zstd_decompress_block.c source code [linux/lib/zstd/decompress/zstd_decompress_block.c]

1	/*
2	* Copyright (c) Yann Collet, Facebook, Inc.
3	* All rights reserved.
4	*
5	* This source code is licensed under both the BSD-style license (found in the
6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
7	* in the COPYING file in the root directory of this source tree).
8	* You may select, at your option, one of the above-listed licenses.
9	*/
10
11	/ zstd_decompress_block :*
12	* this module takes care of decompressing _compressed_ block */
13
14	/-******************************************************
15	* Dependencies
16	*********************************************************/
17	#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18	#include "../common/compiler.h" /* prefetch */
19	#include "../common/cpu.h" /* bmi2 */
20	#include "../common/mem.h" /* low level memory routines */
21	#define FSE_STATIC_LINKING_ONLY
22	#include "../common/fse.h"
23	#define HUF_STATIC_LINKING_ONLY
24	#include "../common/huf.h"
25	#include "../common/zstd_internal.h"
26	#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
27	#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
28	#include "zstd_decompress_block.h"
29
30	/_******************************************************
31	* Macros
32	**********************************************************/
33
34	/ These two optional macros force the use one way or another of the two*
35	* ZSTD_decompressSequences implementations. You can't force in both directions
36	* at the same time.
37	*/
38	#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
39	defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
40	#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
41	#endif
42
43
44	/_******************************************************
45	* Memory operations
46	**********************************************************/
47	static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, `4`); }
48
49
50	/-************************************************************
51	* Block decoding
52	***************************************************************/
53
54	/! ZSTD_getcBlockSize() :*
55	* Provides the size of compressed block from block header `src` */
56	size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
57	blockProperties_t* bpPtr)
58	{
59	RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
60
61	{ U32 const cBlockHeader = MEM_readLE24(memPtr: src);
62	U32 const cSize = cBlockHeader >> `3`;
63	bpPtr->lastBlock = cBlockHeader & `1`;
64	bpPtr->blockType = (blockType_e)((cBlockHeader >> `1`) & `3`);
65	bpPtr->origSize = cSize; / only useful for RLE /
66	if (bpPtr->blockType == bt_rle) return `1`;
67	RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
68	return cSize;
69	}
70	}
71
72	/ Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer /
73	static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
74	const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
75	{
76	if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
77	{
78	/ room for litbuffer to fit without read faulting /
79	dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
80	dctx->litBufferEnd = dctx->litBuffer + litSize;
81	dctx->litBufferLocation = ZSTD_in_dst;
82	}
83	else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
84	{
85	/ won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer /
86	if (splitImmediately) {
87	/ won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer /
88	dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
89	dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
90	}
91	else {
92	/ initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after /
93	dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
94	dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
95	}
96	dctx->litBufferLocation = ZSTD_split;
97	}
98	else
99	{
100	/ fits entirely within litExtraBuffer, so no split is necessary /
101	dctx->litBuffer = dctx->litExtraBuffer;
102	dctx->litBufferEnd = dctx->litBuffer + litSize;
103	dctx->litBufferLocation = ZSTD_not_in_dst;
104	}
105	}
106
107	/ Hidden declaration for fullbench /
108	size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
109	const void* src, size_t srcSize,
110	void* dst, size_t dstCapacity, const streaming_operation streaming);
111	/! ZSTD_decodeLiteralsBlock() :*
112	* Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
113	* in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
114	* block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
115	* stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
116	*
117	* @return : nb of bytes read from src (< srcSize )
118	* note : symbol not declared but exposed for fullbench */
119	size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
120	const void* src, size_t srcSize, / note : srcSize < BLOCKSIZE /
121	void* dst, size_t dstCapacity, const streaming_operation streaming)
122	{
123	DEBUGLOG(`5`, "ZSTD_decodeLiteralsBlock");
124	RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
125
126	{ const BYTE* const istart = (const BYTE*) src;
127	symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[`0`] & `3`);
128
129	switch(litEncType)
130	{
131	case set_repeat:
132	DEBUGLOG(`5`, "set_repeat flag : re-using stats from previous compressed literals block");
133	RETURN_ERROR_IF(dctx->litEntropy==`0`, dictionary_corrupted, "");
134	ZSTD_FALLTHROUGH;
135
136	case set_compressed:
137	RETURN_ERROR_IF(srcSize < `5`, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
138	{ size_t lhSize, litSize, litCSize;
139	U32 singleStream=`0`;
140	U32 const lhlCode = (istart[`0`] >> `2`) & `3`;
141	U32 const lhc = MEM_readLE32(memPtr: istart);
142	size_t hufSuccess;
143	size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
144	switch(lhlCode)
145	{
146	case `0`: case `1`: default: / note : default is impossible, since lhlCode into [0..3] /
147	/ 2 - 2 - 10 - 10 /
148	singleStream = !lhlCode;
149	lhSize = `3`;
150	litSize = (lhc >> `4`) & `0x3FF`;
151	litCSize = (lhc >> `14`) & `0x3FF`;
152	break;
153	case `2`:
154	/ 2 - 2 - 14 - 14 /
155	lhSize = `4`;
156	litSize = (lhc >> `4`) & `0x3FFF`;
157	litCSize = lhc >> `18`;
158	break;
159	case `3`:
160	/ 2 - 2 - 18 - 18 /
161	lhSize = `5`;
162	litSize = (lhc >> `4`) & `0x3FFFF`;
163	litCSize = (lhc >> `22`) + ((size_t)istart[`4`] << `10`);
164	break;
165	}
166	RETURN_ERROR_IF(litSize > `0` && dst == NULL, dstSize_tooSmall, "NULL not handled");
167	RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
168	RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
169	RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
170	ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, splitImmediately: `0`);
171
172	/ prefetch huffman table if cold /
173	if (dctx->ddictIsCold && (litSize > `768` / heuristic /)) {
174	PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
175	}
176
177	if (litEncType==set_repeat) {
178	if (singleStream) {
179	hufSuccess = HUF_decompress1X_usingDTable_bmi2(
180	dst: dctx->litBuffer, maxDstSize: litSize, cSrc: istart+lhSize, cSrcSize: litCSize,
181	DTable: dctx->HUFptr, bmi2: ZSTD_DCtx_get_bmi2(dctx));
182	} else {
183	hufSuccess = HUF_decompress4X_usingDTable_bmi2(
184	dst: dctx->litBuffer, maxDstSize: litSize, cSrc: istart+lhSize, cSrcSize: litCSize,
185	DTable: dctx->HUFptr, bmi2: ZSTD_DCtx_get_bmi2(dctx));
186	}
187	} else {
188	if (singleStream) {
189	#if defined(HUF_FORCE_DECOMPRESS_X2)
190	hufSuccess = HUF_decompress1X_DCtx_wksp(
191	dctx->entropy.hufTable, dctx->litBuffer, litSize,
192	istart+lhSize, litCSize, dctx->workspace,
193	sizeof(dctx->workspace));
194	#else
195	hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
196	dctx: dctx->entropy.hufTable, dst: dctx->litBuffer, dstSize: litSize,
197	cSrc: istart+lhSize, cSrcSize: litCSize, workSpace: dctx->workspace,
198	wkspSize: sizeof(dctx->workspace), bmi2: ZSTD_DCtx_get_bmi2(dctx));
199	#endif
200	} else {
201	hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
202	dctx: dctx->entropy.hufTable, dst: dctx->litBuffer, dstSize: litSize,
203	cSrc: istart+lhSize, cSrcSize: litCSize, workSpace: dctx->workspace,
204	wkspSize: sizeof(dctx->workspace), bmi2: ZSTD_DCtx_get_bmi2(dctx));
205	}
206	}
207	if (dctx->litBufferLocation == ZSTD_split)
208	{
209	ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
210	ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
211	dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
212	dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
213	}
214
215	RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
216
217	dctx->litPtr = dctx->litBuffer;
218	dctx->litSize = litSize;
219	dctx->litEntropy = `1`;
220	if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
221	return litCSize + lhSize;
222	}
223
224	case set_basic:
225	{ size_t litSize, lhSize;
226	U32 const lhlCode = ((istart[`0`]) >> `2`) & `3`;
227	size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
228	switch(lhlCode)
229	{
230	case `0`: case `2`: default: / note : default is impossible, since lhlCode into [0..3] /
231	lhSize = `1`;
232	litSize = istart[`0`] >> `3`;
233	break;
234	case `1`:
235	lhSize = `2`;
236	litSize = MEM_readLE16(memPtr: istart) >> `4`;
237	break;
238	case `3`:
239	lhSize = `3`;
240	litSize = MEM_readLE24(memPtr: istart) >> `4`;
241	break;
242	}
243
244	RETURN_ERROR_IF(litSize > `0` && dst == NULL, dstSize_tooSmall, "NULL not handled");
245	RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
246	ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, splitImmediately: `1`);
247	if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { / risk reading beyond src buffer with wildcopy /
248	RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
249	if (dctx->litBufferLocation == ZSTD_split)
250	{
251	ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
252	ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
253	}
254	else
255	{
256	ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
257	}
258	dctx->litPtr = dctx->litBuffer;
259	dctx->litSize = litSize;
260	return lhSize+litSize;
261	}
262	/ direct reference into compressed stream /
263	dctx->litPtr = istart+lhSize;
264	dctx->litSize = litSize;
265	dctx->litBufferEnd = dctx->litPtr + litSize;
266	dctx->litBufferLocation = ZSTD_not_in_dst;
267	return lhSize+litSize;
268	}
269
270	case set_rle:
271	{ U32 const lhlCode = ((istart[`0`]) >> `2`) & `3`;
272	size_t litSize, lhSize;
273	size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
274	switch(lhlCode)
275	{
276	case `0`: case `2`: default: / note : default is impossible, since lhlCode into [0..3] /
277	lhSize = `1`;
278	litSize = istart[`0`] >> `3`;
279	break;
280	case `1`:
281	lhSize = `2`;
282	litSize = MEM_readLE16(memPtr: istart) >> `4`;
283	break;
284	case `3`:
285	lhSize = `3`;
286	litSize = MEM_readLE24(memPtr: istart) >> `4`;
287	RETURN_ERROR_IF(srcSize<`4`, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
288	break;
289	}
290	RETURN_ERROR_IF(litSize > `0` && dst == NULL, dstSize_tooSmall, "NULL not handled");
291	RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
292	RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
293	ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, splitImmediately: `1`);
294	if (dctx->litBufferLocation == ZSTD_split)
295	{
296	ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
297	ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
298	}
299	else
300	{
301	ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
302	}
303	dctx->litPtr = dctx->litBuffer;
304	dctx->litSize = litSize;
305	return lhSize+`1`;
306	}
307	default:
308	RETURN_ERROR(corruption_detected, "impossible");
309	}
310	}
311	}
312
313	/ Default FSE distribution tables.*
314	* These are pre-calculated FSE decoding tables using default distributions as defined in specification :
315	* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
316	* They were generated programmatically with following method :
317	* - start from default distributions, present in /lib/common/zstd_internal.h
318	* - generate tables normally, using ZSTD_buildFSETable()
319	* - printout the content of tables
320	* - pretify output, report below, test with fuzzer to ensure it's correct */
321
322	/ Default FSE distribution table for Literal Lengths /
323	static const ZSTD_seqSymbol LL_defaultDTable[(`1`<<LL_DEFAULTNORMLOG)+`1`] = {
324	{ `1`, `1`, `1`, LL_DEFAULTNORMLOG}, / header : fastMode, tableLog /
325	/ nextState, nbAddBits, nbBits, baseVal /
326	{ `0`, `0`, `4`, `0`}, { `16`, `0`, `4`, `0`},
327	{ `32`, `0`, `5`, `1`}, { `0`, `0`, `5`, `3`},
328	{ `0`, `0`, `5`, `4`}, { `0`, `0`, `5`, `6`},
329	{ `0`, `0`, `5`, `7`}, { `0`, `0`, `5`, `9`},
330	{ `0`, `0`, `5`, `10`}, { `0`, `0`, `5`, `12`},
331	{ `0`, `0`, `6`, `14`}, { `0`, `1`, `5`, `16`},
332	{ `0`, `1`, `5`, `20`}, { `0`, `1`, `5`, `22`},
333	{ `0`, `2`, `5`, `28`}, { `0`, `3`, `5`, `32`},
334	{ `0`, `4`, `5`, `48`}, { `32`, `6`, `5`, `64`},
335	{ `0`, `7`, `5`, `128`}, { `0`, `8`, `6`, `256`},
336	{ `0`, `10`, `6`, `1024`}, { `0`, `12`, `6`, `4096`},
337	{ `32`, `0`, `4`, `0`}, { `0`, `0`, `4`, `1`},
338	{ `0`, `0`, `5`, `2`}, { `32`, `0`, `5`, `4`},
339	{ `0`, `0`, `5`, `5`}, { `32`, `0`, `5`, `7`},
340	{ `0`, `0`, `5`, `8`}, { `32`, `0`, `5`, `10`},
341	{ `0`, `0`, `5`, `11`}, { `0`, `0`, `6`, `13`},
342	{ `32`, `1`, `5`, `16`}, { `0`, `1`, `5`, `18`},
343	{ `32`, `1`, `5`, `22`}, { `0`, `2`, `5`, `24`},
344	{ `32`, `3`, `5`, `32`}, { `0`, `3`, `5`, `40`},
345	{ `0`, `6`, `4`, `64`}, { `16`, `6`, `4`, `64`},
346	{ `32`, `7`, `5`, `128`}, { `0`, `9`, `6`, `512`},
347	{ `0`, `11`, `6`, `2048`}, { `48`, `0`, `4`, `0`},
348	{ `16`, `0`, `4`, `1`}, { `32`, `0`, `5`, `2`},
349	{ `32`, `0`, `5`, `3`}, { `32`, `0`, `5`, `5`},
350	{ `32`, `0`, `5`, `6`}, { `32`, `0`, `5`, `8`},
351	{ `32`, `0`, `5`, `9`}, { `32`, `0`, `5`, `11`},
352	{ `32`, `0`, `5`, `12`}, { `0`, `0`, `6`, `15`},
353	{ `32`, `1`, `5`, `18`}, { `32`, `1`, `5`, `20`},
354	{ `32`, `2`, `5`, `24`}, { `32`, `2`, `5`, `28`},
355	{ `32`, `3`, `5`, `40`}, { `32`, `4`, `5`, `48`},
356	{ `0`, `16`, `6`,`65536`}, { `0`, `15`, `6`,`32768`},
357	{ `0`, `14`, `6`,`16384`}, { `0`, `13`, `6`, `8192`},
358	}; / LL_defaultDTable /
359
360	/ Default FSE distribution table for Offset Codes /
361	static const ZSTD_seqSymbol OF_defaultDTable[(`1`<<OF_DEFAULTNORMLOG)+`1`] = {
362	{ `1`, `1`, `1`, OF_DEFAULTNORMLOG}, / header : fastMode, tableLog /
363	/ nextState, nbAddBits, nbBits, baseVal /
364	{ `0`, `0`, `5`, `0`}, { `0`, `6`, `4`, `61`},
365	{ `0`, `9`, `5`, `509`}, { `0`, `15`, `5`,`32765`},
366	{ `0`, `21`, `5`,`2097149`}, { `0`, `3`, `5`, `5`},
367	{ `0`, `7`, `4`, `125`}, { `0`, `12`, `5`, `4093`},
368	{ `0`, `18`, `5`,`262141`}, { `0`, `23`, `5`,`8388605`},
369	{ `0`, `5`, `5`, `29`}, { `0`, `8`, `4`, `253`},
370	{ `0`, `14`, `5`,`16381`}, { `0`, `20`, `5`,`1048573`},
371	{ `0`, `2`, `5`, `1`}, { `16`, `7`, `4`, `125`},
372	{ `0`, `11`, `5`, `2045`}, { `0`, `17`, `5`,`131069`},
373	{ `0`, `22`, `5`,`4194301`}, { `0`, `4`, `5`, `13`},
374	{ `16`, `8`, `4`, `253`}, { `0`, `13`, `5`, `8189`},
375	{ `0`, `19`, `5`,`524285`}, { `0`, `1`, `5`, `1`},
376	{ `16`, `6`, `4`, `61`}, { `0`, `10`, `5`, `1021`},
377	{ `0`, `16`, `5`,`65533`}, { `0`, `28`, `5`,`268435453`},
378	{ `0`, `27`, `5`,`134217725`}, { `0`, `26`, `5`,`67108861`},
379	{ `0`, `25`, `5`,`33554429`}, { `0`, `24`, `5`,`16777213`},
380	}; / OF_defaultDTable /
381
382
383	/ Default FSE distribution table for Match Lengths /
384	static const ZSTD_seqSymbol ML_defaultDTable[(`1`<<ML_DEFAULTNORMLOG)+`1`] = {
385	{ `1`, `1`, `1`, ML_DEFAULTNORMLOG}, / header : fastMode, tableLog /
386	/ nextState, nbAddBits, nbBits, baseVal /
387	{ `0`, `0`, `6`, `3`}, { `0`, `0`, `4`, `4`},
388	{ `32`, `0`, `5`, `5`}, { `0`, `0`, `5`, `6`},
389	{ `0`, `0`, `5`, `8`}, { `0`, `0`, `5`, `9`},
390	{ `0`, `0`, `5`, `11`}, { `0`, `0`, `6`, `13`},
391	{ `0`, `0`, `6`, `16`}, { `0`, `0`, `6`, `19`},
392	{ `0`, `0`, `6`, `22`}, { `0`, `0`, `6`, `25`},
393	{ `0`, `0`, `6`, `28`}, { `0`, `0`, `6`, `31`},
394	{ `0`, `0`, `6`, `34`}, { `0`, `1`, `6`, `37`},
395	{ `0`, `1`, `6`, `41`}, { `0`, `2`, `6`, `47`},
396	{ `0`, `3`, `6`, `59`}, { `0`, `4`, `6`, `83`},
397	{ `0`, `7`, `6`, `131`}, { `0`, `9`, `6`, `515`},
398	{ `16`, `0`, `4`, `4`}, { `0`, `0`, `4`, `5`},
399	{ `32`, `0`, `5`, `6`}, { `0`, `0`, `5`, `7`},
400	{ `32`, `0`, `5`, `9`}, { `0`, `0`, `5`, `10`},
401	{ `0`, `0`, `6`, `12`}, { `0`, `0`, `6`, `15`},
402	{ `0`, `0`, `6`, `18`}, { `0`, `0`, `6`, `21`},
403	{ `0`, `0`, `6`, `24`}, { `0`, `0`, `6`, `27`},
404	{ `0`, `0`, `6`, `30`}, { `0`, `0`, `6`, `33`},
405	{ `0`, `1`, `6`, `35`}, { `0`, `1`, `6`, `39`},
406	{ `0`, `2`, `6`, `43`}, { `0`, `3`, `6`, `51`},
407	{ `0`, `4`, `6`, `67`}, { `0`, `5`, `6`, `99`},
408	{ `0`, `8`, `6`, `259`}, { `32`, `0`, `4`, `4`},
409	{ `48`, `0`, `4`, `4`}, { `16`, `0`, `4`, `5`},
410	{ `32`, `0`, `5`, `7`}, { `32`, `0`, `5`, `8`},
411	{ `32`, `0`, `5`, `10`}, { `32`, `0`, `5`, `11`},
412	{ `0`, `0`, `6`, `14`}, { `0`, `0`, `6`, `17`},
413	{ `0`, `0`, `6`, `20`}, { `0`, `0`, `6`, `23`},
414	{ `0`, `0`, `6`, `26`}, { `0`, `0`, `6`, `29`},
415	{ `0`, `0`, `6`, `32`}, { `0`, `16`, `6`,`65539`},
416	{ `0`, `15`, `6`,`32771`}, { `0`, `14`, `6`,`16387`},
417	{ `0`, `13`, `6`, `8195`}, { `0`, `12`, `6`, `4099`},
418	{ `0`, `11`, `6`, `2051`}, { `0`, `10`, `6`, `1027`},
419	}; / ML_defaultDTable /
420
421
422	static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
423	{
424	void* ptr = dt;
425	ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
426	ZSTD_seqSymbol* const cell = dt + `1`;
427
428	DTableH->tableLog = `0`;
429	DTableH->fastMode = `0`;
430
431	cell->nbBits = `0`;
432	cell->nextState = `0`;
433	assert(nbAddBits < `255`);
434	cell->nbAdditionalBits = nbAddBits;
435	cell->baseValue = baseValue;
436	}
437
438
439	/ ZSTD_buildFSETable() :*
440	* generate FSE decoding table for one symbol (ll, ml or off)
441	* cannot fail if input is valid =>
442	* all inputs are presumed validated at this stage */
443	FORCE_INLINE_TEMPLATE
444	void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
445	const short* normalizedCounter, unsigned maxSymbolValue,
446	const U32* baseValue, const U8* nbAdditionalBits,
447	unsigned tableLog, void* wksp, size_t wkspSize)
448	{
449	ZSTD_seqSymbol* const tableDecode = dt+`1`;
450	U32 const maxSV1 = maxSymbolValue + `1`;
451	U32 const tableSize = `1` << tableLog;
452
453	U16* symbolNext = (U16*)wksp;
454	BYTE* spread = (BYTE*)(symbolNext + MaxSeq + `1`);
455	U32 highThreshold = tableSize - `1`;
456
457
458	/ Sanity Checks /
459	assert(maxSymbolValue <= MaxSeq);
460	assert(tableLog <= MaxFSELog);
461	assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
462	(void)wkspSize;
463	/ Init, lay down lowprob symbols /
464	{ ZSTD_seqSymbol_header DTableH;
465	DTableH.tableLog = tableLog;
466	DTableH.fastMode = `1`;
467	{ S16 const largeLimit= (S16)(`1` << (tableLog-`1`));
468	U32 s;
469	for (s=`0`; s<maxSV1; s++) {
470	if (normalizedCounter[s]==-`1`) {
471	tableDecode[highThreshold--].baseValue = s;
472	symbolNext[s] = `1`;
473	} else {
474	if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=`0`;
475	assert(normalizedCounter[s]>=`0`);
476	symbolNext[s] = (U16)normalizedCounter[s];
477	} } }
478	ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
479	}
480
481	/ Spread symbols /
482	assert(tableSize <= `512`);
483	/ Specialized symbol spreading for the case when there are*
484	* no low probability (-1 count) symbols. When compressing
485	* small blocks we avoid low probability symbols to hit this
486	* case, since header decoding speed matters more.
487	*/
488	if (highThreshold == tableSize - `1`) {
489	size_t const tableMask = tableSize-`1`;
490	size_t const step = FSE_TABLESTEP(tableSize);
491	/ First lay down the symbols in order.*
492	* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
493	* misses since small blocks generally have small table logs, so nearly
494	* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
495	* our buffer to handle the over-write.
496	*/
497	{
498	U64 const add = `0x0101010101010101ull`;
499	size_t pos = `0`;
500	U64 sv = `0`;
501	U32 s;
502	for (s=`0`; s<maxSV1; ++s, sv += add) {
503	int i;
504	int const n = normalizedCounter[s];
505	MEM_write64(memPtr: spread + pos, value: sv);
506	for (i = `8`; i < n; i += `8`) {
507	MEM_write64(memPtr: spread + pos + i, value: sv);
508	}
509	pos += n;
510	}
511	}
512	/ Now we spread those positions across the table.*
513	* The benefit of doing it in two stages is that we avoid the the
514	* variable size inner loop, which caused lots of branch misses.
515	* Now we can run through all the positions without any branch misses.
516	* We unroll the loop twice, since that is what emperically worked best.
517	*/
518	{
519	size_t position = `0`;
520	size_t s;
521	size_t const unroll = `2`;
522	assert(tableSize % unroll == `0`); / FSE_MIN_TABLELOG is 5 /
523	for (s = `0`; s < (size_t)tableSize; s += unroll) {
524	size_t u;
525	for (u = `0`; u < unroll; ++u) {
526	size_t const uPosition = (position + (u * step)) & tableMask;
527	tableDecode[uPosition].baseValue = spread[s + u];
528	}
529	position = (position + (unroll * step)) & tableMask;
530	}
531	assert(position == `0`);
532	}
533	} else {
534	U32 const tableMask = tableSize-`1`;
535	U32 const step = FSE_TABLESTEP(tableSize);
536	U32 s, position = `0`;
537	for (s=`0`; s<maxSV1; s++) {
538	int i;
539	int const n = normalizedCounter[s];
540	for (i=`0`; i<n; i++) {
541	tableDecode[position].baseValue = s;
542	position = (position + step) & tableMask;
543	while (position > highThreshold) position = (position + step) & tableMask; / lowprob area /
544	} }
545	assert(position == `0`); / position must reach all cells once, otherwise normalizedCounter is incorrect /
546	}
547
548	/ Build Decoding table /
549	{
550	U32 u;
551	for (u=`0`; u<tableSize; u++) {
552	U32 const symbol = tableDecode[u].baseValue;
553	U32 const nextState = symbolNext[symbol]++;
554	tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(val: nextState) );
555	tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
556	assert(nbAdditionalBits[symbol] < `255`);
557	tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
558	tableDecode[u].baseValue = baseValue[symbol];
559	}
560	}
561	}
562
563	/ Avoids the FORCE_INLINE of the _body() function. /
564	static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
565	const short* normalizedCounter, unsigned maxSymbolValue,
566	const U32* baseValue, const U8* nbAdditionalBits,
567	unsigned tableLog, void* wksp, size_t wkspSize)
568	{
569	ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
570	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
571	}
572
573	#if DYNAMIC_BMI2
574	BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
575	const short* normalizedCounter, unsigned maxSymbolValue,
576	const U32* baseValue, const U8* nbAdditionalBits,
577	unsigned tableLog, void* wksp, size_t wkspSize)
578	{
579	ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
580	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
581	}
582	#endif
583
584	void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
585	const short* normalizedCounter, unsigned maxSymbolValue,
586	const U32* baseValue, const U8* nbAdditionalBits,
587	unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
588	{
589	#if DYNAMIC_BMI2
590	if (bmi2) {
591	ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
592	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
593	return;
594	}
595	#endif
596	(void)bmi2;
597	ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
598	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
599	}
600
601
602	/! ZSTD_buildSeqTable() :*
603	* @return : nb bytes read from src,
604	* or an error code if it fails */
605	static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
606	symbolEncodingType_e type, unsigned max, U32 maxLog,
607	const void* src, size_t srcSize,
608	const U32* baseValue, const U8* nbAdditionalBits,
609	const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
610	int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
611	int bmi2)
612	{
613	switch(type)
614	{
615	case set_rle :
616	RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
617	RETURN_ERROR_IF(((const* BYTE*)src) > max, corruption_detected, "");
618	{ U32 const symbol = (const* BYTE*)src;
619	U32 const baseline = baseValue[symbol];
620	U8 const nbBits = nbAdditionalBits[symbol];
621	ZSTD_buildSeqTable_rle(dt: DTableSpace, baseValue: baseline, nbAddBits: nbBits);
622	}
623	*DTablePtr = DTableSpace;
624	return `1`;
625	case set_basic :
626	*DTablePtr = defaultTable;
627	return `0`;
628	case set_repeat:
629	RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
630	/ prefetch FSE table if used /
631	if (ddictIsCold && (nbSeq > `24` / heuristic /)) {
632	const void* const pStart = *DTablePtr;
633	size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
634	PREFETCH_AREA(pStart, pSize);
635	}
636	return `0`;
637	case set_compressed :
638	{ unsigned tableLog;
639	S16 norm[MaxSeq+`1`];
640	size_t const headerSize = FSE_readNCount(normalizedCounter: norm, maxSymbolValuePtr: &max, tableLogPtr: &tableLog, rBuffer: src, rBuffSize: srcSize);
641	RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
642	RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
643	ZSTD_buildFSETable(dt: DTableSpace, normalizedCounter: norm, maxSymbolValue: max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
644	*DTablePtr = DTableSpace;
645	return headerSize;
646	}
647	default :
648	assert(`0`);
649	RETURN_ERROR(GENERIC, "impossible");
650	}
651	}
652
653	size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
654	const void* src, size_t srcSize)
655	{
656	const BYTE* const istart = (const BYTE*)src;
657	const BYTE* const iend = istart + srcSize;
658	const BYTE* ip = istart;
659	int nbSeq;
660	DEBUGLOG(`5`, "ZSTD_decodeSeqHeaders");
661
662	/ check /
663	RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
664
665	/ SeqHead /
666	nbSeq = *ip++;
667	if (!nbSeq) {
668	*nbSeqPtr=`0`;
669	RETURN_ERROR_IF(srcSize != `1`, srcSize_wrong, "");
670	return `1`;
671	}
672	if (nbSeq > `0x7F`) {
673	if (nbSeq == `0xFF`) {
674	RETURN_ERROR_IF(ip+`2` > iend, srcSize_wrong, "");
675	nbSeq = MEM_readLE16(memPtr: ip) + LONGNBSEQ;
676	ip+=`2`;
677	} else {
678	RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
679	nbSeq = ((nbSeq-`0x80`)<<`8`) + *ip++;
680	}
681	}
682	*nbSeqPtr = nbSeq;
683
684	/ FSE table descriptors /
685	RETURN_ERROR_IF(ip+`1` > iend, srcSize_wrong, ""); / minimum possible size: 1 byte for symbol encoding types /
686	{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> `6`);
687	symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> `4`) & `3`);
688	symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> `2`) & `3`);
689	ip++;
690
691	/ Build DTables /
692	{ size_t const llhSize = ZSTD_buildSeqTable(DTableSpace: dctx->entropy.LLTable, DTablePtr: &dctx->LLTptr,
693	type: LLtype, MaxLL, LLFSELog,
694	src: ip, srcSize: iend-ip,
695	baseValue: LL_base, nbAdditionalBits: LL_bits,
696	defaultTable: LL_defaultDTable, flagRepeatTable: dctx->fseEntropy,
697	ddictIsCold: dctx->ddictIsCold, nbSeq,
698	wksp: dctx->workspace, wkspSize: sizeof(dctx->workspace),
699	bmi2: ZSTD_DCtx_get_bmi2(dctx));
700	RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
701	ip += llhSize;
702	}
703
704	{ size_t const ofhSize = ZSTD_buildSeqTable(DTableSpace: dctx->entropy.OFTable, DTablePtr: &dctx->OFTptr,
705	type: OFtype, MaxOff, OffFSELog,
706	src: ip, srcSize: iend-ip,
707	baseValue: OF_base, nbAdditionalBits: OF_bits,
708	defaultTable: OF_defaultDTable, flagRepeatTable: dctx->fseEntropy,
709	ddictIsCold: dctx->ddictIsCold, nbSeq,
710	wksp: dctx->workspace, wkspSize: sizeof(dctx->workspace),
711	bmi2: ZSTD_DCtx_get_bmi2(dctx));
712	RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
713	ip += ofhSize;
714	}
715
716	{ size_t const mlhSize = ZSTD_buildSeqTable(DTableSpace: dctx->entropy.MLTable, DTablePtr: &dctx->MLTptr,
717	type: MLtype, MaxML, MLFSELog,
718	src: ip, srcSize: iend-ip,
719	baseValue: ML_base, nbAdditionalBits: ML_bits,
720	defaultTable: ML_defaultDTable, flagRepeatTable: dctx->fseEntropy,
721	ddictIsCold: dctx->ddictIsCold, nbSeq,
722	wksp: dctx->workspace, wkspSize: sizeof(dctx->workspace),
723	bmi2: ZSTD_DCtx_get_bmi2(dctx));
724	RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
725	ip += mlhSize;
726	}
727	}
728
729	return ip-istart;
730	}
731
732
733	typedef struct {
734	size_t litLength;
735	size_t matchLength;
736	size_t offset;
737	} seq_t;
738
739	typedef struct {
740	size_t state;
741	const ZSTD_seqSymbol* table;
742	} ZSTD_fseState;
743
744	typedef struct {
745	BIT_DStream_t DStream;
746	ZSTD_fseState stateLL;
747	ZSTD_fseState stateOffb;
748	ZSTD_fseState stateML;
749	size_t prevOffset[ZSTD_REP_NUM];
750	} seqState_t;
751
752	/! ZSTD_overlapCopy8() :*
753	* Copies 8 bytes from ip to op and updates op and ip where ip <= op.
754	* If the offset is < 8 then the offset is spread to at least 8 bytes.
755	*
756	* Precondition: ip <= op
757	* Postcondition: op - op >= 8
758	*/
759	HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
760	assert(ip <= op);
761	if (offset < `8`) {
762	/ close range match, overlap /
763	static const U32 dec32table[] = { `0`, `1`, `2`, `1`, `4`, `4`, `4`, `4` }; / added /
764	static const int dec64table[] = { `8`, `8`, `8`, `7`, `8`, `9`,`10`,`11` }; / subtracted /
765	int const sub2 = dec64table[offset];
766	(op)[`0`] = (ip)[`0`];
767	(op)[`1`] = (ip)[`1`];
768	(op)[`2`] = (ip)[`2`];
769	(op)[`3`] = (ip)[`3`];
770	*ip += dec32table[offset];
771	ZSTD_copy4(dst: op+`4`, src: ip);
772	*ip -= sub2;
773	} else {
774	ZSTD_copy8(dst: op, src: ip);
775	}
776	*ip += `8`;
777	*op += `8`;
778	assert(op - ip >= `8`);
779	}
780
781	/! ZSTD_safecopy() :*
782	* Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
783	* and write up to 16 bytes past oend_w (op >= oend_w is allowed).
784	* This function is only called in the uncommon case where the sequence is near the end of the block. It
785	* should be fast for a single long sequence, but can be slow for several short sequences.
786	*
787	* @param ovtype controls the overlap detection
788	* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
789	* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
790	* The src buffer must be before the dst buffer.
791	*/
792	static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
793	ptrdiff_t const diff = op - ip;
794	BYTE* const oend = op + length;
795
796	assert((ovtype == ZSTD_no_overlap && (diff <= -`8` \|\| diff >= `8` \|\| op >= oend_w)) \|\|
797	(ovtype == ZSTD_overlap_src_before_dst && diff >= `0`));
798
799	if (length < `8`) {
800	/ Handle short lengths. /
801	while (op < oend) op++ = ip++;
802	return;
803	}
804	if (ovtype == ZSTD_overlap_src_before_dst) {
805	/ Copy 8 bytes and ensure the offset >= 8 when there can be overlap. /
806	assert(length >= `8`);
807	ZSTD_overlapCopy8(op: &op, ip: &ip, offset: diff);
808	length -= `8`;
809	assert(op - ip >= `8`);
810	assert(op <= oend);
811	}
812
813	if (oend <= oend_w) {
814	/ No risk of overwrite. /
815	ZSTD_wildcopy(dst: op, src: ip, length, ovtype);
816	return;
817	}
818	if (op <= oend_w) {
819	/ Wildcopy until we get close to the end. /
820	assert(oend > oend_w);
821	ZSTD_wildcopy(dst: op, src: ip, length: oend_w - op, ovtype);
822	ip += oend_w - op;
823	op += oend_w - op;
824	}
825	/ Handle the leftovers. /
826	while (op < oend) op++ = ip++;
827	}
828
829	/ ZSTD_safecopyDstBeforeSrc():*
830	* This version allows overlap with dst before src, or handles the non-overlap case with dst after src
831	* Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
832	static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
833	ptrdiff_t const diff = op - ip;
834	BYTE* const oend = op + length;
835
836	if (length < `8` \|\| diff > -`8`) {
837	/ Handle short lengths, close overlaps, and dst not before src. /
838	while (op < oend) op++ = ip++;
839	return;
840	}
841
842	if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
843	ZSTD_wildcopy(dst: op, src: ip, length: oend - WILDCOPY_OVERLENGTH - op, ovtype: ZSTD_no_overlap);
844	ip += oend - WILDCOPY_OVERLENGTH - op;
845	op += oend - WILDCOPY_OVERLENGTH - op;
846	}
847
848	/ Handle the leftovers. /
849	while (op < oend) op++ = ip++;
850	}
851
852	/ ZSTD_execSequenceEnd():*
853	* This version handles cases that are near the end of the output buffer. It requires
854	* more careful checks to make sure there is no overflow. By separating out these hard
855	* and unlikely cases, we can speed up the common cases.
856	*
857	* NOTE: This function needs to be fast for a single long sequence, but doesn't need
858	* to be optimized for many small sequences, since those fall into ZSTD_execSequence().
859	*/
860	FORCE_NOINLINE
861	size_t ZSTD_execSequenceEnd(BYTE* op,
862	BYTE* const oend, seq_t sequence,
863	const BYTE** litPtr, const BYTE* const litLimit,
864	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
865	{
866	BYTE* const oLitEnd = op + sequence.litLength;
867	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
868	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
869	const BYTE* match = oLitEnd - sequence.offset;
870	BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
871
872	/ bounds checks : careful of address space overflow in 32-bit mode /
873	RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
874	RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
875	assert(op < op + sequenceLength);
876	assert(oLitEnd < op + sequenceLength);
877
878	/ copy literals /
879	ZSTD_safecopy(op, oend_w, ip: *litPtr, length: sequence.litLength, ovtype: ZSTD_no_overlap);
880	op = oLitEnd;
881	*litPtr = iLitEnd;
882
883	/ copy Match /
884	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
885	/ offset beyond prefix /
886	RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
887	match = dictEnd - (prefixStart - match);
888	if (match + sequence.matchLength <= dictEnd) {
889	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
890	return sequenceLength;
891	}
892	/ span extDict & currentPrefixSegment /
893	{ size_t const length1 = dictEnd - match;
894	ZSTD_memmove(oLitEnd, match, length1);
895	op = oLitEnd + length1;
896	sequence.matchLength -= length1;
897	match = prefixStart;
898	}
899	}
900	ZSTD_safecopy(op, oend_w, ip: match, length: sequence.matchLength, ovtype: ZSTD_overlap_src_before_dst);
901	return sequenceLength;
902	}
903
904	/ ZSTD_execSequenceEndSplitLitBuffer():*
905	* This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
906	*/
907	FORCE_NOINLINE
908	size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
909	BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
910	const BYTE** litPtr, const BYTE* const litLimit,
911	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
912	{
913	BYTE* const oLitEnd = op + sequence.litLength;
914	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
915	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
916	const BYTE* match = oLitEnd - sequence.offset;
917
918
919	/ bounds checks : careful of address space overflow in 32-bit mode /
920	RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
921	RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
922	assert(op < op + sequenceLength);
923	assert(oLitEnd < op + sequenceLength);
924
925	/ copy literals /
926	RETURN_ERROR_IF(op > litPtr && op < litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
927	ZSTD_safecopyDstBeforeSrc(op, ip: *litPtr, length: sequence.litLength);
928	op = oLitEnd;
929	*litPtr = iLitEnd;
930
931	/ copy Match /
932	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
933	/ offset beyond prefix /
934	RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
935	match = dictEnd - (prefixStart - match);
936	if (match + sequence.matchLength <= dictEnd) {
937	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
938	return sequenceLength;
939	}
940	/ span extDict & currentPrefixSegment /
941	{ size_t const length1 = dictEnd - match;
942	ZSTD_memmove(oLitEnd, match, length1);
943	op = oLitEnd + length1;
944	sequence.matchLength -= length1;
945	match = prefixStart;
946	}
947	}
948	ZSTD_safecopy(op, oend_w, ip: match, length: sequence.matchLength, ovtype: ZSTD_overlap_src_before_dst);
949	return sequenceLength;
950	}
951
952	HINT_INLINE
953	size_t ZSTD_execSequence(BYTE* op,
954	BYTE* const oend, seq_t sequence,
955	const BYTE** litPtr, const BYTE* const litLimit,
956	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
957	{
958	BYTE* const oLitEnd = op + sequence.litLength;
959	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
960	BYTE* const oMatchEnd = op + sequenceLength; / risk : address space overflow (32-bits) /
961	BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; / risk : address space underflow on oend=NULL /
962	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
963	const BYTE* match = oLitEnd - sequence.offset;
964
965	assert(op != NULL / Precondition /);
966	assert(oend_w < oend / No underflow /);
967	/ Handle edge cases in a slow path:*
968	* - Read beyond end of literals
969	* - Match end is within WILDCOPY_OVERLIMIT of oend
970	* - 32-bit mode and the match length overflows
971	*/
972	if (UNLIKELY(
973	iLitEnd > litLimit \|\|
974	oMatchEnd > oend_w \|\|
975	(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
976	return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
977
978	/ Assumptions (everything else goes into ZSTD_execSequenceEnd()) /
979	assert(op <= oLitEnd / No overflow /);
980	assert(oLitEnd < oMatchEnd / Non-zero match & no overflow /);
981	assert(oMatchEnd <= oend / No underflow /);
982	assert(iLitEnd <= litLimit / Literal length is in bounds /);
983	assert(oLitEnd <= oend_w / Can wildcopy literals /);
984	assert(oMatchEnd <= oend_w / Can wildcopy matches /);
985
986	/ Copy Literals:*
987	* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
988	* We likely don't need the full 32-byte wildcopy.
989	*/
990	assert(WILDCOPY_OVERLENGTH >= `16`);
991	ZSTD_copy16(dst: op, src: (*litPtr));
992	if (UNLIKELY(sequence.litLength > `16`)) {
993	ZSTD_wildcopy(dst: op + `16`, src: (*litPtr) + `16`, length: sequence.litLength - `16`, ovtype: ZSTD_no_overlap);
994	}
995	op = oLitEnd;
996	litPtr = iLitEnd; /* update for next sequence /
997
998	/ Copy Match /
999	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1000	/ offset beyond prefix -> go into extDict /
1001	RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1002	match = dictEnd + (match - prefixStart);
1003	if (match + sequence.matchLength <= dictEnd) {
1004	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1005	return sequenceLength;
1006	}
1007	/ span extDict & currentPrefixSegment /
1008	{ size_t const length1 = dictEnd - match;
1009	ZSTD_memmove(oLitEnd, match, length1);
1010	op = oLitEnd + length1;
1011	sequence.matchLength -= length1;
1012	match = prefixStart;
1013	}
1014	}
1015	/ Match within prefix of 1 or more bytes /
1016	assert(op <= oMatchEnd);
1017	assert(oMatchEnd <= oend_w);
1018	assert(match >= prefixStart);
1019	assert(sequence.matchLength >= `1`);
1020
1021	/ Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy*
1022	* without overlap checking.
1023	*/
1024	if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1025	/ We bet on a full wildcopy for matches, since we expect matches to be*
1026	* longer than literals (in general). In silesia, ~10% of matches are longer
1027	* than 16 bytes.
1028	*/
1029	ZSTD_wildcopy(dst: op, src: match, length: (ptrdiff_t)sequence.matchLength, ovtype: ZSTD_no_overlap);
1030	return sequenceLength;
1031	}
1032	assert(sequence.offset < WILDCOPY_VECLEN);
1033
1034	/ Copy 8 bytes and spread the offset to be >= 8. /
1035	ZSTD_overlapCopy8(op: &op, ip: &match, offset: sequence.offset);
1036
1037	/ If the match length is > 8 bytes, then continue with the wildcopy. /
1038	if (sequence.matchLength > `8`) {
1039	assert(op < oMatchEnd);
1040	ZSTD_wildcopy(dst: op, src: match, length: (ptrdiff_t)sequence.matchLength - `8`, ovtype: ZSTD_overlap_src_before_dst);
1041	}
1042	return sequenceLength;
1043	}
1044
1045	HINT_INLINE
1046	size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
1047	BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
1048	const BYTE** litPtr, const BYTE* const litLimit,
1049	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1050	{
1051	BYTE* const oLitEnd = op + sequence.litLength;
1052	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1053	BYTE* const oMatchEnd = op + sequenceLength; / risk : address space overflow (32-bits) /
1054	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1055	const BYTE* match = oLitEnd - sequence.offset;
1056
1057	assert(op != NULL / Precondition /);
1058	assert(oend_w < oend / No underflow /);
1059	/ Handle edge cases in a slow path:*
1060	* - Read beyond end of literals
1061	* - Match end is within WILDCOPY_OVERLIMIT of oend
1062	* - 32-bit mode and the match length overflows
1063	*/
1064	if (UNLIKELY(
1065	iLitEnd > litLimit \|\|
1066	oMatchEnd > oend_w \|\|
1067	(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
1068	return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1069
1070	/ Assumptions (everything else goes into ZSTD_execSequenceEnd()) /
1071	assert(op <= oLitEnd / No overflow /);
1072	assert(oLitEnd < oMatchEnd / Non-zero match & no overflow /);
1073	assert(oMatchEnd <= oend / No underflow /);
1074	assert(iLitEnd <= litLimit / Literal length is in bounds /);
1075	assert(oLitEnd <= oend_w / Can wildcopy literals /);
1076	assert(oMatchEnd <= oend_w / Can wildcopy matches /);
1077
1078	/ Copy Literals:*
1079	* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1080	* We likely don't need the full 32-byte wildcopy.
1081	*/
1082	assert(WILDCOPY_OVERLENGTH >= `16`);
1083	ZSTD_copy16(dst: op, src: (*litPtr));
1084	if (UNLIKELY(sequence.litLength > `16`)) {
1085	ZSTD_wildcopy(dst: op+`16`, src: (*litPtr)+`16`, length: sequence.litLength-`16`, ovtype: ZSTD_no_overlap);
1086	}
1087	op = oLitEnd;
1088	litPtr = iLitEnd; /* update for next sequence /
1089
1090	/ Copy Match /
1091	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1092	/ offset beyond prefix -> go into extDict /
1093	RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1094	match = dictEnd + (match - prefixStart);
1095	if (match + sequence.matchLength <= dictEnd) {
1096	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1097	return sequenceLength;
1098	}
1099	/ span extDict & currentPrefixSegment /
1100	{ size_t const length1 = dictEnd - match;
1101	ZSTD_memmove(oLitEnd, match, length1);
1102	op = oLitEnd + length1;
1103	sequence.matchLength -= length1;
1104	match = prefixStart;
1105	} }
1106	/ Match within prefix of 1 or more bytes /
1107	assert(op <= oMatchEnd);
1108	assert(oMatchEnd <= oend_w);
1109	assert(match >= prefixStart);
1110	assert(sequence.matchLength >= `1`);
1111
1112	/ Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy*
1113	* without overlap checking.
1114	*/
1115	if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1116	/ We bet on a full wildcopy for matches, since we expect matches to be*
1117	* longer than literals (in general). In silesia, ~10% of matches are longer
1118	* than 16 bytes.
1119	*/
1120	ZSTD_wildcopy(dst: op, src: match, length: (ptrdiff_t)sequence.matchLength, ovtype: ZSTD_no_overlap);
1121	return sequenceLength;
1122	}
1123	assert(sequence.offset < WILDCOPY_VECLEN);
1124
1125	/ Copy 8 bytes and spread the offset to be >= 8. /
1126	ZSTD_overlapCopy8(op: &op, ip: &match, offset: sequence.offset);
1127
1128	/ If the match length is > 8 bytes, then continue with the wildcopy. /
1129	if (sequence.matchLength > `8`) {
1130	assert(op < oMatchEnd);
1131	ZSTD_wildcopy(dst: op, src: match, length: (ptrdiff_t)sequence.matchLength-`8`, ovtype: ZSTD_overlap_src_before_dst);
1132	}
1133	return sequenceLength;
1134	}
1135
1136
1137	static void
1138	ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
1139	{
1140	const void* ptr = dt;
1141	const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
1142	DStatePtr->state = BIT_readBits(bitD, nbBits: DTableH->tableLog);
1143	DEBUGLOG(`6`, "ZSTD_initFseState : val=%u using %u bits",
1144	(U32)DStatePtr->state, DTableH->tableLog);
1145	BIT_reloadDStream(bitD);
1146	DStatePtr->table = dt + `1`;
1147	}
1148
1149	FORCE_INLINE_TEMPLATE void
1150	ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
1151	{
1152	size_t const lowBits = BIT_readBits(bitD, nbBits);
1153	DStatePtr->state = nextState + lowBits;
1154	}
1155
1156	/ We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum*
1157	* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
1158	* bits before reloading. This value is the maximum number of bytes we read
1159	* after reloading when we are decoding long offsets.
1160	*/
1161	#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
1162	(ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
1163	? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
1164	: 0)
1165
1166	typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=`1` } ZSTD_longOffset_e;
1167
1168	FORCE_INLINE_TEMPLATE seq_t
1169	ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1170	{
1171	seq_t seq;
1172	const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
1173	const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
1174	const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
1175	seq.matchLength = mlDInfo->baseValue;
1176	seq.litLength = llDInfo->baseValue;
1177	{ U32 const ofBase = ofDInfo->baseValue;
1178	BYTE const llBits = llDInfo->nbAdditionalBits;
1179	BYTE const mlBits = mlDInfo->nbAdditionalBits;
1180	BYTE const ofBits = ofDInfo->nbAdditionalBits;
1181	BYTE const totalBits = llBits+mlBits+ofBits;
1182
1183	U16 const llNext = llDInfo->nextState;
1184	U16 const mlNext = mlDInfo->nextState;
1185	U16 const ofNext = ofDInfo->nextState;
1186	U32 const llnbBits = llDInfo->nbBits;
1187	U32 const mlnbBits = mlDInfo->nbBits;
1188	U32 const ofnbBits = ofDInfo->nbBits;
1189	/*
1190	* As gcc has better branch and block analyzers, sometimes it is only
1191	* valuable to mark likelyness for clang, it gives around 3-4% of
1192	* performance.
1193	*/
1194
1195	/ sequence /
1196	{ size_t offset;
1197	#if defined(__clang__)
1198	if (LIKELY(ofBits > `1`)) {
1199	#else
1200	if (ofBits > `1`) {
1201	#endif
1202	ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == `1`);
1203	ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == `5`);
1204	assert(ofBits <= MaxOff);
1205	if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1206	U32 const extraBits = ofBits - MIN(ofBits, `32` - seqState->DStream.bitsConsumed);
1207	offset = ofBase + (BIT_readBitsFast(bitD: &seqState->DStream, nbBits: ofBits - extraBits) << extraBits);
1208	BIT_reloadDStream(bitD: &seqState->DStream);
1209	if (extraBits) offset += BIT_readBitsFast(bitD: &seqState->DStream, nbBits: extraBits);
1210	assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); / to avoid another reload /
1211	} else {
1212	offset = ofBase + BIT_readBitsFast(bitD: &seqState->DStream, nbBits: ofBits/>0/); / <= (ZSTD_WINDOWLOG_MAX-1) bits /
1213	if (MEM_32bits()) BIT_reloadDStream(bitD: &seqState->DStream);
1214	}
1215	seqState->prevOffset[`2`] = seqState->prevOffset[`1`];
1216	seqState->prevOffset[`1`] = seqState->prevOffset[`0`];
1217	seqState->prevOffset[`0`] = offset;
1218	} else {
1219	U32 const ll0 = (llDInfo->baseValue == `0`);
1220	if (LIKELY((ofBits == `0`))) {
1221	offset = seqState->prevOffset[ll0];
1222	seqState->prevOffset[`1`] = seqState->prevOffset[!ll0];
1223	seqState->prevOffset[`0`] = offset;
1224	} else {
1225	offset = ofBase + ll0 + BIT_readBitsFast(bitD: &seqState->DStream, nbBits: `1`);
1226	{ size_t temp = (offset==`3`) ? seqState->prevOffset[`0`] - `1` : seqState->prevOffset[offset];
1227	temp += !temp; / 0 is not valid; input is corrupted; force offset to 1 /
1228	if (offset != `1`) seqState->prevOffset[`2`] = seqState->prevOffset[`1`];
1229	seqState->prevOffset[`1`] = seqState->prevOffset[`0`];
1230	seqState->prevOffset[`0`] = offset = temp;
1231	} } }
1232	seq.offset = offset;
1233	}
1234
1235	#if defined(__clang__)
1236	if (UNLIKELY(mlBits > `0`))
1237	#else
1238	if (mlBits > `0`)
1239	#endif
1240	seq.matchLength += BIT_readBitsFast(bitD: &seqState->DStream, nbBits: mlBits/>0/);
1241
1242	if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1243	BIT_reloadDStream(bitD: &seqState->DStream);
1244	if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1245	BIT_reloadDStream(bitD: &seqState->DStream);
1246	/ Ensure there are enough bits to read the rest of data in 64-bit mode. /
1247	ZSTD_STATIC_ASSERT(`16`+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1248
1249	#if defined(__clang__)
1250	if (UNLIKELY(llBits > `0`))
1251	#else
1252	if (llBits > `0`)
1253	#endif
1254	seq.litLength += BIT_readBitsFast(bitD: &seqState->DStream, nbBits: llBits/>0/);
1255
1256	if (MEM_32bits())
1257	BIT_reloadDStream(bitD: &seqState->DStream);
1258
1259	DEBUGLOG(`6`, "seq: litL=%u, matchL=%u, offset=%u",
1260	(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1261
1262	ZSTD_updateFseStateWithDInfo(DStatePtr: &seqState->stateLL, bitD: &seqState->DStream, nextState: llNext, nbBits: llnbBits); / <= 9 bits /
1263	ZSTD_updateFseStateWithDInfo(DStatePtr: &seqState->stateML, bitD: &seqState->DStream, nextState: mlNext, nbBits: mlnbBits); / <= 9 bits /
1264	if (MEM_32bits()) BIT_reloadDStream(bitD: &seqState->DStream); / <= 18 bits /
1265	ZSTD_updateFseStateWithDInfo(DStatePtr: &seqState->stateOffb, bitD: &seqState->DStream, nextState: ofNext, nbBits: ofnbBits); / <= 8 bits /
1266	}
1267
1268	return seq;
1269	}
1270
1271	#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1272	MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
1273	{
1274	size_t const windowSize = dctx->fParams.windowSize;
1275	/ No dictionary used. /
1276	if (dctx->dictContentEndForFuzzing == NULL) return `0`;
1277	/ Dictionary is our prefix. /
1278	if (prefixStart == dctx->dictContentBeginForFuzzing) return `1`;
1279	/ Dictionary is not our ext-dict. /
1280	if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return `0`;
1281	/ Dictionary is not within our window size. /
1282	if ((size_t)(oLitEnd - prefixStart) >= windowSize) return `0`;
1283	/ Dictionary is active. /
1284	return `1`;
1285	}
1286
1287	MEM_STATIC void ZSTD_assertValidSequence(
1288	ZSTD_DCtx const* dctx,
1289	BYTE const* op, BYTE const* oend,
1290	seq_t const seq,
1291	BYTE const* prefixStart, BYTE const* virtualStart)
1292	{
1293	#if DEBUGLEVEL >= 1
1294	size_t const windowSize = dctx->fParams.windowSize;
1295	size_t const sequenceSize = seq.litLength + seq.matchLength;
1296	BYTE const* const oLitEnd = op + seq.litLength;
1297	DEBUGLOG(`6`, "Checking sequence: litL=%u matchL=%u offset=%u",
1298	(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1299	assert(op <= oend);
1300	assert((size_t)(oend - op) >= sequenceSize);
1301	assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
1302	if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
1303	size_t const dictSize = (size_t)((char const)dctx->dictContentEndForFuzzing - (char* const*)dctx->dictContentBeginForFuzzing);
1304	/ Offset must be within the dictionary. /
1305	assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
1306	assert(seq.offset <= windowSize + dictSize);
1307	} else {
1308	/ Offset must be within our window. /
1309	assert(seq.offset <= windowSize);
1310	}
1311	#else
1312	(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
1313	#endif
1314	}
1315	#endif
1316
1317	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1318
1319
1320	FORCE_INLINE_TEMPLATE size_t
1321	DONT_VECTORIZE
1322	ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
1323	void* dst, size_t maxDstSize,
1324	const void* seqStart, size_t seqSize, int nbSeq,
1325	const ZSTD_longOffset_e isLongOffset,
1326	const int frame)
1327	{
1328	const BYTE* ip = (const BYTE*)seqStart;
1329	const BYTE* const iend = ip + seqSize;
1330	BYTE* const ostart = (BYTE*)dst;
1331	BYTE* const oend = ostart + maxDstSize;
1332	BYTE* op = ostart;
1333	const BYTE* litPtr = dctx->litPtr;
1334	const BYTE* litBufferEnd = dctx->litBufferEnd;
1335	const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1336	const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1337	const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1338	DEBUGLOG(`5`, "ZSTD_decompressSequences_bodySplitLitBuffer");
1339	(void)frame;
1340
1341	/ Regen sequences /
1342	if (nbSeq) {
1343	seqState_t seqState;
1344	dctx->fseEntropy = `1`;
1345	{ U32 i; for (i=`0`; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1346	RETURN_ERROR_IF(
1347	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1348	corruption_detected, "");
1349	ZSTD_initFseState(DStatePtr: &seqState.stateLL, bitD: &seqState.DStream, dt: dctx->LLTptr);
1350	ZSTD_initFseState(DStatePtr: &seqState.stateOffb, bitD: &seqState.DStream, dt: dctx->OFTptr);
1351	ZSTD_initFseState(DStatePtr: &seqState.stateML, bitD: &seqState.DStream, dt: dctx->MLTptr);
1352	assert(dst != NULL);
1353
1354	ZSTD_STATIC_ASSERT(
1355	BIT_DStream_unfinished < BIT_DStream_completed &&
1356	BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1357	BIT_DStream_completed < BIT_DStream_overflow);
1358
1359	/ decompress without overrunning litPtr begins /
1360	{
1361	seq_t sequence = ZSTD_decodeSequence(seqState: &seqState, longOffsets: isLongOffset);
1362	/ Align the decompression loop to 32 + 16 bytes.*
1363	*
1364	* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1365	* speed swings based on the alignment of the decompression loop. This
1366	* performance swing is caused by parts of the decompression loop falling
1367	* out of the DSB. The entire decompression loop should fit in the DSB,
1368	* when it can't we get much worse performance. You can measure if you've
1369	* hit the good case or the bad case with this perf command for some
1370	* compressed file test.zst:
1371	*
1372	* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1373	* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1374	*
1375	* If you see most cycles served out of the MITE you've hit the bad case.
1376	* If you see most cycles served out of the DSB you've hit the good case.
1377	* If it is pretty even then you may be in an okay case.
1378	*
1379	* This issue has been reproduced on the following CPUs:
1380	* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1381	* Use Instruments->Counters to get DSB/MITE cycles.
1382	* I never got performance swings, but I was able to
1383	* go from the good case of mostly DSB to half of the
1384	* cycles served from MITE.
1385	* - Coffeelake: Intel i9-9900k
1386	* - Coffeelake: Intel i7-9700k
1387	*
1388	* I haven't been able to reproduce the instability or DSB misses on any
1389	* of the following CPUS:
1390	* - Haswell
1391	* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1392	* - Skylake
1393	*
1394	* Alignment is done for each of the three major decompression loops:
1395	* - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
1396	* - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
1397	* - ZSTD_decompressSequences_body
1398	* Alignment choices are made to minimize large swings on bad cases and influence on performance
1399	* from changes external to this code, rather than to overoptimize on the current commit.
1400	*
1401	* If you are seeing performance stability this script can help test.
1402	* It tests on 4 commits in zstd where I saw performance change.
1403	*
1404	* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1405	*/
1406	#if defined(__x86_64__)
1407	__asm__(".p2align 6");
1408	# if __GNUC__ >= 7
1409	/ good for gcc-7, gcc-9, and gcc-11 /
1410	__asm__("nop");
1411	__asm__(".p2align 5");
1412	__asm__("nop");
1413	__asm__(".p2align 4");
1414	# if __GNUC__ == 8 \|\| __GNUC__ == 10
1415	/ good for gcc-8 and gcc-10 /
1416	__asm__("nop");
1417	__asm__(".p2align 3");
1418	# endif
1419	# endif
1420	#endif
1421
1422	/ Handle the initial state where litBuffer is currently split between dst and litExtraBuffer /
1423	for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
1424	size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, oend_w: litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: vBase, dictEnd);
1425	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1426	assert(!ZSTD_isError(oneSeqSize));
1427	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1428	#endif
1429	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1430	return oneSeqSize;
1431	DEBUGLOG(`6`, "regenerated sequence size : %u", (U32)oneSeqSize);
1432	op += oneSeqSize;
1433	if (UNLIKELY(!--nbSeq))
1434	break;
1435	BIT_reloadDStream(bitD: &(seqState.DStream));
1436	sequence = ZSTD_decodeSequence(seqState: &seqState, longOffsets: isLongOffset);
1437	}
1438
1439	/ If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd /
1440	if (nbSeq > `0`) {
1441	const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1442	if (leftoverLit)
1443	{
1444	RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1445	ZSTD_safecopyDstBeforeSrc(op, ip: litPtr, length: leftoverLit);
1446	sequence.litLength -= leftoverLit;
1447	op += leftoverLit;
1448	}
1449	litPtr = dctx->litExtraBuffer;
1450	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1451	dctx->litBufferLocation = ZSTD_not_in_dst;
1452	{
1453	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: vBase, dictEnd);
1454	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1455	assert(!ZSTD_isError(oneSeqSize));
1456	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1457	#endif
1458	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1459	return oneSeqSize;
1460	DEBUGLOG(`6`, "regenerated sequence size : %u", (U32)oneSeqSize);
1461	op += oneSeqSize;
1462	if (--nbSeq)
1463	BIT_reloadDStream(bitD: &(seqState.DStream));
1464	}
1465	}
1466	}
1467
1468	if (nbSeq > `0`) / there is remaining lit from extra buffer /
1469	{
1470
1471	#if defined(__x86_64__)
1472	__asm__(".p2align 6");
1473	__asm__("nop");
1474	# if __GNUC__ != 7
1475	/ worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang /
1476	__asm__(".p2align 4");
1477	__asm__("nop");
1478	__asm__(".p2align 3");
1479	# elif __GNUC__ >= 11
1480	__asm__(".p2align 3");
1481	# else
1482	__asm__(".p2align 5");
1483	__asm__("nop");
1484	__asm__(".p2align 3");
1485	# endif
1486	#endif
1487
1488	for (; ; ) {
1489	seq_t const sequence = ZSTD_decodeSequence(seqState: &seqState, longOffsets: isLongOffset);
1490	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: vBase, dictEnd);
1491	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1492	assert(!ZSTD_isError(oneSeqSize));
1493	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1494	#endif
1495	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1496	return oneSeqSize;
1497	DEBUGLOG(`6`, "regenerated sequence size : %u", (U32)oneSeqSize);
1498	op += oneSeqSize;
1499	if (UNLIKELY(!--nbSeq))
1500	break;
1501	BIT_reloadDStream(bitD: &(seqState.DStream));
1502	}
1503	}
1504
1505	/ check if reached exact end /
1506	DEBUGLOG(`5`, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
1507	RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1508	RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1509	/ save reps for next block /
1510	{ U32 i; for (i=`0`; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1511	}
1512
1513	/ last literal segment /
1514	if (dctx->litBufferLocation == ZSTD_split) / split hasn't been reached yet, first get dst then copy litExtraBuffer /
1515	{
1516	size_t const lastLLSize = litBufferEnd - litPtr;
1517	RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1518	if (op != NULL) {
1519	ZSTD_memmove(op, litPtr, lastLLSize);
1520	op += lastLLSize;
1521	}
1522	litPtr = dctx->litExtraBuffer;
1523	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1524	dctx->litBufferLocation = ZSTD_not_in_dst;
1525	}
1526	{ size_t const lastLLSize = litBufferEnd - litPtr;
1527	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1528	if (op != NULL) {
1529	ZSTD_memcpy(op, litPtr, lastLLSize);
1530	op += lastLLSize;
1531	}
1532	}
1533
1534	return op-ostart;
1535	}
1536
1537	FORCE_INLINE_TEMPLATE size_t
1538	DONT_VECTORIZE
1539	ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
1540	void* dst, size_t maxDstSize,
1541	const void* seqStart, size_t seqSize, int nbSeq,
1542	const ZSTD_longOffset_e isLongOffset,
1543	const int frame)
1544	{
1545	const BYTE* ip = (const BYTE*)seqStart;
1546	const BYTE* const iend = ip + seqSize;
1547	BYTE* const ostart = (BYTE*)dst;
1548	BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
1549	BYTE* op = ostart;
1550	const BYTE* litPtr = dctx->litPtr;
1551	const BYTE* const litEnd = litPtr + dctx->litSize;
1552	const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
1553	const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
1554	const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
1555	DEBUGLOG(`5`, "ZSTD_decompressSequences_body");
1556	(void)frame;
1557
1558	/ Regen sequences /
1559	if (nbSeq) {
1560	seqState_t seqState;
1561	dctx->fseEntropy = `1`;
1562	{ U32 i; for (i = `0`; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1563	RETURN_ERROR_IF(
1564	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
1565	corruption_detected, "");
1566	ZSTD_initFseState(DStatePtr: &seqState.stateLL, bitD: &seqState.DStream, dt: dctx->LLTptr);
1567	ZSTD_initFseState(DStatePtr: &seqState.stateOffb, bitD: &seqState.DStream, dt: dctx->OFTptr);
1568	ZSTD_initFseState(DStatePtr: &seqState.stateML, bitD: &seqState.DStream, dt: dctx->MLTptr);
1569	assert(dst != NULL);
1570
1571	ZSTD_STATIC_ASSERT(
1572	BIT_DStream_unfinished < BIT_DStream_completed &&
1573	BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1574	BIT_DStream_completed < BIT_DStream_overflow);
1575
1576	#if defined(__x86_64__)
1577	__asm__(".p2align 6");
1578	__asm__("nop");
1579	# if __GNUC__ >= 7
1580	__asm__(".p2align 5");
1581	__asm__("nop");
1582	__asm__(".p2align 3");
1583	# else
1584	__asm__(".p2align 4");
1585	__asm__("nop");
1586	__asm__(".p2align 3");
1587	# endif
1588	#endif
1589
1590	for ( ; ; ) {
1591	seq_t const sequence = ZSTD_decodeSequence(seqState: &seqState, longOffsets: isLongOffset);
1592	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, litPtr: &litPtr, litLimit: litEnd, prefixStart, virtualStart: vBase, dictEnd);
1593	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1594	assert(!ZSTD_isError(oneSeqSize));
1595	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1596	#endif
1597	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1598	return oneSeqSize;
1599	DEBUGLOG(`6`, "regenerated sequence size : %u", (U32)oneSeqSize);
1600	op += oneSeqSize;
1601	if (UNLIKELY(!--nbSeq))
1602	break;
1603	BIT_reloadDStream(bitD: &(seqState.DStream));
1604	}
1605
1606	/ check if reached exact end /
1607	DEBUGLOG(`5`, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
1608	RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1609	RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1610	/ save reps for next block /
1611	{ U32 i; for (i=`0`; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1612	}
1613
1614	/ last literal segment /
1615	{ size_t const lastLLSize = litEnd - litPtr;
1616	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1617	if (op != NULL) {
1618	ZSTD_memcpy(op, litPtr, lastLLSize);
1619	op += lastLLSize;
1620	}
1621	}
1622
1623	return op-ostart;
1624	}
1625
1626	static size_t
1627	ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1628	void* dst, size_t maxDstSize,
1629	const void* seqStart, size_t seqSize, int nbSeq,
1630	const ZSTD_longOffset_e isLongOffset,
1631	const int frame)
1632	{
1633	return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1634	}
1635
1636	static size_t
1637	ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
1638	void* dst, size_t maxDstSize,
1639	const void* seqStart, size_t seqSize, int nbSeq,
1640	const ZSTD_longOffset_e isLongOffset,
1641	const int frame)
1642	{
1643	return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1644	}
1645	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1646
1647	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1648
1649	FORCE_INLINE_TEMPLATE size_t
1650	ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
1651	const BYTE* const prefixStart, const BYTE* const dictEnd)
1652	{
1653	prefetchPos += sequence.litLength;
1654	{ const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
1655	const BYTE* const match = matchBase + prefetchPos - sequence.offset; / note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.*
1656	* No consequence though : memory address is only used for prefetching, not for dereferencing */
1657	PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); / note : it's safe to invoke PREFETCH() on any memory address, including invalid ones /
1658	}
1659	return prefetchPos + sequence.matchLength;
1660	}
1661
1662	/ This decoding function employs prefetching*
1663	* to reduce latency impact of cache misses.
1664	* It's generally employed when block contains a significant portion of long-distance matches
1665	* or when coupled with a "cold" dictionary */
1666	FORCE_INLINE_TEMPLATE size_t
1667	ZSTD_decompressSequencesLong_body(
1668	ZSTD_DCtx* dctx,
1669	void* dst, size_t maxDstSize,
1670	const void* seqStart, size_t seqSize, int nbSeq,
1671	const ZSTD_longOffset_e isLongOffset,
1672	const int frame)
1673	{
1674	const BYTE* ip = (const BYTE*)seqStart;
1675	const BYTE* const iend = ip + seqSize;
1676	BYTE* const ostart = (BYTE*)dst;
1677	BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
1678	BYTE* op = ostart;
1679	const BYTE* litPtr = dctx->litPtr;
1680	const BYTE* litBufferEnd = dctx->litBufferEnd;
1681	const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1682	const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1683	const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1684	(void)frame;
1685
1686	/ Regen sequences /
1687	if (nbSeq) {
1688	#define STORED_SEQS 8
1689	#define STORED_SEQS_MASK (STORED_SEQS-1)
1690	#define ADVANCED_SEQS STORED_SEQS
1691	seq_t sequences[STORED_SEQS];
1692	int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1693	seqState_t seqState;
1694	int seqNb;
1695	size_t prefetchPos = (size_t)(op-prefixStart); / track position relative to prefixStart /
1696
1697	dctx->fseEntropy = `1`;
1698	{ int i; for (i=`0`; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1699	assert(dst != NULL);
1700	assert(iend >= ip);
1701	RETURN_ERROR_IF(
1702	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1703	corruption_detected, "");
1704	ZSTD_initFseState(DStatePtr: &seqState.stateLL, bitD: &seqState.DStream, dt: dctx->LLTptr);
1705	ZSTD_initFseState(DStatePtr: &seqState.stateOffb, bitD: &seqState.DStream, dt: dctx->OFTptr);
1706	ZSTD_initFseState(DStatePtr: &seqState.stateML, bitD: &seqState.DStream, dt: dctx->MLTptr);
1707
1708	/ prepare in advance /
1709	for (seqNb=`0`; (BIT_reloadDStream(bitD: &seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1710	seq_t const sequence = ZSTD_decodeSequence(seqState: &seqState, longOffsets: isLongOffset);
1711	prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1712	sequences[seqNb] = sequence;
1713	}
1714	RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
1715
1716	/ decompress without stomping litBuffer /
1717	for (; (BIT_reloadDStream(bitD: &(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
1718	seq_t sequence = ZSTD_decodeSequence(seqState: &seqState, longOffsets: isLongOffset);
1719	size_t oneSeqSize;
1720
1721	if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
1722	{
1723	/ lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer /
1724	const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1725	if (leftoverLit)
1726	{
1727	RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1728	ZSTD_safecopyDstBeforeSrc(op, ip: litPtr, length: leftoverLit);
1729	sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
1730	op += leftoverLit;
1731	}
1732	litPtr = dctx->litExtraBuffer;
1733	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1734	dctx->litBufferLocation = ZSTD_not_in_dst;
1735	oneSeqSize = ZSTD_execSequence(op, oend, sequence: sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd);
1736	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1737	assert(!ZSTD_isError(oneSeqSize));
1738	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1739	#endif
1740	if (ZSTD_isError(code: oneSeqSize)) return oneSeqSize;
1741
1742	prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1743	sequences[seqNb & STORED_SEQS_MASK] = sequence;
1744	op += oneSeqSize;
1745	}
1746	else
1747	{
1748	/ lit buffer is either wholly contained in first or second split, or not split at all/
1749	oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1750	ZSTD_execSequenceSplitLitBuffer(op, oend, oend_w: litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequence: sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd) :
1751	ZSTD_execSequence(op, oend, sequence: sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd);
1752	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1753	assert(!ZSTD_isError(oneSeqSize));
1754	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1755	#endif
1756	if (ZSTD_isError(code: oneSeqSize)) return oneSeqSize;
1757
1758	prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1759	sequences[seqNb & STORED_SEQS_MASK] = sequence;
1760	op += oneSeqSize;
1761	}
1762	}
1763	RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
1764
1765	/ finish queue /
1766	seqNb -= seqAdvance;
1767	for ( ; seqNb<nbSeq ; seqNb++) {
1768	seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
1769	if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
1770	{
1771	const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1772	if (leftoverLit)
1773	{
1774	RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1775	ZSTD_safecopyDstBeforeSrc(op, ip: litPtr, length: leftoverLit);
1776	sequence->litLength -= leftoverLit;
1777	op += leftoverLit;
1778	}
1779	litPtr = dctx->litExtraBuffer;
1780	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1781	dctx->litBufferLocation = ZSTD_not_in_dst;
1782	{
1783	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence: *sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd);
1784	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1785	assert(!ZSTD_isError(oneSeqSize));
1786	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1787	#endif
1788	if (ZSTD_isError(code: oneSeqSize)) return oneSeqSize;
1789	op += oneSeqSize;
1790	}
1791	}
1792	else
1793	{
1794	size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1795	ZSTD_execSequenceSplitLitBuffer(op, oend, oend_w: litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, sequence: *sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd) :
1796	ZSTD_execSequence(op, oend, sequence: *sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd);
1797	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1798	assert(!ZSTD_isError(oneSeqSize));
1799	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1800	#endif
1801	if (ZSTD_isError(code: oneSeqSize)) return oneSeqSize;
1802	op += oneSeqSize;
1803	}
1804	}
1805
1806	/ save reps for next block /
1807	{ U32 i; for (i=`0`; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1808	}
1809
1810	/ last literal segment /
1811	if (dctx->litBufferLocation == ZSTD_split) / first deplete literal buffer in dst, then copy litExtraBuffer /
1812	{
1813	size_t const lastLLSize = litBufferEnd - litPtr;
1814	RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1815	if (op != NULL) {
1816	ZSTD_memmove(op, litPtr, lastLLSize);
1817	op += lastLLSize;
1818	}
1819	litPtr = dctx->litExtraBuffer;
1820	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1821	}
1822	{ size_t const lastLLSize = litBufferEnd - litPtr;
1823	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1824	if (op != NULL) {
1825	ZSTD_memmove(op, litPtr, lastLLSize);
1826	op += lastLLSize;
1827	}
1828	}
1829
1830	return op-ostart;
1831	}
1832
1833	static size_t
1834	ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1835	void* dst, size_t maxDstSize,
1836	const void* seqStart, size_t seqSize, int nbSeq,
1837	const ZSTD_longOffset_e isLongOffset,
1838	const int frame)
1839	{
1840	return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1841	}
1842	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1843
1844
1845
1846	#if DYNAMIC_BMI2
1847
1848	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1849	static BMI2_TARGET_ATTRIBUTE size_t
1850	DONT_VECTORIZE
1851	ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1852	void* dst, size_t maxDstSize,
1853	const void* seqStart, size_t seqSize, int nbSeq,
1854	const ZSTD_longOffset_e isLongOffset,
1855	const int frame)
1856	{
1857	return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1858	}
1859	static BMI2_TARGET_ATTRIBUTE size_t
1860	DONT_VECTORIZE
1861	ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
1862	void* dst, size_t maxDstSize,
1863	const void* seqStart, size_t seqSize, int nbSeq,
1864	const ZSTD_longOffset_e isLongOffset,
1865	const int frame)
1866	{
1867	return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1868	}
1869	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1870
1871	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1872	static BMI2_TARGET_ATTRIBUTE size_t
1873	ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1874	void* dst, size_t maxDstSize,
1875	const void* seqStart, size_t seqSize, int nbSeq,
1876	const ZSTD_longOffset_e isLongOffset,
1877	const int frame)
1878	{
1879	return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1880	}
1881	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1882
1883	#endif /* DYNAMIC_BMI2 */
1884
1885	typedef size_t (*ZSTD_decompressSequences_t)(
1886	ZSTD_DCtx* dctx,
1887	void* dst, size_t maxDstSize,
1888	const void* seqStart, size_t seqSize, int nbSeq,
1889	const ZSTD_longOffset_e isLongOffset,
1890	const int frame);
1891
1892	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1893	static size_t
1894	ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1895	const void* seqStart, size_t seqSize, int nbSeq,
1896	const ZSTD_longOffset_e isLongOffset,
1897	const int frame)
1898	{
1899	DEBUGLOG(`5`, "ZSTD_decompressSequences");
1900	#if DYNAMIC_BMI2
1901	if (ZSTD_DCtx_get_bmi2(dctx)) {
1902	return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1903	}
1904	#endif
1905	return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1906	}
1907	static size_t
1908	ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1909	const void* seqStart, size_t seqSize, int nbSeq,
1910	const ZSTD_longOffset_e isLongOffset,
1911	const int frame)
1912	{
1913	DEBUGLOG(`5`, "ZSTD_decompressSequencesSplitLitBuffer");
1914	#if DYNAMIC_BMI2
1915	if (ZSTD_DCtx_get_bmi2(dctx)) {
1916	return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1917	}
1918	#endif
1919	return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1920	}
1921	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1922
1923
1924	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1925	/ ZSTD_decompressSequencesLong() :*
1926	* decompression function triggered when a minimum share of offsets is considered "long",
1927	* aka out of cache.
1928	* note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1929	* This function will try to mitigate main memory latency through the use of prefetching */
1930	static size_t
1931	ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1932	void* dst, size_t maxDstSize,
1933	const void* seqStart, size_t seqSize, int nbSeq,
1934	const ZSTD_longOffset_e isLongOffset,
1935	const int frame)
1936	{
1937	DEBUGLOG(`5`, "ZSTD_decompressSequencesLong");
1938	#if DYNAMIC_BMI2
1939	if (ZSTD_DCtx_get_bmi2(dctx)) {
1940	return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1941	}
1942	#endif
1943	return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1944	}
1945	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1946
1947
1948
1949	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1950	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1951	/ ZSTD_getLongOffsetsShare() :*
1952	* condition : offTable must be valid
1953	* @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1954	* compared to maximum possible of (1<<OffFSELog) */
1955	static unsigned
1956	ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1957	{
1958	const void* ptr = offTable;
1959	U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[`0`].tableLog;
1960	const ZSTD_seqSymbol* table = offTable + `1`;
1961	U32 const max = `1` << tableLog;
1962	U32 u, total = `0`;
1963	DEBUGLOG(`5`, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
1964
1965	assert(max <= (`1` << OffFSELog)); / max not too large /
1966	for (u=`0`; u<max; u++) {
1967	if (table[u].nbAdditionalBits > `22`) total += `1`;
1968	}
1969
1970	assert(tableLog <= OffFSELog);
1971	total <<= (OffFSELog - tableLog); / scale to OffFSELog /
1972
1973	return total;
1974	}
1975	#endif
1976
1977	size_t
1978	ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1979	void* dst, size_t dstCapacity,
1980	const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
1981	{ / blockType == blockCompressed /
1982	const BYTE* ip = (const BYTE*)src;
1983	/ isLongOffset must be true if there are long offsets.*
1984	* Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1985	* We don't expect that to be the case in 64-bit mode.
1986	* In block mode, window size is not known, so we have to be conservative.
1987	* (note: but it could be evaluated from current-lowLimit)
1988	*/
1989	ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame \|\| (dctx->fParams.windowSize > (`1ULL` << STREAM_ACCUMULATOR_MIN))));
1990	DEBUGLOG(`5`, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1991
1992	RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
1993
1994	/ Decode literals section /
1995	{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
1996	DEBUGLOG(`5`, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
1997	if (ZSTD_isError(code: litCSize)) return litCSize;
1998	ip += litCSize;
1999	srcSize -= litCSize;
2000	}
2001
2002	/ Build Decoding Tables /
2003	{
2004	/ These macros control at build-time which decompressor implementation*
2005	* we use. If neither is defined, we do some inspection and dispatch at
2006	* runtime.
2007	*/
2008	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2009	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2010	int usePrefetchDecoder = dctx->ddictIsCold;
2011	#endif
2012	int nbSeq;
2013	size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, nbSeqPtr: &nbSeq, src: ip, srcSize);
2014	if (ZSTD_isError(code: seqHSize)) return seqHSize;
2015	ip += seqHSize;
2016	srcSize -= seqHSize;
2017
2018	RETURN_ERROR_IF(dst == NULL && nbSeq > `0`, dstSize_tooSmall, "NULL not handled");
2019
2020	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2021	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2022	if ( !usePrefetchDecoder
2023	&& (!frame \|\| (dctx->fParams.windowSize > (`1`<<`24`)))
2024	&& (nbSeq>ADVANCED_SEQS) ) { / could probably use a larger nbSeq limit /
2025	U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(offTable: dctx->OFTptr);
2026	U32 const minShare = MEM_64bits() ? `7` : `20`; / heuristic values, correspond to 2.73% and 7.81% /
2027	usePrefetchDecoder = (shareLongOffsets >= minShare);
2028	}
2029	#endif
2030
2031	dctx->ddictIsCold = `0`;
2032
2033	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2034	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2035	if (usePrefetchDecoder)
2036	#endif
2037	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
2038	return ZSTD_decompressSequencesLong(dctx, dst, maxDstSize: dstCapacity, seqStart: ip, seqSize: srcSize, nbSeq, isLongOffset, frame);
2039	#endif
2040
2041	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
2042	/ else /
2043	if (dctx->litBufferLocation == ZSTD_split)
2044	return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, maxDstSize: dstCapacity, seqStart: ip, seqSize: srcSize, nbSeq, isLongOffset, frame);
2045	else
2046	return ZSTD_decompressSequences(dctx, dst, maxDstSize: dstCapacity, seqStart: ip, seqSize: srcSize, nbSeq, isLongOffset, frame);
2047	#endif
2048	}
2049	}
2050
2051
2052	void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
2053	{
2054	if (dst != dctx->previousDstEnd && dstSize > `0`) { / not contiguous /
2055	dctx->dictEnd = dctx->previousDstEnd;
2056	dctx->virtualStart = (const char)dst - ((const* char)(dctx->previousDstEnd) - (const* char*)(dctx->prefixStart));
2057	dctx->prefixStart = dst;
2058	dctx->previousDstEnd = dst;
2059	}
2060	}
2061
2062
2063	size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2064	void* dst, size_t dstCapacity,
2065	const void* src, size_t srcSize)
2066	{
2067	size_t dSize;
2068	ZSTD_checkContinuity(dctx, dst, dstSize: dstCapacity);
2069	dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, / frame / `0`, streaming: not_streaming);
2070	dctx->previousDstEnd = (char*)dst + dSize;
2071	return dSize;
2072	}
2073

source code of linux/lib/zstd/decompress/zstd_decompress_block.c