huf_compress.c source code [linux/lib/zstd/compress/huf_compress.c]

1	/* ******************************************************************
2	* Huffman encoder, part of New Generation Entropy library
3	* Copyright (c) Yann Collet, Facebook, Inc.
4	*
5	* You can contact the author at :
6	* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
7	* - Public forum : https://groups.google.com/forum/#!forum/lz4c
8	*
9	* This source code is licensed under both the BSD-style license (found in the
10	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
11	* in the COPYING file in the root directory of this source tree).
12	* You may select, at your option, one of the above-listed licenses.
13	****************************************************************** */
14
15	/* **************************************************************
16	* Compiler specifics
17	****************************************************************/
18
19
20	/* **************************************************************
21	* Includes
22	****************************************************************/
23	#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
24	#include "../common/compiler.h"
25	#include "../common/bitstream.h"
26	#include "hist.h"
27	#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
28	#include "../common/fse.h" /* header compression */
29	#define HUF_STATIC_LINKING_ONLY
30	#include "../common/huf.h"
31	#include "../common/error_private.h"
32
33
34	/* **************************************************************
35	* Error Management
36	****************************************************************/
37	#define HUF_isError ERR_isError
38	#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only after variable declarations */
39
40
41	/* **************************************************************
42	* Utils
43	****************************************************************/
44	unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
45	{
46	return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, minus: `1`);
47	}
48
49
50	/* *******************************************************
51	* HUF : Huffman block compression
52	*********************************************************/
53	#define HUF_WORKSPACE_MAX_ALIGNMENT 8
54
55	static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
56	{
57	size_t const mask = align - `1`;
58	size_t const rem = (size_t)workspace & mask;
59	size_t const add = (align - rem) & mask;
60	BYTE* const aligned = (BYTE*)workspace + add;
61	assert((align & (align - `1`)) == `0`); / pow 2 /
62	assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
63	if (*workspaceSizePtr >= add) {
64	assert(add < align);
65	assert(((size_t)aligned & mask) == `0`);
66	*workspaceSizePtr -= add;
67	return aligned;
68	} else {
69	*workspaceSizePtr = `0`;
70	return NULL;
71	}
72	}
73
74
75	/ HUF_compressWeights() :*
76	* Same as FSE_compress(), but dedicated to huff0's weights compression.
77	* The use case needs much less stack memory.
78	* Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
79	*/
80	#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
81
82	typedef struct {
83	FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
84	U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
85	unsigned count[HUF_TABLELOG_MAX+`1`];
86	S16 norm[HUF_TABLELOG_MAX+`1`];
87	} HUF_CompressWeightsWksp;
88
89	static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
90	{
91	BYTE* const ostart = (BYTE*) dst;
92	BYTE* op = ostart;
93	BYTE* const oend = ostart + dstSize;
94
95	unsigned maxSymbolValue = HUF_TABLELOG_MAX;
96	U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
97	HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, workspaceSizePtr: &workspaceSize, ZSTD_ALIGNOF(U32));
98
99	if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
100
101	/ init conditions /
102	if (wtSize <= `1`) return `0`; / Not compressible /
103
104	/ Scan input and build symbol stats /
105	{ unsigned const maxCount = HIST_count_simple(count: wksp->count, maxSymbolValuePtr: &maxSymbolValue, src: weightTable, srcSize: wtSize); / never fails /
106	if (maxCount == wtSize) return `1`; / only a single symbol in src : rle /
107	if (maxCount == `1`) return `0`; / each symbol present maximum once => not compressible /
108	}
109
110	tableLog = FSE_optimalTableLog(maxTableLog: tableLog, srcSize: wtSize, maxSymbolValue);
111	CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, / useLowProbCount / `0`) );
112
113	/ Write table description header /
114	{ CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
115	op += hSize;
116	}
117
118	/ Compress /
119	CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
120	{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
121	if (cSize == `0`) return `0`; / not enough space for compressed data /
122	op += cSize;
123	}
124
125	return (size_t)(op-ostart);
126	}
127
128	static size_t HUF_getNbBits(HUF_CElt elt)
129	{
130	return elt & `0xFF`;
131	}
132
133	static size_t HUF_getNbBitsFast(HUF_CElt elt)
134	{
135	return elt;
136	}
137
138	static size_t HUF_getValue(HUF_CElt elt)
139	{
140	return elt & ~`0xFF`;
141	}
142
143	static size_t HUF_getValueFast(HUF_CElt elt)
144	{
145	return elt;
146	}
147
148	static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
149	{
150	assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
151	*elt = nbBits;
152	}
153
154	static void HUF_setValue(HUF_CElt* elt, size_t value)
155	{
156	size_t const nbBits = HUF_getNbBits(elt: *elt);
157	if (nbBits > `0`) {
158	assert((value >> nbBits) == `0`);
159	elt \|= value << (sizeof(HUF_CElt) `8` - nbBits);
160	}
161	}
162
163	typedef struct {
164	HUF_CompressWeightsWksp wksp;
165	BYTE bitsToWeight[HUF_TABLELOG_MAX + `1`]; / precomputed conversion table /
166	BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
167	} HUF_WriteCTableWksp;
168
169	size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
170	const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
171	void* workspace, size_t workspaceSize)
172	{
173	HUF_CElt const* const ct = CTable + `1`;
174	BYTE* op = (BYTE*)dst;
175	U32 n;
176	HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, workspaceSizePtr: &workspaceSize, ZSTD_ALIGNOF(U32));
177
178	/ check conditions /
179	if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
180	if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
181
182	/ convert to weight /
183	wksp->bitsToWeight[`0`] = `0`;
184	for (n=`1`; n<huffLog+`1`; n++)
185	wksp->bitsToWeight[n] = (BYTE)(huffLog + `1` - n);
186	for (n=`0`; n<maxSymbolValue; n++)
187	wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(elt: ct[n])];
188
189	/ attempt weights compression by FSE /
190	if (maxDstSize < `1`) return ERROR(dstSize_tooSmall);
191	{ CHECK_V_F(hSize, HUF_compressWeights(op+`1`, maxDstSize-`1`, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
192	if ((hSize>`1`) & (hSize < maxSymbolValue/`2`)) { / FSE compressed /
193	op[`0`] = (BYTE)hSize;
194	return hSize+`1`;
195	} }
196
197	/ write raw values as 4-bits (max : 15) /
198	if (maxSymbolValue > (`256`-`128`)) return ERROR(GENERIC); / should not happen : likely means source cannot be compressed /
199	if (((maxSymbolValue+`1`)/`2`) + `1` > maxDstSize) return ERROR(dstSize_tooSmall); / not enough space within dst buffer /
200	op[`0`] = (BYTE)(`128` /special case/ + (maxSymbolValue-`1`));
201	wksp->huffWeight[maxSymbolValue] = `0`; / to be sure it doesn't cause msan issue in final combination /
202	for (n=`0`; n<maxSymbolValue; n+=`2`)
203	op[(n/`2`)+`1`] = (BYTE)((wksp->huffWeight[n] << `4`) + wksp->huffWeight[n+`1`]);
204	return ((maxSymbolValue+`1`)/`2`) + `1`;
205	}
206
207	/! HUF_writeCTable() :*
208	`CTable` : Huffman tree to save, using huf representation.
209	@return : size of saved CTable /*
210	size_t HUF_writeCTable (void* dst, size_t maxDstSize,
211	const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
212	{
213	HUF_WriteCTableWksp wksp;
214	return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, workspace: &wksp, workspaceSize: sizeof(wksp));
215	}
216
217
218	size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
219	{
220	BYTE huffWeight[HUF_SYMBOLVALUE_MAX + `1`]; / init not required, even though some static analyzer may complain /
221	U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + `1`]; / large enough for values from 0 to 16 /
222	U32 tableLog = `0`;
223	U32 nbSymbols = `0`;
224	HUF_CElt* const ct = CTable + `1`;
225
226	/ get symbol weights /
227	CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+`1`, rankVal, &nbSymbols, &tableLog, src, srcSize));
228	*hasZeroWeights = (rankVal[`0`] > `0`);
229
230	/ check result /
231	if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
232	if (nbSymbols > maxSymbolValuePtr+`1`) return* ERROR(maxSymbolValue_tooSmall);
233
234	CTable[`0`] = tableLog;
235
236	/ Prepare base value per rank /
237	{ U32 n, nextRankStart = `0`;
238	for (n=`1`; n<=tableLog; n++) {
239	U32 curr = nextRankStart;
240	nextRankStart += (rankVal[n] << (n-`1`));
241	rankVal[n] = curr;
242	} }
243
244	/ fill nbBits /
245	{ U32 n; for (n=`0`; n<nbSymbols; n++) {
246	const U32 w = huffWeight[n];
247	HUF_setNbBits(elt: ct + n, nbBits: (BYTE)(tableLog + `1` - w) & -(w != `0`));
248	} }
249
250	/ fill val /
251	{ U16 nbPerRank[HUF_TABLELOG_MAX+`2`] = {`0`}; / support w=0=>n=tableLog+1 /
252	U16 valPerRank[HUF_TABLELOG_MAX+`2`] = {`0`};
253	{ U32 n; for (n=`0`; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(elt: ct[n])]++; }
254	/ determine stating value per rank /
255	valPerRank[tableLog+`1`] = `0`; / for w==0 /
256	{ U16 min = `0`;
257	U32 n; for (n=tableLog; n>`0`; n--) { / start at n=tablelog <-> w=1 /
258	valPerRank[n] = min; / get starting value within each rank /
259	min += nbPerRank[n];
260	min >>= `1`;
261	} }
262	/ assign value within rank, symbol order /
263	{ U32 n; for (n=`0`; n<nbSymbols; n++) HUF_setValue(elt: ct + n, value: valPerRank[HUF_getNbBits(elt: ct[n])]++); }
264	}
265
266	*maxSymbolValuePtr = nbSymbols - `1`;
267	return readSize;
268	}
269
270	U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
271	{
272	const HUF_CElt* ct = CTable + `1`;
273	assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
274	return (U32)HUF_getNbBits(elt: ct[symbolValue]);
275	}
276
277
278	typedef struct nodeElt_s {
279	U32 count;
280	U16 parent;
281	BYTE byte;
282	BYTE nbBits;
283	} nodeElt;
284
285	/*
286	* HUF_setMaxHeight():
287	* Enforces maxNbBits on the Huffman tree described in huffNode.
288	*
289	* It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
290	* the tree to so that it is a valid canonical Huffman tree.
291	*
292	* @pre The sum of the ranks of each symbol == 2^largestBits,
293	* where largestBits == huffNode[lastNonNull].nbBits.
294	* @post The sum of the ranks of each symbol == 2^largestBits,
295	* where largestBits is the return value <= maxNbBits.
296	*
297	* @param huffNode The Huffman tree modified in place to enforce maxNbBits.
298	* @param lastNonNull The symbol with the lowest count in the Huffman tree.
299	* @param maxNbBits The maximum allowed number of bits, which the Huffman tree
300	* may not respect. After this function the Huffman tree will
301	* respect maxNbBits.
302	* @return The maximum number of bits of the Huffman tree after adjustment,
303	* necessarily no more than maxNbBits.
304	*/
305	static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
306	{
307	const U32 largestBits = huffNode[lastNonNull].nbBits;
308	/ early exit : no elt > maxNbBits, so the tree is already valid. /
309	if (largestBits <= maxNbBits) return largestBits;
310
311	/ there are several too large elements (at least >= 2) /
312	{ int totalCost = `0`;
313	const U32 baseCost = `1` << (largestBits - maxNbBits);
314	int n = (int)lastNonNull;
315
316	/ Adjust any ranks > maxNbBits to maxNbBits.*
317	* Compute totalCost, which is how far the sum of the ranks is
318	* we are over 2^largestBits after adjust the offending ranks.
319	*/
320	while (huffNode[n].nbBits > maxNbBits) {
321	totalCost += baseCost - (`1` << (largestBits - huffNode[n].nbBits));
322	huffNode[n].nbBits = (BYTE)maxNbBits;
323	n--;
324	}
325	/ n stops at huffNode[n].nbBits <= maxNbBits /
326	assert(huffNode[n].nbBits <= maxNbBits);
327	/ n end at index of smallest symbol using < maxNbBits /
328	while (huffNode[n].nbBits == maxNbBits) --n;
329
330	/ renorm totalCost from 2^largestBits to 2^maxNbBits*
331	* note : totalCost is necessarily a multiple of baseCost */
332	assert((totalCost & (baseCost - `1`)) == `0`);
333	totalCost >>= (largestBits - maxNbBits);
334	assert(totalCost > `0`);
335
336	/ repay normalized cost /
337	{ U32 const noSymbol = `0xF0F0F0F0`;
338	U32 rankLast[HUF_TABLELOG_MAX+`2`];
339
340	/ Get pos of last (smallest = lowest cum. count) symbol per rank /
341	ZSTD_memset(rankLast, `0xF0`, sizeof(rankLast));
342	{ U32 currentNbBits = maxNbBits;
343	int pos;
344	for (pos=n ; pos >= `0`; pos--) {
345	if (huffNode[pos].nbBits >= currentNbBits) continue;
346	currentNbBits = huffNode[pos].nbBits; / < maxNbBits /
347	rankLast[maxNbBits-currentNbBits] = (U32)pos;
348	} }
349
350	while (totalCost > `0`) {
351	/ Try to reduce the next power of 2 above totalCost because we*
352	* gain back half the rank.
353	*/
354	U32 nBitsToDecrease = BIT_highbit32(val: (U32)totalCost) + `1`;
355	for ( ; nBitsToDecrease > `1`; nBitsToDecrease--) {
356	U32 const highPos = rankLast[nBitsToDecrease];
357	U32 const lowPos = rankLast[nBitsToDecrease-`1`];
358	if (highPos == noSymbol) continue;
359	/ Decrease highPos if no symbols of lowPos or if it is*
360	* not cheaper to remove 2 lowPos than highPos.
361	*/
362	if (lowPos == noSymbol) break;
363	{ U32 const highTotal = huffNode[highPos].count;
364	U32 const lowTotal = `2` * huffNode[lowPos].count;
365	if (highTotal <= lowTotal) break;
366	} }
367	/ only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) /
368	assert(rankLast[nBitsToDecrease] != noSymbol \|\| nBitsToDecrease == `1`);
369	/ HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary /
370	while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
371	nBitsToDecrease++;
372	assert(rankLast[nBitsToDecrease] != noSymbol);
373	/ Increase the number of bits to gain back half the rank cost. /
374	totalCost -= `1` << (nBitsToDecrease-`1`);
375	huffNode[rankLast[nBitsToDecrease]].nbBits++;
376
377	/ Fix up the new rank.*
378	* If the new rank was empty, this symbol is now its smallest.
379	* Otherwise, this symbol will be the largest in the new rank so no adjustment.
380	*/
381	if (rankLast[nBitsToDecrease-`1`] == noSymbol)
382	rankLast[nBitsToDecrease-`1`] = rankLast[nBitsToDecrease];
383	/ Fix up the old rank.*
384	* If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
385	* it must be the only symbol in its rank, so the old rank now has no symbols.
386	* Otherwise, since the Huffman nodes are sorted by count, the previous position is now
387	* the smallest node in the rank. If the previous position belongs to a different rank,
388	* then the rank is now empty.
389	*/
390	if (rankLast[nBitsToDecrease] == `0`) / special case, reached largest symbol /
391	rankLast[nBitsToDecrease] = noSymbol;
392	else {
393	rankLast[nBitsToDecrease]--;
394	if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
395	rankLast[nBitsToDecrease] = noSymbol; / this rank is now empty /
396	}
397	} / while (totalCost > 0) /
398
399	/ If we've removed too much weight, then we have to add it back.*
400	* To avoid overshooting again, we only adjust the smallest rank.
401	* We take the largest nodes from the lowest rank 0 and move them
402	* to rank 1. There's guaranteed to be enough rank 0 symbols because
403	* TODO.
404	*/
405	while (totalCost < `0`) { / Sometimes, cost correction overshoot /
406	/ special case : no rank 1 symbol (using maxNbBits-1);*
407	* let's create one from largest rank 0 (using maxNbBits).
408	*/
409	if (rankLast[`1`] == noSymbol) {
410	while (huffNode[n].nbBits == maxNbBits) n--;
411	huffNode[n+`1`].nbBits--;
412	assert(n >= `0`);
413	rankLast[`1`] = (U32)(n+`1`);
414	totalCost++;
415	continue;
416	}
417	huffNode[ rankLast[`1`] + `1` ].nbBits--;
418	rankLast[`1`]++;
419	totalCost ++;
420	}
421	} / repay normalized cost /
422	} / there are several too large elements (at least >= 2) /
423
424	return maxNbBits;
425	}
426
427	typedef struct {
428	U16 base;
429	U16 curr;
430	} rankPos;
431
432	typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
433
434	/ Number of buckets available for HUF_sort() /
435	#define RANK_POSITION_TABLE_SIZE 192
436
437	typedef struct {
438	huffNodeTable huffNodeTbl;
439	rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
440	} HUF_buildCTable_wksp_tables;
441
442	/ RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.*
443	* Strategy is to use as many buckets as possible for representing distinct
444	* counts while using the remainder to represent all "large" counts.
445	*
446	* To satisfy this requirement for 192 buckets, we can do the following:
447	* Let buckets 0-166 represent distinct counts of [0, 166]
448	* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
449	*/
450	#define RANK_POSITION_MAX_COUNT_LOG 32
451	#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
452	#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
453
454	/ Return the appropriate bucket index for a given count. See definition of*
455	* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
456	*/
457	static U32 HUF_getIndex(U32 const count) {
458	return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
459	? count
460	: BIT_highbit32(val: count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
461	}
462
463	/ Helper swap function for HUF_quickSortPartition() /
464	static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
465	nodeElt tmp = *a;
466	a = b;
467	*b = tmp;
468	}
469
470	/ Returns 0 if the huffNode array is not sorted by descending count /
471	MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
472	U32 i;
473	for (i = `1`; i < maxSymbolValue1; ++i) {
474	if (huffNode[i].count > huffNode[i-`1`].count) {
475	return `0`;
476	}
477	}
478	return `1`;
479	}
480
481	/ Insertion sort by descending order /
482	HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
483	int i;
484	int const size = high-low+`1`;
485	huffNode += low;
486	for (i = `1`; i < size; ++i) {
487	nodeElt const key = huffNode[i];
488	int j = i - `1`;
489	while (j >= `0` && huffNode[j].count < key.count) {
490	huffNode[j + `1`] = huffNode[j];
491	j--;
492	}
493	huffNode[j + `1`] = key;
494	}
495	}
496
497	/ Pivot helper function for quicksort. /
498	static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
499	/ Simply select rightmost element as pivot. "Better" selectors like*
500	* median-of-three don't experimentally appear to have any benefit.
501	*/
502	U32 const pivot = arr[high].count;
503	int i = low - `1`;
504	int j = low;
505	for ( ; j < high; j++) {
506	if (arr[j].count > pivot) {
507	i++;
508	HUF_swapNodes(a: &arr[i], b: &arr[j]);
509	}
510	}
511	HUF_swapNodes(a: &arr[i + `1`], b: &arr[high]);
512	return i + `1`;
513	}
514
515	/ Classic quicksort by descending with partially iterative calls*
516	* to reduce worst case callstack size.
517	*/
518	static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
519	int const kInsertionSortThreshold = `8`;
520	if (high - low < kInsertionSortThreshold) {
521	HUF_insertionSort(huffNode: arr, low, high);
522	return;
523	}
524	while (low < high) {
525	int const idx = HUF_quickSortPartition(arr, low, high);
526	if (idx - low < high - idx) {
527	HUF_simpleQuickSort(arr, low, high: idx - `1`);
528	low = idx + `1`;
529	} else {
530	HUF_simpleQuickSort(arr, low: idx + `1`, high);
531	high = idx - `1`;
532	}
533	}
534	}
535
536	/*
537	* HUF_sort():
538	* Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
539	* This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
540	*
541	* @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
542	* Must have (maxSymbolValue + 1) entries.
543	* @param[in] count Histogram of the symbols.
544	* @param[in] maxSymbolValue Maximum symbol value.
545	* @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
546	*/
547	static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
548	U32 n;
549	U32 const maxSymbolValue1 = maxSymbolValue+`1`;
550
551	/ Compute base and set curr to base.*
552	* For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
553	* See HUF_getIndex to see bucketing strategy.
554	* We attribute each symbol to lowerRank's base value, because we want to know where
555	* each rank begins in the output, so for rank R we want to count ranks R+1 and above.
556	*/
557	ZSTD_memset(rankPosition, `0`, sizeof(rankPosition) RANK_POSITION_TABLE_SIZE);
558	for (n = `0`; n < maxSymbolValue1; ++n) {
559	U32 lowerRank = HUF_getIndex(count: count[n]);
560	assert(lowerRank < RANK_POSITION_TABLE_SIZE - `1`);
561	rankPosition[lowerRank].base++;
562	}
563
564	assert(rankPosition[RANK_POSITION_TABLE_SIZE - `1`].base == `0`);
565	/ Set up the rankPosition table /
566	for (n = RANK_POSITION_TABLE_SIZE - `1`; n > `0`; --n) {
567	rankPosition[n-`1`].base += rankPosition[n].base;
568	rankPosition[n-`1`].curr = rankPosition[n-`1`].base;
569	}
570
571	/ Insert each symbol into their appropriate bucket, setting up rankPosition table. /
572	for (n = `0`; n < maxSymbolValue1; ++n) {
573	U32 const c = count[n];
574	U32 const r = HUF_getIndex(count: c) + `1`;
575	U32 const pos = rankPosition[r].curr++;
576	assert(pos < maxSymbolValue1);
577	huffNode[pos].count = c;
578	huffNode[pos].byte = (BYTE)n;
579	}
580
581	/ Sort each bucket. /
582	for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - `1`; ++n) {
583	U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base;
584	U32 const bucketStartIdx = rankPosition[n].base;
585	if (bucketSize > `1`) {
586	assert(bucketStartIdx < maxSymbolValue1);
587	HUF_simpleQuickSort(arr: huffNode + bucketStartIdx, low: `0`, high: bucketSize-`1`);
588	}
589	}
590
591	assert(HUF_isSorted(huffNode, maxSymbolValue1));
592	}
593
594	/ HUF_buildCTable_wksp() :*
595	* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
596	* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
597	*/
598	#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
599
600	/ HUF_buildTree():*
601	* Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
602	*
603	* @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array.
604	* @param maxSymbolValue The maximum symbol value.
605	* @return The smallest node in the Huffman tree (by count).
606	*/
607	static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
608	{
609	nodeElt* const huffNode0 = huffNode - `1`;
610	int nonNullRank;
611	int lowS, lowN;
612	int nodeNb = STARTNODE;
613	int n, nodeRoot;
614	/ init for parents /
615	nonNullRank = (int)maxSymbolValue;
616	while(huffNode[nonNullRank].count == `0`) nonNullRank--;
617	lowS = nonNullRank; nodeRoot = nodeNb + lowS - `1`; lowN = nodeNb;
618	huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-`1`].count;
619	huffNode[lowS].parent = huffNode[lowS-`1`].parent = (U16)nodeNb;
620	nodeNb++; lowS-=`2`;
621	for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(`1U`<<`30`);
622	huffNode0[`0`].count = (U32)(`1U`<<`31`); / fake entry, strong barrier /
623
624	/ create parents /
625	while (nodeNb <= nodeRoot) {
626	int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
627	int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
628	huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
629	huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
630	nodeNb++;
631	}
632
633	/ distribute weights (unlimited tree height) /
634	huffNode[nodeRoot].nbBits = `0`;
635	for (n=nodeRoot-`1`; n>=STARTNODE; n--)
636	huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + `1`;
637	for (n=`0`; n<=nonNullRank; n++)
638	huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + `1`;
639
640	return nonNullRank;
641	}
642
643	/*
644	* HUF_buildCTableFromTree():
645	* Build the CTable given the Huffman tree in huffNode.
646	*
647	* @param[out] CTable The output Huffman CTable.
648	* @param huffNode The Huffman tree.
649	* @param nonNullRank The last and smallest node in the Huffman tree.
650	* @param maxSymbolValue The maximum symbol value.
651	* @param maxNbBits The exact maximum number of bits used in the Huffman tree.
652	*/
653	static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
654	{
655	HUF_CElt* const ct = CTable + `1`;
656	/ fill result into ctable (val, nbBits) /
657	int n;
658	U16 nbPerRank[HUF_TABLELOG_MAX+`1`] = {`0`};
659	U16 valPerRank[HUF_TABLELOG_MAX+`1`] = {`0`};
660	int const alphabetSize = (int)(maxSymbolValue + `1`);
661	for (n=`0`; n<=nonNullRank; n++)
662	nbPerRank[huffNode[n].nbBits]++;
663	/ determine starting value per rank /
664	{ U16 min = `0`;
665	for (n=(int)maxNbBits; n>`0`; n--) {
666	valPerRank[n] = min; / get starting value within each rank /
667	min += nbPerRank[n];
668	min >>= `1`;
669	} }
670	for (n=`0`; n<alphabetSize; n++)
671	HUF_setNbBits(elt: ct + huffNode[n].byte, nbBits: huffNode[n].nbBits); / push nbBits per symbol, symbol order /
672	for (n=`0`; n<alphabetSize; n++)
673	HUF_setValue(elt: ct + n, value: valPerRank[HUF_getNbBits(elt: ct[n])]++); / assign value within rank, symbol order /
674	CTable[`0`] = maxNbBits;
675	}
676
677	size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
678	{
679	HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workspace: workSpace, workspaceSizePtr: &wkspSize, ZSTD_ALIGNOF(U32));
680	nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
681	nodeElt* const huffNode = huffNode0+`1`;
682	int nonNullRank;
683
684	/ safety checks /
685	if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
686	return ERROR(workSpace_tooSmall);
687	if (maxNbBits == `0`) maxNbBits = HUF_TABLELOG_DEFAULT;
688	if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
689	return ERROR(maxSymbolValue_tooLarge);
690	ZSTD_memset(huffNode0, `0`, sizeof(huffNodeTable));
691
692	/ sort, decreasing order /
693	HUF_sort(huffNode, count, maxSymbolValue, rankPosition: wksp_tables->rankPosition);
694
695	/ build tree /
696	nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
697
698	/ enforce maxTableLog /
699	maxNbBits = HUF_setMaxHeight(huffNode, lastNonNull: (U32)nonNullRank, maxNbBits);
700	if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); / check fit into table /
701
702	HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
703
704	return maxNbBits;
705	}
706
707	size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
708	{
709	HUF_CElt const* ct = CTable + `1`;
710	size_t nbBits = `0`;
711	int s;
712	for (s = `0`; s <= (int)maxSymbolValue; ++s) {
713	nbBits += HUF_getNbBits(elt: ct[s]) * count[s];
714	}
715	return nbBits >> `3`;
716	}
717
718	int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
719	HUF_CElt const* ct = CTable + `1`;
720	int bad = `0`;
721	int s;
722	for (s = `0`; s <= (int)maxSymbolValue; ++s) {
723	bad \|= (count[s] != `0`) & (HUF_getNbBits(elt: ct[s]) == `0`);
724	}
725	return !bad;
726	}
727
728	size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
729
730	/ HUF_CStream_t:*
731	* Huffman uses its own BIT_CStream_t implementation.
732	* There are three major differences from BIT_CStream_t:
733	* 1. HUF_addBits() takes a HUF_CElt (size_t) which is
734	* the pair (nbBits, value) in the format:
735	* format:
736	* - Bits [0, 4) = nbBits
737	* - Bits [4, 64 - nbBits) = 0
738	* - Bits [64 - nbBits, 64) = value
739	* 2. The bitContainer is built from the upper bits and
740	* right shifted. E.g. to add a new value of N bits
741	* you right shift the bitContainer by N, then or in
742	* the new value into the N upper bits.
743	* 3. The bitstream has two bit containers. You can add
744	* bits to the second container and merge them into
745	* the first container.
746	*/
747
748	#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
749
750	typedef struct {
751	size_t bitContainer[`2`];
752	size_t bitPos[`2`];
753
754	BYTE* startPtr;
755	BYTE* ptr;
756	BYTE* endPtr;
757	} HUF_CStream_t;
758
759	/! HUF_initCStream():*
760	* Initializes the bitstream.
761	* @returns 0 or an error code.
762	*/
763	static size_t HUF_initCStream(HUF_CStream_t* bitC,
764	void* startPtr, size_t dstCapacity)
765	{
766	ZSTD_memset(bitC, `0`, sizeof(*bitC));
767	bitC->startPtr = (BYTE*)startPtr;
768	bitC->ptr = bitC->startPtr;
769	bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[`0`]);
770	if (dstCapacity <= sizeof(bitC->bitContainer[`0`])) return ERROR(dstSize_tooSmall);
771	return `0`;
772	}
773
774	/! HUF_addBits():*
775	* Adds the symbol stored in HUF_CElt elt to the bitstream.
776	*
777	* @param elt The element we're adding. This is a (nbBits, value) pair.
778	* See the HUF_CStream_t docs for the format.
779	* @param idx Insert into the bitstream at this idx.
780	* @param kFast This is a template parameter. If the bitstream is guaranteed
781	* to have at least 4 unused bits after this call it may be 1,
782	* otherwise it must be 0. HUF_addBits() is faster when fast is set.
783	*/
784	FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
785	{
786	assert(idx <= `1`);
787	assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
788	/ This is efficient on x86-64 with BMI2 because shrx*
789	* only reads the low 6 bits of the register. The compiler
790	* knows this and elides the mask. When fast is set,
791	* every operation can use the same value loaded from elt.
792	*/
793	bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
794	bitC->bitContainer[idx] \|= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
795	/ We only read the low 8 bits of bitC->bitPos[idx] so it*
796	* doesn't matter that the high bits have noise from the value.
797	*/
798	bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
799	assert((bitC->bitPos[idx] & `0xFF`) <= HUF_BITS_IN_CONTAINER);
800	/ The last 4-bits of elt are dirty if fast is set,*
801	* so we must not be overwriting bits that have already been
802	* inserted into the bit container.
803	*/
804	#if DEBUGLEVEL >= 1
805	{
806	size_t const nbBits = HUF_getNbBits(elt);
807	size_t const dirtyBits = nbBits == `0` ? `0` : BIT_highbit32((U32)nbBits) + `1`;
808	(void)dirtyBits;
809	/ Middle bits are 0. /
810	assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == `0`);
811	/ We didn't overwrite any bits in the bit container. /
812	assert(!kFast \|\| (bitC->bitPos[idx] & `0xFF`) <= HUF_BITS_IN_CONTAINER);
813	(void)dirtyBits;
814	}
815	#endif
816	}
817
818	FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
819	{
820	bitC->bitContainer[`1`] = `0`;
821	bitC->bitPos[`1`] = `0`;
822	}
823
824	/! HUF_mergeIndex1() :*
825	* Merges the bit container @ index 1 into the bit container @ index 0
826	* and zeros the bit container @ index 1.
827	*/
828	FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
829	{
830	assert((bitC->bitPos[`1`] & `0xFF`) < HUF_BITS_IN_CONTAINER);
831	bitC->bitContainer[`0`] >>= (bitC->bitPos[`1`] & `0xFF`);
832	bitC->bitContainer[`0`] \|= bitC->bitContainer[`1`];
833	bitC->bitPos[`0`] += bitC->bitPos[`1`];
834	assert((bitC->bitPos[`0`] & `0xFF`) <= HUF_BITS_IN_CONTAINER);
835	}
836
837	/! HUF_flushBits() :*
838	* Flushes the bits in the bit container @ index 0.
839	*
840	* @post bitPos will be < 8.
841	* @param kFast If kFast is set then we must know a-priori that
842	* the bit container will not overflow.
843	*/
844	FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
845	{
846	/ The upper bits of bitPos are noisy, so we must mask by 0xFF. /
847	size_t const nbBits = bitC->bitPos[`0`] & `0xFF`;
848	size_t const nbBytes = nbBits >> `3`;
849	/ The top nbBits bits of bitContainer are the ones we need. /
850	size_t const bitContainer = bitC->bitContainer[`0`] >> (HUF_BITS_IN_CONTAINER - nbBits);
851	/ Mask bitPos to account for the bytes we consumed. /
852	bitC->bitPos[`0`] &= `7`;
853	assert(nbBits > `0`);
854	assert(nbBits <= sizeof(bitC->bitContainer[`0`]) * `8`);
855	assert(bitC->ptr <= bitC->endPtr);
856	MEM_writeLEST(memPtr: bitC->ptr, val: bitContainer);
857	bitC->ptr += nbBytes;
858	assert(!kFast \|\| bitC->ptr <= bitC->endPtr);
859	if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
860	/ bitContainer doesn't need to be modified because the leftover*
861	* bits are already the top bitPos bits. And we don't care about
862	* noise in the lower values.
863	*/
864	}
865
866	/! HUF_endMark()*
867	* @returns The Huffman stream end mark: A 1-bit value = 1.
868	*/
869	static HUF_CElt HUF_endMark(void)
870	{
871	HUF_CElt endMark;
872	HUF_setNbBits(elt: &endMark, nbBits: `1`);
873	HUF_setValue(elt: &endMark, value: `1`);
874	return endMark;
875	}
876
877	/! HUF_closeCStream() :*
878	* @return Size of CStream, in bytes,
879	* or 0 if it could not fit into dstBuffer */
880	static size_t HUF_closeCStream(HUF_CStream_t* bitC)
881	{
882	HUF_addBits(bitC, elt: HUF_endMark(), / idx / `0`, / kFast / `0`);
883	HUF_flushBits(bitC, / kFast / `0`);
884	{
885	size_t const nbBits = bitC->bitPos[`0`] & `0xFF`;
886	if (bitC->ptr >= bitC->endPtr) return `0`; / overflow detected /
887	return (bitC->ptr - bitC->startPtr) + (nbBits > `0`);
888	}
889	}
890
891	FORCE_INLINE_TEMPLATE void
892	HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
893	{
894	HUF_addBits(bitC: bitCPtr, elt: CTable[symbol], idx, kFast: fast);
895	}
896
897	FORCE_INLINE_TEMPLATE void
898	HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
899	const BYTE* ip, size_t srcSize,
900	const HUF_CElt* ct,
901	int kUnroll, int kFastFlush, int kLastFast)
902	{
903	/ Join to kUnroll /
904	int n = (int)srcSize;
905	int rem = n % kUnroll;
906	if (rem > `0`) {
907	for (; rem > `0`; --rem) {
908	HUF_encodeSymbol(bitCPtr: bitC, symbol: ip[--n], CTable: ct, idx: `0`, / fast / `0`);
909	}
910	HUF_flushBits(bitC, kFast: kFastFlush);
911	}
912	assert(n % kUnroll == `0`);
913
914	/ Join to 2 * kUnroll /
915	if (n % (`2` * kUnroll)) {
916	int u;
917	for (u = `1`; u < kUnroll; ++u) {
918	HUF_encodeSymbol(bitCPtr: bitC, symbol: ip[n - u], CTable: ct, idx: `0`, fast: `1`);
919	}
920	HUF_encodeSymbol(bitCPtr: bitC, symbol: ip[n - kUnroll], CTable: ct, idx: `0`, fast: kLastFast);
921	HUF_flushBits(bitC, kFast: kFastFlush);
922	n -= kUnroll;
923	}
924	assert(n % (`2` * kUnroll) == `0`);
925
926	for (; n>`0`; n-= `2` * kUnroll) {
927	/ Encode kUnroll symbols into the bitstream @ index 0. /
928	int u;
929	for (u = `1`; u < kUnroll; ++u) {
930	HUF_encodeSymbol(bitCPtr: bitC, symbol: ip[n - u], CTable: ct, / idx / `0`, / fast / `1`);
931	}
932	HUF_encodeSymbol(bitCPtr: bitC, symbol: ip[n - kUnroll], CTable: ct, / idx / `0`, / fast / kLastFast);
933	HUF_flushBits(bitC, kFast: kFastFlush);
934	/ Encode kUnroll symbols into the bitstream @ index 1.*
935	* This allows us to start filling the bit container
936	* without any data dependencies.
937	*/
938	HUF_zeroIndex1(bitC);
939	for (u = `1`; u < kUnroll; ++u) {
940	HUF_encodeSymbol(bitCPtr: bitC, symbol: ip[n - kUnroll - u], CTable: ct, / idx / `1`, / fast / `1`);
941	}
942	HUF_encodeSymbol(bitCPtr: bitC, symbol: ip[n - kUnroll - kUnroll], CTable: ct, / idx / `1`, / fast / kLastFast);
943	/ Merge bitstream @ index 1 into the bitstream @ index 0 /
944	HUF_mergeIndex1(bitC);
945	HUF_flushBits(bitC, kFast: kFastFlush);
946	}
947	assert(n == `0`);
948
949	}
950
951	/*
952	* Returns a tight upper bound on the output space needed by Huffman
953	* with 8 bytes buffer to handle over-writes. If the output is at least
954	* this large we don't need to do bounds checks during Huffman encoding.
955	*/
956	static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
957	{
958	return ((srcSize * tableLog) >> `3`) + `8`;
959	}
960
961
962	FORCE_INLINE_TEMPLATE size_t
963	HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
964	const void* src, size_t srcSize,
965	const HUF_CElt* CTable)
966	{
967	U32 const tableLog = (U32)CTable[`0`];
968	HUF_CElt const* ct = CTable + `1`;
969	const BYTE* ip = (const BYTE*) src;
970	BYTE* const ostart = (BYTE*)dst;
971	BYTE* const oend = ostart + dstSize;
972	BYTE* op = ostart;
973	HUF_CStream_t bitC;
974
975	/ init /
976	if (dstSize < `8`) return `0`; / not enough space to compress /
977	{ size_t const initErr = HUF_initCStream(bitC: &bitC, startPtr: op, dstCapacity: (size_t)(oend-op));
978	if (HUF_isError(code: initErr)) return `0`; }
979
980	if (dstSize < HUF_tightCompressBound(srcSize, tableLog: (size_t)tableLog) \|\| tableLog > `11`)
981	HUF_compress1X_usingCTable_internal_body_loop(bitC: &bitC, ip, srcSize, ct, / kUnroll / MEM_32bits() ? `2` : `4`, / kFast / kFastFlush: `0`, / kLastFast / `0`);
982	else {
983	if (MEM_32bits()) {
984	switch (tableLog) {
985	case `11`:
986	HUF_compress1X_usingCTable_internal_body_loop(bitC: &bitC, ip, srcSize, ct, / kUnroll / `2`, / kFastFlush / `1`, / kLastFast / `0`);
987	break;
988	case `10`: ZSTD_FALLTHROUGH;
989	case `9`: ZSTD_FALLTHROUGH;
990	case `8`:
991	HUF_compress1X_usingCTable_internal_body_loop(bitC: &bitC, ip, srcSize, ct, / kUnroll / `2`, / kFastFlush / `1`, / kLastFast / `1`);
992	break;
993	case `7`: ZSTD_FALLTHROUGH;
994	default:
995	HUF_compress1X_usingCTable_internal_body_loop(bitC: &bitC, ip, srcSize, ct, / kUnroll / `3`, / kFastFlush / `1`, / kLastFast / `1`);
996	break;
997	}
998	} else {
999	switch (tableLog) {
1000	case `11`:
1001	HUF_compress1X_usingCTable_internal_body_loop(bitC: &bitC, ip, srcSize, ct, / kUnroll / `5`, / kFastFlush / `1`, / kLastFast / `0`);
1002	break;
1003	case `10`:
1004	HUF_compress1X_usingCTable_internal_body_loop(bitC: &bitC, ip, srcSize, ct, / kUnroll / `5`, / kFastFlush / `1`, / kLastFast / `1`);
1005	break;
1006	case `9`:
1007	HUF_compress1X_usingCTable_internal_body_loop(bitC: &bitC, ip, srcSize, ct, / kUnroll / `6`, / kFastFlush / `1`, / kLastFast / `0`);
1008	break;
1009	case `8`:
1010	HUF_compress1X_usingCTable_internal_body_loop(bitC: &bitC, ip, srcSize, ct, / kUnroll / `7`, / kFastFlush / `1`, / kLastFast / `0`);
1011	break;
1012	case `7`:
1013	HUF_compress1X_usingCTable_internal_body_loop(bitC: &bitC, ip, srcSize, ct, / kUnroll / `8`, / kFastFlush / `1`, / kLastFast / `0`);
1014	break;
1015	case `6`: ZSTD_FALLTHROUGH;
1016	default:
1017	HUF_compress1X_usingCTable_internal_body_loop(bitC: &bitC, ip, srcSize, ct, / kUnroll / `9`, / kFastFlush / `1`, / kLastFast / `1`);
1018	break;
1019	}
1020	}
1021	}
1022	assert(bitC.ptr <= bitC.endPtr);
1023
1024	return HUF_closeCStream(bitC: &bitC);
1025	}
1026
1027	#if DYNAMIC_BMI2
1028
1029	static BMI2_TARGET_ATTRIBUTE size_t
1030	HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
1031	const void* src, size_t srcSize,
1032	const HUF_CElt* CTable)
1033	{
1034	return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
1035	}
1036
1037	static size_t
1038	HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
1039	const void* src, size_t srcSize,
1040	const HUF_CElt* CTable)
1041	{
1042	return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
1043	}
1044
1045	static size_t
1046	HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
1047	const void* src, size_t srcSize,
1048	const HUF_CElt* CTable, const int bmi2)
1049	{
1050	if (bmi2) {
1051	return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
1052	}
1053	return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
1054	}
1055
1056	#else
1057
1058	static size_t
1059	HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
1060	const void* src, size_t srcSize,
1061	const HUF_CElt* CTable, const int bmi2)
1062	{
1063	(void)bmi2;
1064	return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
1065	}
1066
1067	#endif
1068
1069	size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
1070	{
1071	return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, / bmi2 / `0`);
1072	}
1073
1074	size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
1075	{
1076	return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
1077	}
1078
1079	static size_t
1080	HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
1081	const void* src, size_t srcSize,
1082	const HUF_CElt* CTable, int bmi2)
1083	{
1084	size_t const segmentSize = (srcSize+`3`)/`4`; / first 3 segments /
1085	const BYTE* ip = (const BYTE*) src;
1086	const BYTE* const iend = ip + srcSize;
1087	BYTE* const ostart = (BYTE*) dst;
1088	BYTE* const oend = ostart + dstSize;
1089	BYTE* op = ostart;
1090
1091	if (dstSize < `6` + `1` + `1` + `1` + `8`) return `0`; / minimum space to compress successfully /
1092	if (srcSize < `12`) return `0`; / no saving possible : too small input /
1093	op += `6`; / jumpTable /
1094
1095	assert(op <= oend);
1096	{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
1097	if (cSize == `0` \|\| cSize > `65535`) return `0`;
1098	MEM_writeLE16(memPtr: ostart, val: (U16)cSize);
1099	op += cSize;
1100	}
1101
1102	ip += segmentSize;
1103	assert(op <= oend);
1104	{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
1105	if (cSize == `0` \|\| cSize > `65535`) return `0`;
1106	MEM_writeLE16(memPtr: ostart+`2`, val: (U16)cSize);
1107	op += cSize;
1108	}
1109
1110	ip += segmentSize;
1111	assert(op <= oend);
1112	{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
1113	if (cSize == `0` \|\| cSize > `65535`) return `0`;
1114	MEM_writeLE16(memPtr: ostart+`4`, val: (U16)cSize);
1115	op += cSize;
1116	}
1117
1118	ip += segmentSize;
1119	assert(op <= oend);
1120	assert(ip <= iend);
1121	{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
1122	if (cSize == `0` \|\| cSize > `65535`) return `0`;
1123	op += cSize;
1124	}
1125
1126	return (size_t)(op-ostart);
1127	}
1128
1129	size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
1130	{
1131	return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, / bmi2 / `0`);
1132	}
1133
1134	size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
1135	{
1136	return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
1137	}
1138
1139	typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
1140
1141	static size_t HUF_compressCTable_internal(
1142	BYTE* const ostart, BYTE* op, BYTE* const oend,
1143	const void* src, size_t srcSize,
1144	HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
1145	{
1146	size_t const cSize = (nbStreams==HUF_singleStream) ?
1147	HUF_compress1X_usingCTable_internal(dst: op, dstSize: (size_t)(oend - op), src, srcSize, CTable, bmi2) :
1148	HUF_compress4X_usingCTable_internal(dst: op, dstSize: (size_t)(oend - op), src, srcSize, CTable, bmi2);
1149	if (HUF_isError(code: cSize)) { return cSize; }
1150	if (cSize==`0`) { return `0`; } / uncompressible /
1151	op += cSize;
1152	/ check compressibility /
1153	assert(op >= ostart);
1154	if ((size_t)(op-ostart) >= srcSize-`1`) { return `0`; }
1155	return (size_t)(op-ostart);
1156	}
1157
1158	typedef struct {
1159	unsigned count[HUF_SYMBOLVALUE_MAX + `1`];
1160	HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
1161	union {
1162	HUF_buildCTable_wksp_tables buildCTable_wksp;
1163	HUF_WriteCTableWksp writeCTable_wksp;
1164	U32 hist_wksp[HIST_WKSP_SIZE_U32];
1165	} wksps;
1166	} HUF_compress_tables_t;
1167
1168	#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
1169	#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
1170
1171	/ HUF_compress_internal() :*
1172	* `workSpace_align4` must be aligned on 4-bytes boundaries,
1173	* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
1174	static size_t
1175	HUF_compress_internal (void* dst, size_t dstSize,
1176	const void* src, size_t srcSize,
1177	unsigned maxSymbolValue, unsigned huffLog,
1178	HUF_nbStreams_e nbStreams,
1179	void* workSpace, size_t wkspSize,
1180	HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
1181	const int bmi2, unsigned suspectUncompressible)
1182	{
1183	HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workspace: workSpace, workspaceSizePtr: &wkspSize, ZSTD_ALIGNOF(size_t));
1184	BYTE* const ostart = (BYTE*)dst;
1185	BYTE* const oend = ostart + dstSize;
1186	BYTE* op = ostart;
1187
1188	HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
1189
1190	/ checks & inits /
1191	if (wkspSize < sizeof(table)) return* ERROR(workSpace_tooSmall);
1192	if (!srcSize) return `0`; / Uncompressed /
1193	if (!dstSize) return `0`; / cannot fit anything within dst budget /
1194	if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); / current block size limit /
1195	if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
1196	if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
1197	if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
1198	if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
1199
1200	/ Heuristic : If old table is valid, use it for small inputs /
1201	if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
1202	return HUF_compressCTable_internal(ostart, op, oend,
1203	src, srcSize,
1204	nbStreams, CTable: oldHufTable, bmi2);
1205	}
1206
1207	/ If uncompressible data is suspected, do a smaller sampling first /
1208	DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= `2`);
1209	if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
1210	size_t largestTotal = `0`;
1211	{ unsigned maxSymbolValueBegin = maxSymbolValue;
1212	CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
1213	largestTotal += largestBegin;
1214	}
1215	{ unsigned maxSymbolValueEnd = maxSymbolValue;
1216	CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
1217	largestTotal += largestEnd;
1218	}
1219	if (largestTotal <= ((`2` * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> `7`)+`4`) return `0`; / heuristic : probably not compressible enough /
1220	}
1221
1222	/ Scan input and build symbol stats /
1223	{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE)src, srcSize, table->wksps.hist_wksp, sizeof*(table->wksps.hist_wksp)) );
1224	if (largest == srcSize) { ostart = ((const* BYTE)src)[`0`]; return* `1`; } / single symbol, rle /
1225	if (largest <= (srcSize >> `7`)+`4`) return `0`; / heuristic : probably not compressible enough /
1226	}
1227
1228	/ Check validity of previous table /
1229	if ( repeat
1230	&& *repeat == HUF_repeat_check
1231	&& !HUF_validateCTable(CTable: oldHufTable, count: table->count, maxSymbolValue)) {
1232	*repeat = HUF_repeat_none;
1233	}
1234	/ Heuristic : use existing table for small inputs /
1235	if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
1236	return HUF_compressCTable_internal(ostart, op, oend,
1237	src, srcSize,
1238	nbStreams, CTable: oldHufTable, bmi2);
1239	}
1240
1241	/ Build Huffman Tree /
1242	huffLog = HUF_optimalTableLog(maxTableLog: huffLog, srcSize, maxSymbolValue);
1243	{ size_t const maxBits = HUF_buildCTable_wksp(CTable: table->CTable, count: table->count,
1244	maxSymbolValue, maxNbBits: huffLog,
1245	workSpace: &table->wksps.buildCTable_wksp, wkspSize: sizeof(table->wksps.buildCTable_wksp));
1246	CHECK_F(maxBits);
1247	huffLog = (U32)maxBits;
1248	}
1249	/ Zero unused symbols in CTable, so we can check it for validity /
1250	{
1251	size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
1252	size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
1253	ZSTD_memset(table->CTable + ctableSize, `0`, unusedSize);
1254	}
1255
1256	/ Write table description header /
1257	{ CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
1258	&table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
1259	/ Check if using previous huffman table is beneficial /
1260	if (repeat && *repeat != HUF_repeat_none) {
1261	size_t const oldSize = HUF_estimateCompressedSize(CTable: oldHufTable, count: table->count, maxSymbolValue);
1262	size_t const newSize = HUF_estimateCompressedSize(CTable: table->CTable, count: table->count, maxSymbolValue);
1263	if (oldSize <= hSize + newSize \|\| hSize + `12` >= srcSize) {
1264	return HUF_compressCTable_internal(ostart, op, oend,
1265	src, srcSize,
1266	nbStreams, CTable: oldHufTable, bmi2);
1267	} }
1268
1269	/ Use the new huffman table /
1270	if (hSize + `12ul` >= srcSize) { return `0`; }
1271	op += hSize;
1272	if (repeat) { *repeat = HUF_repeat_none; }
1273	if (oldHufTable)
1274	ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); / Save new table /
1275	}
1276	return HUF_compressCTable_internal(ostart, op, oend,
1277	src, srcSize,
1278	nbStreams, CTable: table->CTable, bmi2);
1279	}
1280
1281
1282	size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
1283	const void* src, size_t srcSize,
1284	unsigned maxSymbolValue, unsigned huffLog,
1285	void* workSpace, size_t wkspSize)
1286	{
1287	return HUF_compress_internal(dst, dstSize, src, srcSize,
1288	maxSymbolValue, huffLog, nbStreams: HUF_singleStream,
1289	workSpace, wkspSize,
1290	NULL, NULL, preferRepeat: `0`, bmi2: `0` /bmi2/, suspectUncompressible: `0`);
1291	}
1292
1293	size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
1294	const void* src, size_t srcSize,
1295	unsigned maxSymbolValue, unsigned huffLog,
1296	void* workSpace, size_t wkspSize,
1297	HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
1298	int bmi2, unsigned suspectUncompressible)
1299	{
1300	return HUF_compress_internal(dst, dstSize, src, srcSize,
1301	maxSymbolValue, huffLog, nbStreams: HUF_singleStream,
1302	workSpace, wkspSize, oldHufTable: hufTable,
1303	repeat, preferRepeat, bmi2, suspectUncompressible);
1304	}
1305
1306	/ HUF_compress4X_repeat():*
1307	* compress input using 4 streams.
1308	* provide workspace to generate compression tables */
1309	size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
1310	const void* src, size_t srcSize,
1311	unsigned maxSymbolValue, unsigned huffLog,
1312	void* workSpace, size_t wkspSize)
1313	{
1314	return HUF_compress_internal(dst, dstSize, src, srcSize,
1315	maxSymbolValue, huffLog, nbStreams: HUF_fourStreams,
1316	workSpace, wkspSize,
1317	NULL, NULL, preferRepeat: `0`, bmi2: `0` /bmi2/, suspectUncompressible: `0`);
1318	}
1319
1320	/ HUF_compress4X_repeat():*
1321	* compress input using 4 streams.
1322	* consider skipping quickly
1323	* re-use an existing huffman compression table */
1324	size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
1325	const void* src, size_t srcSize,
1326	unsigned maxSymbolValue, unsigned huffLog,
1327	void* workSpace, size_t wkspSize,
1328	HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible)
1329	{
1330	return HUF_compress_internal(dst, dstSize, src, srcSize,
1331	maxSymbolValue, huffLog, nbStreams: HUF_fourStreams,
1332	workSpace, wkspSize,
1333	oldHufTable: hufTable, repeat, preferRepeat, bmi2, suspectUncompressible);
1334	}
1335
1336

source code of linux/lib/zstd/compress/huf_compress.c