zstd_fast.c source code [linux/lib/zstd/compress/zstd_fast.c]

1	/*
2	* Copyright (c) Yann Collet, Facebook, Inc.
3	* All rights reserved.
4	*
5	* This source code is licensed under both the BSD-style license (found in the
6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
7	* in the COPYING file in the root directory of this source tree).
8	* You may select, at your option, one of the above-listed licenses.
9	*/
10
11	#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
12	#include "zstd_fast.h"
13
14
15	void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
16	const void* const end,
17	ZSTD_dictTableLoadMethod_e dtlm)
18	{
19	const ZSTD_compressionParameters* const cParams = &ms->cParams;
20	U32* const hashTable = ms->hashTable;
21	U32 const hBits = cParams->hashLog;
22	U32 const mls = cParams->minMatch;
23	const BYTE* const base = ms->window.base;
24	const BYTE* ip = base + ms->nextToUpdate;
25	const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
26	const U32 fastHashFillStep = `3`;
27
28	/ Always insert every fastHashFillStep position into the hash table.*
29	* Insert the other positions if their hash entry is empty.
30	*/
31	for ( ; ip + fastHashFillStep < iend + `2`; ip += fastHashFillStep) {
32	U32 const curr = (U32)(ip - base);
33	size_t const hash0 = ZSTD_hashPtr(p: ip, hBits, mls);
34	hashTable[hash0] = curr;
35	if (dtlm == ZSTD_dtlm_fast) continue;
36	/ Only load extra positions for ZSTD_dtlm_full /
37	{ U32 p;
38	for (p = `1`; p < fastHashFillStep; ++p) {
39	size_t const hash = ZSTD_hashPtr(p: ip + p, hBits, mls);
40	if (hashTable[hash] == `0`) { / not yet filled /
41	hashTable[hash] = curr + p;
42	} } } }
43	}
44
45
46	/*
47	* If you squint hard enough (and ignore repcodes), the search operation at any
48	* given position is broken into 4 stages:
49	*
50	* 1. Hash (map position to hash value via input read)
51	* 2. Lookup (map hash val to index via hashtable read)
52	* 3. Load (map index to value at that position via input read)
53	* 4. Compare
54	*
55	* Each of these steps involves a memory read at an address which is computed
56	* from the previous step. This means these steps must be sequenced and their
57	* latencies are cumulative.
58	*
59	* Rather than do 1->2->3->4 sequentially for a single position before moving
60	* onto the next, this implementation interleaves these operations across the
61	* next few positions:
62	*
63	* R = Repcode Read & Compare
64	* H = Hash
65	* T = Table Lookup
66	* M = Match Read & Compare
67	*
68	* Pos \| Time -->
69	* ----+-------------------
70	* N \| ... M
71	* N+1 \| ... TM
72	* N+2 \| R H T M
73	* N+3 \| H TM
74	* N+4 \| R H T M
75	* N+5 \| H ...
76	* N+6 \| R ...
77	*
78	* This is very much analogous to the pipelining of execution in a CPU. And just
79	* like a CPU, we have to dump the pipeline when we find a match (i.e., take a
80	* branch).
81	*
82	* When this happens, we throw away our current state, and do the following prep
83	* to re-enter the loop:
84	*
85	* Pos \| Time -->
86	* ----+-------------------
87	* N \| H T
88	* N+1 \| H
89	*
90	* This is also the work we do at the beginning to enter the loop initially.
91	*/
92	FORCE_INLINE_TEMPLATE size_t
93	ZSTD_compressBlock_fast_noDict_generic(
94	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
95	void const* src, size_t srcSize,
96	U32 const mls, U32 const hasStep)
97	{
98	const ZSTD_compressionParameters* const cParams = &ms->cParams;
99	U32* const hashTable = ms->hashTable;
100	U32 const hlog = cParams->hashLog;
101	/ support stepSize of 0 /
102	size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + `1`) : `2`;
103	const BYTE* const base = ms->window.base;
104	const BYTE* const istart = (const BYTE*)src;
105	const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
106	const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, curr: endIndex, windowLog: cParams->windowLog);
107	const BYTE* const prefixStart = base + prefixStartIndex;
108	const BYTE* const iend = istart + srcSize;
109	const BYTE* const ilimit = iend - HASH_READ_SIZE;
110
111	const BYTE* anchor = istart;
112	const BYTE* ip0 = istart;
113	const BYTE* ip1;
114	const BYTE* ip2;
115	const BYTE* ip3;
116	U32 current0;
117
118	U32 rep_offset1 = rep[`0`];
119	U32 rep_offset2 = rep[`1`];
120	U32 offsetSaved = `0`;
121
122	size_t hash0; / hash for ip0 /
123	size_t hash1; / hash for ip1 /
124	U32 idx; / match idx for ip0 /
125	U32 mval; / src value at match idx /
126
127	U32 offcode;
128	const BYTE* match0;
129	size_t mLength;
130
131	/ ip0 and ip1 are always adjacent. The targetLength skipping and*
132	* uncompressibility acceleration is applied to every other position,
133	* matching the behavior of #1562. step therefore represents the gap
134	* between pairs of positions, from ip0 to ip2 or ip1 to ip3. */
135	size_t step;
136	const BYTE* nextStep;
137	const size_t kStepIncr = (`1` << (kSearchStrength - `1`));
138
139	DEBUGLOG(`5`, "ZSTD_compressBlock_fast_generic");
140	ip0 += (ip0 == prefixStart);
141	{ U32 const curr = (U32)(ip0 - base);
142	U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, windowLog: cParams->windowLog);
143	U32 const maxRep = curr - windowLow;
144	if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = `0`;
145	if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = `0`;
146	}
147
148	/ start each op /
149	_start: / Requires: ip0 /
150
151	step = stepSize;
152	nextStep = ip0 + kStepIncr;
153
154	/ calculate positions, ip0 - anchor == 0, so we skip step calc /
155	ip1 = ip0 + `1`;
156	ip2 = ip0 + step;
157	ip3 = ip2 + `1`;
158
159	if (ip3 >= ilimit) {
160	goto _cleanup;
161	}
162
163	hash0 = ZSTD_hashPtr(p: ip0, hBits: hlog, mls);
164	hash1 = ZSTD_hashPtr(p: ip1, hBits: hlog, mls);
165
166	idx = hashTable[hash0];
167
168	do {
169	/ load repcode match for ip[2]/
170	const U32 rval = MEM_read32(memPtr: ip2 - rep_offset1);
171
172	/ write back hash table entry /
173	current0 = (U32)(ip0 - base);
174	hashTable[hash0] = current0;
175
176	/ check repcode at ip[2] /
177	if ((MEM_read32(memPtr: ip2) == rval) & (rep_offset1 > `0`)) {
178	ip0 = ip2;
179	match0 = ip0 - rep_offset1;
180	mLength = ip0[-`1`] == match0[-`1`];
181	ip0 -= mLength;
182	match0 -= mLength;
183	offcode = STORE_REPCODE_1;
184	mLength += `4`;
185	goto _match;
186	}
187
188	/ load match for ip[0] /
189	if (idx >= prefixStartIndex) {
190	mval = MEM_read32(memPtr: base + idx);
191	} else {
192	mval = MEM_read32(memPtr: ip0) ^ `1`; / guaranteed to not match. /
193	}
194
195	/ check match at ip[0] /
196	if (MEM_read32(memPtr: ip0) == mval) {
197	/ found a match! /
198	goto _offset;
199	}
200
201	/ lookup ip[1] /
202	idx = hashTable[hash1];
203
204	/ hash ip[2] /
205	hash0 = hash1;
206	hash1 = ZSTD_hashPtr(p: ip2, hBits: hlog, mls);
207
208	/ advance to next positions /
209	ip0 = ip1;
210	ip1 = ip2;
211	ip2 = ip3;
212
213	/ write back hash table entry /
214	current0 = (U32)(ip0 - base);
215	hashTable[hash0] = current0;
216
217	/ load match for ip[0] /
218	if (idx >= prefixStartIndex) {
219	mval = MEM_read32(memPtr: base + idx);
220	} else {
221	mval = MEM_read32(memPtr: ip0) ^ `1`; / guaranteed to not match. /
222	}
223
224	/ check match at ip[0] /
225	if (MEM_read32(memPtr: ip0) == mval) {
226	/ found a match! /
227	goto _offset;
228	}
229
230	/ lookup ip[1] /
231	idx = hashTable[hash1];
232
233	/ hash ip[2] /
234	hash0 = hash1;
235	hash1 = ZSTD_hashPtr(p: ip2, hBits: hlog, mls);
236
237	/ advance to next positions /
238	ip0 = ip1;
239	ip1 = ip2;
240	ip2 = ip0 + step;
241	ip3 = ip1 + step;
242
243	/ calculate step /
244	if (ip2 >= nextStep) {
245	step++;
246	PREFETCH_L1(ip1 + `64`);
247	PREFETCH_L1(ip1 + `128`);
248	nextStep += kStepIncr;
249	}
250	} while (ip3 < ilimit);
251
252	_cleanup:
253	/ Note that there are probably still a couple positions we could search.*
254	* However, it seems to be a meaningful performance hit to try to search
255	* them. So let's not. */
256
257	/ save reps for next block /
258	rep[`0`] = rep_offset1 ? rep_offset1 : offsetSaved;
259	rep[`1`] = rep_offset2 ? rep_offset2 : offsetSaved;
260
261	/ Return the last literals size /
262	return (size_t)(iend - anchor);
263
264	_offset: / Requires: ip0, idx /
265
266	/ Compute the offset code. /
267	match0 = base + idx;
268	rep_offset2 = rep_offset1;
269	rep_offset1 = (U32)(ip0-match0);
270	offcode = STORE_OFFSET(rep_offset1);
271	mLength = `4`;
272
273	/ Count the backwards match length. /
274	while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-`1`] == match0[-`1`])) {
275	ip0--;
276	match0--;
277	mLength++;
278	}
279
280	_match: / Requires: ip0, match0, offcode /
281
282	/ Count the forward length. /
283	mLength += ZSTD_count(pIn: ip0 + mLength, pMatch: match0 + mLength, pInLimit: iend);
284
285	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip0 - anchor), literals: anchor, litLimit: iend, offBase_minus1: offcode, matchLength: mLength);
286
287	ip0 += mLength;
288	anchor = ip0;
289
290	/ write next hash table entry /
291	if (ip1 < ip0) {
292	hashTable[hash1] = (U32)(ip1 - base);
293	}
294
295	/ Fill table and check for immediate repcode. /
296	if (ip0 <= ilimit) {
297	/ Fill Table /
298	assert(base+current0+`2` > istart); / check base overflow /
299	hashTable[ZSTD_hashPtr(p: base+current0+`2`, hBits: hlog, mls)] = current0+`2`; / here because current+2 could be > iend-8 /
300	hashTable[ZSTD_hashPtr(p: ip0-`2`, hBits: hlog, mls)] = (U32)(ip0-`2`-base);
301
302	if (rep_offset2 > `0`) { / rep_offset2==0 means rep_offset2 is invalidated /
303	while ( (ip0 <= ilimit) && (MEM_read32(memPtr: ip0) == MEM_read32(memPtr: ip0 - rep_offset2)) ) {
304	/ store sequence /
305	size_t const rLength = ZSTD_count(pIn: ip0+`4`, pMatch: ip0+`4`-rep_offset2, pInLimit: iend) + `4`;
306	{ U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } / swap rep_offset2 <=> rep_offset1 /
307	hashTable[ZSTD_hashPtr(p: ip0, hBits: hlog, mls)] = (U32)(ip0-base);
308	ip0 += rLength;
309	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: `0` /litLen/, literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: rLength);
310	anchor = ip0;
311	continue; / faster when present (confirmed on gcc-8) ... (?) /
312	} } }
313
314	goto _start;
315	}
316
317	#define ZSTD_GEN_FAST_FN(dictMode, mls, step) \
318	static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step( \
319	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
320	void const* src, size_t srcSize) \
321	{ \
322	return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \
323	}
324
325	ZSTD_GEN_FAST_FN(noDict, `4`, `1`)
326	ZSTD_GEN_FAST_FN(noDict, `5`, `1`)
327	ZSTD_GEN_FAST_FN(noDict, `6`, `1`)
328	ZSTD_GEN_FAST_FN(noDict, `7`, `1`)
329
330	ZSTD_GEN_FAST_FN(noDict, `4`, `0`)
331	ZSTD_GEN_FAST_FN(noDict, `5`, `0`)
332	ZSTD_GEN_FAST_FN(noDict, `6`, `0`)
333	ZSTD_GEN_FAST_FN(noDict, `7`, `0`)
334
335	size_t ZSTD_compressBlock_fast(
336	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
337	void const* src, size_t srcSize)
338	{
339	U32 const mls = ms->cParams.minMatch;
340	assert(ms->dictMatchState == NULL);
341	if (ms->cParams.targetLength > `1`) {
342	switch(mls)
343	{
344	default: / includes case 3 /
345	case `4` :
346	return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize);
347	case `5` :
348	return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize);
349	case `6` :
350	return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize);
351	case `7` :
352	return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
353	}
354	} else {
355	switch(mls)
356	{
357	default: / includes case 3 /
358	case `4` :
359	return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize);
360	case `5` :
361	return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize);
362	case `6` :
363	return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize);
364	case `7` :
365	return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
366	}
367
368	}
369	}
370
371	FORCE_INLINE_TEMPLATE
372	size_t ZSTD_compressBlock_fast_dictMatchState_generic(
373	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
374	void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
375	{
376	const ZSTD_compressionParameters* const cParams = &ms->cParams;
377	U32* const hashTable = ms->hashTable;
378	U32 const hlog = cParams->hashLog;
379	/ support stepSize of 0 /
380	U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
381	const BYTE* const base = ms->window.base;
382	const BYTE* const istart = (const BYTE*)src;
383	const BYTE* ip = istart;
384	const BYTE* anchor = istart;
385	const U32 prefixStartIndex = ms->window.dictLimit;
386	const BYTE* const prefixStart = base + prefixStartIndex;
387	const BYTE* const iend = istart + srcSize;
388	const BYTE* const ilimit = iend - HASH_READ_SIZE;
389	U32 offset_1=rep[`0`], offset_2=rep[`1`];
390	U32 offsetSaved = `0`;
391
392	const ZSTD_matchState_t* const dms = ms->dictMatchState;
393	const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
394	const U32* const dictHashTable = dms->hashTable;
395	const U32 dictStartIndex = dms->window.dictLimit;
396	const BYTE* const dictBase = dms->window.base;
397	const BYTE* const dictStart = dictBase + dictStartIndex;
398	const BYTE* const dictEnd = dms->window.nextSrc;
399	const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase);
400	const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
401	const U32 dictHLog = dictCParams->hashLog;
402
403	/ if a dictionary is still attached, it necessarily means that*
404	* it is within window size. So we just check it. */
405	const U32 maxDistance = `1U` << cParams->windowLog;
406	const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
407	assert(endIndex - prefixStartIndex <= maxDistance);
408	(void)maxDistance; (void)endIndex; / these variables are not used when assert() is disabled /
409
410	(void)hasStep; / not currently specialized on whether it's accelerated /
411
412	/ ensure there will be no underflow*
413	* when translating a dict index into a local index */
414	assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
415
416	/ init /
417	DEBUGLOG(`5`, "ZSTD_compressBlock_fast_dictMatchState_generic");
418	ip += (dictAndPrefixLength == `0`);
419	/ dictMatchState repCode checks don't currently handle repCode == 0*
420	* disabling. */
421	assert(offset_1 <= dictAndPrefixLength);
422	assert(offset_2 <= dictAndPrefixLength);
423
424	/ Main Search Loop /
425	while (ip < ilimit) { / < instead of <=, because repcode check at (ip+1) /
426	size_t mLength;
427	size_t const h = ZSTD_hashPtr(p: ip, hBits: hlog, mls);
428	U32 const curr = (U32)(ip-base);
429	U32 const matchIndex = hashTable[h];
430	const BYTE* match = base + matchIndex;
431	const U32 repIndex = curr + `1` - offset_1;
432	const BYTE* repMatch = (repIndex < prefixStartIndex) ?
433	dictBase + (repIndex - dictIndexDelta) :
434	base + repIndex;
435	hashTable[h] = curr; / update hash table /
436
437	if ( ((U32)((prefixStartIndex-`1`) - repIndex) >= `3`) / intentional underflow : ensure repIndex isn't overlapping dict + prefix /
438	&& (MEM_read32(memPtr: repMatch) == MEM_read32(memPtr: ip+`1`)) ) {
439	const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
440	mLength = ZSTD_count_2segments(ip: ip+`1`+`4`, match: repMatch+`4`, iEnd: iend, mEnd: repMatchEnd, iStart: prefixStart) + `4`;
441	ip++;
442	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: mLength);
443	} else if ( (matchIndex <= prefixStartIndex) ) {
444	size_t const dictHash = ZSTD_hashPtr(p: ip, hBits: dictHLog, mls);
445	U32 const dictMatchIndex = dictHashTable[dictHash];
446	const BYTE* dictMatch = dictBase + dictMatchIndex;
447	if (dictMatchIndex <= dictStartIndex \|\|
448	MEM_read32(memPtr: dictMatch) != MEM_read32(memPtr: ip)) {
449	assert(stepSize >= `1`);
450	ip += ((ip-anchor) >> kSearchStrength) + stepSize;
451	continue;
452	} else {
453	/ found a dict match /
454	U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
455	mLength = ZSTD_count_2segments(ip: ip+`4`, match: dictMatch+`4`, iEnd: iend, mEnd: dictEnd, iStart: prefixStart) + `4`;
456	while (((ip>anchor) & (dictMatch>dictStart))
457	&& (ip[-`1`] == dictMatch[-`1`])) {
458	ip--; dictMatch--; mLength++;
459	} / catch up /
460	offset_2 = offset_1;
461	offset_1 = offset;
462	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_OFFSET(offset), matchLength: mLength);
463	}
464	} else if (MEM_read32(memPtr: match) != MEM_read32(memPtr: ip)) {
465	/ it's not a match, and we're not going to check the dictionary /
466	assert(stepSize >= `1`);
467	ip += ((ip-anchor) >> kSearchStrength) + stepSize;
468	continue;
469	} else {
470	/ found a regular match /
471	U32 const offset = (U32)(ip-match);
472	mLength = ZSTD_count(pIn: ip+`4`, pMatch: match+`4`, pInLimit: iend) + `4`;
473	while (((ip>anchor) & (match>prefixStart))
474	&& (ip[-`1`] == match[-`1`])) { ip--; match--; mLength++; } / catch up /
475	offset_2 = offset_1;
476	offset_1 = offset;
477	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_OFFSET(offset), matchLength: mLength);
478	}
479
480	/ match found /
481	ip += mLength;
482	anchor = ip;
483
484	if (ip <= ilimit) {
485	/ Fill Table /
486	assert(base+curr+`2` > istart); / check base overflow /
487	hashTable[ZSTD_hashPtr(p: base+curr+`2`, hBits: hlog, mls)] = curr+`2`; / here because curr+2 could be > iend-8 /
488	hashTable[ZSTD_hashPtr(p: ip-`2`, hBits: hlog, mls)] = (U32)(ip-`2`-base);
489
490	/ check immediate repcode /
491	while (ip <= ilimit) {
492	U32 const current2 = (U32)(ip-base);
493	U32 const repIndex2 = current2 - offset_2;
494	const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
495	dictBase - dictIndexDelta + repIndex2 :
496	base + repIndex2;
497	if ( ((U32)((prefixStartIndex-`1`) - (U32)repIndex2) >= `3` / intentional overflow /)
498	&& (MEM_read32(memPtr: repMatch2) == MEM_read32(memPtr: ip)) ) {
499	const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
500	size_t const repLength2 = ZSTD_count_2segments(ip: ip+`4`, match: repMatch2+`4`, iEnd: iend, mEnd: repEnd2, iStart: prefixStart) + `4`;
501	U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; / swap offset_2 <=> offset_1 /
502	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: `0`, literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: repLength2);
503	hashTable[ZSTD_hashPtr(p: ip, hBits: hlog, mls)] = current2;
504	ip += repLength2;
505	anchor = ip;
506	continue;
507	}
508	break;
509	}
510	}
511	}
512
513	/ save reps for next block /
514	rep[`0`] = offset_1 ? offset_1 : offsetSaved;
515	rep[`1`] = offset_2 ? offset_2 : offsetSaved;
516
517	/ Return the last literals size /
518	return (size_t)(iend - anchor);
519	}
520
521
522	ZSTD_GEN_FAST_FN(dictMatchState, `4`, `0`)
523	ZSTD_GEN_FAST_FN(dictMatchState, `5`, `0`)
524	ZSTD_GEN_FAST_FN(dictMatchState, `6`, `0`)
525	ZSTD_GEN_FAST_FN(dictMatchState, `7`, `0`)
526
527	size_t ZSTD_compressBlock_fast_dictMatchState(
528	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
529	void const* src, size_t srcSize)
530	{
531	U32 const mls = ms->cParams.minMatch;
532	assert(ms->dictMatchState != NULL);
533	switch(mls)
534	{
535	default: / includes case 3 /
536	case `4` :
537	return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize);
538	case `5` :
539	return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize);
540	case `6` :
541	return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize);
542	case `7` :
543	return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize);
544	}
545	}
546
547
548	static size_t ZSTD_compressBlock_fast_extDict_generic(
549	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
550	void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
551	{
552	const ZSTD_compressionParameters* const cParams = &ms->cParams;
553	U32* const hashTable = ms->hashTable;
554	U32 const hlog = cParams->hashLog;
555	/ support stepSize of 0 /
556	U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
557	const BYTE* const base = ms->window.base;
558	const BYTE* const dictBase = ms->window.dictBase;
559	const BYTE* const istart = (const BYTE*)src;
560	const BYTE* ip = istart;
561	const BYTE* anchor = istart;
562	const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
563	const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, curr: endIndex, windowLog: cParams->windowLog);
564	const U32 dictStartIndex = lowLimit;
565	const BYTE* const dictStart = dictBase + dictStartIndex;
566	const U32 dictLimit = ms->window.dictLimit;
567	const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
568	const BYTE* const prefixStart = base + prefixStartIndex;
569	const BYTE* const dictEnd = dictBase + prefixStartIndex;
570	const BYTE* const iend = istart + srcSize;
571	const BYTE* const ilimit = iend - `8`;
572	U32 offset_1=rep[`0`], offset_2=rep[`1`];
573
574	(void)hasStep; / not currently specialized on whether it's accelerated /
575
576	DEBUGLOG(`5`, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
577
578	/ switch to "regular" variant if extDict is invalidated due to maxDistance /
579	if (prefixStartIndex == dictStartIndex)
580	return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
581
582	/ Search Loop /
583	while (ip < ilimit) { / < instead of <=, because (ip+1) /
584	const size_t h = ZSTD_hashPtr(p: ip, hBits: hlog, mls);
585	const U32 matchIndex = hashTable[h];
586	const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
587	const BYTE* match = matchBase + matchIndex;
588	const U32 curr = (U32)(ip-base);
589	const U32 repIndex = curr + `1` - offset_1;
590	const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
591	const BYTE* const repMatch = repBase + repIndex;
592	hashTable[h] = curr; / update hash table /
593	DEBUGLOG(`7`, "offset_1 = %u , curr = %u", offset_1, curr);
594
595	if ( ( ((U32)((prefixStartIndex-`1`) - repIndex) >= `3`) / intentional underflow /
596	& (offset_1 <= curr+`1` - dictStartIndex) ) / note: we are searching at curr+1 /
597	&& (MEM_read32(memPtr: repMatch) == MEM_read32(memPtr: ip+`1`)) ) {
598	const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
599	size_t const rLength = ZSTD_count_2segments(ip: ip+`1` +`4`, match: repMatch +`4`, iEnd: iend, mEnd: repMatchEnd, iStart: prefixStart) + `4`;
600	ip++;
601	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: rLength);
602	ip += rLength;
603	anchor = ip;
604	} else {
605	if ( (matchIndex < dictStartIndex) \|\|
606	(MEM_read32(memPtr: match) != MEM_read32(memPtr: ip)) ) {
607	assert(stepSize >= `1`);
608	ip += ((ip-anchor) >> kSearchStrength) + stepSize;
609	continue;
610	}
611	{ const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
612	const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
613	U32 const offset = curr - matchIndex;
614	size_t mLength = ZSTD_count_2segments(ip: ip+`4`, match: match+`4`, iEnd: iend, mEnd: matchEnd, iStart: prefixStart) + `4`;
615	while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-`1`] == match[-`1`])) { ip--; match--; mLength++; } / catch up /
616	offset_2 = offset_1; offset_1 = offset; / update offset history /
617	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_OFFSET(offset), matchLength: mLength);
618	ip += mLength;
619	anchor = ip;
620	} }
621
622	if (ip <= ilimit) {
623	/ Fill Table /
624	hashTable[ZSTD_hashPtr(p: base+curr+`2`, hBits: hlog, mls)] = curr+`2`;
625	hashTable[ZSTD_hashPtr(p: ip-`2`, hBits: hlog, mls)] = (U32)(ip-`2`-base);
626	/ check immediate repcode /
627	while (ip <= ilimit) {
628	U32 const current2 = (U32)(ip-base);
629	U32 const repIndex2 = current2 - offset_2;
630	const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
631	if ( (((U32)((prefixStartIndex-`1`) - repIndex2) >= `3`) & (offset_2 <= curr - dictStartIndex)) / intentional overflow /
632	&& (MEM_read32(memPtr: repMatch2) == MEM_read32(memPtr: ip)) ) {
633	const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
634	size_t const repLength2 = ZSTD_count_2segments(ip: ip+`4`, match: repMatch2+`4`, iEnd: iend, mEnd: repEnd2, iStart: prefixStart) + `4`;
635	{ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } / swap offset_2 <=> offset_1 /
636	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: `0` /litlen/, literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: repLength2);
637	hashTable[ZSTD_hashPtr(p: ip, hBits: hlog, mls)] = current2;
638	ip += repLength2;
639	anchor = ip;
640	continue;
641	}
642	break;
643	} } }
644
645	/ save reps for next block /
646	rep[`0`] = offset_1;
647	rep[`1`] = offset_2;
648
649	/ Return the last literals size /
650	return (size_t)(iend - anchor);
651	}
652
653	ZSTD_GEN_FAST_FN(extDict, `4`, `0`)
654	ZSTD_GEN_FAST_FN(extDict, `5`, `0`)
655	ZSTD_GEN_FAST_FN(extDict, `6`, `0`)
656	ZSTD_GEN_FAST_FN(extDict, `7`, `0`)
657
658	size_t ZSTD_compressBlock_fast_extDict(
659	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
660	void const* src, size_t srcSize)
661	{
662	U32 const mls = ms->cParams.minMatch;
663	switch(mls)
664	{
665	default: / includes case 3 /
666	case `4` :
667	return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize);
668	case `5` :
669	return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize);
670	case `6` :
671	return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize);
672	case `7` :
673	return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize);
674	}
675	}
676

source code of linux/lib/zstd/compress/zstd_fast.c