zstd_double_fast.c source code [linux/lib/zstd/compress/zstd_double_fast.c]

1	/*
2	* Copyright (c) Yann Collet, Facebook, Inc.
3	* All rights reserved.
4	*
5	* This source code is licensed under both the BSD-style license (found in the
6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
7	* in the COPYING file in the root directory of this source tree).
8	* You may select, at your option, one of the above-listed licenses.
9	*/
10
11	#include "zstd_compress_internal.h"
12	#include "zstd_double_fast.h"
13
14
15	void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
16	void const* end, ZSTD_dictTableLoadMethod_e dtlm)
17	{
18	const ZSTD_compressionParameters* const cParams = &ms->cParams;
19	U32* const hashLarge = ms->hashTable;
20	U32 const hBitsL = cParams->hashLog;
21	U32 const mls = cParams->minMatch;
22	U32* const hashSmall = ms->chainTable;
23	U32 const hBitsS = cParams->chainLog;
24	const BYTE* const base = ms->window.base;
25	const BYTE* ip = base + ms->nextToUpdate;
26	const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
27	const U32 fastHashFillStep = `3`;
28
29	/ Always insert every fastHashFillStep position into the hash tables.*
30	* Insert the other positions into the large hash table if their entry
31	* is empty.
32	*/
33	for (; ip + fastHashFillStep - `1` <= iend; ip += fastHashFillStep) {
34	U32 const curr = (U32)(ip - base);
35	U32 i;
36	for (i = `0`; i < fastHashFillStep; ++i) {
37	size_t const smHash = ZSTD_hashPtr(p: ip + i, hBits: hBitsS, mls);
38	size_t const lgHash = ZSTD_hashPtr(p: ip + i, hBits: hBitsL, mls: `8`);
39	if (i == `0`)
40	hashSmall[smHash] = curr + i;
41	if (i == `0` \|\| hashLarge[lgHash] == `0`)
42	hashLarge[lgHash] = curr + i;
43	/ Only load extra positions for ZSTD_dtlm_full /
44	if (dtlm == ZSTD_dtlm_fast)
45	break;
46	} }
47	}
48
49
50	FORCE_INLINE_TEMPLATE
51	size_t ZSTD_compressBlock_doubleFast_noDict_generic(
52	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
53	void const* src, size_t srcSize, U32 const mls / template /)
54	{
55	ZSTD_compressionParameters const* cParams = &ms->cParams;
56	U32* const hashLong = ms->hashTable;
57	const U32 hBitsL = cParams->hashLog;
58	U32* const hashSmall = ms->chainTable;
59	const U32 hBitsS = cParams->chainLog;
60	const BYTE* const base = ms->window.base;
61	const BYTE* const istart = (const BYTE*)src;
62	const BYTE* anchor = istart;
63	const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
64	/ presumes that, if there is a dictionary, it must be using Attach mode /
65	const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, curr: endIndex, windowLog: cParams->windowLog);
66	const BYTE* const prefixLowest = base + prefixLowestIndex;
67	const BYTE* const iend = istart + srcSize;
68	const BYTE* const ilimit = iend - HASH_READ_SIZE;
69	U32 offset_1=rep[`0`], offset_2=rep[`1`];
70	U32 offsetSaved = `0`;
71
72	size_t mLength;
73	U32 offset;
74	U32 curr;
75
76	/ how many positions to search before increasing step size /
77	const size_t kStepIncr = `1` << kSearchStrength;
78	/ the position at which to increment the step size if no match is found /
79	const BYTE* nextStep;
80	size_t step; / the current step size /
81
82	size_t hl0; / the long hash at ip /
83	size_t hl1; / the long hash at ip1 /
84
85	U32 idxl0; / the long match index for ip /
86	U32 idxl1; / the long match index for ip1 /
87
88	const BYTE* matchl0; / the long match for ip /
89	const BYTE* matchs0; / the short match for ip /
90	const BYTE* matchl1; / the long match for ip1 /
91
92	const BYTE* ip = istart; / the current position /
93	const BYTE* ip1; / the next position /
94
95	DEBUGLOG(`5`, "ZSTD_compressBlock_doubleFast_noDict_generic");
96
97	/ init /
98	ip += ((ip - prefixLowest) == `0`);
99	{
100	U32 const current = (U32)(ip - base);
101	U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr: current, windowLog: cParams->windowLog);
102	U32 const maxRep = current - windowLow;
103	if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = `0`;
104	if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = `0`;
105	}
106
107	/ Outer Loop: one iteration per match found and stored /
108	while (`1`) {
109	step = `1`;
110	nextStep = ip + kStepIncr;
111	ip1 = ip + step;
112
113	if (ip1 > ilimit) {
114	goto _cleanup;
115	}
116
117	hl0 = ZSTD_hashPtr(p: ip, hBits: hBitsL, mls: `8`);
118	idxl0 = hashLong[hl0];
119	matchl0 = base + idxl0;
120
121	/ Inner Loop: one iteration per search / position /
122	do {
123	const size_t hs0 = ZSTD_hashPtr(p: ip, hBits: hBitsS, mls);
124	const U32 idxs0 = hashSmall[hs0];
125	curr = (U32)(ip-base);
126	matchs0 = base + idxs0;
127
128	hashLong[hl0] = hashSmall[hs0] = curr; / update hash tables /
129
130	/ check noDict repcode /
131	if ((offset_1 > `0`) & (MEM_read32(memPtr: ip+`1`-offset_1) == MEM_read32(memPtr: ip+`1`))) {
132	mLength = ZSTD_count(pIn: ip+`1`+`4`, pMatch: ip+`1`+`4`-offset_1, pInLimit: iend) + `4`;
133	ip++;
134	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: mLength);
135	goto _match_stored;
136	}
137
138	hl1 = ZSTD_hashPtr(p: ip1, hBits: hBitsL, mls: `8`);
139
140	if (idxl0 > prefixLowestIndex) {
141	/ check prefix long match /
142	if (MEM_read64(memPtr: matchl0) == MEM_read64(memPtr: ip)) {
143	mLength = ZSTD_count(pIn: ip+`8`, pMatch: matchl0+`8`, pInLimit: iend) + `8`;
144	offset = (U32)(ip-matchl0);
145	while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-`1`] == matchl0[-`1`])) { ip--; matchl0--; mLength++; } / catch up /
146	goto _match_found;
147	}
148	}
149
150	idxl1 = hashLong[hl1];
151	matchl1 = base + idxl1;
152
153	if (idxs0 > prefixLowestIndex) {
154	/ check prefix short match /
155	if (MEM_read32(memPtr: matchs0) == MEM_read32(memPtr: ip)) {
156	goto _search_next_long;
157	}
158	}
159
160	if (ip1 >= nextStep) {
161	PREFETCH_L1(ip1 + `64`);
162	PREFETCH_L1(ip1 + `128`);
163	step++;
164	nextStep += kStepIncr;
165	}
166	ip = ip1;
167	ip1 += step;
168
169	hl0 = hl1;
170	idxl0 = idxl1;
171	matchl0 = matchl1;
172	#if defined(__aarch64__)
173	PREFETCH_L1(ip+`256`);
174	#endif
175	} while (ip1 <= ilimit);
176
177	_cleanup:
178	/ save reps for next block /
179	rep[`0`] = offset_1 ? offset_1 : offsetSaved;
180	rep[`1`] = offset_2 ? offset_2 : offsetSaved;
181
182	/ Return the last literals size /
183	return (size_t)(iend - anchor);
184
185	_search_next_long:
186
187	/ check prefix long +1 match /
188	if (idxl1 > prefixLowestIndex) {
189	if (MEM_read64(memPtr: matchl1) == MEM_read64(memPtr: ip1)) {
190	ip = ip1;
191	mLength = ZSTD_count(pIn: ip+`8`, pMatch: matchl1+`8`, pInLimit: iend) + `8`;
192	offset = (U32)(ip-matchl1);
193	while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-`1`] == matchl1[-`1`])) { ip--; matchl1--; mLength++; } / catch up /
194	goto _match_found;
195	}
196	}
197
198	/ if no long +1 match, explore the short match we found /
199	mLength = ZSTD_count(pIn: ip+`4`, pMatch: matchs0+`4`, pInLimit: iend) + `4`;
200	offset = (U32)(ip - matchs0);
201	while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-`1`] == matchs0[-`1`])) { ip--; matchs0--; mLength++; } / catch up /
202
203	/ fall-through /
204
205	_match_found: / requires ip, offset, mLength /
206	offset_2 = offset_1;
207	offset_1 = offset;
208
209	if (step < `4`) {
210	/ It is unsafe to write this value back to the hashtable when ip1 is*
211	* greater than or equal to the new ip we will have after we're done
212	* processing this match. Rather than perform that test directly
213	* (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
214	* more predictable test. The minmatch even if we take a short match is
215	* 4 bytes, so as long as step, the distance between ip and ip1
216	* (initially) is less than 4, we know ip1 < new ip. */
217	hashLong[hl1] = (U32)(ip1 - base);
218	}
219
220	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_OFFSET(offset), matchLength: mLength);
221
222	_match_stored:
223	/ match found /
224	ip += mLength;
225	anchor = ip;
226
227	if (ip <= ilimit) {
228	/ Complementary insertion /
229	/ done after iLimit test, as candidates could be > iend-8 /
230	{ U32 const indexToInsert = curr+`2`;
231	hashLong[ZSTD_hashPtr(p: base+indexToInsert, hBits: hBitsL, mls: `8`)] = indexToInsert;
232	hashLong[ZSTD_hashPtr(p: ip-`2`, hBits: hBitsL, mls: `8`)] = (U32)(ip-`2`-base);
233	hashSmall[ZSTD_hashPtr(p: base+indexToInsert, hBits: hBitsS, mls)] = indexToInsert;
234	hashSmall[ZSTD_hashPtr(p: ip-`1`, hBits: hBitsS, mls)] = (U32)(ip-`1`-base);
235	}
236
237	/ check immediate repcode /
238	while ( (ip <= ilimit)
239	&& ( (offset_2>`0`)
240	& (MEM_read32(memPtr: ip) == MEM_read32(memPtr: ip - offset_2)) )) {
241	/ store sequence /
242	size_t const rLength = ZSTD_count(pIn: ip+`4`, pMatch: ip+`4`-offset_2, pInLimit: iend) + `4`;
243	U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; / swap offset_2 <=> offset_1 /
244	hashSmall[ZSTD_hashPtr(p: ip, hBits: hBitsS, mls)] = (U32)(ip-base);
245	hashLong[ZSTD_hashPtr(p: ip, hBits: hBitsL, mls: `8`)] = (U32)(ip-base);
246	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: `0`, literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: rLength);
247	ip += rLength;
248	anchor = ip;
249	continue; / faster when present ... (?) /
250	}
251	}
252	}
253	}
254
255
256	FORCE_INLINE_TEMPLATE
257	size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
258	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
259	void const* src, size_t srcSize,
260	U32 const mls / template /)
261	{
262	ZSTD_compressionParameters const* cParams = &ms->cParams;
263	U32* const hashLong = ms->hashTable;
264	const U32 hBitsL = cParams->hashLog;
265	U32* const hashSmall = ms->chainTable;
266	const U32 hBitsS = cParams->chainLog;
267	const BYTE* const base = ms->window.base;
268	const BYTE* const istart = (const BYTE*)src;
269	const BYTE* ip = istart;
270	const BYTE* anchor = istart;
271	const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
272	/ presumes that, if there is a dictionary, it must be using Attach mode /
273	const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, curr: endIndex, windowLog: cParams->windowLog);
274	const BYTE* const prefixLowest = base + prefixLowestIndex;
275	const BYTE* const iend = istart + srcSize;
276	const BYTE* const ilimit = iend - HASH_READ_SIZE;
277	U32 offset_1=rep[`0`], offset_2=rep[`1`];
278	U32 offsetSaved = `0`;
279
280	const ZSTD_matchState_t* const dms = ms->dictMatchState;
281	const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
282	const U32* const dictHashLong = dms->hashTable;
283	const U32* const dictHashSmall = dms->chainTable;
284	const U32 dictStartIndex = dms->window.dictLimit;
285	const BYTE* const dictBase = dms->window.base;
286	const BYTE* const dictStart = dictBase + dictStartIndex;
287	const BYTE* const dictEnd = dms->window.nextSrc;
288	const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
289	const U32 dictHBitsL = dictCParams->hashLog;
290	const U32 dictHBitsS = dictCParams->chainLog;
291	const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
292
293	DEBUGLOG(`5`, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
294
295	/ if a dictionary is attached, it must be within window range /
296	assert(ms->window.dictLimit + (`1U` << cParams->windowLog) >= endIndex);
297
298	/ init /
299	ip += (dictAndPrefixLength == `0`);
300
301	/ dictMatchState repCode checks don't currently handle repCode == 0*
302	* disabling. */
303	assert(offset_1 <= dictAndPrefixLength);
304	assert(offset_2 <= dictAndPrefixLength);
305
306	/ Main Search Loop /
307	while (ip < ilimit) { / < instead of <=, because repcode check at (ip+1) /
308	size_t mLength;
309	U32 offset;
310	size_t const h2 = ZSTD_hashPtr(p: ip, hBits: hBitsL, mls: `8`);
311	size_t const h = ZSTD_hashPtr(p: ip, hBits: hBitsS, mls);
312	size_t const dictHL = ZSTD_hashPtr(p: ip, hBits: dictHBitsL, mls: `8`);
313	size_t const dictHS = ZSTD_hashPtr(p: ip, hBits: dictHBitsS, mls);
314	U32 const curr = (U32)(ip-base);
315	U32 const matchIndexL = hashLong[h2];
316	U32 matchIndexS = hashSmall[h];
317	const BYTE* matchLong = base + matchIndexL;
318	const BYTE* match = base + matchIndexS;
319	const U32 repIndex = curr + `1` - offset_1;
320	const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
321	dictBase + (repIndex - dictIndexDelta) :
322	base + repIndex;
323	hashLong[h2] = hashSmall[h] = curr; / update hash tables /
324
325	/ check repcode /
326	if (((U32)((prefixLowestIndex-`1`) - repIndex) >= `3` / intentional underflow /)
327	&& (MEM_read32(memPtr: repMatch) == MEM_read32(memPtr: ip+`1`)) ) {
328	const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
329	mLength = ZSTD_count_2segments(ip: ip+`1`+`4`, match: repMatch+`4`, iEnd: iend, mEnd: repMatchEnd, iStart: prefixLowest) + `4`;
330	ip++;
331	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: mLength);
332	goto _match_stored;
333	}
334
335	if (matchIndexL > prefixLowestIndex) {
336	/ check prefix long match /
337	if (MEM_read64(memPtr: matchLong) == MEM_read64(memPtr: ip)) {
338	mLength = ZSTD_count(pIn: ip+`8`, pMatch: matchLong+`8`, pInLimit: iend) + `8`;
339	offset = (U32)(ip-matchLong);
340	while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-`1`] == matchLong[-`1`])) { ip--; matchLong--; mLength++; } / catch up /
341	goto _match_found;
342	}
343	} else {
344	/ check dictMatchState long match /
345	U32 const dictMatchIndexL = dictHashLong[dictHL];
346	const BYTE* dictMatchL = dictBase + dictMatchIndexL;
347	assert(dictMatchL < dictEnd);
348
349	if (dictMatchL > dictStart && MEM_read64(memPtr: dictMatchL) == MEM_read64(memPtr: ip)) {
350	mLength = ZSTD_count_2segments(ip: ip+`8`, match: dictMatchL+`8`, iEnd: iend, mEnd: dictEnd, iStart: prefixLowest) + `8`;
351	offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
352	while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-`1`] == dictMatchL[-`1`])) { ip--; dictMatchL--; mLength++; } / catch up /
353	goto _match_found;
354	} }
355
356	if (matchIndexS > prefixLowestIndex) {
357	/ check prefix short match /
358	if (MEM_read32(memPtr: match) == MEM_read32(memPtr: ip)) {
359	goto _search_next_long;
360	}
361	} else {
362	/ check dictMatchState short match /
363	U32 const dictMatchIndexS = dictHashSmall[dictHS];
364	match = dictBase + dictMatchIndexS;
365	matchIndexS = dictMatchIndexS + dictIndexDelta;
366
367	if (match > dictStart && MEM_read32(memPtr: match) == MEM_read32(memPtr: ip)) {
368	goto _search_next_long;
369	} }
370
371	ip += ((ip-anchor) >> kSearchStrength) + `1`;
372	#if defined(__aarch64__)
373	PREFETCH_L1(ip+`256`);
374	#endif
375	continue;
376
377	_search_next_long:
378
379	{ size_t const hl3 = ZSTD_hashPtr(p: ip+`1`, hBits: hBitsL, mls: `8`);
380	size_t const dictHLNext = ZSTD_hashPtr(p: ip+`1`, hBits: dictHBitsL, mls: `8`);
381	U32 const matchIndexL3 = hashLong[hl3];
382	const BYTE* matchL3 = base + matchIndexL3;
383	hashLong[hl3] = curr + `1`;
384
385	/ check prefix long +1 match /
386	if (matchIndexL3 > prefixLowestIndex) {
387	if (MEM_read64(memPtr: matchL3) == MEM_read64(memPtr: ip+`1`)) {
388	mLength = ZSTD_count(pIn: ip+`9`, pMatch: matchL3+`8`, pInLimit: iend) + `8`;
389	ip++;
390	offset = (U32)(ip-matchL3);
391	while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-`1`] == matchL3[-`1`])) { ip--; matchL3--; mLength++; } / catch up /
392	goto _match_found;
393	}
394	} else {
395	/ check dict long +1 match /
396	U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
397	const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
398	assert(dictMatchL3 < dictEnd);
399	if (dictMatchL3 > dictStart && MEM_read64(memPtr: dictMatchL3) == MEM_read64(memPtr: ip+`1`)) {
400	mLength = ZSTD_count_2segments(ip: ip+`1`+`8`, match: dictMatchL3+`8`, iEnd: iend, mEnd: dictEnd, iStart: prefixLowest) + `8`;
401	ip++;
402	offset = (U32)(curr + `1` - dictMatchIndexL3 - dictIndexDelta);
403	while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-`1`] == dictMatchL3[-`1`])) { ip--; dictMatchL3--; mLength++; } / catch up /
404	goto _match_found;
405	} } }
406
407	/ if no long +1 match, explore the short match we found /
408	if (matchIndexS < prefixLowestIndex) {
409	mLength = ZSTD_count_2segments(ip: ip+`4`, match: match+`4`, iEnd: iend, mEnd: dictEnd, iStart: prefixLowest) + `4`;
410	offset = (U32)(curr - matchIndexS);
411	while (((ip>anchor) & (match>dictStart)) && (ip[-`1`] == match[-`1`])) { ip--; match--; mLength++; } / catch up /
412	} else {
413	mLength = ZSTD_count(pIn: ip+`4`, pMatch: match+`4`, pInLimit: iend) + `4`;
414	offset = (U32)(ip - match);
415	while (((ip>anchor) & (match>prefixLowest)) && (ip[-`1`] == match[-`1`])) { ip--; match--; mLength++; } / catch up /
416	}
417
418	_match_found:
419	offset_2 = offset_1;
420	offset_1 = offset;
421
422	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_OFFSET(offset), matchLength: mLength);
423
424	_match_stored:
425	/ match found /
426	ip += mLength;
427	anchor = ip;
428
429	if (ip <= ilimit) {
430	/ Complementary insertion /
431	/ done after iLimit test, as candidates could be > iend-8 /
432	{ U32 const indexToInsert = curr+`2`;
433	hashLong[ZSTD_hashPtr(p: base+indexToInsert, hBits: hBitsL, mls: `8`)] = indexToInsert;
434	hashLong[ZSTD_hashPtr(p: ip-`2`, hBits: hBitsL, mls: `8`)] = (U32)(ip-`2`-base);
435	hashSmall[ZSTD_hashPtr(p: base+indexToInsert, hBits: hBitsS, mls)] = indexToInsert;
436	hashSmall[ZSTD_hashPtr(p: ip-`1`, hBits: hBitsS, mls)] = (U32)(ip-`1`-base);
437	}
438
439	/ check immediate repcode /
440	while (ip <= ilimit) {
441	U32 const current2 = (U32)(ip-base);
442	U32 const repIndex2 = current2 - offset_2;
443	const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
444	dictBase + repIndex2 - dictIndexDelta :
445	base + repIndex2;
446	if ( ((U32)((prefixLowestIndex-`1`) - (U32)repIndex2) >= `3` / intentional overflow /)
447	&& (MEM_read32(memPtr: repMatch2) == MEM_read32(memPtr: ip)) ) {
448	const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
449	size_t const repLength2 = ZSTD_count_2segments(ip: ip+`4`, match: repMatch2+`4`, iEnd: iend, mEnd: repEnd2, iStart: prefixLowest) + `4`;
450	U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; / swap offset_2 <=> offset_1 /
451	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: `0`, literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: repLength2);
452	hashSmall[ZSTD_hashPtr(p: ip, hBits: hBitsS, mls)] = current2;
453	hashLong[ZSTD_hashPtr(p: ip, hBits: hBitsL, mls: `8`)] = current2;
454	ip += repLength2;
455	anchor = ip;
456	continue;
457	}
458	break;
459	}
460	}
461	} / while (ip < ilimit) /
462
463	/ save reps for next block /
464	rep[`0`] = offset_1 ? offset_1 : offsetSaved;
465	rep[`1`] = offset_2 ? offset_2 : offsetSaved;
466
467	/ Return the last literals size /
468	return (size_t)(iend - anchor);
469	}
470
471	#define ZSTD_GEN_DFAST_FN(dictMode, mls) \
472	static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
473	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
474	void const* src, size_t srcSize) \
475	{ \
476	return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
477	}
478
479	ZSTD_GEN_DFAST_FN(noDict, `4`)
480	ZSTD_GEN_DFAST_FN(noDict, `5`)
481	ZSTD_GEN_DFAST_FN(noDict, `6`)
482	ZSTD_GEN_DFAST_FN(noDict, `7`)
483
484	ZSTD_GEN_DFAST_FN(dictMatchState, `4`)
485	ZSTD_GEN_DFAST_FN(dictMatchState, `5`)
486	ZSTD_GEN_DFAST_FN(dictMatchState, `6`)
487	ZSTD_GEN_DFAST_FN(dictMatchState, `7`)
488
489
490	size_t ZSTD_compressBlock_doubleFast(
491	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
492	void const* src, size_t srcSize)
493	{
494	const U32 mls = ms->cParams.minMatch;
495	switch(mls)
496	{
497	default: / includes case 3 /
498	case `4` :
499	return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
500	case `5` :
501	return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
502	case `6` :
503	return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
504	case `7` :
505	return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
506	}
507	}
508
509
510	size_t ZSTD_compressBlock_doubleFast_dictMatchState(
511	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
512	void const* src, size_t srcSize)
513	{
514	const U32 mls = ms->cParams.minMatch;
515	switch(mls)
516	{
517	default: / includes case 3 /
518	case `4` :
519	return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
520	case `5` :
521	return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
522	case `6` :
523	return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
524	case `7` :
525	return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
526	}
527	}
528
529
530	static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
531	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
532	void const* src, size_t srcSize,
533	U32 const mls / template /)
534	{
535	ZSTD_compressionParameters const* cParams = &ms->cParams;
536	U32* const hashLong = ms->hashTable;
537	U32 const hBitsL = cParams->hashLog;
538	U32* const hashSmall = ms->chainTable;
539	U32 const hBitsS = cParams->chainLog;
540	const BYTE* const istart = (const BYTE*)src;
541	const BYTE* ip = istart;
542	const BYTE* anchor = istart;
543	const BYTE* const iend = istart + srcSize;
544	const BYTE* const ilimit = iend - `8`;
545	const BYTE* const base = ms->window.base;
546	const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
547	const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, curr: endIndex, windowLog: cParams->windowLog);
548	const U32 dictStartIndex = lowLimit;
549	const U32 dictLimit = ms->window.dictLimit;
550	const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
551	const BYTE* const prefixStart = base + prefixStartIndex;
552	const BYTE* const dictBase = ms->window.dictBase;
553	const BYTE* const dictStart = dictBase + dictStartIndex;
554	const BYTE* const dictEnd = dictBase + prefixStartIndex;
555	U32 offset_1=rep[`0`], offset_2=rep[`1`];
556
557	DEBUGLOG(`5`, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
558
559	/ if extDict is invalidated due to maxDistance, switch to "regular" variant /
560	if (prefixStartIndex == dictStartIndex)
561	return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
562
563	/ Search Loop /
564	while (ip < ilimit) { / < instead of <=, because (ip+1) /
565	const size_t hSmall = ZSTD_hashPtr(p: ip, hBits: hBitsS, mls);
566	const U32 matchIndex = hashSmall[hSmall];
567	const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
568	const BYTE* match = matchBase + matchIndex;
569
570	const size_t hLong = ZSTD_hashPtr(p: ip, hBits: hBitsL, mls: `8`);
571	const U32 matchLongIndex = hashLong[hLong];
572	const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
573	const BYTE* matchLong = matchLongBase + matchLongIndex;
574
575	const U32 curr = (U32)(ip-base);
576	const U32 repIndex = curr + `1` - offset_1; / offset_1 expected <= curr +1 /
577	const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
578	const BYTE* const repMatch = repBase + repIndex;
579	size_t mLength;
580	hashSmall[hSmall] = hashLong[hLong] = curr; / update hash table /
581
582	if ((((U32)((prefixStartIndex-`1`) - repIndex) >= `3`) / intentional underflow : ensure repIndex doesn't overlap dict + prefix /
583	& (offset_1 <= curr+`1` - dictStartIndex)) / note: we are searching at curr+1 /
584	&& (MEM_read32(memPtr: repMatch) == MEM_read32(memPtr: ip+`1`)) ) {
585	const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
586	mLength = ZSTD_count_2segments(ip: ip+`1`+`4`, match: repMatch+`4`, iEnd: iend, mEnd: repMatchEnd, iStart: prefixStart) + `4`;
587	ip++;
588	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: mLength);
589	} else {
590	if ((matchLongIndex > dictStartIndex) && (MEM_read64(memPtr: matchLong) == MEM_read64(memPtr: ip))) {
591	const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
592	const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
593	U32 offset;
594	mLength = ZSTD_count_2segments(ip: ip+`8`, match: matchLong+`8`, iEnd: iend, mEnd: matchEnd, iStart: prefixStart) + `8`;
595	offset = curr - matchLongIndex;
596	while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-`1`] == matchLong[-`1`])) { ip--; matchLong--; mLength++; } / catch up /
597	offset_2 = offset_1;
598	offset_1 = offset;
599	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_OFFSET(offset), matchLength: mLength);
600
601	} else if ((matchIndex > dictStartIndex) && (MEM_read32(memPtr: match) == MEM_read32(memPtr: ip))) {
602	size_t const h3 = ZSTD_hashPtr(p: ip+`1`, hBits: hBitsL, mls: `8`);
603	U32 const matchIndex3 = hashLong[h3];
604	const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
605	const BYTE* match3 = match3Base + matchIndex3;
606	U32 offset;
607	hashLong[h3] = curr + `1`;
608	if ( (matchIndex3 > dictStartIndex) && (MEM_read64(memPtr: match3) == MEM_read64(memPtr: ip+`1`)) ) {
609	const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
610	const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
611	mLength = ZSTD_count_2segments(ip: ip+`9`, match: match3+`8`, iEnd: iend, mEnd: matchEnd, iStart: prefixStart) + `8`;
612	ip++;
613	offset = curr+`1` - matchIndex3;
614	while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-`1`] == match3[-`1`])) { ip--; match3--; mLength++; } / catch up /
615	} else {
616	const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
617	const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
618	mLength = ZSTD_count_2segments(ip: ip+`4`, match: match+`4`, iEnd: iend, mEnd: matchEnd, iStart: prefixStart) + `4`;
619	offset = curr - matchIndex;
620	while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-`1`] == match[-`1`])) { ip--; match--; mLength++; } / catch up /
621	}
622	offset_2 = offset_1;
623	offset_1 = offset;
624	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_OFFSET(offset), matchLength: mLength);
625
626	} else {
627	ip += ((ip-anchor) >> kSearchStrength) + `1`;
628	continue;
629	} }
630
631	/ move to next sequence start /
632	ip += mLength;
633	anchor = ip;
634
635	if (ip <= ilimit) {
636	/ Complementary insertion /
637	/ done after iLimit test, as candidates could be > iend-8 /
638	{ U32 const indexToInsert = curr+`2`;
639	hashLong[ZSTD_hashPtr(p: base+indexToInsert, hBits: hBitsL, mls: `8`)] = indexToInsert;
640	hashLong[ZSTD_hashPtr(p: ip-`2`, hBits: hBitsL, mls: `8`)] = (U32)(ip-`2`-base);
641	hashSmall[ZSTD_hashPtr(p: base+indexToInsert, hBits: hBitsS, mls)] = indexToInsert;
642	hashSmall[ZSTD_hashPtr(p: ip-`1`, hBits: hBitsS, mls)] = (U32)(ip-`1`-base);
643	}
644
645	/ check immediate repcode /
646	while (ip <= ilimit) {
647	U32 const current2 = (U32)(ip-base);
648	U32 const repIndex2 = current2 - offset_2;
649	const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
650	if ( (((U32)((prefixStartIndex-`1`) - repIndex2) >= `3`) / intentional overflow : ensure repIndex2 doesn't overlap dict + prefix /
651	& (offset_2 <= current2 - dictStartIndex))
652	&& (MEM_read32(memPtr: repMatch2) == MEM_read32(memPtr: ip)) ) {
653	const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
654	size_t const repLength2 = ZSTD_count_2segments(ip: ip+`4`, match: repMatch2+`4`, iEnd: iend, mEnd: repEnd2, iStart: prefixStart) + `4`;
655	U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; / swap offset_2 <=> offset_1 /
656	ZSTD_storeSeq(seqStorePtr: seqStore, litLength: `0`, literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: repLength2);
657	hashSmall[ZSTD_hashPtr(p: ip, hBits: hBitsS, mls)] = current2;
658	hashLong[ZSTD_hashPtr(p: ip, hBits: hBitsL, mls: `8`)] = current2;
659	ip += repLength2;
660	anchor = ip;
661	continue;
662	}
663	break;
664	} } }
665
666	/ save reps for next block /
667	rep[`0`] = offset_1;
668	rep[`1`] = offset_2;
669
670	/ Return the last literals size /
671	return (size_t)(iend - anchor);
672	}
673
674	ZSTD_GEN_DFAST_FN(extDict, `4`)
675	ZSTD_GEN_DFAST_FN(extDict, `5`)
676	ZSTD_GEN_DFAST_FN(extDict, `6`)
677	ZSTD_GEN_DFAST_FN(extDict, `7`)
678
679	size_t ZSTD_compressBlock_doubleFast_extDict(
680	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
681	void const* src, size_t srcSize)
682	{
683	U32 const mls = ms->cParams.minMatch;
684	switch(mls)
685	{
686	default: / includes case 3 /
687	case `4` :
688	return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
689	case `5` :
690	return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
691	case `6` :
692	return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
693	case `7` :
694	return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
695	}
696	}
697

source code of linux/lib/zstd/compress/zstd_double_fast.c