1 | /* |
2 | * Copyright (c) Yann Collet, Facebook, Inc. |
3 | * All rights reserved. |
4 | * |
5 | * This source code is licensed under both the BSD-style license (found in the |
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
7 | * in the COPYING file in the root directory of this source tree). |
8 | * You may select, at your option, one of the above-listed licenses. |
9 | */ |
10 | |
11 | #include "zstd_compress_internal.h" |
12 | #include "zstd_double_fast.h" |
13 | |
14 | |
15 | void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, |
16 | void const* end, ZSTD_dictTableLoadMethod_e dtlm) |
17 | { |
18 | const ZSTD_compressionParameters* const cParams = &ms->cParams; |
19 | U32* const hashLarge = ms->hashTable; |
20 | U32 const hBitsL = cParams->hashLog; |
21 | U32 const mls = cParams->minMatch; |
22 | U32* const hashSmall = ms->chainTable; |
23 | U32 const hBitsS = cParams->chainLog; |
24 | const BYTE* const base = ms->window.base; |
25 | const BYTE* ip = base + ms->nextToUpdate; |
26 | const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; |
27 | const U32 fastHashFillStep = 3; |
28 | |
29 | /* Always insert every fastHashFillStep position into the hash tables. |
30 | * Insert the other positions into the large hash table if their entry |
31 | * is empty. |
32 | */ |
33 | for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { |
34 | U32 const curr = (U32)(ip - base); |
35 | U32 i; |
36 | for (i = 0; i < fastHashFillStep; ++i) { |
37 | size_t const smHash = ZSTD_hashPtr(p: ip + i, hBits: hBitsS, mls); |
38 | size_t const lgHash = ZSTD_hashPtr(p: ip + i, hBits: hBitsL, mls: 8); |
39 | if (i == 0) |
40 | hashSmall[smHash] = curr + i; |
41 | if (i == 0 || hashLarge[lgHash] == 0) |
42 | hashLarge[lgHash] = curr + i; |
43 | /* Only load extra positions for ZSTD_dtlm_full */ |
44 | if (dtlm == ZSTD_dtlm_fast) |
45 | break; |
46 | } } |
47 | } |
48 | |
49 | |
50 | FORCE_INLINE_TEMPLATE |
51 | size_t ZSTD_compressBlock_doubleFast_noDict_generic( |
52 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
53 | void const* src, size_t srcSize, U32 const mls /* template */) |
54 | { |
55 | ZSTD_compressionParameters const* cParams = &ms->cParams; |
56 | U32* const hashLong = ms->hashTable; |
57 | const U32 hBitsL = cParams->hashLog; |
58 | U32* const hashSmall = ms->chainTable; |
59 | const U32 hBitsS = cParams->chainLog; |
60 | const BYTE* const base = ms->window.base; |
61 | const BYTE* const istart = (const BYTE*)src; |
62 | const BYTE* anchor = istart; |
63 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); |
64 | /* presumes that, if there is a dictionary, it must be using Attach mode */ |
65 | const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, curr: endIndex, windowLog: cParams->windowLog); |
66 | const BYTE* const prefixLowest = base + prefixLowestIndex; |
67 | const BYTE* const iend = istart + srcSize; |
68 | const BYTE* const ilimit = iend - HASH_READ_SIZE; |
69 | U32 offset_1=rep[0], offset_2=rep[1]; |
70 | U32 offsetSaved = 0; |
71 | |
72 | size_t mLength; |
73 | U32 offset; |
74 | U32 curr; |
75 | |
76 | /* how many positions to search before increasing step size */ |
77 | const size_t kStepIncr = 1 << kSearchStrength; |
78 | /* the position at which to increment the step size if no match is found */ |
79 | const BYTE* nextStep; |
80 | size_t step; /* the current step size */ |
81 | |
82 | size_t hl0; /* the long hash at ip */ |
83 | size_t hl1; /* the long hash at ip1 */ |
84 | |
85 | U32 idxl0; /* the long match index for ip */ |
86 | U32 idxl1; /* the long match index for ip1 */ |
87 | |
88 | const BYTE* matchl0; /* the long match for ip */ |
89 | const BYTE* matchs0; /* the short match for ip */ |
90 | const BYTE* matchl1; /* the long match for ip1 */ |
91 | |
92 | const BYTE* ip = istart; /* the current position */ |
93 | const BYTE* ip1; /* the next position */ |
94 | |
95 | DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic" ); |
96 | |
97 | /* init */ |
98 | ip += ((ip - prefixLowest) == 0); |
99 | { |
100 | U32 const current = (U32)(ip - base); |
101 | U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr: current, windowLog: cParams->windowLog); |
102 | U32 const maxRep = current - windowLow; |
103 | if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; |
104 | if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; |
105 | } |
106 | |
107 | /* Outer Loop: one iteration per match found and stored */ |
108 | while (1) { |
109 | step = 1; |
110 | nextStep = ip + kStepIncr; |
111 | ip1 = ip + step; |
112 | |
113 | if (ip1 > ilimit) { |
114 | goto _cleanup; |
115 | } |
116 | |
117 | hl0 = ZSTD_hashPtr(p: ip, hBits: hBitsL, mls: 8); |
118 | idxl0 = hashLong[hl0]; |
119 | matchl0 = base + idxl0; |
120 | |
121 | /* Inner Loop: one iteration per search / position */ |
122 | do { |
123 | const size_t hs0 = ZSTD_hashPtr(p: ip, hBits: hBitsS, mls); |
124 | const U32 idxs0 = hashSmall[hs0]; |
125 | curr = (U32)(ip-base); |
126 | matchs0 = base + idxs0; |
127 | |
128 | hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */ |
129 | |
130 | /* check noDict repcode */ |
131 | if ((offset_1 > 0) & (MEM_read32(memPtr: ip+1-offset_1) == MEM_read32(memPtr: ip+1))) { |
132 | mLength = ZSTD_count(pIn: ip+1+4, pMatch: ip+1+4-offset_1, pInLimit: iend) + 4; |
133 | ip++; |
134 | ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: mLength); |
135 | goto _match_stored; |
136 | } |
137 | |
138 | hl1 = ZSTD_hashPtr(p: ip1, hBits: hBitsL, mls: 8); |
139 | |
140 | if (idxl0 > prefixLowestIndex) { |
141 | /* check prefix long match */ |
142 | if (MEM_read64(memPtr: matchl0) == MEM_read64(memPtr: ip)) { |
143 | mLength = ZSTD_count(pIn: ip+8, pMatch: matchl0+8, pInLimit: iend) + 8; |
144 | offset = (U32)(ip-matchl0); |
145 | while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ |
146 | goto _match_found; |
147 | } |
148 | } |
149 | |
150 | idxl1 = hashLong[hl1]; |
151 | matchl1 = base + idxl1; |
152 | |
153 | if (idxs0 > prefixLowestIndex) { |
154 | /* check prefix short match */ |
155 | if (MEM_read32(memPtr: matchs0) == MEM_read32(memPtr: ip)) { |
156 | goto _search_next_long; |
157 | } |
158 | } |
159 | |
160 | if (ip1 >= nextStep) { |
161 | PREFETCH_L1(ip1 + 64); |
162 | PREFETCH_L1(ip1 + 128); |
163 | step++; |
164 | nextStep += kStepIncr; |
165 | } |
166 | ip = ip1; |
167 | ip1 += step; |
168 | |
169 | hl0 = hl1; |
170 | idxl0 = idxl1; |
171 | matchl0 = matchl1; |
172 | #if defined(__aarch64__) |
173 | PREFETCH_L1(ip+256); |
174 | #endif |
175 | } while (ip1 <= ilimit); |
176 | |
177 | _cleanup: |
178 | /* save reps for next block */ |
179 | rep[0] = offset_1 ? offset_1 : offsetSaved; |
180 | rep[1] = offset_2 ? offset_2 : offsetSaved; |
181 | |
182 | /* Return the last literals size */ |
183 | return (size_t)(iend - anchor); |
184 | |
185 | _search_next_long: |
186 | |
187 | /* check prefix long +1 match */ |
188 | if (idxl1 > prefixLowestIndex) { |
189 | if (MEM_read64(memPtr: matchl1) == MEM_read64(memPtr: ip1)) { |
190 | ip = ip1; |
191 | mLength = ZSTD_count(pIn: ip+8, pMatch: matchl1+8, pInLimit: iend) + 8; |
192 | offset = (U32)(ip-matchl1); |
193 | while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ |
194 | goto _match_found; |
195 | } |
196 | } |
197 | |
198 | /* if no long +1 match, explore the short match we found */ |
199 | mLength = ZSTD_count(pIn: ip+4, pMatch: matchs0+4, pInLimit: iend) + 4; |
200 | offset = (U32)(ip - matchs0); |
201 | while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ |
202 | |
203 | /* fall-through */ |
204 | |
205 | _match_found: /* requires ip, offset, mLength */ |
206 | offset_2 = offset_1; |
207 | offset_1 = offset; |
208 | |
209 | if (step < 4) { |
210 | /* It is unsafe to write this value back to the hashtable when ip1 is |
211 | * greater than or equal to the new ip we will have after we're done |
212 | * processing this match. Rather than perform that test directly |
213 | * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler |
214 | * more predictable test. The minmatch even if we take a short match is |
215 | * 4 bytes, so as long as step, the distance between ip and ip1 |
216 | * (initially) is less than 4, we know ip1 < new ip. */ |
217 | hashLong[hl1] = (U32)(ip1 - base); |
218 | } |
219 | |
220 | ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_OFFSET(offset), matchLength: mLength); |
221 | |
222 | _match_stored: |
223 | /* match found */ |
224 | ip += mLength; |
225 | anchor = ip; |
226 | |
227 | if (ip <= ilimit) { |
228 | /* Complementary insertion */ |
229 | /* done after iLimit test, as candidates could be > iend-8 */ |
230 | { U32 const indexToInsert = curr+2; |
231 | hashLong[ZSTD_hashPtr(p: base+indexToInsert, hBits: hBitsL, mls: 8)] = indexToInsert; |
232 | hashLong[ZSTD_hashPtr(p: ip-2, hBits: hBitsL, mls: 8)] = (U32)(ip-2-base); |
233 | hashSmall[ZSTD_hashPtr(p: base+indexToInsert, hBits: hBitsS, mls)] = indexToInsert; |
234 | hashSmall[ZSTD_hashPtr(p: ip-1, hBits: hBitsS, mls)] = (U32)(ip-1-base); |
235 | } |
236 | |
237 | /* check immediate repcode */ |
238 | while ( (ip <= ilimit) |
239 | && ( (offset_2>0) |
240 | & (MEM_read32(memPtr: ip) == MEM_read32(memPtr: ip - offset_2)) )) { |
241 | /* store sequence */ |
242 | size_t const rLength = ZSTD_count(pIn: ip+4, pMatch: ip+4-offset_2, pInLimit: iend) + 4; |
243 | U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ |
244 | hashSmall[ZSTD_hashPtr(p: ip, hBits: hBitsS, mls)] = (U32)(ip-base); |
245 | hashLong[ZSTD_hashPtr(p: ip, hBits: hBitsL, mls: 8)] = (U32)(ip-base); |
246 | ZSTD_storeSeq(seqStorePtr: seqStore, litLength: 0, literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: rLength); |
247 | ip += rLength; |
248 | anchor = ip; |
249 | continue; /* faster when present ... (?) */ |
250 | } |
251 | } |
252 | } |
253 | } |
254 | |
255 | |
256 | FORCE_INLINE_TEMPLATE |
257 | size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( |
258 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
259 | void const* src, size_t srcSize, |
260 | U32 const mls /* template */) |
261 | { |
262 | ZSTD_compressionParameters const* cParams = &ms->cParams; |
263 | U32* const hashLong = ms->hashTable; |
264 | const U32 hBitsL = cParams->hashLog; |
265 | U32* const hashSmall = ms->chainTable; |
266 | const U32 hBitsS = cParams->chainLog; |
267 | const BYTE* const base = ms->window.base; |
268 | const BYTE* const istart = (const BYTE*)src; |
269 | const BYTE* ip = istart; |
270 | const BYTE* anchor = istart; |
271 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); |
272 | /* presumes that, if there is a dictionary, it must be using Attach mode */ |
273 | const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, curr: endIndex, windowLog: cParams->windowLog); |
274 | const BYTE* const prefixLowest = base + prefixLowestIndex; |
275 | const BYTE* const iend = istart + srcSize; |
276 | const BYTE* const ilimit = iend - HASH_READ_SIZE; |
277 | U32 offset_1=rep[0], offset_2=rep[1]; |
278 | U32 offsetSaved = 0; |
279 | |
280 | const ZSTD_matchState_t* const dms = ms->dictMatchState; |
281 | const ZSTD_compressionParameters* const dictCParams = &dms->cParams; |
282 | const U32* const dictHashLong = dms->hashTable; |
283 | const U32* const dictHashSmall = dms->chainTable; |
284 | const U32 dictStartIndex = dms->window.dictLimit; |
285 | const BYTE* const dictBase = dms->window.base; |
286 | const BYTE* const dictStart = dictBase + dictStartIndex; |
287 | const BYTE* const dictEnd = dms->window.nextSrc; |
288 | const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase); |
289 | const U32 dictHBitsL = dictCParams->hashLog; |
290 | const U32 dictHBitsS = dictCParams->chainLog; |
291 | const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); |
292 | |
293 | DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic" ); |
294 | |
295 | /* if a dictionary is attached, it must be within window range */ |
296 | assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); |
297 | |
298 | /* init */ |
299 | ip += (dictAndPrefixLength == 0); |
300 | |
301 | /* dictMatchState repCode checks don't currently handle repCode == 0 |
302 | * disabling. */ |
303 | assert(offset_1 <= dictAndPrefixLength); |
304 | assert(offset_2 <= dictAndPrefixLength); |
305 | |
306 | /* Main Search Loop */ |
307 | while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ |
308 | size_t mLength; |
309 | U32 offset; |
310 | size_t const h2 = ZSTD_hashPtr(p: ip, hBits: hBitsL, mls: 8); |
311 | size_t const h = ZSTD_hashPtr(p: ip, hBits: hBitsS, mls); |
312 | size_t const dictHL = ZSTD_hashPtr(p: ip, hBits: dictHBitsL, mls: 8); |
313 | size_t const dictHS = ZSTD_hashPtr(p: ip, hBits: dictHBitsS, mls); |
314 | U32 const curr = (U32)(ip-base); |
315 | U32 const matchIndexL = hashLong[h2]; |
316 | U32 matchIndexS = hashSmall[h]; |
317 | const BYTE* matchLong = base + matchIndexL; |
318 | const BYTE* match = base + matchIndexS; |
319 | const U32 repIndex = curr + 1 - offset_1; |
320 | const BYTE* repMatch = (repIndex < prefixLowestIndex) ? |
321 | dictBase + (repIndex - dictIndexDelta) : |
322 | base + repIndex; |
323 | hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ |
324 | |
325 | /* check repcode */ |
326 | if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) |
327 | && (MEM_read32(memPtr: repMatch) == MEM_read32(memPtr: ip+1)) ) { |
328 | const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; |
329 | mLength = ZSTD_count_2segments(ip: ip+1+4, match: repMatch+4, iEnd: iend, mEnd: repMatchEnd, iStart: prefixLowest) + 4; |
330 | ip++; |
331 | ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: mLength); |
332 | goto _match_stored; |
333 | } |
334 | |
335 | if (matchIndexL > prefixLowestIndex) { |
336 | /* check prefix long match */ |
337 | if (MEM_read64(memPtr: matchLong) == MEM_read64(memPtr: ip)) { |
338 | mLength = ZSTD_count(pIn: ip+8, pMatch: matchLong+8, pInLimit: iend) + 8; |
339 | offset = (U32)(ip-matchLong); |
340 | while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ |
341 | goto _match_found; |
342 | } |
343 | } else { |
344 | /* check dictMatchState long match */ |
345 | U32 const dictMatchIndexL = dictHashLong[dictHL]; |
346 | const BYTE* dictMatchL = dictBase + dictMatchIndexL; |
347 | assert(dictMatchL < dictEnd); |
348 | |
349 | if (dictMatchL > dictStart && MEM_read64(memPtr: dictMatchL) == MEM_read64(memPtr: ip)) { |
350 | mLength = ZSTD_count_2segments(ip: ip+8, match: dictMatchL+8, iEnd: iend, mEnd: dictEnd, iStart: prefixLowest) + 8; |
351 | offset = (U32)(curr - dictMatchIndexL - dictIndexDelta); |
352 | while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ |
353 | goto _match_found; |
354 | } } |
355 | |
356 | if (matchIndexS > prefixLowestIndex) { |
357 | /* check prefix short match */ |
358 | if (MEM_read32(memPtr: match) == MEM_read32(memPtr: ip)) { |
359 | goto _search_next_long; |
360 | } |
361 | } else { |
362 | /* check dictMatchState short match */ |
363 | U32 const dictMatchIndexS = dictHashSmall[dictHS]; |
364 | match = dictBase + dictMatchIndexS; |
365 | matchIndexS = dictMatchIndexS + dictIndexDelta; |
366 | |
367 | if (match > dictStart && MEM_read32(memPtr: match) == MEM_read32(memPtr: ip)) { |
368 | goto _search_next_long; |
369 | } } |
370 | |
371 | ip += ((ip-anchor) >> kSearchStrength) + 1; |
372 | #if defined(__aarch64__) |
373 | PREFETCH_L1(ip+256); |
374 | #endif |
375 | continue; |
376 | |
377 | _search_next_long: |
378 | |
379 | { size_t const hl3 = ZSTD_hashPtr(p: ip+1, hBits: hBitsL, mls: 8); |
380 | size_t const dictHLNext = ZSTD_hashPtr(p: ip+1, hBits: dictHBitsL, mls: 8); |
381 | U32 const matchIndexL3 = hashLong[hl3]; |
382 | const BYTE* matchL3 = base + matchIndexL3; |
383 | hashLong[hl3] = curr + 1; |
384 | |
385 | /* check prefix long +1 match */ |
386 | if (matchIndexL3 > prefixLowestIndex) { |
387 | if (MEM_read64(memPtr: matchL3) == MEM_read64(memPtr: ip+1)) { |
388 | mLength = ZSTD_count(pIn: ip+9, pMatch: matchL3+8, pInLimit: iend) + 8; |
389 | ip++; |
390 | offset = (U32)(ip-matchL3); |
391 | while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ |
392 | goto _match_found; |
393 | } |
394 | } else { |
395 | /* check dict long +1 match */ |
396 | U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; |
397 | const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; |
398 | assert(dictMatchL3 < dictEnd); |
399 | if (dictMatchL3 > dictStart && MEM_read64(memPtr: dictMatchL3) == MEM_read64(memPtr: ip+1)) { |
400 | mLength = ZSTD_count_2segments(ip: ip+1+8, match: dictMatchL3+8, iEnd: iend, mEnd: dictEnd, iStart: prefixLowest) + 8; |
401 | ip++; |
402 | offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta); |
403 | while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ |
404 | goto _match_found; |
405 | } } } |
406 | |
407 | /* if no long +1 match, explore the short match we found */ |
408 | if (matchIndexS < prefixLowestIndex) { |
409 | mLength = ZSTD_count_2segments(ip: ip+4, match: match+4, iEnd: iend, mEnd: dictEnd, iStart: prefixLowest) + 4; |
410 | offset = (U32)(curr - matchIndexS); |
411 | while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ |
412 | } else { |
413 | mLength = ZSTD_count(pIn: ip+4, pMatch: match+4, pInLimit: iend) + 4; |
414 | offset = (U32)(ip - match); |
415 | while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ |
416 | } |
417 | |
418 | _match_found: |
419 | offset_2 = offset_1; |
420 | offset_1 = offset; |
421 | |
422 | ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_OFFSET(offset), matchLength: mLength); |
423 | |
424 | _match_stored: |
425 | /* match found */ |
426 | ip += mLength; |
427 | anchor = ip; |
428 | |
429 | if (ip <= ilimit) { |
430 | /* Complementary insertion */ |
431 | /* done after iLimit test, as candidates could be > iend-8 */ |
432 | { U32 const indexToInsert = curr+2; |
433 | hashLong[ZSTD_hashPtr(p: base+indexToInsert, hBits: hBitsL, mls: 8)] = indexToInsert; |
434 | hashLong[ZSTD_hashPtr(p: ip-2, hBits: hBitsL, mls: 8)] = (U32)(ip-2-base); |
435 | hashSmall[ZSTD_hashPtr(p: base+indexToInsert, hBits: hBitsS, mls)] = indexToInsert; |
436 | hashSmall[ZSTD_hashPtr(p: ip-1, hBits: hBitsS, mls)] = (U32)(ip-1-base); |
437 | } |
438 | |
439 | /* check immediate repcode */ |
440 | while (ip <= ilimit) { |
441 | U32 const current2 = (U32)(ip-base); |
442 | U32 const repIndex2 = current2 - offset_2; |
443 | const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? |
444 | dictBase + repIndex2 - dictIndexDelta : |
445 | base + repIndex2; |
446 | if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) |
447 | && (MEM_read32(memPtr: repMatch2) == MEM_read32(memPtr: ip)) ) { |
448 | const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; |
449 | size_t const repLength2 = ZSTD_count_2segments(ip: ip+4, match: repMatch2+4, iEnd: iend, mEnd: repEnd2, iStart: prefixLowest) + 4; |
450 | U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ |
451 | ZSTD_storeSeq(seqStorePtr: seqStore, litLength: 0, literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: repLength2); |
452 | hashSmall[ZSTD_hashPtr(p: ip, hBits: hBitsS, mls)] = current2; |
453 | hashLong[ZSTD_hashPtr(p: ip, hBits: hBitsL, mls: 8)] = current2; |
454 | ip += repLength2; |
455 | anchor = ip; |
456 | continue; |
457 | } |
458 | break; |
459 | } |
460 | } |
461 | } /* while (ip < ilimit) */ |
462 | |
463 | /* save reps for next block */ |
464 | rep[0] = offset_1 ? offset_1 : offsetSaved; |
465 | rep[1] = offset_2 ? offset_2 : offsetSaved; |
466 | |
467 | /* Return the last literals size */ |
468 | return (size_t)(iend - anchor); |
469 | } |
470 | |
471 | #define ZSTD_GEN_DFAST_FN(dictMode, mls) \ |
472 | static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \ |
473 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ |
474 | void const* src, size_t srcSize) \ |
475 | { \ |
476 | return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ |
477 | } |
478 | |
479 | ZSTD_GEN_DFAST_FN(noDict, 4) |
480 | ZSTD_GEN_DFAST_FN(noDict, 5) |
481 | ZSTD_GEN_DFAST_FN(noDict, 6) |
482 | ZSTD_GEN_DFAST_FN(noDict, 7) |
483 | |
484 | ZSTD_GEN_DFAST_FN(dictMatchState, 4) |
485 | ZSTD_GEN_DFAST_FN(dictMatchState, 5) |
486 | ZSTD_GEN_DFAST_FN(dictMatchState, 6) |
487 | ZSTD_GEN_DFAST_FN(dictMatchState, 7) |
488 | |
489 | |
490 | size_t ZSTD_compressBlock_doubleFast( |
491 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
492 | void const* src, size_t srcSize) |
493 | { |
494 | const U32 mls = ms->cParams.minMatch; |
495 | switch(mls) |
496 | { |
497 | default: /* includes case 3 */ |
498 | case 4 : |
499 | return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize); |
500 | case 5 : |
501 | return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize); |
502 | case 6 : |
503 | return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize); |
504 | case 7 : |
505 | return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize); |
506 | } |
507 | } |
508 | |
509 | |
510 | size_t ZSTD_compressBlock_doubleFast_dictMatchState( |
511 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
512 | void const* src, size_t srcSize) |
513 | { |
514 | const U32 mls = ms->cParams.minMatch; |
515 | switch(mls) |
516 | { |
517 | default: /* includes case 3 */ |
518 | case 4 : |
519 | return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize); |
520 | case 5 : |
521 | return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize); |
522 | case 6 : |
523 | return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize); |
524 | case 7 : |
525 | return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize); |
526 | } |
527 | } |
528 | |
529 | |
530 | static size_t ZSTD_compressBlock_doubleFast_extDict_generic( |
531 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
532 | void const* src, size_t srcSize, |
533 | U32 const mls /* template */) |
534 | { |
535 | ZSTD_compressionParameters const* cParams = &ms->cParams; |
536 | U32* const hashLong = ms->hashTable; |
537 | U32 const hBitsL = cParams->hashLog; |
538 | U32* const hashSmall = ms->chainTable; |
539 | U32 const hBitsS = cParams->chainLog; |
540 | const BYTE* const istart = (const BYTE*)src; |
541 | const BYTE* ip = istart; |
542 | const BYTE* anchor = istart; |
543 | const BYTE* const iend = istart + srcSize; |
544 | const BYTE* const ilimit = iend - 8; |
545 | const BYTE* const base = ms->window.base; |
546 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); |
547 | const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, curr: endIndex, windowLog: cParams->windowLog); |
548 | const U32 dictStartIndex = lowLimit; |
549 | const U32 dictLimit = ms->window.dictLimit; |
550 | const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; |
551 | const BYTE* const prefixStart = base + prefixStartIndex; |
552 | const BYTE* const dictBase = ms->window.dictBase; |
553 | const BYTE* const dictStart = dictBase + dictStartIndex; |
554 | const BYTE* const dictEnd = dictBase + prefixStartIndex; |
555 | U32 offset_1=rep[0], offset_2=rep[1]; |
556 | |
557 | DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)" , srcSize); |
558 | |
559 | /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ |
560 | if (prefixStartIndex == dictStartIndex) |
561 | return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize); |
562 | |
563 | /* Search Loop */ |
564 | while (ip < ilimit) { /* < instead of <=, because (ip+1) */ |
565 | const size_t hSmall = ZSTD_hashPtr(p: ip, hBits: hBitsS, mls); |
566 | const U32 matchIndex = hashSmall[hSmall]; |
567 | const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; |
568 | const BYTE* match = matchBase + matchIndex; |
569 | |
570 | const size_t hLong = ZSTD_hashPtr(p: ip, hBits: hBitsL, mls: 8); |
571 | const U32 matchLongIndex = hashLong[hLong]; |
572 | const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; |
573 | const BYTE* matchLong = matchLongBase + matchLongIndex; |
574 | |
575 | const U32 curr = (U32)(ip-base); |
576 | const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ |
577 | const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; |
578 | const BYTE* const repMatch = repBase + repIndex; |
579 | size_t mLength; |
580 | hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ |
581 | |
582 | if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ |
583 | & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */ |
584 | && (MEM_read32(memPtr: repMatch) == MEM_read32(memPtr: ip+1)) ) { |
585 | const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; |
586 | mLength = ZSTD_count_2segments(ip: ip+1+4, match: repMatch+4, iEnd: iend, mEnd: repMatchEnd, iStart: prefixStart) + 4; |
587 | ip++; |
588 | ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: mLength); |
589 | } else { |
590 | if ((matchLongIndex > dictStartIndex) && (MEM_read64(memPtr: matchLong) == MEM_read64(memPtr: ip))) { |
591 | const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; |
592 | const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; |
593 | U32 offset; |
594 | mLength = ZSTD_count_2segments(ip: ip+8, match: matchLong+8, iEnd: iend, mEnd: matchEnd, iStart: prefixStart) + 8; |
595 | offset = curr - matchLongIndex; |
596 | while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ |
597 | offset_2 = offset_1; |
598 | offset_1 = offset; |
599 | ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_OFFSET(offset), matchLength: mLength); |
600 | |
601 | } else if ((matchIndex > dictStartIndex) && (MEM_read32(memPtr: match) == MEM_read32(memPtr: ip))) { |
602 | size_t const h3 = ZSTD_hashPtr(p: ip+1, hBits: hBitsL, mls: 8); |
603 | U32 const matchIndex3 = hashLong[h3]; |
604 | const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; |
605 | const BYTE* match3 = match3Base + matchIndex3; |
606 | U32 offset; |
607 | hashLong[h3] = curr + 1; |
608 | if ( (matchIndex3 > dictStartIndex) && (MEM_read64(memPtr: match3) == MEM_read64(memPtr: ip+1)) ) { |
609 | const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; |
610 | const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; |
611 | mLength = ZSTD_count_2segments(ip: ip+9, match: match3+8, iEnd: iend, mEnd: matchEnd, iStart: prefixStart) + 8; |
612 | ip++; |
613 | offset = curr+1 - matchIndex3; |
614 | while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ |
615 | } else { |
616 | const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; |
617 | const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; |
618 | mLength = ZSTD_count_2segments(ip: ip+4, match: match+4, iEnd: iend, mEnd: matchEnd, iStart: prefixStart) + 4; |
619 | offset = curr - matchIndex; |
620 | while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ |
621 | } |
622 | offset_2 = offset_1; |
623 | offset_1 = offset; |
624 | ZSTD_storeSeq(seqStorePtr: seqStore, litLength: (size_t)(ip-anchor), literals: anchor, litLimit: iend, STORE_OFFSET(offset), matchLength: mLength); |
625 | |
626 | } else { |
627 | ip += ((ip-anchor) >> kSearchStrength) + 1; |
628 | continue; |
629 | } } |
630 | |
631 | /* move to next sequence start */ |
632 | ip += mLength; |
633 | anchor = ip; |
634 | |
635 | if (ip <= ilimit) { |
636 | /* Complementary insertion */ |
637 | /* done after iLimit test, as candidates could be > iend-8 */ |
638 | { U32 const indexToInsert = curr+2; |
639 | hashLong[ZSTD_hashPtr(p: base+indexToInsert, hBits: hBitsL, mls: 8)] = indexToInsert; |
640 | hashLong[ZSTD_hashPtr(p: ip-2, hBits: hBitsL, mls: 8)] = (U32)(ip-2-base); |
641 | hashSmall[ZSTD_hashPtr(p: base+indexToInsert, hBits: hBitsS, mls)] = indexToInsert; |
642 | hashSmall[ZSTD_hashPtr(p: ip-1, hBits: hBitsS, mls)] = (U32)(ip-1-base); |
643 | } |
644 | |
645 | /* check immediate repcode */ |
646 | while (ip <= ilimit) { |
647 | U32 const current2 = (U32)(ip-base); |
648 | U32 const repIndex2 = current2 - offset_2; |
649 | const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; |
650 | if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ |
651 | & (offset_2 <= current2 - dictStartIndex)) |
652 | && (MEM_read32(memPtr: repMatch2) == MEM_read32(memPtr: ip)) ) { |
653 | const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; |
654 | size_t const repLength2 = ZSTD_count_2segments(ip: ip+4, match: repMatch2+4, iEnd: iend, mEnd: repEnd2, iStart: prefixStart) + 4; |
655 | U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ |
656 | ZSTD_storeSeq(seqStorePtr: seqStore, litLength: 0, literals: anchor, litLimit: iend, STORE_REPCODE_1, matchLength: repLength2); |
657 | hashSmall[ZSTD_hashPtr(p: ip, hBits: hBitsS, mls)] = current2; |
658 | hashLong[ZSTD_hashPtr(p: ip, hBits: hBitsL, mls: 8)] = current2; |
659 | ip += repLength2; |
660 | anchor = ip; |
661 | continue; |
662 | } |
663 | break; |
664 | } } } |
665 | |
666 | /* save reps for next block */ |
667 | rep[0] = offset_1; |
668 | rep[1] = offset_2; |
669 | |
670 | /* Return the last literals size */ |
671 | return (size_t)(iend - anchor); |
672 | } |
673 | |
674 | ZSTD_GEN_DFAST_FN(extDict, 4) |
675 | ZSTD_GEN_DFAST_FN(extDict, 5) |
676 | ZSTD_GEN_DFAST_FN(extDict, 6) |
677 | ZSTD_GEN_DFAST_FN(extDict, 7) |
678 | |
679 | size_t ZSTD_compressBlock_doubleFast_extDict( |
680 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
681 | void const* src, size_t srcSize) |
682 | { |
683 | U32 const mls = ms->cParams.minMatch; |
684 | switch(mls) |
685 | { |
686 | default: /* includes case 3 */ |
687 | case 4 : |
688 | return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize); |
689 | case 5 : |
690 | return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize); |
691 | case 6 : |
692 | return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize); |
693 | case 7 : |
694 | return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize); |
695 | } |
696 | } |
697 | |