| 1 | // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause |
| 2 | /* |
| 3 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 4 | * All rights reserved. |
| 5 | * |
| 6 | * This source code is licensed under both the BSD-style license (found in the |
| 7 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 8 | * in the COPYING file in the root directory of this source tree). |
| 9 | * You may select, at your option, one of the above-listed licenses. |
| 10 | */ |
| 11 | |
| 12 | #include "../common/compiler.h" /* ZSTD_ALIGNOF */ |
| 13 | #include "../common/mem.h" /* S64 */ |
| 14 | #include "../common/zstd_deps.h" /* ZSTD_memset */ |
| 15 | #include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */ |
| 16 | #include "hist.h" /* HIST_add */ |
| 17 | #include "zstd_preSplit.h" |
| 18 | |
| 19 | |
| 20 | #define BLOCKSIZE_MIN 3500 |
| 21 | #define THRESHOLD_PENALTY_RATE 16 |
| 22 | #define THRESHOLD_BASE (THRESHOLD_PENALTY_RATE - 2) |
| 23 | #define THRESHOLD_PENALTY 3 |
| 24 | |
| 25 | #define HASHLENGTH 2 |
| 26 | #define HASHLOG_MAX 10 |
| 27 | #define HASHTABLESIZE (1 << HASHLOG_MAX) |
| 28 | #define HASHMASK (HASHTABLESIZE - 1) |
| 29 | #define KNUTH 0x9e3779b9 |
| 30 | |
| 31 | /* for hashLog > 8, hash 2 bytes. |
| 32 | * for hashLog == 8, just take the byte, no hashing. |
| 33 | * The speed of this method relies on compile-time constant propagation */ |
| 34 | FORCE_INLINE_TEMPLATE unsigned hash2(const void *p, unsigned hashLog) |
| 35 | { |
| 36 | assert(hashLog >= 8); |
| 37 | if (hashLog == 8) return (U32)((const BYTE*)p)[0]; |
| 38 | assert(hashLog <= HASHLOG_MAX); |
| 39 | return (U32)(MEM_read16(memPtr: p)) * KNUTH >> (32 - hashLog); |
| 40 | } |
| 41 | |
| 42 | |
| 43 | typedef struct { |
| 44 | unsigned events[HASHTABLESIZE]; |
| 45 | size_t nbEvents; |
| 46 | } Fingerprint; |
| 47 | typedef struct { |
| 48 | Fingerprint pastEvents; |
| 49 | Fingerprint newEvents; |
| 50 | } FPStats; |
| 51 | |
| 52 | static void initStats(FPStats* fpstats) |
| 53 | { |
| 54 | ZSTD_memset(fpstats, 0, sizeof(FPStats)); |
| 55 | } |
| 56 | |
| 57 | FORCE_INLINE_TEMPLATE void |
| 58 | addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog) |
| 59 | { |
| 60 | const char* p = (const char*)src; |
| 61 | size_t limit = srcSize - HASHLENGTH + 1; |
| 62 | size_t n; |
| 63 | assert(srcSize >= HASHLENGTH); |
| 64 | for (n = 0; n < limit; n+=samplingRate) { |
| 65 | fp->events[hash2(p: p+n, hashLog)]++; |
| 66 | } |
| 67 | fp->nbEvents += limit/samplingRate; |
| 68 | } |
| 69 | |
| 70 | FORCE_INLINE_TEMPLATE void |
| 71 | recordFingerprint_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog) |
| 72 | { |
| 73 | ZSTD_memset(fp, 0, sizeof(unsigned) * ((size_t)1 << hashLog)); |
| 74 | fp->nbEvents = 0; |
| 75 | addEvents_generic(fp, src, srcSize, samplingRate, hashLog); |
| 76 | } |
| 77 | |
| 78 | typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize); |
| 79 | |
| 80 | #define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate |
| 81 | |
| 82 | #define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \ |
| 83 | static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \ |
| 84 | { \ |
| 85 | recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \ |
| 86 | } |
| 87 | |
| 88 | ZSTD_GEN_RECORD_FINGERPRINT(1, 10) |
| 89 | ZSTD_GEN_RECORD_FINGERPRINT(5, 10) |
| 90 | ZSTD_GEN_RECORD_FINGERPRINT(11, 9) |
| 91 | ZSTD_GEN_RECORD_FINGERPRINT(43, 8) |
| 92 | |
| 93 | |
| 94 | static U64 abs64(S64 s64) { return (U64)((s64 < 0) ? -s64 : s64); } |
| 95 | |
| 96 | static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2, unsigned hashLog) |
| 97 | { |
| 98 | U64 distance = 0; |
| 99 | size_t n; |
| 100 | assert(hashLog <= HASHLOG_MAX); |
| 101 | for (n = 0; n < ((size_t)1 << hashLog); n++) { |
| 102 | distance += |
| 103 | abs64(s64: (S64)fp1->events[n] * (S64)fp2->nbEvents - (S64)fp2->events[n] * (S64)fp1->nbEvents); |
| 104 | } |
| 105 | return distance; |
| 106 | } |
| 107 | |
| 108 | /* Compare newEvents with pastEvents |
| 109 | * return 1 when considered "too different" |
| 110 | */ |
| 111 | static int compareFingerprints(const Fingerprint* ref, |
| 112 | const Fingerprint* newfp, |
| 113 | int penalty, |
| 114 | unsigned hashLog) |
| 115 | { |
| 116 | assert(ref->nbEvents > 0); |
| 117 | assert(newfp->nbEvents > 0); |
| 118 | { U64 p50 = (U64)ref->nbEvents * (U64)newfp->nbEvents; |
| 119 | U64 deviation = fpDistance(fp1: ref, fp2: newfp, hashLog); |
| 120 | U64 threshold = p50 * (U64)(THRESHOLD_BASE + penalty) / THRESHOLD_PENALTY_RATE; |
| 121 | return deviation >= threshold; |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | static void mergeEvents(Fingerprint* acc, const Fingerprint* newfp) |
| 126 | { |
| 127 | size_t n; |
| 128 | for (n = 0; n < HASHTABLESIZE; n++) { |
| 129 | acc->events[n] += newfp->events[n]; |
| 130 | } |
| 131 | acc->nbEvents += newfp->nbEvents; |
| 132 | } |
| 133 | |
| 134 | static void flushEvents(FPStats* fpstats) |
| 135 | { |
| 136 | size_t n; |
| 137 | for (n = 0; n < HASHTABLESIZE; n++) { |
| 138 | fpstats->pastEvents.events[n] = fpstats->newEvents.events[n]; |
| 139 | } |
| 140 | fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents; |
| 141 | ZSTD_memset(&fpstats->newEvents, 0, sizeof(fpstats->newEvents)); |
| 142 | } |
| 143 | |
| 144 | static void removeEvents(Fingerprint* acc, const Fingerprint* slice) |
| 145 | { |
| 146 | size_t n; |
| 147 | for (n = 0; n < HASHTABLESIZE; n++) { |
| 148 | assert(acc->events[n] >= slice->events[n]); |
| 149 | acc->events[n] -= slice->events[n]; |
| 150 | } |
| 151 | acc->nbEvents -= slice->nbEvents; |
| 152 | } |
| 153 | |
| 154 | #define CHUNKSIZE (8 << 10) |
| 155 | static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize, |
| 156 | int level, |
| 157 | void* workspace, size_t wkspSize) |
| 158 | { |
| 159 | static const RecordEvents_f records_fs[] = { |
| 160 | FP_RECORD(43), FP_RECORD(11), FP_RECORD(5), FP_RECORD(1) |
| 161 | }; |
| 162 | static const unsigned hashParams[] = { 8, 9, 10, 10 }; |
| 163 | const RecordEvents_f record_f = (assert(0<=level && level<=3), records_fs[level]); |
| 164 | FPStats* const fpstats = (FPStats*)workspace; |
| 165 | const char* p = (const char*)blockStart; |
| 166 | int penalty = THRESHOLD_PENALTY; |
| 167 | size_t pos = 0; |
| 168 | assert(blockSize == (128 << 10)); |
| 169 | assert(workspace != NULL); |
| 170 | assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0); |
| 171 | ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats)); |
| 172 | assert(wkspSize >= sizeof(FPStats)); (void)wkspSize; |
| 173 | |
| 174 | initStats(fpstats); |
| 175 | record_f(&fpstats->pastEvents, p, CHUNKSIZE); |
| 176 | for (pos = CHUNKSIZE; pos <= blockSize - CHUNKSIZE; pos += CHUNKSIZE) { |
| 177 | record_f(&fpstats->newEvents, p + pos, CHUNKSIZE); |
| 178 | if (compareFingerprints(ref: &fpstats->pastEvents, newfp: &fpstats->newEvents, penalty, hashLog: hashParams[level])) { |
| 179 | return pos; |
| 180 | } else { |
| 181 | mergeEvents(acc: &fpstats->pastEvents, newfp: &fpstats->newEvents); |
| 182 | if (penalty > 0) penalty--; |
| 183 | } |
| 184 | } |
| 185 | assert(pos == blockSize); |
| 186 | return blockSize; |
| 187 | (void)flushEvents; (void)removeEvents; |
| 188 | } |
| 189 | |
| 190 | /* ZSTD_splitBlock_fromBorders(): very fast strategy : |
| 191 | * compare fingerprint from beginning and end of the block, |
| 192 | * derive from their difference if it's preferable to split in the middle, |
| 193 | * repeat the process a second time, for finer grained decision. |
| 194 | * 3 times did not brought improvements, so I stopped at 2. |
| 195 | * Benefits are good enough for a cheap heuristic. |
| 196 | * More accurate splitting saves more, but speed impact is also more perceptible. |
| 197 | * For better accuracy, use more elaborate variant *_byChunks. |
| 198 | */ |
| 199 | static size_t ZSTD_splitBlock_fromBorders(const void* blockStart, size_t blockSize, |
| 200 | void* workspace, size_t wkspSize) |
| 201 | { |
| 202 | #define SEGMENT_SIZE 512 |
| 203 | FPStats* const fpstats = (FPStats*)workspace; |
| 204 | Fingerprint* middleEvents = (Fingerprint*)(void*)((char*)workspace + 512 * sizeof(unsigned)); |
| 205 | assert(blockSize == (128 << 10)); |
| 206 | assert(workspace != NULL); |
| 207 | assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0); |
| 208 | ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats)); |
| 209 | assert(wkspSize >= sizeof(FPStats)); (void)wkspSize; |
| 210 | |
| 211 | initStats(fpstats); |
| 212 | HIST_add(count: fpstats->pastEvents.events, src: blockStart, SEGMENT_SIZE); |
| 213 | HIST_add(count: fpstats->newEvents.events, src: (const char*)blockStart + blockSize - SEGMENT_SIZE, SEGMENT_SIZE); |
| 214 | fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents = SEGMENT_SIZE; |
| 215 | if (!compareFingerprints(ref: &fpstats->pastEvents, newfp: &fpstats->newEvents, penalty: 0, hashLog: 8)) |
| 216 | return blockSize; |
| 217 | |
| 218 | HIST_add(count: middleEvents->events, src: (const char*)blockStart + blockSize/2 - SEGMENT_SIZE/2, SEGMENT_SIZE); |
| 219 | middleEvents->nbEvents = SEGMENT_SIZE; |
| 220 | { U64 const distFromBegin = fpDistance(fp1: &fpstats->pastEvents, fp2: middleEvents, hashLog: 8); |
| 221 | U64 const distFromEnd = fpDistance(fp1: &fpstats->newEvents, fp2: middleEvents, hashLog: 8); |
| 222 | U64 const minDistance = SEGMENT_SIZE * SEGMENT_SIZE / 3; |
| 223 | if (abs64(s64: (S64)distFromBegin - (S64)distFromEnd) < minDistance) |
| 224 | return 64 KB; |
| 225 | return (distFromBegin > distFromEnd) ? 32 KB : 96 KB; |
| 226 | } |
| 227 | } |
| 228 | |
| 229 | size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize, |
| 230 | int level, |
| 231 | void* workspace, size_t wkspSize) |
| 232 | { |
| 233 | DEBUGLOG(6, "ZSTD_splitBlock (level=%i)" , level); |
| 234 | assert(0<=level && level<=4); |
| 235 | if (level == 0) |
| 236 | return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize); |
| 237 | /* level >= 1*/ |
| 238 | return ZSTD_splitBlock_byChunks(blockStart, blockSize, level: level-1, workspace, wkspSize); |
| 239 | } |
| 240 | |