1 | /* ****************************************************************** |
2 | * bitstream |
3 | * Part of FSE library |
4 | * Copyright (c) Yann Collet, Facebook, Inc. |
5 | * |
6 | * You can contact the author at : |
7 | * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy |
8 | * |
9 | * This source code is licensed under both the BSD-style license (found in the |
10 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
11 | * in the COPYING file in the root directory of this source tree). |
12 | * You may select, at your option, one of the above-listed licenses. |
13 | ****************************************************************** */ |
14 | #ifndef BITSTREAM_H_MODULE |
15 | #define BITSTREAM_H_MODULE |
16 | |
17 | /* |
18 | * This API consists of small unitary functions, which must be inlined for best performance. |
19 | * Since link-time-optimization is not available for all compilers, |
20 | * these functions are defined into a .h to be included. |
21 | */ |
22 | |
23 | /*-**************************************** |
24 | * Dependencies |
25 | ******************************************/ |
26 | #include "mem.h" /* unaligned access routines */ |
27 | #include "compiler.h" /* UNLIKELY() */ |
28 | #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ |
29 | #include "error_private.h" /* error codes and messages */ |
30 | |
31 | |
32 | /*========================================= |
33 | * Target specific |
34 | =========================================*/ |
35 | |
36 | #define STREAM_ACCUMULATOR_MIN_32 25 |
37 | #define STREAM_ACCUMULATOR_MIN_64 57 |
38 | #define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) |
39 | |
40 | |
41 | /*-****************************************** |
42 | * bitStream encoding API (write forward) |
43 | ********************************************/ |
44 | /* bitStream can mix input from multiple sources. |
45 | * A critical property of these streams is that they encode and decode in **reverse** direction. |
46 | * So the first bit sequence you add will be the last to be read, like a LIFO stack. |
47 | */ |
48 | typedef struct { |
49 | size_t bitContainer; |
50 | unsigned bitPos; |
51 | char* startPtr; |
52 | char* ptr; |
53 | char* endPtr; |
54 | } BIT_CStream_t; |
55 | |
56 | MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); |
57 | MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); |
58 | MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); |
59 | MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); |
60 | |
61 | /* Start with initCStream, providing the size of buffer to write into. |
62 | * bitStream will never write outside of this buffer. |
63 | * `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. |
64 | * |
65 | * bits are first added to a local register. |
66 | * Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. |
67 | * Writing data into memory is an explicit operation, performed by the flushBits function. |
68 | * Hence keep track how many bits are potentially stored into local register to avoid register overflow. |
69 | * After a flushBits, a maximum of 7 bits might still be stored into local register. |
70 | * |
71 | * Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. |
72 | * |
73 | * Last operation is to close the bitStream. |
74 | * The function returns the final size of CStream in bytes. |
75 | * If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) |
76 | */ |
77 | |
78 | |
79 | /*-******************************************** |
80 | * bitStream decoding API (read backward) |
81 | **********************************************/ |
82 | typedef struct { |
83 | size_t bitContainer; |
84 | unsigned bitsConsumed; |
85 | const char* ptr; |
86 | const char* start; |
87 | const char* limitPtr; |
88 | } BIT_DStream_t; |
89 | |
90 | typedef enum { BIT_DStream_unfinished = 0, |
91 | BIT_DStream_endOfBuffer = 1, |
92 | BIT_DStream_completed = 2, |
93 | BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ |
94 | /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ |
95 | |
96 | MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); |
97 | MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); |
98 | MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); |
99 | MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); |
100 | |
101 | |
102 | /* Start by invoking BIT_initDStream(). |
103 | * A chunk of the bitStream is then stored into a local register. |
104 | * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). |
105 | * You can then retrieve bitFields stored into the local register, **in reverse order**. |
106 | * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. |
107 | * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. |
108 | * Otherwise, it can be less than that, so proceed accordingly. |
109 | * Checking if DStream has reached its end can be performed with BIT_endOfDStream(). |
110 | */ |
111 | |
112 | |
113 | /*-**************************************** |
114 | * unsafe API |
115 | ******************************************/ |
116 | MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); |
117 | /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ |
118 | |
119 | MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); |
120 | /* unsafe version; does not check buffer overflow */ |
121 | |
122 | MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); |
123 | /* faster, but works only if nbBits >= 1 */ |
124 | |
125 | |
126 | |
127 | /*-************************************************************** |
128 | * Internal functions |
129 | ****************************************************************/ |
130 | MEM_STATIC unsigned BIT_highbit32 (U32 val) |
131 | { |
132 | assert(val != 0); |
133 | { |
134 | # if (__GNUC__ >= 3) /* Use GCC Intrinsic */ |
135 | return __builtin_clz (val) ^ 31; |
136 | # else /* Software version */ |
137 | static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, |
138 | 11, 14, 16, 18, 22, 25, 3, 30, |
139 | 8, 12, 20, 28, 15, 17, 24, 7, |
140 | 19, 27, 23, 6, 26, 5, 4, 31 }; |
141 | U32 v = val; |
142 | v |= v >> 1; |
143 | v |= v >> 2; |
144 | v |= v >> 4; |
145 | v |= v >> 8; |
146 | v |= v >> 16; |
147 | return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; |
148 | # endif |
149 | } |
150 | } |
151 | |
152 | /*===== Local Constants =====*/ |
153 | static const unsigned BIT_mask[] = { |
154 | 0, 1, 3, 7, 0xF, 0x1F, |
155 | 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, |
156 | 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, |
157 | 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, |
158 | 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, |
159 | 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ |
160 | #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) |
161 | |
162 | /*-************************************************************** |
163 | * bitStream encoding |
164 | ****************************************************************/ |
165 | /*! BIT_initCStream() : |
166 | * `dstCapacity` must be > sizeof(size_t) |
167 | * @return : 0 if success, |
168 | * otherwise an error code (can be tested using ERR_isError()) */ |
169 | MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, |
170 | void* startPtr, size_t dstCapacity) |
171 | { |
172 | bitC->bitContainer = 0; |
173 | bitC->bitPos = 0; |
174 | bitC->startPtr = (char*)startPtr; |
175 | bitC->ptr = bitC->startPtr; |
176 | bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); |
177 | if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); |
178 | return 0; |
179 | } |
180 | |
181 | /*! BIT_addBits() : |
182 | * can add up to 31 bits into `bitC`. |
183 | * Note : does not check for register overflow ! */ |
184 | MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, |
185 | size_t value, unsigned nbBits) |
186 | { |
187 | DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); |
188 | assert(nbBits < BIT_MASK_SIZE); |
189 | assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); |
190 | bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; |
191 | bitC->bitPos += nbBits; |
192 | } |
193 | |
194 | /*! BIT_addBitsFast() : |
195 | * works only if `value` is _clean_, |
196 | * meaning all high bits above nbBits are 0 */ |
197 | MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, |
198 | size_t value, unsigned nbBits) |
199 | { |
200 | assert((value>>nbBits) == 0); |
201 | assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); |
202 | bitC->bitContainer |= value << bitC->bitPos; |
203 | bitC->bitPos += nbBits; |
204 | } |
205 | |
206 | /*! BIT_flushBitsFast() : |
207 | * assumption : bitContainer has not overflowed |
208 | * unsafe version; does not check buffer overflow */ |
209 | MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) |
210 | { |
211 | size_t const nbBytes = bitC->bitPos >> 3; |
212 | assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); |
213 | assert(bitC->ptr <= bitC->endPtr); |
214 | MEM_writeLEST(memPtr: bitC->ptr, val: bitC->bitContainer); |
215 | bitC->ptr += nbBytes; |
216 | bitC->bitPos &= 7; |
217 | bitC->bitContainer >>= nbBytes*8; |
218 | } |
219 | |
220 | /*! BIT_flushBits() : |
221 | * assumption : bitContainer has not overflowed |
222 | * safe version; check for buffer overflow, and prevents it. |
223 | * note : does not signal buffer overflow. |
224 | * overflow will be revealed later on using BIT_closeCStream() */ |
225 | MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) |
226 | { |
227 | size_t const nbBytes = bitC->bitPos >> 3; |
228 | assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); |
229 | assert(bitC->ptr <= bitC->endPtr); |
230 | MEM_writeLEST(memPtr: bitC->ptr, val: bitC->bitContainer); |
231 | bitC->ptr += nbBytes; |
232 | if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; |
233 | bitC->bitPos &= 7; |
234 | bitC->bitContainer >>= nbBytes*8; |
235 | } |
236 | |
237 | /*! BIT_closeCStream() : |
238 | * @return : size of CStream, in bytes, |
239 | * or 0 if it could not fit into dstBuffer */ |
240 | MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) |
241 | { |
242 | BIT_addBitsFast(bitC, value: 1, nbBits: 1); /* endMark */ |
243 | BIT_flushBits(bitC); |
244 | if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ |
245 | return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); |
246 | } |
247 | |
248 | |
249 | /*-******************************************************** |
250 | * bitStream decoding |
251 | **********************************************************/ |
252 | /*! BIT_initDStream() : |
253 | * Initialize a BIT_DStream_t. |
254 | * `bitD` : a pointer to an already allocated BIT_DStream_t structure. |
255 | * `srcSize` must be the *exact* size of the bitStream, in bytes. |
256 | * @return : size of stream (== srcSize), or an errorCode if a problem is detected |
257 | */ |
258 | MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) |
259 | { |
260 | if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } |
261 | |
262 | bitD->start = (const char*)srcBuffer; |
263 | bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); |
264 | |
265 | if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ |
266 | bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); |
267 | bitD->bitContainer = MEM_readLEST(memPtr: bitD->ptr); |
268 | { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; |
269 | bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(val: lastByte) : 0; /* ensures bitsConsumed is always set */ |
270 | if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } |
271 | } else { |
272 | bitD->ptr = bitD->start; |
273 | bitD->bitContainer = *(const BYTE*)(bitD->start); |
274 | switch(srcSize) |
275 | { |
276 | case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); |
277 | ZSTD_FALLTHROUGH; |
278 | |
279 | case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); |
280 | ZSTD_FALLTHROUGH; |
281 | |
282 | case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); |
283 | ZSTD_FALLTHROUGH; |
284 | |
285 | case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; |
286 | ZSTD_FALLTHROUGH; |
287 | |
288 | case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; |
289 | ZSTD_FALLTHROUGH; |
290 | |
291 | case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; |
292 | ZSTD_FALLTHROUGH; |
293 | |
294 | default: break; |
295 | } |
296 | { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; |
297 | bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(val: lastByte) : 0; |
298 | if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ |
299 | } |
300 | bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; |
301 | } |
302 | |
303 | return srcSize; |
304 | } |
305 | |
306 | MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start) |
307 | { |
308 | return bitContainer >> start; |
309 | } |
310 | |
311 | MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) |
312 | { |
313 | U32 const regMask = sizeof(bitContainer)*8 - 1; |
314 | /* if start > regMask, bitstream is corrupted, and result is undefined */ |
315 | assert(nbBits < BIT_MASK_SIZE); |
316 | /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better |
317 | * than accessing memory. When bmi2 instruction is not present, we consider |
318 | * such cpus old (pre-Haswell, 2013) and their performance is not of that |
319 | * importance. |
320 | */ |
321 | #if defined(__x86_64__) || defined(_M_X86) |
322 | return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1); |
323 | #else |
324 | return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; |
325 | #endif |
326 | } |
327 | |
328 | MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) |
329 | { |
330 | assert(nbBits < BIT_MASK_SIZE); |
331 | return bitContainer & BIT_mask[nbBits]; |
332 | } |
333 | |
334 | /*! BIT_lookBits() : |
335 | * Provides next n bits from local register. |
336 | * local register is not modified. |
337 | * On 32-bits, maxNbBits==24. |
338 | * On 64-bits, maxNbBits==56. |
339 | * @return : value extracted */ |
340 | MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) |
341 | { |
342 | /* arbitrate between double-shift and shift+mask */ |
343 | #if 1 |
344 | /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, |
345 | * bitstream is likely corrupted, and result is undefined */ |
346 | return BIT_getMiddleBits(bitContainer: bitD->bitContainer, start: (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); |
347 | #else |
348 | /* this code path is slower on my os-x laptop */ |
349 | U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; |
350 | return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); |
351 | #endif |
352 | } |
353 | |
354 | /*! BIT_lookBitsFast() : |
355 | * unsafe version; only works if nbBits >= 1 */ |
356 | MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) |
357 | { |
358 | U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; |
359 | assert(nbBits >= 1); |
360 | return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); |
361 | } |
362 | |
363 | MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) |
364 | { |
365 | bitD->bitsConsumed += nbBits; |
366 | } |
367 | |
368 | /*! BIT_readBits() : |
369 | * Read (consume) next n bits from local register and update. |
370 | * Pay attention to not read more than nbBits contained into local register. |
371 | * @return : extracted value. */ |
372 | MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) |
373 | { |
374 | size_t const value = BIT_lookBits(bitD, nbBits); |
375 | BIT_skipBits(bitD, nbBits); |
376 | return value; |
377 | } |
378 | |
379 | /*! BIT_readBitsFast() : |
380 | * unsafe version; only works only if nbBits >= 1 */ |
381 | MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) |
382 | { |
383 | size_t const value = BIT_lookBitsFast(bitD, nbBits); |
384 | assert(nbBits >= 1); |
385 | BIT_skipBits(bitD, nbBits); |
386 | return value; |
387 | } |
388 | |
389 | /*! BIT_reloadDStreamFast() : |
390 | * Similar to BIT_reloadDStream(), but with two differences: |
391 | * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! |
392 | * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this |
393 | * point you must use BIT_reloadDStream() to reload. |
394 | */ |
395 | MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) |
396 | { |
397 | if (UNLIKELY(bitD->ptr < bitD->limitPtr)) |
398 | return BIT_DStream_overflow; |
399 | assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); |
400 | bitD->ptr -= bitD->bitsConsumed >> 3; |
401 | bitD->bitsConsumed &= 7; |
402 | bitD->bitContainer = MEM_readLEST(memPtr: bitD->ptr); |
403 | return BIT_DStream_unfinished; |
404 | } |
405 | |
406 | /*! BIT_reloadDStream() : |
407 | * Refill `bitD` from buffer previously set in BIT_initDStream() . |
408 | * This function is safe, it guarantees it will not read beyond src buffer. |
409 | * @return : status of `BIT_DStream_t` internal register. |
410 | * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ |
411 | MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) |
412 | { |
413 | if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ |
414 | return BIT_DStream_overflow; |
415 | |
416 | if (bitD->ptr >= bitD->limitPtr) { |
417 | return BIT_reloadDStreamFast(bitD); |
418 | } |
419 | if (bitD->ptr == bitD->start) { |
420 | if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; |
421 | return BIT_DStream_completed; |
422 | } |
423 | /* start < ptr < limitPtr */ |
424 | { U32 nbBytes = bitD->bitsConsumed >> 3; |
425 | BIT_DStream_status result = BIT_DStream_unfinished; |
426 | if (bitD->ptr - nbBytes < bitD->start) { |
427 | nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ |
428 | result = BIT_DStream_endOfBuffer; |
429 | } |
430 | bitD->ptr -= nbBytes; |
431 | bitD->bitsConsumed -= nbBytes*8; |
432 | bitD->bitContainer = MEM_readLEST(memPtr: bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ |
433 | return result; |
434 | } |
435 | } |
436 | |
437 | /*! BIT_endOfDStream() : |
438 | * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). |
439 | */ |
440 | MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) |
441 | { |
442 | return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); |
443 | } |
444 | |
445 | |
446 | #endif /* BITSTREAM_H_MODULE */ |
447 | |