decompress_bunzip2.c source code [linux/lib/decompress_bunzip2.c]

1	/ Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).*
2
3	Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
4	which also acknowledges contributions by Mike Burrows, David Wheeler,
5	Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
6	Robert Sedgewick, and Jon L. Bentley.
7
8	This code is licensed under the LGPLv2:
9	LGPL (http://www.gnu.org/copyleft/lgpl.html
10	*/
11
12	/*
13	Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org).
14
15	More efficient reading of Huffman codes, a streamlined read_bunzip()
16	function, and various other tweaks. In (limited) tests, approximately
17	20% faster than bzcat on x86 and about 10% faster on arm.
18
19	Note that about 2/3 of the time is spent in read_unzip() reversing
20	the Burrows-Wheeler transformation. Much of that time is delay
21	resulting from cache misses.
22
23	I would ask that anyone benefiting from this work, especially those
24	using it in commercial products, consider making a donation to my local
25	non-profit hospice organization in the name of the woman I loved, who
26	passed away Feb. 12, 2003.
27
28	In memory of Toni W. Hagan
29
30	Hospice of Acadiana, Inc.
31	2600 Johnston St., Suite 200
32	Lafayette, LA 70503-3240
33
34	Phone (337) 232-1234 or 1-800-738-2226
35	Fax (337) 232-1297
36
37	https://www.hospiceacadiana.com/
38
39	Manuel
40	*/
41
42	/*
43	Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu)
44	*/
45
46
47	#ifdef STATIC
48	#define PREBOOT
49	#else
50	#include <linux/decompress/bunzip2.h>
51	#endif /* STATIC */
52
53	#include <linux/decompress/mm.h>
54	#include <linux/crc32poly.h>
55
56	#ifndef INT_MAX
57	#define INT_MAX 0x7fffffff
58	#endif
59
60	/ Constants for Huffman coding /
61	#define MAX_GROUPS 6
62	#define GROUP_SIZE 50 /* 64 would have been more efficient */
63	#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */
64	#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */
65	#define SYMBOL_RUNA 0
66	#define SYMBOL_RUNB 1
67
68	/ Status return values /
69	#define RETVAL_OK 0
70	#define RETVAL_LAST_BLOCK (-1)
71	#define RETVAL_NOT_BZIP_DATA (-2)
72	#define RETVAL_UNEXPECTED_INPUT_EOF (-3)
73	#define RETVAL_UNEXPECTED_OUTPUT_EOF (-4)
74	#define RETVAL_DATA_ERROR (-5)
75	#define RETVAL_OUT_OF_MEMORY (-6)
76	#define RETVAL_OBSOLETE_INPUT (-7)
77
78	/ Other housekeeping constants /
79	#define BZIP2_IOBUF_SIZE 4096
80
81	/ This is what we know about each Huffman coding group /
82	struct group_data {
83	/ We have an extra slot at the end of limit[] for a sentinel value. /
84	int limit[MAX_HUFCODE_BITS+`1`];
85	int base[MAX_HUFCODE_BITS];
86	int permute[MAX_SYMBOLS];
87	int minLen, maxLen;
88	};
89
90	/ Structure holding all the housekeeping data, including IO buffers and*
91	memory that persists between calls to bunzip /*
92	struct bunzip_data {
93	/ State for interrupting output loop /
94	int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
95	/ I/O tracking data (file handles, buffers, positions, etc.) /
96	long (fill)(void*, unsigned* long);
97	long inbufCount, inbufPos /, outbufPos/;
98	unsigned char inbuf /,outbuf/;
99	unsigned int inbufBitCount, inbufBits;
100	/ The CRC values stored in the block header and calculated from the*
101	data /*
102	unsigned int crc32Table[`256`], headerCRC, totalCRC, writeCRC;
103	/ Intermediate buffer and its size (in bytes) /
104	unsigned int *dbuf, dbufSize;
105	/ These things are a bit too big to go on the stack /
106	unsigned char selectors[`32768`]; / nSelectors = 15 bits /
107	struct group_data groups[MAX_GROUPS]; / Huffman coding tables /
108	int io_error; / non-zero if we have IO error /
109	int byteCount[`256`];
110	unsigned char symToByte[`256`], mtfSymbol[`256`];
111	};
112
113
114	/ Return the next nnn bits of input. All reads from the compressed input*
115	are done through this function. All reads are big endian /*
116	static unsigned int INIT get_bits(struct bunzip_data bd, char* bits_wanted)
117	{
118	unsigned int bits = `0`;
119
120	/ If we need to get more data from the byte buffer, do so.*
121	(Loop getting one byte at a time to enforce endianness and avoid
122	unaligned access.) /*
123	while (bd->inbufBitCount < bits_wanted) {
124	/ If we need to read more data from file into byte buffer, do*
125	so /*
126	if (bd->inbufPos == bd->inbufCount) {
127	if (bd->io_error)
128	return `0`;
129	bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE);
130	if (bd->inbufCount <= `0`) {
131	bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF;
132	return `0`;
133	}
134	bd->inbufPos = `0`;
135	}
136	/ Avoid 32-bit overflow (dump bit buffer to top of output) /
137	if (bd->inbufBitCount >= `24`) {
138	bits = bd->inbufBits&((`1` << bd->inbufBitCount)-`1`);
139	bits_wanted -= bd->inbufBitCount;
140	bits <<= bits_wanted;
141	bd->inbufBitCount = `0`;
142	}
143	/ Grab next 8 bits of input from buffer. /
144	bd->inbufBits = (bd->inbufBits << `8`)\|bd->inbuf[bd->inbufPos++];
145	bd->inbufBitCount += `8`;
146	}
147	/ Calculate result /
148	bd->inbufBitCount -= bits_wanted;
149	bits \|= (bd->inbufBits >> bd->inbufBitCount)&((`1` << bits_wanted)-`1`);
150
151	return bits;
152	}
153
154	/ Unpacks the next block and sets up for the inverse burrows-wheeler step. /
155
156	static int INIT get_next_block(struct bunzip_data *bd)
157	{
158	struct group_data *hufGroup = NULL;
159	int *base = NULL;
160	int *limit = NULL;
161	int dbufCount, nextSym, dbufSize, groupCount, selector,
162	i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount;
163	unsigned char uc, symToByte, mtfSymbol, *selectors;
164	unsigned int *dbuf, origPtr;
165
166	dbuf = bd->dbuf;
167	dbufSize = bd->dbufSize;
168	selectors = bd->selectors;
169	byteCount = bd->byteCount;
170	symToByte = bd->symToByte;
171	mtfSymbol = bd->mtfSymbol;
172
173	/ Read in header signature and CRC, then validate signature.*
174	(last block signature means CRC is for whole file, return now) /*
175	i = get_bits(bd, bits_wanted: `24`);
176	j = get_bits(bd, bits_wanted: `24`);
177	bd->headerCRC = get_bits(bd, bits_wanted: `32`);
178	if ((i == `0x177245`) && (j == `0x385090`))
179	return RETVAL_LAST_BLOCK;
180	if ((i != `0x314159`) \|\| (j != `0x265359`))
181	return RETVAL_NOT_BZIP_DATA;
182	/ We can add support for blockRandomised if anybody complains.*
183	There was some code for this in busybox 1.0.0-pre3, but nobody ever
184	noticed that it didn't actually work. /*
185	if (get_bits(bd, bits_wanted: `1`))
186	return RETVAL_OBSOLETE_INPUT;
187	origPtr = get_bits(bd, bits_wanted: `24`);
188	if (origPtr >= dbufSize)
189	return RETVAL_DATA_ERROR;
190	/ mapping table: if some byte values are never used (encoding things*
191	like ascii text), the compression code removes the gaps to have fewer
192	symbols to deal with, and writes a sparse bitfield indicating which
193	values were present. We make a translation table to convert the
194	symbols back to the corresponding bytes. /*
195	t = get_bits(bd, bits_wanted: `16`);
196	symTotal = `0`;
197	for (i = `0`; i < `16`; i++) {
198	if (t&(`1` << (`15`-i))) {
199	k = get_bits(bd, bits_wanted: `16`);
200	for (j = `0`; j < `16`; j++)
201	if (k&(`1` << (`15`-j)))
202	symToByte[symTotal++] = (`16`*i)+j;
203	}
204	}
205	/ How many different Huffman coding groups does this block use? /
206	groupCount = get_bits(bd, bits_wanted: `3`);
207	if (groupCount < `2` \|\| groupCount > MAX_GROUPS)
208	return RETVAL_DATA_ERROR;
209	/ nSelectors: Every GROUP_SIZE many symbols we select a new*
210	Huffman coding group. Read in the group selector list,
211	which is stored as MTF encoded bit runs. (MTF = Move To
212	Front, as each value is used it's moved to the start of the
213	list.) /*
214	nSelectors = get_bits(bd, bits_wanted: `15`);
215	if (!nSelectors)
216	return RETVAL_DATA_ERROR;
217	for (i = `0`; i < groupCount; i++)
218	mtfSymbol[i] = i;
219	for (i = `0`; i < nSelectors; i++) {
220	/ Get next value /
221	for (j = `0`; get_bits(bd, bits_wanted: `1`); j++)
222	if (j >= groupCount)
223	return RETVAL_DATA_ERROR;
224	/ Decode MTF to get the next selector /
225	uc = mtfSymbol[j];
226	for (; j; j--)
227	mtfSymbol[j] = mtfSymbol[j-`1`];
228	mtfSymbol[`0`] = selectors[i] = uc;
229	}
230	/ Read the Huffman coding tables for each group, which code*
231	for symTotal literal symbols, plus two run symbols (RUNA,
232	RUNB) /*
233	symCount = symTotal+`2`;
234	for (j = `0`; j < groupCount; j++) {
235	unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+`1`];
236	int minLen, maxLen, pp;
237	/ Read Huffman code lengths for each symbol. They're*
238	stored in a way similar to mtf; record a starting
239	value for the first symbol, and an offset from the
240	previous value for everys symbol after that.
241	(Subtracting 1 before the loop and then adding it
242	back at the end is an optimization that makes the
243	test inside the loop simpler: symbol length 0
244	becomes negative, so an unsigned inequality catches
245	it.) /*
246	t = get_bits(bd, bits_wanted: `5`)-`1`;
247	for (i = `0`; i < symCount; i++) {
248	for (;;) {
249	if (((unsigned)t) > (MAX_HUFCODE_BITS-`1`))
250	return RETVAL_DATA_ERROR;
251
252	/ If first bit is 0, stop. Else*
253	second bit indicates whether to
254	increment or decrement the value.
255	Optimization: grab 2 bits and unget
256	the second if the first was 0. /*
257
258	k = get_bits(bd, bits_wanted: `2`);
259	if (k < `2`) {
260	bd->inbufBitCount++;
261	break;
262	}
263	/ Add one if second bit 1, else*
264	* subtract 1. Avoids if/else */
265	t += (((k+`1`)&`2`)-`1`);
266	}
267	/ Correct for the initial -1, to get the*
268	* final symbol length */
269	length[i] = t+`1`;
270	}
271	/ Find largest and smallest lengths in this group /
272	minLen = maxLen = length[`0`];
273
274	for (i = `1`; i < symCount; i++) {
275	if (length[i] > maxLen)
276	maxLen = length[i];
277	else if (length[i] < minLen)
278	minLen = length[i];
279	}
280
281	/ Calculate permute[], base[], and limit[] tables from*
282	* length[].
283	*
284	* permute[] is the lookup table for converting
285	* Huffman coded symbols into decoded symbols. base[]
286	* is the amount to subtract from the value of a
287	* Huffman symbol of a given length when using
288	* permute[].
289	*
290	* limit[] indicates the largest numerical value a
291	* symbol with a given number of bits can have. This
292	* is how the Huffman codes can vary in length: each
293	* code with a value > limit[length] needs another
294	* bit.
295	*/
296	hufGroup = bd->groups+j;
297	hufGroup->minLen = minLen;
298	hufGroup->maxLen = maxLen;
299	/ Note that minLen can't be smaller than 1, so we*
300	adjust the base and limit array pointers so we're
301	not always wasting the first entry. We do this
302	again when using them (during symbol decoding)./*
303	base = hufGroup->base-`1`;
304	limit = hufGroup->limit-`1`;
305	/ Calculate permute[]. Concurrently, initialize*
306	* temp[] and limit[]. */
307	pp = `0`;
308	for (i = minLen; i <= maxLen; i++) {
309	temp[i] = limit[i] = `0`;
310	for (t = `0`; t < symCount; t++)
311	if (length[t] == i)
312	hufGroup->permute[pp++] = t;
313	}
314	/ Count symbols coded for at each bit length /
315	for (i = `0`; i < symCount; i++)
316	temp[length[i]]++;
317	/ Calculate limit[] (the largest symbol-coding value*
318	*at each bit length, which is (previous limit <<
319	*1)+symbols at this level), and base[] (number of
320	*symbols to ignore at each bit length, which is limit
321	*minus the cumulative count of symbols coded for
322	already). /
323	pp = t = `0`;
324	for (i = minLen; i < maxLen; i++) {
325	pp += temp[i];
326	/ We read the largest possible symbol size*
327	and then unget bits after determining how
328	many we need, and those extra bits could be
329	set to anything. (They're noise from
330	future symbols.) At each level we're
331	really only interested in the first few
332	bits, so here we set all the trailing
333	to-be-ignored bits to 1 so they don't
334	affect the value > limit[length]
335	comparison. /*
336	limit[i] = (pp << (maxLen - i)) - `1`;
337	pp <<= `1`;
338	base[i+`1`] = pp-(t += temp[i]);
339	}
340	limit[maxLen+`1`] = INT_MAX; / Sentinel value for*
341	* reading next sym. */
342	limit[maxLen] = pp+temp[maxLen]-`1`;
343	base[minLen] = `0`;
344	}
345	/ We've finished reading and digesting the block header. Now*
346	read this block's Huffman coded symbols from the file and
347	undo the Huffman coding and run length encoding, saving the
348	result into dbuf[dbufCount++] = uc /*
349
350	/ Initialize symbol occurrence counters and symbol Move To*
351	* Front table */
352	for (i = `0`; i < `256`; i++) {
353	byteCount[i] = `0`;
354	mtfSymbol[i] = (unsigned char)i;
355	}
356	/ Loop through compressed symbols. /
357	runPos = dbufCount = symCount = selector = `0`;
358	for (;;) {
359	/ Determine which Huffman coding group to use. /
360	if (!(symCount--)) {
361	symCount = GROUP_SIZE-`1`;
362	if (selector >= nSelectors)
363	return RETVAL_DATA_ERROR;
364	hufGroup = bd->groups+selectors[selector++];
365	base = hufGroup->base-`1`;
366	limit = hufGroup->limit-`1`;
367	}
368	/ Read next Huffman-coded symbol. /
369	/ Note: It is far cheaper to read maxLen bits and*
370	back up than it is to read minLen bits and then an
371	additional bit at a time, testing as we go.
372	Because there is a trailing last block (with file
373	CRC), there is no danger of the overread causing an
374	unexpected EOF for a valid compressed file. As a
375	further optimization, we do the read inline
376	(falling back to a call to get_bits if the buffer
377	runs dry). The following (up to got_huff_bits:) is
378	equivalent to j = get_bits(bd, hufGroup->maxLen);
379	*/
380	while (bd->inbufBitCount < hufGroup->maxLen) {
381	if (bd->inbufPos == bd->inbufCount) {
382	j = get_bits(bd, bits_wanted: hufGroup->maxLen);
383	goto got_huff_bits;
384	}
385	bd->inbufBits =
386	(bd->inbufBits << `8`)\|bd->inbuf[bd->inbufPos++];
387	bd->inbufBitCount += `8`;
388	}
389	bd->inbufBitCount -= hufGroup->maxLen;
390	j = (bd->inbufBits >> bd->inbufBitCount)&
391	((`1` << hufGroup->maxLen)-`1`);
392	got_huff_bits:
393	/ Figure how many bits are in next symbol and*
394	* unget extras */
395	i = hufGroup->minLen;
396	while (j > limit[i])
397	++i;
398	bd->inbufBitCount += (hufGroup->maxLen - i);
399	/ Huffman decode value to get nextSym (with bounds checking) /
400	if ((i > hufGroup->maxLen)
401	\|\| (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i]))
402	>= MAX_SYMBOLS))
403	return RETVAL_DATA_ERROR;
404	nextSym = hufGroup->permute[j];
405	/ We have now decoded the symbol, which indicates*
406	either a new literal byte, or a repeated run of the
407	most recent literal byte. First, check if nextSym
408	indicates a repeated run, and if so loop collecting
409	how many times to repeat the last literal. /*
410	if (((unsigned)nextSym) <= SYMBOL_RUNB) { / RUNA or RUNB /
411	/ If this is the start of a new run, zero out*
412	* counter */
413	if (!runPos) {
414	runPos = `1`;
415	t = `0`;
416	}
417	/ Neat trick that saves 1 symbol: instead of*
418	or-ing 0 or 1 at each bit position, add 1
419	or 2 instead. For example, 1011 is 1 << 0
420	+ 1 << 1 + 2 << 2. 1010 is 2 << 0 + 2 << 1
421	+ 1 << 2. You can make any bit pattern
422	that way using 1 less symbol than the basic
423	or 0/1 method (except all bits 0, which
424	would use no symbols, but a run of length 0
425	doesn't mean anything in this context).
426	Thus space is saved. /*
427	t += (runPos << nextSym);
428	/ +runPos if RUNA; +2runPos if RUNB /*
429
430	runPos <<= `1`;
431	continue;
432	}
433	/ When we hit the first non-run symbol after a run,*
434	we now know how many times to repeat the last
435	literal, so append that many copies to our buffer
436	of decoded symbols (dbuf) now. (The last literal
437	used is the one at the head of the mtfSymbol
438	array.) /*
439	if (runPos) {
440	runPos = `0`;
441	if (dbufCount+t >= dbufSize)
442	return RETVAL_DATA_ERROR;
443
444	uc = symToByte[mtfSymbol[`0`]];
445	byteCount[uc] += t;
446	while (t--)
447	dbuf[dbufCount++] = uc;
448	}
449	/ Is this the terminating symbol? /
450	if (nextSym > symTotal)
451	break;
452	/ At this point, nextSym indicates a new literal*
453	character. Subtract one to get the position in the
454	MTF array at which this literal is currently to be
455	found. (Note that the result can't be -1 or 0,
456	because 0 and 1 are RUNA and RUNB. But another
457	instance of the first symbol in the mtf array,
458	position 0, would have been handled as part of a
459	run above. Therefore 1 unused mtf position minus 2
460	non-literal nextSym values equals -1.) /*
461	if (dbufCount >= dbufSize)
462	return RETVAL_DATA_ERROR;
463	i = nextSym - `1`;
464	uc = mtfSymbol[i];
465	/ Adjust the MTF array. Since we typically expect to*
466	*move only a small number of symbols, and are bound
467	*by 256 in any case, using memmove here would
468	*typically be bigger and slower due to function call
469	overhead and other assorted setup costs. /
470	do {
471	mtfSymbol[i] = mtfSymbol[i-`1`];
472	} while (--i);
473	mtfSymbol[`0`] = uc;
474	uc = symToByte[uc];
475	/ We have our literal byte. Save it into dbuf. /
476	byteCount[uc]++;
477	dbuf[dbufCount++] = (unsigned int)uc;
478	}
479	/ At this point, we've read all the Huffman-coded symbols*
480	(and repeated runs) for this block from the input stream,
481	and decoded them into the intermediate buffer. There are
482	dbufCount many decoded bytes in dbuf[]. Now undo the
483	Burrows-Wheeler transform on dbuf. See
484	http://dogma.net/markn/articles/bwt/bwt.htm
485	*/
486	/ Turn byteCount into cumulative occurrence counts of 0 to n-1. /
487	j = `0`;
488	for (i = `0`; i < `256`; i++) {
489	k = j+byteCount[i];
490	byteCount[i] = j;
491	j = k;
492	}
493	/ Figure out what order dbuf would be in if we sorted it. /
494	for (i = `0`; i < dbufCount; i++) {
495	uc = (unsigned char)(dbuf[i] & `0xff`);
496	dbuf[byteCount[uc]] \|= (i << `8`);
497	byteCount[uc]++;
498	}
499	/ Decode first byte by hand to initialize "previous" byte.*
500	Note that it doesn't get output, and if the first three
501	characters are identical it doesn't qualify as a run (hence
502	writeRunCountdown = 5). /*
503	if (dbufCount) {
504	if (origPtr >= dbufCount)
505	return RETVAL_DATA_ERROR;
506	bd->writePos = dbuf[origPtr];
507	bd->writeCurrent = (unsigned char)(bd->writePos&`0xff`);
508	bd->writePos >>= `8`;
509	bd->writeRunCountdown = `5`;
510	}
511	bd->writeCount = dbufCount;
512
513	return RETVAL_OK;
514	}
515
516	/ Undo burrows-wheeler transform on intermediate buffer to produce output.*
517	If start_bunzip was initialized with out_fd =-1, then up to len bytes of
518	data are written to outbuf. Return value is number of bytes written or
519	error (all errors are negative numbers). If out_fd!=-1, outbuf and len
520	are ignored, data is written to out_fd and return is RETVAL_OK or error.
521	*/
522
523	static int INIT read_bunzip(struct bunzip_data bd, char* outbuf, int* len)
524	{
525	const unsigned int *dbuf;
526	int pos, xcurrent, previous, gotcount;
527
528	/ If last read was short due to end of file, return last block now /
529	if (bd->writeCount < `0`)
530	return bd->writeCount;
531
532	gotcount = `0`;
533	dbuf = bd->dbuf;
534	pos = bd->writePos;
535	xcurrent = bd->writeCurrent;
536
537	/ We will always have pending decoded data to write into the output*
538	buffer unless this is the very first call (in which case we haven't
539	Huffman-decoded a block into the intermediate buffer yet). /*
540
541	if (bd->writeCopies) {
542	/ Inside the loop, writeCopies means extra copies (beyond 1) /
543	--bd->writeCopies;
544	/ Loop outputting bytes /
545	for (;;) {
546	/ If the output buffer is full, snapshot*
547	* state and return */
548	if (gotcount >= len) {
549	bd->writePos = pos;
550	bd->writeCurrent = xcurrent;
551	bd->writeCopies++;
552	return len;
553	}
554	/ Write next byte into output buffer, updating CRC /
555	outbuf[gotcount++] = xcurrent;
556	bd->writeCRC = (((bd->writeCRC) << `8`)
557	^bd->crc32Table[((bd->writeCRC) >> `24`)
558	^xcurrent]);
559	/ Loop now if we're outputting multiple*
560	* copies of this byte */
561	if (bd->writeCopies) {
562	--bd->writeCopies;
563	continue;
564	}
565	decode_next_byte:
566	if (!bd->writeCount--)
567	break;
568	/ Follow sequence vector to undo*
569	* Burrows-Wheeler transform */
570	previous = xcurrent;
571	pos = dbuf[pos];
572	xcurrent = pos&`0xff`;
573	pos >>= `8`;
574	/ After 3 consecutive copies of the same*
575	byte, the 4th is a repeat count. We count
576	down from 4 instead of counting up because*
577	testing for non-zero is faster /*
578	if (--bd->writeRunCountdown) {
579	if (xcurrent != previous)
580	bd->writeRunCountdown = `4`;
581	} else {
582	/ We have a repeated run, this byte*
583	* indicates the count */
584	bd->writeCopies = xcurrent;
585	xcurrent = previous;
586	bd->writeRunCountdown = `5`;
587	/ Sometimes there are just 3 bytes*
588	* (run length 0) */
589	if (!bd->writeCopies)
590	goto decode_next_byte;
591	/ Subtract the 1 copy we'd output*
592	* anyway to get extras */
593	--bd->writeCopies;
594	}
595	}
596	/ Decompression of this block completed successfully /
597	bd->writeCRC = ~bd->writeCRC;
598	bd->totalCRC = ((bd->totalCRC << `1`) \|
599	(bd->totalCRC >> `31`)) ^ bd->writeCRC;
600	/ If this block had a CRC error, force file level CRC error. /
601	if (bd->writeCRC != bd->headerCRC) {
602	bd->totalCRC = bd->headerCRC+`1`;
603	return RETVAL_LAST_BLOCK;
604	}
605	}
606
607	/ Refill the intermediate buffer by Huffman-decoding next*
608	* block of input */
609	/ (previous is just a convenient unused temp variable here) /
610	previous = get_next_block(bd);
611	if (previous) {
612	bd->writeCount = previous;
613	return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;
614	}
615	bd->writeCRC = `0xffffffffUL`;
616	pos = bd->writePos;
617	xcurrent = bd->writeCurrent;
618	goto decode_next_byte;
619	}
620
621	static long INIT nofill(void buf, unsigned* long len)
622	{
623	return -`1`;
624	}
625
626	/ Allocate the structure, read file header. If in_fd ==-1, inbuf must contain*
627	a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
628	ignored, and data is read from file handle into temporary buffer. /*
629	static int INIT start_bunzip(struct bunzip_data *bdp, void* inbuf, long* len,
630	long (fill)(void*, unsigned* long))
631	{
632	struct bunzip_data *bd;
633	unsigned int i, j, c;
634	const unsigned int BZh0 =
635	(((unsigned int)`'B'`) << `24`)+(((unsigned int)`'Z'`) << `16`)
636	+(((unsigned int)`'h'`) << `8`)+(unsigned int)`'0'`;
637
638	/ Figure out how much data to allocate /
639	i = sizeof(struct bunzip_data);
640
641	/ Allocate bunzip_data. Most fields initialize to zero. /
642	bd = *bdp = malloc(i);
643	if (!bd)
644	return RETVAL_OUT_OF_MEMORY;
645	memset(bd, `0`, sizeof(struct bunzip_data));
646	/ Setup input buffer /
647	bd->inbuf = inbuf;
648	bd->inbufCount = len;
649	if (fill != NULL)
650	bd->fill = fill;
651	else
652	bd->fill = nofill;
653
654	/ Init the CRC32 table (big endian) /
655	for (i = `0`; i < `256`; i++) {
656	c = i << `24`;
657	for (j = `8`; j; j--)
658	c = c&`0x80000000` ? (c << `1`)^(CRC32_POLY_BE) : (c << `1`);
659	bd->crc32Table[i] = c;
660	}
661
662	/ Ensure that file starts with "BZh['1'-'9']." /
663	i = get_bits(bd, bits_wanted: `32`);
664	if (((unsigned int)(i-BZh0-`1`)) >= `9`)
665	return RETVAL_NOT_BZIP_DATA;
666
667	/ Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of*
668	uncompressed data. Allocate intermediate buffer for block. /*
669	bd->dbufSize = `100000`*(i-BZh0);
670
671	bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));
672	if (!bd->dbuf)
673	return RETVAL_OUT_OF_MEMORY;
674	return RETVAL_OK;
675	}
676
677	/ Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data,*
678	not end of file.) /*
679	STATIC int INIT bunzip2(unsigned char buf, long* len,
680	long (fill)(void*, unsigned* long),
681	long (flush)(void*, unsigned* long),
682	unsigned char *outbuf,
683	long *pos,
684	void(error)(char* *x))
685	{
686	struct bunzip_data *bd;
687	int i = -`1`;
688	unsigned char *inbuf;
689
690	if (flush)
691	outbuf = malloc(BZIP2_IOBUF_SIZE);
692
693	if (!outbuf) {
694	error("Could not allocate output buffer");
695	return RETVAL_OUT_OF_MEMORY;
696	}
697	if (buf)
698	inbuf = buf;
699	else
700	inbuf = malloc(BZIP2_IOBUF_SIZE);
701	if (!inbuf) {
702	error("Could not allocate input buffer");
703	i = RETVAL_OUT_OF_MEMORY;
704	goto exit_0;
705	}
706	i = start_bunzip(bdp: &bd, inbuf, len, fill);
707	if (!i) {
708	for (;;) {
709	i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE);
710	if (i <= `0`)
711	break;
712	if (!flush)
713	outbuf += i;
714	else
715	if (i != flush(outbuf, i)) {
716	i = RETVAL_UNEXPECTED_OUTPUT_EOF;
717	break;
718	}
719	}
720	}
721	/ Check CRC and release memory /
722	if (i == RETVAL_LAST_BLOCK) {
723	if (bd->headerCRC != bd->totalCRC)
724	error("Data integrity error when decompressing.");
725	else
726	i = RETVAL_OK;
727	} else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {
728	error("Compressed file ends unexpectedly");
729	}
730	if (!bd)
731	goto exit_1;
732	if (bd->dbuf)
733	large_free(bd->dbuf);
734	if (pos)
735	*pos = bd->inbufPos;
736	free(bd);
737	exit_1:
738	if (!buf)
739	free(inbuf);
740	exit_0:
741	if (flush)
742	free(outbuf);
743	return i;
744	}
745
746	#ifdef PREBOOT
747	STATIC int INIT __decompress(unsigned char buf, long* len,
748	long (fill)(void*, unsigned* long),
749	long (flush)(void*, unsigned* long),
750	unsigned char outbuf, long* olen,
751	long *pos,
752	void (error)(char* *x))
753	{
754	return bunzip2(buf, len - `4`, fill, flush, outbuf, pos, error);
755	}
756	#endif
757

source code of linux/lib/decompress_bunzip2.c