Blame - lib/decompress_bunzip2.c - kernel/msm-4.9

blob: 6dd0335ea61b296b5dfd77818e58e294fb2e805c [file] [log] [blame]

Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	1	/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
				2
				3	Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
				4	which also acknowledges contributions by Mike Burrows, David Wheeler,
				5	Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
				6	Robert Sedgewick, and Jon L. Bentley.
				7
				8	This code is licensed under the LGPLv2:
				9	LGPL (http://www.gnu.org/copyleft/lgpl.html
				10	*/
				11
				12	/*
				13	Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org).
				14
				15	More efficient reading of Huffman codes, a streamlined read_bunzip()
				16	function, and various other tweaks. In (limited) tests, approximately
				17	20% faster than bzcat on x86 and about 10% faster on arm.
				18
				19	Note that about 2/3 of the time is spent in read_unzip() reversing
				20	the Burrows-Wheeler transformation. Much of that time is delay
				21	resulting from cache misses.
				22
				23	I would ask that anyone benefiting from this work, especially those
				24	using it in commercial products, consider making a donation to my local
				25	non-profit hospice organization in the name of the woman I loved, who
				26	passed away Feb. 12, 2003.
				27
				28	In memory of Toni W. Hagan
				29
				30	Hospice of Acadiana, Inc.
				31	2600 Johnston St., Suite 200
				32	Lafayette, LA 70503-3240
				33
				34	Phone (337) 232-1234 or 1-800-738-2226
				35	Fax (337) 232-1297
				36
				37	http://www.hospiceacadiana.com/
				38
				39	Manuel
				40	*/
				41
				42	/*
				43	Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu)
				44	*/
				45
				46
Phillip Lougher	b1af431	2009-08-06 15:09:31 -0700	[diff] [blame]	47	#ifdef STATIC
				48	#define PREBOOT
				49	#else
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	50	#include <linux/decompress/bunzip2.h>
Phillip Lougher	b1af431	2009-08-06 15:09:31 -0700	[diff] [blame]	51	#endif /* STATIC */
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	52
				53	#include <linux/decompress/mm.h>
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	54
				55	#ifndef INT_MAX
				56	#define INT_MAX 0x7fffffff
				57	#endif
				58
				59	/* Constants for Huffman coding */
				60	#define MAX_GROUPS 6
				61	#define GROUP_SIZE 50 /* 64 would have been more efficient */
				62	#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */
				63	#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */
				64	#define SYMBOL_RUNA 0
				65	#define SYMBOL_RUNB 1
				66
				67	/* Status return values */
				68	#define RETVAL_OK 0
				69	#define RETVAL_LAST_BLOCK (-1)
				70	#define RETVAL_NOT_BZIP_DATA (-2)
				71	#define RETVAL_UNEXPECTED_INPUT_EOF (-3)
				72	#define RETVAL_UNEXPECTED_OUTPUT_EOF (-4)
				73	#define RETVAL_DATA_ERROR (-5)
				74	#define RETVAL_OUT_OF_MEMORY (-6)
				75	#define RETVAL_OBSOLETE_INPUT (-7)
				76
				77	/* Other housekeeping constants */
				78	#define BZIP2_IOBUF_SIZE 4096
				79
				80	/* This is what we know about each Huffman coding group */
				81	struct group_data {
				82	/* We have an extra slot at the end of limit[] for a sentinal value. */
				83	int limit[MAX_HUFCODE_BITS+1];
				84	int base[MAX_HUFCODE_BITS];
				85	int permute[MAX_SYMBOLS];
				86	int minLen, maxLen;
				87	};
				88
				89	/* Structure holding all the housekeeping data, including IO buffers and
				90	memory that persists between calls to bunzip */
				91	struct bunzip_data {
				92	/* State for interrupting output loop */
				93	int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
				94	/* I/O tracking data (file handles, buffers, positions, etc.) */
Yinghai Lu	d97b07c	2014-08-08 14:23:14 -0700	[diff] [blame]	95	long (fill)(void, unsigned long);
				96	long inbufCount, inbufPos /, outbufPos/;
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	97	unsigned char inbuf /,outbuf/;
				98	unsigned int inbufBitCount, inbufBits;
				99	/* The CRC values stored in the block header and calculated from the
				100	data */
				101	unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC;
				102	/* Intermediate buffer and its size (in bytes) */
				103	unsigned int *dbuf, dbufSize;
				104	/* These things are a bit too big to go on the stack */
				105	unsigned char selectors[32768]; /* nSelectors = 15 bits */
				106	struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */
				107	int io_error; /* non-zero if we have IO error */
Prarit Bhargava	dd21e9b	2010-08-10 18:03:40 -0700	[diff] [blame]	108	int byteCount[256];
				109	unsigned char symToByte[256], mtfSymbol[256];
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	110	};
				111
				112
				113	/* Return the next nnn bits of input. All reads from the compressed input
				114	are done through this function. All reads are big endian */
				115	static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted)
				116	{
				117	unsigned int bits = 0;
				118
				119	/* If we need to get more data from the byte buffer, do so.
				120	(Loop getting one byte at a time to enforce endianness and avoid
				121	unaligned access.) */
				122	while (bd->inbufBitCount < bits_wanted) {
				123	/* If we need to read more data from file into byte buffer, do
				124	so */
				125	if (bd->inbufPos == bd->inbufCount) {
				126	if (bd->io_error)
				127	return 0;
				128	bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE);
				129	if (bd->inbufCount <= 0) {
				130	bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF;
				131	return 0;
				132	}
				133	bd->inbufPos = 0;
				134	}
				135	/* Avoid 32-bit overflow (dump bit buffer to top of output) */
				136	if (bd->inbufBitCount >= 24) {
				137	bits = bd->inbufBits&((1 << bd->inbufBitCount)-1);
				138	bits_wanted -= bd->inbufBitCount;
				139	bits <<= bits_wanted;
				140	bd->inbufBitCount = 0;
				141	}
				142	/* Grab next 8 bits of input from buffer. */
				143	bd->inbufBits = (bd->inbufBits << 8)\|bd->inbuf[bd->inbufPos++];
				144	bd->inbufBitCount += 8;
				145	}
				146	/* Calculate result */
				147	bd->inbufBitCount -= bits_wanted;
				148	bits \|= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1);
				149
				150	return bits;
				151	}
				152
				153	/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */
				154
				155	static int INIT get_next_block(struct bunzip_data *bd)
				156	{
				157	struct group_data *hufGroup = NULL;
				158	int *base = NULL;
				159	int *limit = NULL;
				160	int dbufCount, nextSym, dbufSize, groupCount, selector,
Prarit Bhargava	dd21e9b	2010-08-10 18:03:40 -0700	[diff] [blame]	161	i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount;
				162	unsigned char uc, symToByte, mtfSymbol, *selectors;
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	163	unsigned int *dbuf, origPtr;
				164
				165	dbuf = bd->dbuf;
				166	dbufSize = bd->dbufSize;
				167	selectors = bd->selectors;
Prarit Bhargava	dd21e9b	2010-08-10 18:03:40 -0700	[diff] [blame]	168	byteCount = bd->byteCount;
				169	symToByte = bd->symToByte;
				170	mtfSymbol = bd->mtfSymbol;
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	171
				172	/* Read in header signature and CRC, then validate signature.
				173	(last block signature means CRC is for whole file, return now) */
				174	i = get_bits(bd, 24);
				175	j = get_bits(bd, 24);
				176	bd->headerCRC = get_bits(bd, 32);
				177	if ((i == 0x177245) && (j == 0x385090))
				178	return RETVAL_LAST_BLOCK;
				179	if ((i != 0x314159) \|\| (j != 0x265359))
				180	return RETVAL_NOT_BZIP_DATA;
				181	/* We can add support for blockRandomised if anybody complains.
				182	There was some code for this in busybox 1.0.0-pre3, but nobody ever
				183	noticed that it didn't actually work. */
				184	if (get_bits(bd, 1))
				185	return RETVAL_OBSOLETE_INPUT;
				186	origPtr = get_bits(bd, 24);
Dan Carpenter	b5c8afe	2014-12-12 16:58:05 -0800	[diff] [blame]	187	if (origPtr >= dbufSize)
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	188	return RETVAL_DATA_ERROR;
				189	/* mapping table: if some byte values are never used (encoding things
				190	like ascii text), the compression code removes the gaps to have fewer
				191	symbols to deal with, and writes a sparse bitfield indicating which
				192	values were present. We make a translation table to convert the
				193	symbols back to the corresponding bytes. */
				194	t = get_bits(bd, 16);
				195	symTotal = 0;
				196	for (i = 0; i < 16; i++) {
				197	if (t&(1 << (15-i))) {
				198	k = get_bits(bd, 16);
				199	for (j = 0; j < 16; j++)
				200	if (k&(1 << (15-j)))
				201	symToByte[symTotal++] = (16*i)+j;
				202	}
				203	}
				204	/* How many different Huffman coding groups does this block use? */
				205	groupCount = get_bits(bd, 3);
				206	if (groupCount < 2 \|\| groupCount > MAX_GROUPS)
				207	return RETVAL_DATA_ERROR;
				208	/* nSelectors: Every GROUP_SIZE many symbols we select a new
				209	Huffman coding group. Read in the group selector list,
				210	which is stored as MTF encoded bit runs. (MTF = Move To
				211	Front, as each value is used it's moved to the start of the
				212	list.) */
				213	nSelectors = get_bits(bd, 15);
				214	if (!nSelectors)
				215	return RETVAL_DATA_ERROR;
				216	for (i = 0; i < groupCount; i++)
				217	mtfSymbol[i] = i;
				218	for (i = 0; i < nSelectors; i++) {
				219	/* Get next value */
				220	for (j = 0; get_bits(bd, 1); j++)
				221	if (j >= groupCount)
				222	return RETVAL_DATA_ERROR;
				223	/* Decode MTF to get the next selector */
				224	uc = mtfSymbol[j];
				225	for (; j; j--)
				226	mtfSymbol[j] = mtfSymbol[j-1];
				227	mtfSymbol[0] = selectors[i] = uc;
				228	}
				229	/* Read the Huffman coding tables for each group, which code
				230	for symTotal literal symbols, plus two run symbols (RUNA,
				231	RUNB) */
				232	symCount = symTotal+2;
				233	for (j = 0; j < groupCount; j++) {
				234	unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1];
				235	int minLen, maxLen, pp;
				236	/* Read Huffman code lengths for each symbol. They're
				237	stored in a way similar to mtf; record a starting
				238	value for the first symbol, and an offset from the
				239	previous value for everys symbol after that.
				240	(Subtracting 1 before the loop and then adding it
				241	back at the end is an optimization that makes the
				242	test inside the loop simpler: symbol length 0
				243	becomes negative, so an unsigned inequality catches
				244	it.) */
				245	t = get_bits(bd, 5)-1;
				246	for (i = 0; i < symCount; i++) {
				247	for (;;) {
				248	if (((unsigned)t) > (MAX_HUFCODE_BITS-1))
				249	return RETVAL_DATA_ERROR;
				250
				251	/* If first bit is 0, stop. Else
				252	second bit indicates whether to
				253	increment or decrement the value.
				254	Optimization: grab 2 bits and unget
				255	the second if the first was 0. */
				256
				257	k = get_bits(bd, 2);
				258	if (k < 2) {
				259	bd->inbufBitCount++;
				260	break;
				261	}
				262	/* Add one if second bit 1, else
				263	* subtract 1. Avoids if/else */
				264	t += (((k+1)&2)-1);
				265	}
				266	/* Correct for the initial -1, to get the
				267	* final symbol length */
				268	length[i] = t+1;
				269	}
				270	/* Find largest and smallest lengths in this group */
				271	minLen = maxLen = length[0];
				272
				273	for (i = 1; i < symCount; i++) {
				274	if (length[i] > maxLen)
				275	maxLen = length[i];
				276	else if (length[i] < minLen)
				277	minLen = length[i];
				278	}
				279
				280	/* Calculate permute[], base[], and limit[] tables from
				281	* length[].
				282	*
				283	* permute[] is the lookup table for converting
				284	* Huffman coded symbols into decoded symbols. base[]
				285	* is the amount to subtract from the value of a
				286	* Huffman symbol of a given length when using
				287	* permute[].
				288	*
				289	* limit[] indicates the largest numerical value a
				290	* symbol with a given number of bits can have. This
				291	* is how the Huffman codes can vary in length: each
				292	* code with a value > limit[length] needs another
				293	* bit.
				294	*/
				295	hufGroup = bd->groups+j;
				296	hufGroup->minLen = minLen;
				297	hufGroup->maxLen = maxLen;
				298	/* Note that minLen can't be smaller than 1, so we
				299	adjust the base and limit array pointers so we're
				300	not always wasting the first entry. We do this
				301	again when using them (during symbol decoding).*/
				302	base = hufGroup->base-1;
				303	limit = hufGroup->limit-1;
André Goddard Rosa	af901ca	2009-11-14 13:09:05 -0200	[diff] [blame]	304	/* Calculate permute[]. Concurrently, initialize
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	305	* temp[] and limit[]. */
				306	pp = 0;
				307	for (i = minLen; i <= maxLen; i++) {
				308	temp[i] = limit[i] = 0;
				309	for (t = 0; t < symCount; t++)
				310	if (length[t] == i)
				311	hufGroup->permute[pp++] = t;
				312	}
				313	/* Count symbols coded for at each bit length */
				314	for (i = 0; i < symCount; i++)
				315	temp[length[i]]++;
				316	/* Calculate limit[] (the largest symbol-coding value
				317	*at each bit length, which is (previous limit <<
				318	*1)+symbols at this level), and base[] (number of
				319	*symbols to ignore at each bit length, which is limit
				320	*minus the cumulative count of symbols coded for
				321	already). /
				322	pp = t = 0;
				323	for (i = minLen; i < maxLen; i++) {
				324	pp += temp[i];
				325	/* We read the largest possible symbol size
				326	and then unget bits after determining how
				327	many we need, and those extra bits could be
				328	set to anything. (They're noise from
				329	future symbols.) At each level we're
				330	really only interested in the first few
				331	bits, so here we set all the trailing
				332	to-be-ignored bits to 1 so they don't
				333	affect the value > limit[length]
				334	comparison. */
				335	limit[i] = (pp << (maxLen - i)) - 1;
				336	pp <<= 1;
				337	base[i+1] = pp-(t += temp[i]);
				338	}
				339	limit[maxLen+1] = INT_MAX; /* Sentinal value for
				340	* reading next sym. */
				341	limit[maxLen] = pp+temp[maxLen]-1;
				342	base[minLen] = 0;
				343	}
				344	/* We've finished reading and digesting the block header. Now
				345	read this block's Huffman coded symbols from the file and
				346	undo the Huffman coding and run length encoding, saving the
				347	result into dbuf[dbufCount++] = uc */
				348
				349	/* Initialize symbol occurrence counters and symbol Move To
				350	* Front table */
				351	for (i = 0; i < 256; i++) {
				352	byteCount[i] = 0;
				353	mtfSymbol[i] = (unsigned char)i;
				354	}
				355	/* Loop through compressed symbols. */
				356	runPos = dbufCount = symCount = selector = 0;
				357	for (;;) {
				358	/* Determine which Huffman coding group to use. */
				359	if (!(symCount--)) {
				360	symCount = GROUP_SIZE-1;
				361	if (selector >= nSelectors)
				362	return RETVAL_DATA_ERROR;
				363	hufGroup = bd->groups+selectors[selector++];
				364	base = hufGroup->base-1;
				365	limit = hufGroup->limit-1;
				366	}
				367	/* Read next Huffman-coded symbol. */
				368	/* Note: It is far cheaper to read maxLen bits and
				369	back up than it is to read minLen bits and then an
				370	additional bit at a time, testing as we go.
				371	Because there is a trailing last block (with file
				372	CRC), there is no danger of the overread causing an
				373	unexpected EOF for a valid compressed file. As a
				374	further optimization, we do the read inline
				375	(falling back to a call to get_bits if the buffer
				376	runs dry). The following (up to got_huff_bits:) is
				377	equivalent to j = get_bits(bd, hufGroup->maxLen);
				378	*/
				379	while (bd->inbufBitCount < hufGroup->maxLen) {
				380	if (bd->inbufPos == bd->inbufCount) {
				381	j = get_bits(bd, hufGroup->maxLen);
				382	goto got_huff_bits;
				383	}
				384	bd->inbufBits =
				385	(bd->inbufBits << 8)\|bd->inbuf[bd->inbufPos++];
				386	bd->inbufBitCount += 8;
				387	};
				388	bd->inbufBitCount -= hufGroup->maxLen;
				389	j = (bd->inbufBits >> bd->inbufBitCount)&
				390	((1 << hufGroup->maxLen)-1);
				391	got_huff_bits:
				392	/* Figure how how many bits are in next symbol and
				393	* unget extras */
				394	i = hufGroup->minLen;
				395	while (j > limit[i])
				396	++i;
				397	bd->inbufBitCount += (hufGroup->maxLen - i);
				398	/* Huffman decode value to get nextSym (with bounds checking) */
				399	if ((i > hufGroup->maxLen)
				400	\|\| (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i]))
				401	>= MAX_SYMBOLS))
				402	return RETVAL_DATA_ERROR;
				403	nextSym = hufGroup->permute[j];
				404	/* We have now decoded the symbol, which indicates
				405	either a new literal byte, or a repeated run of the
				406	most recent literal byte. First, check if nextSym
				407	indicates a repeated run, and if so loop collecting
				408	how many times to repeat the last literal. */
				409	if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */
				410	/* If this is the start of a new run, zero out
				411	* counter */
				412	if (!runPos) {
				413	runPos = 1;
				414	t = 0;
				415	}
				416	/* Neat trick that saves 1 symbol: instead of
				417	or-ing 0 or 1 at each bit position, add 1
				418	or 2 instead. For example, 1011 is 1 << 0
				419	+ 1 << 1 + 2 << 2. 1010 is 2 << 0 + 2 << 1
				420	+ 1 << 2. You can make any bit pattern
				421	that way using 1 less symbol than the basic
				422	or 0/1 method (except all bits 0, which
				423	would use no symbols, but a run of length 0
				424	doesn't mean anything in this context).
				425	Thus space is saved. */
				426	t += (runPos << nextSym);
				427	/* +runPos if RUNA; +2runPos if RUNB /
				428
				429	runPos <<= 1;
				430	continue;
				431	}
				432	/* When we hit the first non-run symbol after a run,
				433	we now know how many times to repeat the last
				434	literal, so append that many copies to our buffer
				435	of decoded symbols (dbuf) now. (The last literal
				436	used is the one at the head of the mtfSymbol
				437	array.) */
				438	if (runPos) {
				439	runPos = 0;
				440	if (dbufCount+t >= dbufSize)
				441	return RETVAL_DATA_ERROR;
				442
				443	uc = symToByte[mtfSymbol[0]];
				444	byteCount[uc] += t;
				445	while (t--)
				446	dbuf[dbufCount++] = uc;
				447	}
				448	/* Is this the terminating symbol? */
				449	if (nextSym > symTotal)
				450	break;
				451	/* At this point, nextSym indicates a new literal
				452	character. Subtract one to get the position in the
				453	MTF array at which this literal is currently to be
				454	found. (Note that the result can't be -1 or 0,
				455	because 0 and 1 are RUNA and RUNB. But another
				456	instance of the first symbol in the mtf array,
				457	position 0, would have been handled as part of a
				458	run above. Therefore 1 unused mtf position minus 2
				459	non-literal nextSym values equals -1.) */
				460	if (dbufCount >= dbufSize)
				461	return RETVAL_DATA_ERROR;
				462	i = nextSym - 1;
				463	uc = mtfSymbol[i];
				464	/* Adjust the MTF array. Since we typically expect to
				465	*move only a small number of symbols, and are bound
				466	*by 256 in any case, using memmove here would
				467	*typically be bigger and slower due to function call
				468	overhead and other assorted setup costs. /
				469	do {
				470	mtfSymbol[i] = mtfSymbol[i-1];
				471	} while (--i);
				472	mtfSymbol[0] = uc;
				473	uc = symToByte[uc];
				474	/* We have our literal byte. Save it into dbuf. */
				475	byteCount[uc]++;
				476	dbuf[dbufCount++] = (unsigned int)uc;
				477	}
				478	/* At this point, we've read all the Huffman-coded symbols
				479	(and repeated runs) for this block from the input stream,
				480	and decoded them into the intermediate buffer. There are
				481	dbufCount many decoded bytes in dbuf[]. Now undo the
				482	Burrows-Wheeler transform on dbuf. See
				483	http://dogma.net/markn/articles/bwt/bwt.htm
				484	*/
				485	/* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
				486	j = 0;
				487	for (i = 0; i < 256; i++) {
				488	k = j+byteCount[i];
				489	byteCount[i] = j;
				490	j = k;
				491	}
				492	/* Figure out what order dbuf would be in if we sorted it. */
				493	for (i = 0; i < dbufCount; i++) {
				494	uc = (unsigned char)(dbuf[i] & 0xff);
				495	dbuf[byteCount[uc]] \|= (i << 8);
				496	byteCount[uc]++;
				497	}
				498	/* Decode first byte by hand to initialize "previous" byte.
				499	Note that it doesn't get output, and if the first three
				500	characters are identical it doesn't qualify as a run (hence
				501	writeRunCountdown = 5). */
				502	if (dbufCount) {
				503	if (origPtr >= dbufCount)
				504	return RETVAL_DATA_ERROR;
				505	bd->writePos = dbuf[origPtr];
				506	bd->writeCurrent = (unsigned char)(bd->writePos&0xff);
				507	bd->writePos >>= 8;
				508	bd->writeRunCountdown = 5;
				509	}
				510	bd->writeCount = dbufCount;
				511
				512	return RETVAL_OK;
				513	}
				514
				515	/* Undo burrows-wheeler transform on intermediate buffer to produce output.
				516	If start_bunzip was initialized with out_fd =-1, then up to len bytes of
				517	data are written to outbuf. Return value is number of bytes written or
				518	error (all errors are negative numbers). If out_fd!=-1, outbuf and len
				519	are ignored, data is written to out_fd and return is RETVAL_OK or error.
				520	*/
				521
				522	static int INIT read_bunzip(struct bunzip_data bd, char outbuf, int len)
				523	{
				524	const unsigned int *dbuf;
				525	int pos, xcurrent, previous, gotcount;
				526
				527	/* If last read was short due to end of file, return last block now */
				528	if (bd->writeCount < 0)
				529	return bd->writeCount;
				530
				531	gotcount = 0;
				532	dbuf = bd->dbuf;
				533	pos = bd->writePos;
				534	xcurrent = bd->writeCurrent;
				535
				536	/* We will always have pending decoded data to write into the output
				537	buffer unless this is the very first call (in which case we haven't
				538	Huffman-decoded a block into the intermediate buffer yet). */
				539
				540	if (bd->writeCopies) {
				541	/* Inside the loop, writeCopies means extra copies (beyond 1) */
				542	--bd->writeCopies;
				543	/* Loop outputting bytes */
				544	for (;;) {
				545	/* If the output buffer is full, snapshot
				546	* state and return */
				547	if (gotcount >= len) {
				548	bd->writePos = pos;
				549	bd->writeCurrent = xcurrent;
				550	bd->writeCopies++;
				551	return len;
				552	}
				553	/* Write next byte into output buffer, updating CRC */
				554	outbuf[gotcount++] = xcurrent;
				555	bd->writeCRC = (((bd->writeCRC) << 8)
				556	^bd->crc32Table[((bd->writeCRC) >> 24)
				557	^xcurrent]);
				558	/* Loop now if we're outputting multiple
				559	* copies of this byte */
				560	if (bd->writeCopies) {
				561	--bd->writeCopies;
				562	continue;
				563	}
				564	decode_next_byte:
				565	if (!bd->writeCount--)
				566	break;
				567	/* Follow sequence vector to undo
				568	* Burrows-Wheeler transform */
				569	previous = xcurrent;
				570	pos = dbuf[pos];
				571	xcurrent = pos&0xff;
				572	pos >>= 8;
				573	/* After 3 consecutive copies of the same
				574	byte, the 4th is a repeat count. We count
				575	down from 4 instead *of counting up because
				576	testing for non-zero is faster */
				577	if (--bd->writeRunCountdown) {
				578	if (xcurrent != previous)
				579	bd->writeRunCountdown = 4;
				580	} else {
				581	/* We have a repeated run, this byte
				582	* indicates the count */
				583	bd->writeCopies = xcurrent;
				584	xcurrent = previous;
				585	bd->writeRunCountdown = 5;
				586	/* Sometimes there are just 3 bytes
				587	* (run length 0) */
				588	if (!bd->writeCopies)
				589	goto decode_next_byte;
				590	/* Subtract the 1 copy we'd output
				591	* anyway to get extras */
				592	--bd->writeCopies;
				593	}
				594	}
				595	/* Decompression of this block completed successfully */
				596	bd->writeCRC = ~bd->writeCRC;
				597	bd->totalCRC = ((bd->totalCRC << 1) \|
				598	(bd->totalCRC >> 31)) ^ bd->writeCRC;
				599	/* If this block had a CRC error, force file level CRC error. */
				600	if (bd->writeCRC != bd->headerCRC) {
				601	bd->totalCRC = bd->headerCRC+1;
				602	return RETVAL_LAST_BLOCK;
				603	}
				604	}
				605
				606	/* Refill the intermediate buffer by Huffman-decoding next
				607	* block of input */
				608	/* (previous is just a convenient unused temp variable here) */
				609	previous = get_next_block(bd);
				610	if (previous) {
				611	bd->writeCount = previous;
				612	return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;
				613	}
				614	bd->writeCRC = 0xffffffffUL;
				615	pos = bd->writePos;
				616	xcurrent = bd->writeCurrent;
				617	goto decode_next_byte;
				618	}
				619
Yinghai Lu	d97b07c	2014-08-08 14:23:14 -0700	[diff] [blame]	620	static long INIT nofill(void *buf, unsigned long len)
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	621	{
				622	return -1;
				623	}
				624
				625	/* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain
				626	a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
				627	ignored, and data is read from file handle into temporary buffer. */
Yinghai Lu	d97b07c	2014-08-08 14:23:14 -0700	[diff] [blame]	628	static int INIT start_bunzip(struct bunzip_data *bdp, void inbuf, long len,
				629	long (fill)(void, unsigned long))
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	630	{
				631	struct bunzip_data *bd;
				632	unsigned int i, j, c;
				633	const unsigned int BZh0 =
				634	(((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)
				635	+(((unsigned int)'h') << 8)+(unsigned int)'0';
				636
				637	/* Figure out how much data to allocate */
				638	i = sizeof(struct bunzip_data);
				639
				640	/* Allocate bunzip_data. Most fields initialize to zero. */
				641	bd = *bdp = malloc(i);
Phillip Lougher	d452986	2009-12-14 21:45:19 +0000	[diff] [blame]	642	if (!bd)
				643	return RETVAL_OUT_OF_MEMORY;
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	644	memset(bd, 0, sizeof(struct bunzip_data));
				645	/* Setup input buffer */
				646	bd->inbuf = inbuf;
				647	bd->inbufCount = len;
				648	if (fill != NULL)
				649	bd->fill = fill;
				650	else
				651	bd->fill = nofill;
				652
				653	/* Init the CRC32 table (big endian) */
				654	for (i = 0; i < 256; i++) {
				655	c = i << 24;
				656	for (j = 8; j; j--)
				657	c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1);
				658	bd->crc32Table[i] = c;
				659	}
				660
				661	/* Ensure that file starts with "BZh['1'-'9']." */
				662	i = get_bits(bd, 32);
				663	if (((unsigned int)(i-BZh0-1)) >= 9)
				664	return RETVAL_NOT_BZIP_DATA;
				665
				666	/* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of
				667	uncompressed data. Allocate intermediate buffer for block. */
				668	bd->dbufSize = 100000*(i-BZh0);
				669
				670	bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));
Phillip Lougher	d452986	2009-12-14 21:45:19 +0000	[diff] [blame]	671	if (!bd->dbuf)
				672	return RETVAL_OUT_OF_MEMORY;
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	673	return RETVAL_OK;
				674	}
				675
				676	/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data,
				677	not end of file.) */
Yinghai Lu	d97b07c	2014-08-08 14:23:14 -0700	[diff] [blame]	678	STATIC int INIT bunzip2(unsigned char *buf, long len,
				679	long (fill)(void, unsigned long),
				680	long (flush)(void, unsigned long),
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	681	unsigned char *outbuf,
Yinghai Lu	d97b07c	2014-08-08 14:23:14 -0700	[diff] [blame]	682	long *pos,
Lasse Collin	93685ad2	2011-01-12 17:01:14 -0800	[diff] [blame]	683	void(error)(char x))
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	684	{
				685	struct bunzip_data *bd;
				686	int i = -1;
				687	unsigned char *inbuf;
				688
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	689	if (flush)
				690	outbuf = malloc(BZIP2_IOBUF_SIZE);
Phillip Lougher	b1af431	2009-08-06 15:09:31 -0700	[diff] [blame]	691
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	692	if (!outbuf) {
Paul Bolle	90802ed	2011-12-05 13:00:34 +0100	[diff] [blame]	693	error("Could not allocate output buffer");
Phillip Lougher	d452986	2009-12-14 21:45:19 +0000	[diff] [blame]	694	return RETVAL_OUT_OF_MEMORY;
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	695	}
				696	if (buf)
				697	inbuf = buf;
				698	else
				699	inbuf = malloc(BZIP2_IOBUF_SIZE);
				700	if (!inbuf) {
Paul Bolle	90802ed	2011-12-05 13:00:34 +0100	[diff] [blame]	701	error("Could not allocate input buffer");
Phillip Lougher	d452986	2009-12-14 21:45:19 +0000	[diff] [blame]	702	i = RETVAL_OUT_OF_MEMORY;
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	703	goto exit_0;
				704	}
				705	i = start_bunzip(&bd, inbuf, len, fill);
				706	if (!i) {
				707	for (;;) {
				708	i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE);
				709	if (i <= 0)
				710	break;
				711	if (!flush)
				712	outbuf += i;
				713	else
				714	if (i != flush(outbuf, i)) {
				715	i = RETVAL_UNEXPECTED_OUTPUT_EOF;
				716	break;
				717	}
				718	}
				719	}
				720	/* Check CRC and release memory */
				721	if (i == RETVAL_LAST_BLOCK) {
				722	if (bd->headerCRC != bd->totalCRC)
				723	error("Data integrity error when decompressing.");
				724	else
				725	i = RETVAL_OK;
				726	} else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {
				727	error("Compressed file ends unexpectedly");
				728	}
Phillip Lougher	d452986	2009-12-14 21:45:19 +0000	[diff] [blame]	729	if (!bd)
				730	goto exit_1;
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	731	if (bd->dbuf)
				732	large_free(bd->dbuf);
				733	if (pos)
				734	*pos = bd->inbufPos;
				735	free(bd);
Phillip Lougher	d452986	2009-12-14 21:45:19 +0000	[diff] [blame]	736	exit_1:
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	737	if (!buf)
				738	free(inbuf);
				739	exit_0:
				740	if (flush)
				741	free(outbuf);
				742	return i;
				743	}
				744
Phillip Lougher	b1af431	2009-08-06 15:09:31 -0700	[diff] [blame]	745	#ifdef PREBOOT
Yinghai Lu	d97b07c	2014-08-08 14:23:14 -0700	[diff] [blame]	746	STATIC int INIT decompress(unsigned char *buf, long len,
				747	long (fill)(void, unsigned long),
				748	long (flush)(void, unsigned long),
Phillip Lougher	b1af431	2009-08-06 15:09:31 -0700	[diff] [blame]	749	unsigned char *outbuf,
Yinghai Lu	d97b07c	2014-08-08 14:23:14 -0700	[diff] [blame]	750	long *pos,
Lasse Collin	93685ad2	2011-01-12 17:01:14 -0800	[diff] [blame]	751	void(error)(char x))
Phillip Lougher	b1af431	2009-08-06 15:09:31 -0700	[diff] [blame]	752	{
Lasse Collin	93685ad2	2011-01-12 17:01:14 -0800	[diff] [blame]	753	return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
Phillip Lougher	b1af431	2009-08-06 15:09:31 -0700	[diff] [blame]	754	}
				755	#endif