aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Andersen <andersen@codepoet.org>2003-10-18 01:58:35 +0000
committerEric Andersen <andersen@codepoet.org>2003-10-18 01:58:35 +0000
commit0d6d88a2058d191c34d25a8709aca40311bb0c2e (patch)
tree36ceefac611aab48f725052d47bba93b4e48d1c9
parent6fe55ae93983946b266ff18c5411d3e3ff9469b4 (diff)
downloadbusybox-w32-0d6d88a2058d191c34d25a8709aca40311bb0c2e.tar.gz
busybox-w32-0d6d88a2058d191c34d25a8709aca40311bb0c2e.tar.bz2
busybox-w32-0d6d88a2058d191c34d25a8709aca40311bb0c2e.zip
Rob Landley's new micro-bunzip version 3. Rob writes:
The API for using partial writes, as described in my last message, sucked. So here's a patch against my last patch that changes things so that write_bunzip_data calls read_bunzip_data itself behind the scenes whenever necessary. So usage is now just start_bunzip(), write_bunzip_data() until it returns a negative number, and then the cleanup at the end of uncompressStream. It adds 32 bytes to the executable, but it should allow the caller (tar) to be simplified enough to compensate. Total -Os stripped exe size now 6856 bytes. Rob P.S. I attached the whole C file so you don't have to keep incremental patches straight if you don't want to. :) P.S. In the version I'm banging on now, I've simplified the license to just LGPL. I read the OSL a bit more closely and the patent termination clause would have bit IBM in their counter-suit of SCO if the code in question had been OSL instead of GPL, and I've decided I just don't want to beta-test legal code right now.
-rw-r--r--archival/libunarchive/decompress_bunzip2.c2057
1 files changed, 465 insertions, 1592 deletions
diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c
index 0164b77e0..474186a2a 100644
--- a/archival/libunarchive/decompress_bunzip2.c
+++ b/archival/libunarchive/decompress_bunzip2.c
@@ -1,1658 +1,531 @@
1/*-- 1/* vi: set sw=4 ts=4: */
2 This file is a part of bzip2 and/or libbzip2, a program and 2/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
3 library for lossless, block-sorting data compression.
4 3
5 Copyright (C) 1996-2000 Julian R Seward. All rights reserved. 4 Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
5 which also acknowledges contributions by Mike Burrows, David Wheeler,
6 Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
7 Robert Sedgewick, and Jon L. Bentley.
6 8
7 Redistribution and use in source and binary forms, with or without 9 This code is licensed under the LGPLv2:
8 modification, are permitted provided that the following conditions 10 LGPL (http://www.gnu.org/copyleft/lgpl.html
9 are met: 11*/
10 12
11 1. Redistributions of source code must retain the above copyright 13#include <setjmp.h>
12 notice, this list of conditions and the following disclaimer.
13
14 2. The origin of this software must not be misrepresented; you must
15 not claim that you wrote the original software. If you use this
16 software in a product, an acknowledgment in the product
17 documentation would be appreciated but is not required.
18
19 3. Altered source versions must be plainly marked as such, and must
20 not be misrepresented as being the original software.
21
22 4. The name of the author may not be used to endorse or promote
23 products derived from this software without specific prior written
24 permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
27 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
28 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
30 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
32 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
34 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37
38 Julian Seward, Cambridge, UK.
39 jseward@acm.org
40 bzip2/libbzip2 version 1.0 of 21 March 2000
41
42 This program is based on (at least) the work of:
43 Mike Burrows
44 David Wheeler
45 Peter Fenwick
46 Alistair Moffat
47 Radford Neal
48 Ian H. Witten
49 Robert Sedgewick
50 Jon L. Bentley
51
52 For more information on these sources, see the manual.
53--*/
54
55#include <stdlib.h>
56#include <stdio.h> 14#include <stdio.h>
15#include <stdlib.h>
57#include <string.h> 16#include <string.h>
58#include <getopt.h>
59#include <unistd.h> 17#include <unistd.h>
60 18
61#include "busybox.h" 19/* Constants for huffman coding */
62 20#define MAX_GROUPS 6
63#define MTFA_SIZE 4096 21#define GROUP_SIZE 50 /* 64 would have been more efficient */
64#define MTFL_SIZE 16 22#define MAX_HUFCODE_BITS 20 /* Longest huffman code allowed */
65#define BZ_N_GROUPS 6 23#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */
66#define BZ_G_SIZE 50 24#define SYMBOL_RUNA 0
67#define BZ_MAX_ALPHA_SIZE 258 25#define SYMBOL_RUNB 1
68 26
69#define BZ_OK 0 27/* Status return values */
70#define BZ_STREAM_END 4 28#define RETVAL_OK 0
71#define BZ_SEQUENCE_ERROR (-1) 29#define RETVAL_LAST_BLOCK (-1)
72#define BZ_DATA_ERROR (-4) 30#define RETVAL_NOT_BZIP_DATA (-2)
73#define BZ_DATA_ERROR_MAGIC (-5) 31#define RETVAL_UNEXPECTED_INPUT_EOF (-3)
74#define BZ_IO_ERROR (-6) 32#define RETVAL_UNEXPECTED_OUTPUT_EOF (-4)
75#define BZ_UNEXPECTED_EOF (-7) 33#define RETVAL_DATA_ERROR (-5)
76 34#define RETVAL_OUT_OF_MEMORY (-6)
77#define BZ_RUNA 0 35#define RETVAL_OBSOLETE_INPUT (-7)
78#define BZ_RUNB 1 36
79 37/* Other housekeeping constants */
80#define BZ_MAX_UNUSED 5000 38#define IOBUF_SIZE 4096
81#define FILE_NAME_LEN 1034 39
82/*-- states for decompression. --*/ 40char *bunzip_errors[]={NULL,"Bad file checksum","Not bzip data",
83 41 "Unexpected input EOF","Unexpected output EOF","Data error",
84#define BZ_X_IDLE 1 42 "Out of memory","Obsolete (pre 0.9.5) bzip format not supported."};
85#define BZ_X_OUTPUT 2 43
86 44/* This is what we know about each huffman coding group */
87#define BZ_X_MAGIC_1 10 45struct group_data {
88#define BZ_X_MAGIC_2 11 46 int limit[MAX_HUFCODE_BITS],base[MAX_HUFCODE_BITS],permute[MAX_SYMBOLS];
89#define BZ_X_MAGIC_3 12 47 char minLen, maxLen;
90#define BZ_X_MAGIC_4 13
91#define BZ_X_BLKHDR_1 14
92#define BZ_X_BLKHDR_2 15
93#define BZ_X_BLKHDR_3 16
94#define BZ_X_BLKHDR_4 17
95#define BZ_X_BLKHDR_5 18
96#define BZ_X_BLKHDR_6 19
97#define BZ_X_BCRC_1 20
98#define BZ_X_BCRC_2 21
99#define BZ_X_BCRC_3 22
100#define BZ_X_BCRC_4 23
101#define BZ_X_RANDBIT 24
102#define BZ_X_ORIGPTR_1 25
103#define BZ_X_ORIGPTR_2 26
104#define BZ_X_ORIGPTR_3 27
105#define BZ_X_MAPPING_1 28
106#define BZ_X_MAPPING_2 29
107#define BZ_X_SELECTOR_1 30
108#define BZ_X_SELECTOR_2 31
109#define BZ_X_SELECTOR_3 32
110#define BZ_X_CODING_1 33
111#define BZ_X_CODING_2 34
112#define BZ_X_CODING_3 35
113#define BZ_X_MTF_1 36
114#define BZ_X_MTF_2 37
115#define BZ_X_MTF_3 38
116#define BZ_X_MTF_4 39
117#define BZ_X_MTF_5 40
118#define BZ_X_MTF_6 41
119#define BZ_X_ENDHDR_2 42
120#define BZ_X_ENDHDR_3 43
121#define BZ_X_ENDHDR_4 44
122#define BZ_X_ENDHDR_5 45
123#define BZ_X_ENDHDR_6 46
124#define BZ_X_CCRC_1 47
125#define BZ_X_CCRC_2 48
126#define BZ_X_CCRC_3 49
127#define BZ_X_CCRC_4 50
128
129#define BZ_MAX_CODE_LEN 23
130#define OM_TEST 3
131
132typedef struct {
133 char *next_in;
134 unsigned int avail_in;
135
136 char *next_out;
137 unsigned int avail_out;
138
139 void *state;
140
141} bz_stream;
142
143#define BZ_MAX_UNUSED 5000
144typedef struct {
145 bz_stream strm;
146 int fd;
147 unsigned char initialisedOk;
148 char buf[BZ_MAX_UNUSED];
149 int lastErr;
150 int bufN;
151} bzFile;
152
153/*-- Structure holding all the decompression-side stuff. --*/
154typedef struct {
155 /* pointer back to the struct bz_stream */
156 bz_stream* strm;
157
158 /* state indicator for this stream */
159 int state;
160
161 /* for doing the final run-length decoding */
162 unsigned char state_out_ch;
163 int state_out_len;
164 unsigned char blockRandomised;
165 int rNToGo;
166 int rTPos;
167
168 /* the buffer for bit stream reading */
169 unsigned int bsBuff;
170 int bsLive;
171
172 /* misc administratium */
173 int blockSize100k;
174 int currBlockNo;
175
176 /* for undoing the Burrows-Wheeler transform */
177 int origPtr;
178 unsigned int tPos;
179 int k0;
180 int unzftab[256];
181 int nblock_used;
182 int cftab[257];
183 int cftabCopy[257];
184
185 /* for undoing the Burrows-Wheeler transform (FAST) */
186 unsigned int *tt;
187
188 /* stored and calculated CRCs */
189 unsigned int storedBlockCRC;
190 unsigned int storedCombinedCRC;
191 unsigned int calculatedBlockCRC;
192 unsigned int calculatedCombinedCRC;
193
194 /* map of bytes used in block */
195 int nInUse;
196 unsigned char inUse[256];
197 unsigned char inUse16[16];
198 unsigned char seqToUnseq[256];
199
200 /* for decoding the MTF values */
201 unsigned char mtfa [MTFA_SIZE];
202 unsigned char selector [2 + (900000 / BZ_G_SIZE)];
203 unsigned char selectorMtf[2 + (900000 / BZ_G_SIZE)];
204 unsigned char len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
205 int mtfbase[256 / MTFL_SIZE];
206
207 int limit [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
208 int base [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
209 int perm [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
210 int minLens[BZ_N_GROUPS];
211
212 /* save area for scalars in the main decompress code */
213 int save_i;
214 int save_j;
215 int save_t;
216 int save_alphaSize;
217 int save_nGroups;
218 int save_nSelectors;
219 int save_EOB;
220 int save_groupNo;
221 int save_groupPos;
222 int save_nextSym;
223 int save_nblockMAX;
224 int save_nblock;
225 int save_es;
226 int save_N;
227 int save_curr;
228 int save_zt;
229 int save_zn;
230 int save_zvec;
231 int save_zj;
232 int save_gSel;
233 int save_gMinlen;
234 int *save_gLimit;
235 int *save_gBase;
236 int *save_gPerm;
237} DState;
238
239static int BZ2_rNums[512];
240static bzFile *bzf;
241static int bzerr = BZ_OK;
242
243static const unsigned int BZ2_crc32Table[256] = {
244
245 /*-- Ugly, innit? --*/
246
247 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
248 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
249 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
250 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
251 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
252 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
253 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
254 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
255 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
256 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
257 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
258 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
259 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
260 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
261 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
262 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
263 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
264 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
265 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
266 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
267 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
268 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
269 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
270 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
271 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
272 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
273 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
274 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
275 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
276 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
277 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
278 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
279 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
280 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
281 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
282 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
283 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
284 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
285 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
286 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
287 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
288 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
289 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
290 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
291 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
292 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
293 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
294 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
295 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
296 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
297 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
298 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
299 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
300 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
301 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
302 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
303 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
304 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
305 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
306 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
307 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
308 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
309 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
310 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
311}; 48};
312 49
313static void bz_rand_udp_mask(DState *s) 50/* Structure holding all the housekeeping data, including IO buffers and
51 memory that persists between calls to bunzip */
52typedef struct {
53 /* For I/O error handling */
54 jmp_buf jmpbuf;
55 /* Input stream, input buffer, input bit buffer */
56 int in_fd,inbufCount,inbufPos;
57 unsigned char *inbuf;
58 unsigned int inbufBitCount, inbufBits;
59 /* Output buffer */
60 char outbuf[IOBUF_SIZE];
61 int outbufPos;
62 /* The CRC values stored in the block header and calculated from the data */
63 unsigned int crc32Table[256],headerCRC, dataCRC, totalCRC;
64 /* Intermediate buffer and its size (in bytes) */
65 unsigned int *dbuf, dbufSize;
66 /* State for interrupting output loop */
67 int writePos,writeRun,writeCount,writeCurrent;
68
69 /* These things are a bit too big to go on the stack */
70 unsigned char selectors[32768]; /* nSelectors=15 bits */
71 struct group_data groups[MAX_GROUPS]; /* huffman coding tables */
72} bunzip_data;
73
74/* Return the next nnn bits of input. All reads from the compressed input
75 are done through this function. All reads are big endian */
76static unsigned int get_bits(bunzip_data *bd, char bits_wanted)
314{ 77{
315 if (s->rNToGo == 0) { 78 unsigned int bits=0;
316 s->rNToGo = BZ2_rNums[s->rTPos]; 79
317 s->rTPos++; 80 /* If we need to get more data from the byte buffer, do so. (Loop getting
318 if (s->rTPos == 512) { 81 one byte at a time to enforce endianness and avoid unaligned access.) */
319 s->rTPos = 0; 82 while (bd->inbufBitCount<bits_wanted) {
83 /* If we need to read more data from file into byte buffer, do so */
84 if(bd->inbufPos==bd->inbufCount) {
85 if(!(bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE)))
86 longjmp(bd->jmpbuf,RETVAL_UNEXPECTED_INPUT_EOF);
87 bd->inbufPos=0;
320 } 88 }
321 } 89 /* Avoid 32-bit overflow (dump bit buffer to top of output) */
322 s->rNToGo--; 90 if(bd->inbufBitCount>=24) {
323} 91 bits=bd->inbufBits&((1<<bd->inbufBitCount)-1);
324 92 bits_wanted-=bd->inbufBitCount;
325static void BZ2_hbCreateDecodeTables(int *limit, int *base, int *perm, unsigned char *length, int minLen, int maxLen, int alphaSize ) 93 bits<<=bits_wanted;
326{ 94 bd->inbufBitCount=0;
327 int pp, i, j, vec;
328
329 pp = 0;
330 for (i = minLen; i <= maxLen; i++) {
331 for (j = 0; j < alphaSize; j++) {
332 if (length[j] == i) {
333 perm[pp] = j;
334 pp++;
335 }
336 } 95 }
96 /* Grab next 8 bits of input from buffer. */
97 bd->inbufBits=(bd->inbufBits<<8)|bd->inbuf[bd->inbufPos++];
98 bd->inbufBitCount+=8;
337 } 99 }
100 /* Calculate result */
101 bd->inbufBitCount-=bits_wanted;
102 bits|=(bd->inbufBits>>bd->inbufBitCount)&((1<<bits_wanted)-1);
338 103
339 for (i = 0; i < BZ_MAX_CODE_LEN; i++) { 104 return bits;
340 base[i] = 0;
341 }
342
343 for (i = 0; i < alphaSize; i++) {
344 base[length[i]+1]++;
345 }
346
347 for (i = 1; i < BZ_MAX_CODE_LEN; i++) {
348 base[i] += base[i-1];
349 }
350
351 for (i = 0; i < BZ_MAX_CODE_LEN; i++) {
352 limit[i] = 0;
353 }
354 vec = 0;
355
356 for (i = minLen; i <= maxLen; i++) {
357 vec += (base[i+1] - base[i]);
358 limit[i] = vec-1;
359 vec <<= 1;
360 }
361 for (i = minLen + 1; i <= maxLen; i++) {
362 base[i] = ((limit[i-1] + 1) << 1) - base[i];
363 }
364}
365
366
367static int get_bits(DState *s, int *vvv, char nnn)
368{
369 while (1) {
370 if (s->bsLive >= nnn) {
371 *vvv = (s->bsBuff >> (s->bsLive-nnn)) & ((1 << nnn)-1);
372 s->bsLive -= nnn;
373 break;
374 }
375 if (s->strm->avail_in == 0) {
376 return(FALSE);
377 }
378 s->bsBuff = (s->bsBuff << 8) | ((unsigned int) (*((unsigned char*)(s->strm->next_in))));
379 s->bsLive += 8;
380 s->strm->next_in++;
381 s->strm->avail_in--;
382 }
383 return(TRUE);
384} 105}
385 106
386static int bz_get_fast(DState *s) 107/* Decompress a block of text to into intermediate buffer */
387{
388 int cccc;
389 s->tPos = s->tt[s->tPos];
390 cccc = (unsigned char)(s->tPos & 0xff);
391 s->tPos >>= 8;
392 return(cccc);
393}
394 108
395/*---------------------------------------------------*/ 109extern int read_bunzip_data(bunzip_data *bd)
396static inline int BZ2_decompress(DState *s)
397{ 110{
398 int uc = 0; 111 struct group_data *hufGroup;
399 int retVal; 112 int dbufCount,nextSym,dbufSize,origPtr,groupCount,*base,*limit,selector,
400 int minLen, maxLen; 113 i,j,k,t,runPos,symCount,symTotal,nSelectors,byteCount[256];
401 114 unsigned char uc, symToByte[256], mtfSymbol[256], *selectors;
402 /* stuff that needs to be saved/restored */ 115 unsigned int *dbuf;
403 int i; 116
404 int j; 117 /* Read in header signature (borrowing mtfSymbol for temp space). */
405 int t; 118 for(i=0;i<6;i++) mtfSymbol[i]=get_bits(bd,8);
406 int alphaSize; 119 mtfSymbol[6]=0;
407 int nGroups; 120 /* Read CRC (which is stored big endian). */
408 int nSelectors; 121 bd->headerCRC=get_bits(bd,32);
409 int EOB; 122 /* Is this the last block (with CRC for file)? */
410 int groupNo; 123 if(!strcmp(mtfSymbol,"\x17\x72\x45\x38\x50\x90"))
411 int groupPos; 124 return RETVAL_LAST_BLOCK;
412 int nextSym; 125 /* If it's not a valid data block, barf. */
413 int nblockMAX; 126 if(strcmp(mtfSymbol,"\x31\x41\x59\x26\x53\x59"))
414 int nblock; 127 return RETVAL_NOT_BZIP_DATA;
415 int es; 128
416 int N; 129 dbuf=bd->dbuf;
417 int curr; 130 dbufSize=bd->dbufSize;
418 int zt; 131 selectors=bd->selectors;
419 int zn; 132 /* We can add support for blockRandomised if anybody complains. There was
420 int zvec; 133 some code for this in busybox 1.0.0-pre3, but nobody ever noticed that
421 int zj; 134 it didn't actually work. */
422 int gSel; 135 if(get_bits(bd,1)) return RETVAL_OBSOLETE_INPUT;
423 int gMinlen; 136 if((origPtr=get_bits(bd,24)) > dbufSize) return RETVAL_DATA_ERROR;
424 int *gLimit; 137 /* mapping table: if some byte values are never used (encoding things
425 int *gBase; 138 like ascii text), the compression code removes the gaps to have fewer
426 int *gPerm; 139 symbols to deal with, and writes a sparse bitfield indicating which
427 int switch_val; 140 values were present. We make a translation table to convert the symbols
428 141 back to the corresponding bytes. */
429 int get_mtf_val_init(void) 142 t=get_bits(bd, 16);
430 { 143 memset(symToByte,0,256);
431 if (groupPos == 0) { 144 symTotal=0;
432 groupNo++; 145 for (i=0;i<16;i++) {
433 if (groupNo >= nSelectors) { 146 if(t&(1<<(15-i))) {
434 retVal = BZ_DATA_ERROR; 147 k=get_bits(bd,16);
435 return(FALSE); 148 for(j=0;j<16;j++)
436 } 149 if(k&(1<<(15-j))) symToByte[symTotal++]=(16*i)+j;
437 groupPos = BZ_G_SIZE;
438 gSel = s->selector[groupNo];
439 gMinlen = s->minLens[gSel];
440 gLimit = &(s->limit[gSel][0]);
441 gPerm = &(s->perm[gSel][0]);
442 gBase = &(s->base[gSel][0]);
443 } 150 }
444 groupPos--;
445 zn = gMinlen;
446 return(TRUE);
447 } 151 }
448 152 /* How many different huffman coding groups does this block use? */
449 if (s->state == BZ_X_MAGIC_1) { 153 groupCount=get_bits(bd,3);
450 /*initialise the save area*/ 154 if (groupCount<2 || groupCount>MAX_GROUPS) return RETVAL_DATA_ERROR;
451 s->save_i = 0; 155 /* nSelectors: Every GROUP_SIZE many symbols we select a new huffman coding
452 s->save_j = 0; 156 group. Read in the group selector list, which is stored as MTF encoded
453 s->save_t = 0; 157 bit runs. */
454 s->save_alphaSize = 0; 158 if(!(nSelectors=get_bits(bd, 15))) return RETVAL_DATA_ERROR;
455 s->save_nGroups = 0; 159 for(i=0; i<groupCount; i++) mtfSymbol[i] = i;
456 s->save_nSelectors = 0; 160 for(i=0; i<nSelectors; i++) {
457 s->save_EOB = 0; 161 /* Get next value */
458 s->save_groupNo = 0; 162 for(j=0;get_bits(bd,1);j++) if (j>=groupCount) return RETVAL_DATA_ERROR;
459 s->save_groupPos = 0; 163 /* Decode MTF to get the next selector */
460 s->save_nextSym = 0; 164 uc = mtfSymbol[j];
461 s->save_nblockMAX = 0; 165 memmove(mtfSymbol+1,mtfSymbol,j);
462 s->save_nblock = 0; 166 mtfSymbol[0]=selectors[i]=uc;
463 s->save_es = 0;
464 s->save_N = 0;
465 s->save_curr = 0;
466 s->save_zt = 0;
467 s->save_zn = 0;
468 s->save_zvec = 0;
469 s->save_zj = 0;
470 s->save_gSel = 0;
471 s->save_gMinlen = 0;
472 s->save_gLimit = NULL;
473 s->save_gBase = NULL;
474 s->save_gPerm = NULL;
475 } 167 }
476 168 /* Read the huffman coding tables for each group, which code for symTotal
477 /*restore from the save area*/ 169 literal symbols, plus two run symbols (RUNA, RUNB) */
478 i = s->save_i; 170 symCount=symTotal+2;
479 j = s->save_j; 171 for (j=0; j<groupCount; j++) {
480 t = s->save_t; 172 unsigned char length[MAX_SYMBOLS],temp[MAX_HUFCODE_BITS+1];
481 alphaSize = s->save_alphaSize; 173 int minLen, maxLen, pp;
482 nGroups = s->save_nGroups; 174 /* Read lengths */
483 nSelectors = s->save_nSelectors; 175 t=get_bits(bd, 5);
484 EOB = s->save_EOB; 176 for (i = 0; i < symCount; i++) {
485 groupNo = s->save_groupNo; 177 for(;;) {
486 groupPos = s->save_groupPos; 178 if (t < 1 || t > MAX_HUFCODE_BITS) return RETVAL_DATA_ERROR;
487 nextSym = s->save_nextSym; 179 if(!get_bits(bd, 1)) break;
488 nblockMAX = s->save_nblockMAX; 180 if(!get_bits(bd, 1)) t++;
489 nblock = s->save_nblock; 181 else t--;
490 es = s->save_es; 182 }
491 N = s->save_N; 183 length[i] = t;
492 curr = s->save_curr;
493 zt = s->save_zt;
494 zn = s->save_zn;
495 zvec = s->save_zvec;
496 zj = s->save_zj;
497 gSel = s->save_gSel;
498 gMinlen = s->save_gMinlen;
499 gLimit = s->save_gLimit;
500 gBase = s->save_gBase;
501 gPerm = s->save_gPerm;
502
503 retVal = BZ_OK;
504 switch_val = s->state;
505 switch (switch_val) {
506 case BZ_X_MAGIC_1:
507 s->state = BZ_X_MAGIC_1;
508 if (! get_bits(s, &uc, 8)) {
509 retVal = BZ_OK;
510 goto save_state_and_return;
511 }
512 if (uc != 'B') {
513 retVal = BZ_DATA_ERROR_MAGIC;
514 goto save_state_and_return;
515 }
516
517 case BZ_X_MAGIC_2:
518 s->state = BZ_X_MAGIC_2;
519 if (! get_bits(s, &uc, 8)) {
520 retVal = BZ_OK;
521 goto save_state_and_return;
522 }
523 if (uc != 'Z') {
524 retVal = BZ_DATA_ERROR_MAGIC;
525 goto save_state_and_return;
526 }
527
528 case BZ_X_MAGIC_3:
529 s->state = BZ_X_MAGIC_3;
530 if (! get_bits(s, &uc, 8)) {
531 retVal = BZ_OK;
532 goto save_state_and_return;
533 }
534 if (uc != 'h') {
535 retVal = BZ_DATA_ERROR_MAGIC;
536 goto save_state_and_return;
537 }
538
539 case BZ_X_MAGIC_4:
540 s->state = BZ_X_MAGIC_4;
541 if (! get_bits(s, &s->blockSize100k, 8)) {
542 retVal = BZ_OK;
543 goto save_state_and_return;
544 }
545 if ((s->blockSize100k < '1') || (s->blockSize100k > '9')) {
546 retVal = BZ_DATA_ERROR_MAGIC;
547 goto save_state_and_return;
548 }
549 s->blockSize100k -= '0';
550
551 s->tt = xmalloc(s->blockSize100k * 100000 * sizeof(int));
552
553 case BZ_X_BLKHDR_1:
554 s->state = BZ_X_BLKHDR_1;
555 if (! get_bits(s, &uc, 8)) {
556 retVal = BZ_OK;
557 goto save_state_and_return;
558 }
559
560 if (uc == 0x17) {
561 goto endhdr_2;
562 }
563 if (uc != 0x31) {
564 retVal = BZ_DATA_ERROR;
565 goto save_state_and_return;
566 }
567
568 case BZ_X_BLKHDR_2:
569 s->state = BZ_X_BLKHDR_2;
570 if (! get_bits(s, &uc, 8)) {
571 retVal = BZ_OK;
572 goto save_state_and_return;
573 }
574 if (uc != 0x41) {
575 retVal = BZ_DATA_ERROR;
576 goto save_state_and_return;
577 }
578
579 case BZ_X_BLKHDR_3:
580 s->state = BZ_X_BLKHDR_3;
581 if (! get_bits(s, &uc, 8)) {
582 retVal = BZ_OK;
583 goto save_state_and_return;
584 }
585 if (uc != 0x59) {
586 retVal = BZ_DATA_ERROR;
587 goto save_state_and_return;
588 }
589
590 case BZ_X_BLKHDR_4:
591 s->state = BZ_X_BLKHDR_4;
592 if (! get_bits(s, &uc, 8)) {
593 retVal = BZ_OK;
594 goto save_state_and_return;
595 }
596 if (uc != 0x26) {
597 retVal = BZ_DATA_ERROR;
598 goto save_state_and_return;
599 }
600
601 case BZ_X_BLKHDR_5:
602 s->state = BZ_X_BLKHDR_5;
603 if (! get_bits(s, &uc, 8)) {
604 retVal = BZ_OK;
605 goto save_state_and_return;
606 }
607 if (uc != 0x53) {
608 retVal = BZ_DATA_ERROR;
609 goto save_state_and_return;
610 }
611
612 case BZ_X_BLKHDR_6:
613 s->state = BZ_X_BLKHDR_6;
614 if (! get_bits(s, &uc, 8)) {
615 retVal = BZ_OK;
616 goto save_state_and_return;
617 }
618 if (uc != 0x59) {
619 retVal = BZ_DATA_ERROR;
620 goto save_state_and_return;
621 }
622
623 s->currBlockNo++;
624 s->storedBlockCRC = 0;
625
626 case BZ_X_BCRC_1:
627 s->state = BZ_X_BCRC_1;
628 if (! get_bits(s, &uc, 8)) {
629 retVal = BZ_OK;
630 goto save_state_and_return;
631 }
632 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((unsigned int)uc);
633
634 case BZ_X_BCRC_2:
635 s->state = BZ_X_BCRC_2;
636 if (! get_bits(s, &uc, 8)) {
637 retVal = BZ_OK;
638 goto save_state_and_return;
639 }
640 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((unsigned int)uc);
641
642 case BZ_X_BCRC_3:
643 s->state = BZ_X_BCRC_3;
644 if (! get_bits(s, &uc, 8)) {
645 retVal = BZ_OK;
646 goto save_state_and_return;
647 }
648 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((unsigned int)uc);
649
650 case BZ_X_BCRC_4:
651 s->state = BZ_X_BCRC_4;
652 if (! get_bits(s, &uc, 8)) {
653 retVal = BZ_OK;
654 goto save_state_and_return;
655 }
656 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((unsigned int)uc);
657
658 case BZ_X_RANDBIT:
659 s->state = BZ_X_RANDBIT;
660 {
661 int tmp = s->blockRandomised;
662 const int ret = get_bits(s, &tmp, 1);
663 s->blockRandomised = tmp;
664 if (! ret) {
665 retVal = BZ_OK;
666 goto save_state_and_return;
667 }
668 }
669
670 s->origPtr = 0;
671
672 case BZ_X_ORIGPTR_1:
673 s->state = BZ_X_ORIGPTR_1;
674 if (! get_bits(s, &uc, 8)) {
675 retVal = BZ_OK;
676 goto save_state_and_return;
677 }
678 s->origPtr = (s->origPtr << 8) | ((int)uc);
679
680 case BZ_X_ORIGPTR_2:
681 s->state = BZ_X_ORIGPTR_2;
682 if (! get_bits(s, &uc, 8)) {
683 retVal = BZ_OK;
684 goto save_state_and_return;
685 }
686 s->origPtr = (s->origPtr << 8) | ((int)uc);
687
688 case BZ_X_ORIGPTR_3:
689 s->state = BZ_X_ORIGPTR_3;
690 if (! get_bits(s, &uc, 8)) {
691 retVal = BZ_OK;
692 goto save_state_and_return;
693 }
694 s->origPtr = (s->origPtr << 8) | ((int)uc);
695
696 if (s->origPtr < 0) {
697 retVal = BZ_DATA_ERROR;
698 goto save_state_and_return;
699 }
700 if (s->origPtr > 10 + 100000*s->blockSize100k) {
701 retVal = BZ_DATA_ERROR;
702 goto save_state_and_return;
703 }
704
705 /*--- Receive the mapping table ---*/
706 case BZ_X_MAPPING_1:
707 for (i = 0; i < 16; i++) {
708 s->state = BZ_X_MAPPING_1;
709 if (! get_bits(s, &uc, 1)) {
710 retVal = BZ_OK;
711 goto save_state_and_return;
712 }
713 if (uc == 1) {
714 s->inUse16[i] = TRUE;
715 } else {
716 s->inUse16[i] = FALSE;
717 }
718 }
719
720 for (i = 0; i < 256; i++) {
721 s->inUse[i] = FALSE;
722 }
723
724 for (i = 0; i < 16; i++) {
725 if (s->inUse16[i]) {
726 for (j = 0; j < 16; j++) {
727 case BZ_X_MAPPING_2:
728 s->state = BZ_X_MAPPING_2;
729 if (! get_bits(s, &uc, 1)) {
730 retVal = BZ_OK;
731 goto save_state_and_return;
732 }
733 if (uc == 1) {
734 s->inUse[i * 16 + j] = TRUE;
735 }
736 }
737 }
738 }
739
740 s->nInUse = 0;
741 for (i = 0; i < 256; i++) {
742 if (s->inUse[i]) {
743 s->seqToUnseq[s->nInUse] = i;
744 s->nInUse++;
745 }
746 }
747 if (s->nInUse == 0) {
748 retVal = BZ_DATA_ERROR;
749 goto save_state_and_return;
750 }
751 alphaSize = s->nInUse+2;
752
753 /*--- Now the selectors ---*/
754 case BZ_X_SELECTOR_1:
755 s->state = BZ_X_SELECTOR_1;
756 if (! get_bits(s, &nGroups, 3)) {
757 retVal = BZ_OK;
758 goto save_state_and_return;
759 }
760 if (nGroups < 2 || nGroups > 6) {
761 retVal = BZ_DATA_ERROR;
762 goto save_state_and_return;
763 }
764
765 case BZ_X_SELECTOR_2:
766 s->state = BZ_X_SELECTOR_2;
767 if (! get_bits(s, &nSelectors, 15)) {
768 retVal = BZ_OK;
769 goto save_state_and_return;
770 }
771 if (nSelectors < 1) {
772 retVal = BZ_DATA_ERROR;
773 goto save_state_and_return;
774 }
775
776
777
778 for (i = 0; i < nSelectors; i++) {
779 j = 0;
780 while (1) {
781 case BZ_X_SELECTOR_3:
782 s->state = BZ_X_SELECTOR_3;
783 if (! get_bits(s, &uc, 1)) {
784 retVal = BZ_OK;
785 goto save_state_and_return;
786 }
787 if (uc == 0) {
788 break;
789 }
790 j++;
791 if (j >= nGroups) {
792 retVal = BZ_DATA_ERROR;
793 goto save_state_and_return;
794 }
795 }
796 s->selectorMtf[i] = j;
797 }
798
799 /*--- Undo the MTF values for the selectors. ---*/
800 {
801 unsigned char pos[BZ_N_GROUPS], tmp, v;
802 for (v = 0; v < nGroups; v++) {
803 pos[v] = v;
804 }
805 for (i = 0; i < nSelectors; i++) {
806 v = s->selectorMtf[i];
807 tmp = pos[v];
808 while (v > 0) {
809 pos[v] = pos[v-1];
810 v--;
811 }
812 pos[0] = tmp;
813 s->selector[i] = tmp;
814 }
815 }
816
817 /*--- Now the coding tables ---*/
818 for (t = 0; t < nGroups; t++) {
819 case BZ_X_CODING_1:
820 s->state = BZ_X_CODING_1;
821 if (! get_bits(s, &curr, 5)) {
822 retVal = BZ_OK;
823 goto save_state_and_return;
824 }
825 for (i = 0; i < alphaSize; i++) {
826 while (TRUE) {
827 if (curr < 1 || curr > 20) {
828 retVal = BZ_DATA_ERROR;
829 goto save_state_and_return;
830 }
831
832 case BZ_X_CODING_2:
833 s->state = BZ_X_CODING_2;
834 if (! get_bits(s, &uc, 1)) {
835 retVal = BZ_OK;
836 goto save_state_and_return;
837 }
838 if (uc == 0) {
839 break;
840 }
841
842 case BZ_X_CODING_3:
843 s->state = BZ_X_CODING_3;
844 if (! get_bits(s, &uc, 1)) {
845 retVal = BZ_OK;
846 goto save_state_and_return;
847 }
848 if (uc == 0) {
849 curr++;
850 } else {
851 curr--;
852 }
853 }
854 s->len[t][i] = curr;
855 }
856 } 184 }
857 185 /* Find largest and smallest lengths in this group */
858 /*--- Create the Huffman decoding tables ---*/ 186 minLen=maxLen=length[0];
859 for (t = 0; t < nGroups; t++) { 187 for(i = 1; i < symCount; i++) {
860 minLen = 32; 188 if(length[i] > maxLen) maxLen = length[i];
861 maxLen = 0; 189 else if(length[i] < minLen) minLen = length[i];
862 for (i = 0; i < alphaSize; i++) {
863 if (s->len[t][i] > maxLen) {
864 maxLen = s->len[t][i];
865 }
866 if (s->len[t][i] < minLen) {
867 minLen = s->len[t][i];
868 }
869 }
870
871 BZ2_hbCreateDecodeTables (
872 &(s->limit[t][0]),
873 &(s->base[t][0]),
874 &(s->perm[t][0]),
875 &(s->len[t][0]),
876 minLen, maxLen, alphaSize
877 );
878
879
880 s->minLens[t] = minLen;
881 } 190 }
882 191 /* Calculate permute[], base[], and limit[] tables from length[].
883 /*--- Now the MTF values ---*/ 192 *
884 193 * permute[] is the lookup table for converting huffman coded symbols
885 EOB = s->nInUse+1; 194 * into decoded symbols. base[] is the amount to subtract from the
886 nblockMAX = 100000 * s->blockSize100k; 195 * value of a huffman symbol of a given length when using permute[].
887 groupNo = -1; 196 *
888 groupPos = 0; 197 * limit[] indicates the largest numerical value a symbol with a given
889 198 * number of bits can have. It lets us know when to stop reading.
890 for (i = 0; i <= 255; i++) { 199 *
891 s->unzftab[i] = 0; 200 * To use these, keep reading bits until value<=limit[bitcount] or
201 * you've read over 20 bits (error). Then the decoded symbol
202 * equals permute[hufcode_value-base[hufcode_bitcount]].
203 */
204 hufGroup=bd->groups+j;
205 hufGroup->minLen = minLen;
206 hufGroup->maxLen = maxLen;
207 /* Note that minLen can't be smaller than 1, so we adjust the base
208 and limit array pointers so we're not always wasting the first
209 entry. We do this again when using them (during symbol decoding).*/
210 base=hufGroup->base-1;
211 limit=hufGroup->limit-1;
212 /* Calculate permute[] */
213 pp = 0;
214 for(i=minLen;i<=maxLen;i++)
215 for(t=0;t<symCount;t++)
216 if(length[t]==i) hufGroup->permute[pp++] = t;
217 /* Count cumulative symbols coded for at each bit length */
218 for (i=minLen;i<=maxLen;i++) temp[i]=limit[i]=0;
219 for (i=0;i<symCount;i++) temp[length[i]]++;
220 /* Calculate limit[] (the largest symbol-coding value at each bit
221 * length, which is (previous limit<<1)+symbols at this level), and
222 * base[] (number of symbols to ignore at each bit length, which is
223 * limit-cumulative count of symbols coded for already). */
224 pp=t=0;
225 for (i=minLen; i<maxLen; i++) {
226 pp+=temp[i];
227 limit[i]=pp-1;
228 pp<<=1;
229 base[i+1]=pp-(t+=temp[i]);
892 } 230 }
893 /*-- MTF init --*/ 231 limit[maxLen]=pp+temp[maxLen]-1;
894 { 232 base[minLen]=0;
895 int ii, jj, kk; 233 }
896 kk = MTFA_SIZE-1; 234 /* We've finished reading and digesting the block header. Now read this
897 for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) { 235 block's huffman coded symbols from the file and undo the huffman coding
898 for (jj = MTFL_SIZE-1; jj >= 0; jj--) { 236 and run length encoding, saving the result into dbuf[dbufCount++]=uc */
899 s->mtfa[kk] = (unsigned char)(ii * MTFL_SIZE + jj); 237
900 kk--; 238 /* Initialize symbol occurrence counters and symbol mtf table */
901 } 239 memset(byteCount,0,256*sizeof(int));
902 s->mtfbase[ii] = kk + 1; 240 for(i=0;i<256;i++) mtfSymbol[i]=(unsigned char)i;
903 } 241 /* Loop through compressed symbols */
242 runPos=dbufCount=symCount=selector=0;
243 for(;;) {
244 /* Determine which huffman coding group to use. */
245 if(!(symCount--)) {
246 symCount=GROUP_SIZE-1;
247 if(selector>=nSelectors) return RETVAL_DATA_ERROR;
248 hufGroup=bd->groups+selectors[selector++];
249 base=hufGroup->base-1;
250 limit=hufGroup->limit-1;
904 } 251 }
905 /*-- end MTF init --*/ 252 /* Read next huffman-coded symbol */
906 253 i = hufGroup->minLen;
907 nblock = 0; 254 j=get_bits(bd, i);
908 255 for(;;) {
909 if (! get_mtf_val_init()) { 256 if (i > hufGroup->maxLen) return RETVAL_DATA_ERROR;
910 goto save_state_and_return; 257 if (j <= limit[i]) break;
258 i++;
259
260 j = (j << 1) | get_bits(bd,1);
911 } 261 }
912 case BZ_X_MTF_1: 262 /* Huffman decode nextSym (with bounds checking) */
913 s->state = BZ_X_MTF_1; 263 j-=base[i];
914 if (! get_bits(s, &zvec, zn)) { 264 if (j < 0 || j >= MAX_SYMBOLS) return RETVAL_DATA_ERROR;
915 retVal = BZ_OK; 265 nextSym = hufGroup->permute[j];
916 goto save_state_and_return; 266 /* If this is a repeated run, loop collecting data */
917 } 267 if (nextSym == SYMBOL_RUNA || nextSym == SYMBOL_RUNB) {
918 while (1) { 268 /* If this is the start of a new run, zero out counter */
919 if (zn > 20 /* the longest code */) { 269 if(!runPos) {
920 retVal = BZ_DATA_ERROR; 270 runPos = 1;
921 goto save_state_and_return; 271 t = 0;
922 } 272 }
923 if (zvec <= gLimit[zn]) { 273 /* Neat trick that saves 1 symbol: instead of or-ing 0 or 1 at
924 break; 274 each bit position, add 1 or 2 instead. For example,
925 } 275 1011 is 1<<0 + 1<<1 + 2<<2. 1010 is 2<<0 + 2<<1 + 1<<2.
926 zn++; 276 You can make any bit pattern that way using 1 less symbol than
927 277 the basic or 0/1 method (except all bits 0, which would use no
928 case BZ_X_MTF_2: 278 symbols, but a run of length 0 doesn't mean anything in this
929 s->state = BZ_X_MTF_2; 279 context). Thus space is saved. */
930 if (! get_bits(s, &zj, 1)) { 280 if (nextSym == SYMBOL_RUNA) t += runPos;
931 retVal = BZ_OK; 281 else t += 2*runPos;
932 goto save_state_and_return; 282 runPos <<= 1;
933 }
934 zvec = (zvec << 1) | zj;
935 }
936 if (zvec - gBase[zn] < 0 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) {
937 retVal = BZ_DATA_ERROR;
938 goto save_state_and_return;
939 }
940 nextSym = gPerm[zvec - gBase[zn]];
941
942 while (1) {
943 if (nextSym == EOB) {
944 break;
945 }
946
947 if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
948 es = -1;
949 N = 1;
950 do {
951 if (nextSym == BZ_RUNA) {
952 es = es + (0+1) * N;
953 } else {
954 if (nextSym == BZ_RUNB) {
955 es = es + (1+1) * N;
956 }
957 }
958 N = N * 2;
959 if (! get_mtf_val_init()) {
960 goto save_state_and_return;
961 }
962 case BZ_X_MTF_3:
963 s->state = BZ_X_MTF_3;
964 if (! get_bits(s, &zvec, zn)) {
965 retVal = BZ_OK;
966 goto save_state_and_return;
967 }
968 while (1) {
969 if (zn > 20 /* the longest code */) {
970 retVal = BZ_DATA_ERROR;
971 goto save_state_and_return;
972 }
973 if (zvec <= gLimit[zn]) {
974 break;
975 }
976 zn++;
977
978 case BZ_X_MTF_4:
979 s->state = BZ_X_MTF_4;
980 if (! get_bits(s, &zj, 1)) {
981 retVal = BZ_OK;
982 goto save_state_and_return;
983 }
984 zvec = (zvec << 1) | zj;
985 }
986 if (zvec - gBase[zn] < 0 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) {
987 retVal = BZ_DATA_ERROR;
988 goto save_state_and_return;
989
990 }
991 nextSym = gPerm[zvec - gBase[zn]];
992 }
993 while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
994
995 es++;
996 uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
997 s->unzftab[uc] += es;
998
999 while (es > 0) {
1000 if (nblock >= nblockMAX) {
1001 retVal = BZ_DATA_ERROR;
1002 goto save_state_and_return;
1003 }
1004 s->tt[nblock] = (unsigned int)uc;
1005 nblock++;
1006 es--;
1007 }
1008 continue;
1009 } else {
1010 if (nblock >= nblockMAX) {
1011 retVal = BZ_DATA_ERROR;
1012 goto save_state_and_return;
1013 }
1014 /*-- uc = MTF ( nextSym-1 ) --*/
1015 {
1016 int ii, jj, kk, pp, lno, off;
1017 unsigned int nn;
1018 nn = (unsigned int)(nextSym - 1);
1019
1020 if (nn < MTFL_SIZE) {
1021 /* avoid general-case expense */
1022 pp = s->mtfbase[0];
1023 uc = s->mtfa[pp+nn];
1024 while (nn > 3) {
1025 int z = pp+nn;
1026 s->mtfa[(z) ] = s->mtfa[(z)-1];
1027 s->mtfa[(z)-1] = s->mtfa[(z)-2];
1028 s->mtfa[(z)-2] = s->mtfa[(z)-3];
1029 s->mtfa[(z)-3] = s->mtfa[(z)-4];
1030 nn -= 4;
1031 }
1032 while (nn > 0) {
1033 s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--;
1034 }
1035 s->mtfa[pp] = uc;
1036 } else {
1037 /* general case */
1038 lno = nn / MTFL_SIZE;
1039 off = nn % MTFL_SIZE;
1040 pp = s->mtfbase[lno] + off;
1041 uc = s->mtfa[pp];
1042 while (pp > s->mtfbase[lno]) {
1043 s->mtfa[pp] = s->mtfa[pp-1];
1044 pp--;
1045 }
1046 s->mtfbase[lno]++;
1047 while (lno > 0) {
1048 s->mtfbase[lno]--;
1049 s->mtfa[s->mtfbase[lno]] = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
1050 lno--;
1051 }
1052 s->mtfbase[0]--;
1053 s->mtfa[s->mtfbase[0]] = uc;
1054 if (s->mtfbase[0] == 0) {
1055 kk = MTFA_SIZE-1;
1056 for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
1057 for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
1058 s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
1059 kk--;
1060 }
1061 s->mtfbase[ii] = kk + 1;
1062 }
1063 }
1064 }
1065 }
1066 /*-- end uc = MTF ( nextSym-1 ) --*/
1067
1068 s->unzftab[s->seqToUnseq[uc]]++;
1069 s->tt[nblock] = (unsigned int)(s->seqToUnseq[uc]);
1070 nblock++;
1071
1072 if (! get_mtf_val_init()) {
1073 goto save_state_and_return;
1074 }
1075 case BZ_X_MTF_5:
1076 s->state = BZ_X_MTF_5;
1077 if (! get_bits(s, &zvec, zn)) {
1078 retVal = BZ_OK;
1079 goto save_state_and_return;
1080 }
1081 while (1) {
1082 if (zn > 20 /* the longest code */) {
1083 retVal = BZ_DATA_ERROR;
1084 goto save_state_and_return;
1085 }
1086 if (zvec <= gLimit[zn]) {
1087 break;
1088 }
1089 zn++;
1090
1091 case BZ_X_MTF_6:
1092 s->state = BZ_X_MTF_6;
1093 if (! get_bits(s, &zj, 1)) {
1094 retVal = BZ_OK;
1095 goto save_state_and_return;
1096 }
1097 zvec = (zvec << 1) | zj;
1098 }
1099 if (zvec - gBase[zn] < 0 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) {
1100 retVal = BZ_DATA_ERROR;
1101 goto save_state_and_return;
1102 }
1103 nextSym = gPerm[zvec - gBase[zn]];
1104 continue; 283 continue;
1105 } 284 }
285 /* When we hit the first non-run symbol after a run, we now know
286 how many times to repeat the last literal, so append that many
287 copies to our buffer of decoded symbols (dbuf) now. (The last
288 literal used is the one at the head of the mtfSymbol array.) */
289 if(runPos) {
290 runPos=0;
291 if(dbufCount+t>=dbufSize) return RETVAL_DATA_ERROR;
292
293 uc = symToByte[mtfSymbol[0]];
294 byteCount[uc] += t;
295 while(t--) dbuf[dbufCount++]=uc;
296 }
297 /* Is this the terminating symbol? */
298 if(nextSym>symTotal) break;
299 /* At this point, the symbol we just decoded indicates a new literal
300 character. Subtract one to get the position in the MTF array
301 at which this literal is currently to be found. (Note that the
302 result can't be -1 or 0, because 0 and 1 are RUNA and RUNB.
303 Another instance of the first symbol in the mtf array, position 0,
304 would have been handled as part of a run.) */
305 if(dbufCount>=dbufSize) return RETVAL_DATA_ERROR;
306 i = nextSym - 1;
307 uc = mtfSymbol[i];
308 memmove(mtfSymbol+1,mtfSymbol,i);
309 mtfSymbol[0] = uc;
310 uc=symToByte[uc];
311 /* We have our literal byte. Save it into dbuf. */
312 byteCount[uc]++;
313 dbuf[dbufCount++] = (unsigned int)uc;
1106 } 314 }
1107 315 /* At this point, we've finished reading huffman-coded symbols and
1108 /* Now we know what nblock is, we can do a better sanity 316 compressed runs from the input stream. There are dbufCount many of
1109 check on s->origPtr. 317 them in dbuf[]. Now undo the Burrows-Wheeler transform on dbuf.
1110 */ 318 See http://dogma.net/markn/articles/bwt/bwt.htm
1111 if (s->origPtr < 0 || s->origPtr >= nblock) { 319 */
1112 retVal = BZ_DATA_ERROR; 320
1113 goto save_state_and_return; 321 /* Now we know what dbufCount is, do a better sanity check on origPtr. */
322 if (origPtr<0 || origPtr>=dbufCount) return RETVAL_DATA_ERROR;
323 /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
324 j=0;
325 for(i=0;i<256;i++) {
326 k=j+byteCount[i];
327 byteCount[i] = j;
328 j=k;
1114 } 329 }
1115 s->state_out_len = 0; 330 /* Figure out what order dbuf would be in if we sorted it. */
1116 s->state_out_ch = 0; 331 for (i=0;i<dbufCount;i++) {
1117 s->calculatedBlockCRC = 0xffffffffL; 332 uc = (unsigned char)(dbuf[i] & 0xff);
1118 s->state = BZ_X_OUTPUT; 333 dbuf[byteCount[uc]] |= (i << 8);
1119 334 byteCount[uc]++;
1120 /*-- Set up cftab to facilitate generation of T^(-1) --*/
1121 s->cftab[0] = 0;
1122 for (i = 1; i <= 256; i++) {
1123 s->cftab[i] = s->unzftab[i-1];
1124 } 335 }
1125 for (i = 1; i <= 256; i++) { 336 /* blockRandomised support would go here. */
1126 s->cftab[i] += s->cftab[i-1]; 337
338 /* Using i as position, j as previous character, t as current character,
339 and uc as run count */
340 bd->dataCRC = 0xffffffffL;
341 /* Decode first byte by hand to initialize "previous" byte. Note that it
342 doesn't get output, and if the first three characters are identical
343 it doesn't qualify as a run (hence uc=255, which will either wrap
344 to 1 or get reset). */
345 if(dbufCount) {
346 bd->writePos=dbuf[origPtr];
347 bd->writeCurrent=(unsigned char)(bd->writePos&0xff);
348 bd->writePos>>=8;
349 bd->writeRun=-1;
1127 } 350 }
351 bd->writeCount=dbufCount;
1128 352
1129 /*-- compute the T^(-1) vector --*/ 353 return RETVAL_OK;
1130 for (i = 0; i < nblock; i++) {
1131 uc = (unsigned char)(s->tt[i] & 0xff);
1132 s->tt[s->cftab[uc]] |= (i << 8);
1133 s->cftab[uc]++;
1134 }
1135
1136 s->tPos = s->tt[s->origPtr] >> 8;
1137 s->nblock_used = 0;
1138 if (s->blockRandomised) {
1139 s->rNToGo = 0;
1140 s->rTPos = 0;
1141 s->k0 = bz_get_fast(s);
1142
1143 s->nblock_used++;
1144 bz_rand_udp_mask(s);
1145 s->k0 ^= ((s->rNToGo == 1) ? 1 : 0);
1146 } else {
1147 s->k0 = bz_get_fast(s);
1148 s->nblock_used++;
1149 }
1150
1151 retVal = BZ_OK;
1152 goto save_state_and_return;
1153
1154endhdr_2:
1155 case BZ_X_ENDHDR_2:
1156 s->state = BZ_X_ENDHDR_2;
1157 if (! get_bits(s, &uc, 8)) {
1158 retVal = BZ_OK;
1159 goto save_state_and_return;
1160 }
1161 if (uc != 0x72) {
1162 retVal = BZ_DATA_ERROR;
1163 goto save_state_and_return;
1164 }
1165
1166 case BZ_X_ENDHDR_3:
1167 s->state = BZ_X_ENDHDR_3;
1168 if (! get_bits(s, &uc, 8)) {
1169 retVal = BZ_OK;
1170 goto save_state_and_return;
1171 }
1172 if (uc != 0x45) {
1173 retVal = BZ_DATA_ERROR;
1174 goto save_state_and_return;
1175 }
1176
1177 case BZ_X_ENDHDR_4:
1178 s->state = BZ_X_ENDHDR_4;
1179 if (! get_bits(s, &uc, 8)) {
1180 retVal = BZ_OK;
1181 goto save_state_and_return;
1182 }
1183 if (uc != 0x38) {
1184 retVal = BZ_DATA_ERROR;
1185 goto save_state_and_return;
1186 }
1187
1188 case BZ_X_ENDHDR_5:
1189 s->state = BZ_X_ENDHDR_5;
1190 if (! get_bits(s, &uc, 8)) {
1191 retVal = BZ_OK;
1192 goto save_state_and_return;
1193 }
1194 if (uc != 0x50) {
1195 retVal = BZ_DATA_ERROR;
1196 goto save_state_and_return;
1197 }
1198
1199 case BZ_X_ENDHDR_6:
1200 s->state = BZ_X_ENDHDR_6;
1201 if (! get_bits(s, &uc, 8)) {
1202 retVal = BZ_OK;
1203 goto save_state_and_return;
1204 }
1205 if (uc != 0x90) {
1206 retVal = BZ_DATA_ERROR;
1207 goto save_state_and_return;
1208 }
1209 s->storedCombinedCRC = 0;
1210
1211 case BZ_X_CCRC_1:
1212 s->state = BZ_X_CCRC_1;
1213 if (! get_bits(s, &uc, 8)) {
1214 retVal = BZ_OK;
1215 goto save_state_and_return;
1216 }
1217 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((unsigned int)uc);
1218 case BZ_X_CCRC_2:
1219 s->state = BZ_X_CCRC_2;
1220 if (! get_bits(s, &uc, 8)) {
1221 retVal = BZ_OK;
1222 goto save_state_and_return;
1223 }
1224 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((unsigned int)uc);
1225
1226 case BZ_X_CCRC_3:
1227 s->state = BZ_X_CCRC_3;
1228 if (! get_bits(s, &uc, 8)) {
1229 retVal = BZ_OK;
1230 goto save_state_and_return;
1231 }
1232 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((unsigned int)uc);
1233
1234 case BZ_X_CCRC_4:
1235 s->state = BZ_X_CCRC_4;
1236 if (! get_bits(s, &uc, 8)) {
1237 retVal = BZ_OK;
1238 goto save_state_and_return;
1239 }
1240 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((unsigned int)uc);
1241
1242 s->state = BZ_X_IDLE;
1243 retVal = BZ_STREAM_END;
1244 goto save_state_and_return;
1245
1246 }
1247
1248save_state_and_return:
1249 s->save_i = i;
1250 s->save_j = j;
1251 s->save_t = t;
1252 s->save_alphaSize = alphaSize;
1253 s->save_nGroups = nGroups;
1254 s->save_nSelectors = nSelectors;
1255 s->save_EOB = EOB;
1256 s->save_groupNo = groupNo;
1257 s->save_groupPos = groupPos;
1258 s->save_nextSym = nextSym;
1259 s->save_nblockMAX = nblockMAX;
1260 s->save_nblock = nblock;
1261 s->save_es = es;
1262 s->save_N = N;
1263 s->save_curr = curr;
1264 s->save_zt = zt;
1265 s->save_zn = zn;
1266 s->save_zvec = zvec;
1267 s->save_zj = zj;
1268 s->save_gSel = gSel;
1269 s->save_gMinlen = gMinlen;
1270 s->save_gLimit = gLimit;
1271 s->save_gBase = gBase;
1272 s->save_gPerm = gPerm;
1273
1274 return retVal;
1275} 354}
1276 355
1277extern void BZ2_bzReadClose(void) 356/* Flush output buffer to disk */
357extern void flush_bunzip_outbuf(bunzip_data *bd, int out_fd)
1278{ 358{
1279 if (bzf->initialisedOk) { 359 if(bd->outbufPos) {
1280 bz_stream *strm = &(bzf->strm); 360 if(write(out_fd, bd->outbuf, bd->outbufPos) != bd->outbufPos)
1281 DState *s; 361 longjmp(bd->jmpbuf,RETVAL_UNEXPECTED_OUTPUT_EOF);
1282 if (strm == NULL) { 362 bd->outbufPos=0;
1283 return;
1284 }
1285 s = strm->state;
1286 if ((s == NULL) || (s->strm != strm)) {
1287 return;
1288 }
1289 free(s->tt);
1290 free(strm->state);
1291 strm->state = NULL;
1292 return;
1293 } 363 }
1294 free(bzf);
1295} 364}
1296 365
1297static void unRLE_obuf_to_output_FAST(DState *s)
1298{
1299 unsigned char k1;
1300
1301 if (s->blockRandomised) {
1302 while (1) {
1303 /* try to finish existing run */
1304 while (1) {
1305 if (s->strm->avail_out == 0) {
1306 return;
1307 }
1308 if (s->state_out_len == 0) {
1309 break;
1310 }
1311 *((unsigned char *)(s->strm->next_out)) = s->state_out_ch;
1312 s->calculatedBlockCRC = (s->calculatedBlockCRC << 8) ^
1313 BZ2_crc32Table[(s->calculatedBlockCRC >> 24) ^
1314 ((unsigned char)s->state_out_ch)];
1315 s->state_out_len--;
1316 s->strm->next_out++;
1317 s->strm->avail_out--;
1318 }
1319
1320 /* can a new run be started? */
1321 if (s->nblock_used == s->save_nblock+1) {
1322 return;
1323 }
1324 s->state_out_len = 1;
1325 s->state_out_ch = s->k0;
1326 k1 = bz_get_fast(s);
1327 bz_rand_udp_mask(s);
1328 k1 ^= ((s->rNToGo == 1) ? 1 : 0);
1329 s->nblock_used++;
1330 if (s->nblock_used == s->save_nblock+1) {
1331 continue;
1332 }
1333 if (k1 != s->k0) {
1334 s->k0 = k1;
1335 continue;
1336 }
1337 366
1338 s->state_out_len = 2; 367/* Undo burrows-wheeler transform on intermediate buffer to produce output.
1339 k1 = bz_get_fast(s); 368 If !len, write up to len bytes of data to buf. Otherwise write to out_fd.
1340 bz_rand_udp_mask(s); 369 Returns len ? bytes written : RETVAL_OK. Notice all errors negative #'s. */
1341 k1 ^= ((s->rNToGo == 1) ? 1 : 0); 370extern int write_bunzip_data(bunzip_data *bd, int out_fd, char *outbuf, int len)
1342 s->nblock_used++; 371{
1343 if (s->nblock_used == s->save_nblock+1) { 372 unsigned int *dbuf=bd->dbuf;
1344 continue; 373 int count,pos,current, run,copies,outbyte,previous,gotcount=0;
1345 } 374
1346 if (k1 != s->k0) { 375 for(;;) {
1347 s->k0 = k1; 376 /* If last read was short due to end of file, return last block now */
1348 continue; 377 if(bd->writeCount<0) return bd->writeCount;
1349 } 378 /* If we need to refill dbuf, do it. */
1350 s->state_out_len = 3; 379 if(!bd->writeCount) {
1351 k1 = bz_get_fast(s); 380 int i=read_bunzip_data(bd);
1352 bz_rand_udp_mask(s); 381 if(i) {
1353 k1 ^= ((s->rNToGo == 1) ? 1 : 0); 382 if(i==RETVAL_LAST_BLOCK) {
1354 s->nblock_used++; 383 bd->writeCount=i;
1355 if (s->nblock_used == s->save_nblock+1) { 384 return gotcount;
1356 continue; 385 } else return i;
1357 }
1358 if (k1 != s->k0) {
1359 s->k0 = k1;
1360 continue;
1361 } 386 }
1362
1363 k1 = bz_get_fast(s);
1364 bz_rand_udp_mask(s);
1365 k1 ^= ((s->rNToGo == 1) ? 1 : 0);
1366 s->nblock_used++;
1367 s->state_out_len = ((int)k1) + 4;
1368 s->k0 = bz_get_fast(s);
1369 bz_rand_udp_mask(s);
1370 s->k0 ^= ((s->rNToGo == 1) ? 1 : 0);
1371 s->nblock_used++;
1372 } 387 }
1373 } else { 388 /* Loop generating output */
1374 /* restore */ 389 count=bd->writeCount;
1375 unsigned int c_calculatedBlockCRC = s->calculatedBlockCRC; 390 pos=bd->writePos;
1376 unsigned char c_state_out_ch = s->state_out_ch; 391 current=bd->writeCurrent;
1377 int c_state_out_len = s->state_out_len; 392 run=bd->writeRun;
1378 int c_nblock_used = s->nblock_used; 393 while(count) {
1379 int c_k0 = s->k0; 394 /* If somebody (like busybox tar) wants a certain number of bytes of
1380 unsigned int *c_tt = s->tt; 395 data from memory instead of written to a file, humor them */
1381 unsigned int c_tPos = s->tPos; 396 if(len && bd->outbufPos>=len) goto dataus_interruptus;
1382 char *cs_next_out = s->strm->next_out; 397 count--;
1383 unsigned int cs_avail_out = s->strm->avail_out; 398 /* Follow sequence vector to undo Burrows-Wheeler transform */
1384 /* end restore */ 399 previous=current;
1385 400 pos=dbuf[pos];
1386 int s_save_nblockPP = s->save_nblock+1; 401 current=pos&0xff;
1387 402 pos>>=8;
1388 while (1) { 403 /* Whenever we see 3 consecutive copies of the same byte,
1389 /* try to finish existing run */ 404 the 4th is a repeat count */
1390 if (c_state_out_len > 0) { 405 if(run++==3) {
1391 while (TRUE) { 406 copies=current;
1392 if (cs_avail_out == 0) { 407 outbyte=previous;
1393 goto return_notr; 408 current=-1;
1394 } 409 } else {
1395 if (c_state_out_len == 1) { 410 copies=1;
1396 break; 411 outbyte=current;
1397 }
1398 *((unsigned char *)(cs_next_out)) = c_state_out_ch;
1399 c_calculatedBlockCRC = (c_calculatedBlockCRC << 8) ^
1400 BZ2_crc32Table[(c_calculatedBlockCRC >> 24) ^
1401 ((unsigned char)c_state_out_ch)];
1402 c_state_out_len--;
1403 cs_next_out++;
1404 cs_avail_out--;
1405 }
1406s_state_out_len_eq_one:
1407 {
1408 if (cs_avail_out == 0) {
1409 c_state_out_len = 1;
1410 goto return_notr;
1411 }
1412 *((unsigned char *)(cs_next_out)) = c_state_out_ch;
1413 c_calculatedBlockCRC = (c_calculatedBlockCRC << 8) ^
1414 BZ2_crc32Table[(c_calculatedBlockCRC >> 24) ^
1415 ((unsigned char)c_state_out_ch)];
1416 cs_next_out++;
1417 cs_avail_out--;
1418 }
1419 }
1420 /* can a new run be started? */
1421 if (c_nblock_used == s_save_nblockPP) {
1422 c_state_out_len = 0; goto return_notr;
1423 }
1424 c_state_out_ch = c_k0;
1425 c_tPos = c_tt[c_tPos];
1426 k1 = (unsigned char)(c_tPos & 0xff);
1427 c_tPos >>= 8;
1428
1429 c_nblock_used++;
1430
1431 if (k1 != c_k0) {
1432 c_k0 = k1;
1433 goto s_state_out_len_eq_one;
1434 }
1435
1436 if (c_nblock_used == s_save_nblockPP) {
1437 goto s_state_out_len_eq_one;
1438 }
1439
1440 c_state_out_len = 2;
1441 c_tPos = c_tt[c_tPos];
1442 k1 = (unsigned char)(c_tPos & 0xff);
1443 c_tPos >>= 8;
1444
1445 c_nblock_used++;
1446 if (c_nblock_used == s_save_nblockPP) {
1447 continue;
1448 }
1449 if (k1 != c_k0) {
1450 c_k0 = k1;
1451 continue;
1452 }
1453
1454 c_state_out_len = 3;
1455 c_tPos = c_tt[c_tPos];
1456 k1 = (unsigned char)(c_tPos & 0xff);
1457 c_tPos >>= 8;
1458
1459 c_nblock_used++;
1460 if (c_nblock_used == s_save_nblockPP) {
1461 continue;
1462 } 412 }
1463 if (k1 != c_k0) { 413 /* Output bytes to buffer, flushing to file if necessary */
1464 c_k0 = k1; 414 while(copies--) {
1465 continue; 415 if(bd->outbufPos == IOBUF_SIZE) flush_bunzip_outbuf(bd,out_fd);
416 bd->outbuf[bd->outbufPos++] = outbyte;
417 bd->dataCRC = (bd->dataCRC << 8)
418 ^ bd->crc32Table[(bd->dataCRC >> 24) ^ outbyte];
1466 } 419 }
1467 420 if(current!=previous) run=0;
1468 c_tPos = c_tt[c_tPos];
1469 k1 = (unsigned char)(c_tPos & 0xff);
1470 c_tPos >>= 8;
1471
1472 c_nblock_used++;
1473 c_state_out_len = ((int)k1) + 4;
1474
1475 c_tPos = c_tt[c_tPos];
1476 c_k0 = (unsigned char)(c_tPos & 0xff);
1477 c_tPos >>= 8;
1478
1479 c_nblock_used++;
1480 } 421 }
1481 422 /* Decompression of this block completed successfully */
1482return_notr: 423 bd->dataCRC=~(bd->dataCRC);
1483 424 bd->totalCRC=((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->dataCRC;
1484 /* save */ 425 /* If this block had a CRC error, force file level CRC error. */
1485 s->calculatedBlockCRC = c_calculatedBlockCRC; 426 if(bd->dataCRC!=bd->headerCRC) {
1486 s->state_out_ch = c_state_out_ch; 427 bd->totalCRC=bd->headerCRC+1;
1487 s->state_out_len = c_state_out_len; 428 return RETVAL_LAST_BLOCK;
1488 s->nblock_used = c_nblock_used;
1489 s->k0 = c_k0;
1490 s->tt = c_tt;
1491 s->tPos = c_tPos;
1492 s->strm->next_out = cs_next_out;
1493 s->strm->avail_out = cs_avail_out;
1494 /* end save */
1495 }
1496}
1497static inline
1498int BZ2_bzDecompress(bz_stream *strm)
1499{
1500 DState* s;
1501 s = strm->state;
1502
1503 while (1) {
1504 if (s->state == BZ_X_IDLE) {
1505 return BZ_SEQUENCE_ERROR;
1506 } 429 }
1507 if (s->state == BZ_X_OUTPUT) { 430dataus_interruptus:
1508 unRLE_obuf_to_output_FAST(s); 431 bd->writeCount=count;
1509 if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) { 432 if(len) {
1510 s->calculatedBlockCRC = ~(s->calculatedBlockCRC); 433 gotcount+=bd->outbufPos;
1511 if (s->calculatedBlockCRC != s->storedBlockCRC) { 434 memcpy(outbuf,bd->outbuf,len);
1512 return BZ_DATA_ERROR; 435 /* If we got enough data, checkpoint loop state and return */
1513 } 436 if((len-=bd->outbufPos)<1) {
1514 s->calculatedCombinedCRC = (s->calculatedCombinedCRC << 1) | (s->calculatedCombinedCRC >> 31); 437 bd->outbufPos-=len;
1515 s->calculatedCombinedCRC ^= s->calculatedBlockCRC; 438 if(bd->outbufPos)
1516 s->state = BZ_X_BLKHDR_1; 439 memmove(bd->outbuf,bd->outbuf+len,bd->outbufPos);
1517 } else { 440 bd->writePos=pos;
1518 return BZ_OK; 441 bd->writeCurrent=current;
1519 } 442 bd->writeRun=run;
1520 } 443 return gotcount;
1521 if (s->state >= BZ_X_MAGIC_1) {
1522 int r = BZ2_decompress(s);
1523 if (r == BZ_STREAM_END) {
1524 if (s->calculatedCombinedCRC != s->storedCombinedCRC) {
1525 return BZ_DATA_ERROR;
1526 }
1527 return r;
1528 }
1529 if (s->state != BZ_X_OUTPUT) {
1530 return r;
1531 } 444 }
1532 } 445 }
1533 } 446 }
1534
1535 return(0); /*NOTREACHED*/
1536} 447}
1537 448
1538extern ssize_t read_bz2(int fd, void *buf, size_t count) 449/* Allocate the structure, read file header. If !len, src_fd contains
450 filehandle to read from. Else inbuf contains data. */
451extern int start_bunzip(bunzip_data **bdp, int src_fd, char *inbuf, int len)
1539{ 452{
1540 int n, ret; 453 bunzip_data *bd;
1541 454 unsigned int i,j,c;
1542 bzerr = BZ_OK; 455
1543 if (count == 0) { 456 /* Figure out how much data to allocate */
1544 return(0); 457 i=sizeof(bunzip_data);
458 if(!len) i+=IOBUF_SIZE;
459 /* Allocate bunzip_data. Most fields initialize to zero. */
460 if(!(bd=*bdp=malloc(i))) return RETVAL_OUT_OF_MEMORY;
461 memset(bd,0,sizeof(bunzip_data));
462 if(len) {
463 bd->inbuf=inbuf;
464 bd->inbufCount=len;
465 bd->in_fd=-1;
466 } else {
467 bd->inbuf=(char *)(bd+1);
468 bd->in_fd=src_fd;
1545 } 469 }
1546 bzf->strm.avail_out = count; 470 /* Init the CRC32 table (big endian) */
1547 bzf->strm.next_out = buf; 471 for(i=0;i<256;i++) {
1548 472 c=i<<24;
1549 while (1) { 473 for(j=8;j;j--)
1550 if (bzf->strm.avail_in == 0) { 474 c=c&0x80000000 ? (c<<1)^0x04c11db7 : (c<<1);
1551 n = bb_xread(bzf->fd, bzf->buf, BZ_MAX_UNUSED); 475 bd->crc32Table[i]=c;
1552 if (n == 0) {
1553 break;
1554 }
1555 bzf->bufN = n;
1556 bzf->strm.avail_in = bzf->bufN;
1557 bzf->strm.next_in = bzf->buf;
1558 }
1559
1560 ret = BZ2_bzDecompress(&(bzf->strm));
1561
1562 if ((ret != BZ_OK) && (ret != BZ_STREAM_END)) {
1563 bb_error_msg_and_die("Error decompressing");
1564 }
1565
1566 if (ret == BZ_STREAM_END) {
1567 bzerr = BZ_STREAM_END;
1568 return(count - bzf->strm.avail_out);
1569 }
1570 if (bzf->strm.avail_out == 0) {
1571 bzerr = BZ_OK;
1572 return(count);
1573 }
1574 } 476 }
1575 return(0); 477 /* Setup for I/O error handling via longjmp */
478 i=setjmp(bd->jmpbuf);
479 if(i) return i;
480 /* Ensure that file starts with "BZh" */
481 for(i=0;i<3;i++) if(get_bits(bd,8)!="BZh"[i]) return RETVAL_NOT_BZIP_DATA;
482 /* Next byte ascii '1'-'9', indicates block size in units of 100k of
483 uncompressed data. Allocate intermediate buffer for block. */
484 i=get_bits(bd,8);
485 if (i<'1' || i>'9') return RETVAL_NOT_BZIP_DATA;
486 bd->dbufSize=100000*(i-'0');
487 if(!(bd->dbuf=malloc(bd->dbufSize * sizeof(int))))
488 return RETVAL_OUT_OF_MEMORY;
489 return RETVAL_OK;
1576} 490}
1577 491
1578extern void BZ2_bzReadOpen(int fd, void *unused, int nUnused) 492extern char *uncompressStream(int src_fd, int dst_fd)
1579{ 493{
1580 DState *s; 494 bunzip_data *bd;
1581 495 int i;
1582 bzf = xmalloc(sizeof(bzFile));
1583 bzf->initialisedOk = FALSE;
1584 bzf->fd = fd;
1585 bzf->bufN = 0;
1586
1587 s = xmalloc(sizeof(DState));
1588 s->strm = &bzf->strm;
1589 s->state = BZ_X_MAGIC_1;
1590 s->bsLive = 0;
1591 s->bsBuff = 0;
1592 s->calculatedCombinedCRC = 0;
1593 s->tt = NULL;
1594 s->currBlockNo = 0;
1595 bzf->strm.state = s;
1596 496
1597 while (nUnused > 0) { 497 if(!(i=start_bunzip(&bd,src_fd,0,0))) {
1598 bzf->buf[bzf->bufN] = *((unsigned char *)(unused)); 498 i=write_bunzip_data(bd,dst_fd,0,0);
1599 bzf->bufN++; 499 if(i==RETVAL_LAST_BLOCK && bd->headerCRC==bd->totalCRC) i=RETVAL_OK;
1600 unused = ((void *)( 1 + ((unsigned char *)(unused)) ));
1601 nUnused--;
1602 } 500 }
1603 bzf->strm.avail_in = bzf->bufN; 501 flush_bunzip_outbuf(bd,dst_fd);
1604 bzf->strm.next_in = bzf->buf; 502 if(bd->dbuf) free(bd->dbuf);
503 free(bd);
504 return bunzip_errors[-i];
505}
1605 506
1606 bzf->initialisedOk = TRUE; 507/* This new version is not yet properly integrated with tar */
508extern ssize_t read_bz2(int fd, void *buf, size_t count)
509{
510#warning FIXME
511 return(0);
512}
1607 513
514extern void BZ2_bzReadOpen(int fd, void *unused, int nUnused)
515{
516#warning FIXME
1608 return; 517 return;
1609} 518}
1610 519extern void BZ2_bzReadClose(void)
1611extern unsigned char uncompressStream(int src_fd, int dst_fd)
1612{ 520{
1613 unsigned char unused[BZ_MAX_UNUSED]; 521#warning FIXME
1614 unsigned char *unusedTmp;
1615 unsigned char obuf[5000];
1616 int nread;
1617 int nUnused;
1618 int streamNo;
1619 int i;
1620
1621 nUnused = 0;
1622 streamNo = 0;
1623
1624 while(1) {
1625 BZ2_bzReadOpen(src_fd, unused, nUnused);
1626 streamNo++;
1627
1628 while (bzerr == BZ_OK) {
1629 nread = read_bz2(src_fd, obuf, 5000);
1630 if (bzerr == BZ_DATA_ERROR_MAGIC) {
1631 bb_error_msg_and_die("invalid magic");
1632 }
1633 if (((bzerr == BZ_OK) || (bzerr == BZ_STREAM_END)) && (nread > 0)) {
1634 if (write(dst_fd, obuf, nread) != nread) {
1635 BZ2_bzReadClose();
1636 bb_perror_msg_and_die("Couldnt write to file");
1637 }
1638 }
1639 }
1640 nUnused = bzf->strm.avail_in;
1641 unusedTmp = bzf->strm.next_in;
1642
1643 for (i = 0; i < nUnused; i++) {
1644 unused[i] = unusedTmp[i];
1645 }
1646 BZ2_bzReadClose();
1647 if (nUnused == 0) {
1648 break;
1649 }
1650 }
1651
1652 close(src_fd);
1653 if (dst_fd != fileno(stdout)) {
1654 close(dst_fd);
1655 }
1656 return TRUE;
1657} 522}
1658 523
524#if 0
525/* Dumb little test thing, decompress stdin to stdout */
526int main(int argc, char *argv[])
527{
528 char *c=uncompressStream(0,1);
529 fprintf(stderr,"\n%s\n", c ? c : "Completed OK");
530}
531#endif