aboutsummaryrefslogtreecommitdiff
path: root/archival
diff options
context:
space:
mode:
Diffstat (limited to 'archival')
-rw-r--r--archival/libunarchive/decompress_bunzip2.c86
1 files changed, 11 insertions, 75 deletions
diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c
index 8f35bc5f9..e034814c0 100644
--- a/archival/libunarchive/decompress_bunzip2.c
+++ b/archival/libunarchive/decompress_bunzip2.c
@@ -66,7 +66,6 @@ struct group_data {
66 * | grep 'bd->' | sed 's/^.*bd->/bd->/' | sort | $PAGER 66 * | grep 'bd->' | sed 's/^.*bd->/bd->/' | sort | $PAGER
67 * and moved it (inbufBitCount) to offset 0. 67 * and moved it (inbufBitCount) to offset 0.
68 */ 68 */
69
70struct bunzip_data { 69struct bunzip_data {
71 /* I/O tracking data (file handles, buffers, positions, etc.) */ 70 /* I/O tracking data (file handles, buffers, positions, etc.) */
72 unsigned inbufBitCount, inbufBits; 71 unsigned inbufBitCount, inbufBits;
@@ -102,11 +101,9 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
102 101
103 /* If we need to get more data from the byte buffer, do so. (Loop getting 102 /* If we need to get more data from the byte buffer, do so. (Loop getting
104 one byte at a time to enforce endianness and avoid unaligned access.) */ 103 one byte at a time to enforce endianness and avoid unaligned access.) */
105
106 while ((int)(bd->inbufBitCount) < bits_wanted) { 104 while ((int)(bd->inbufBitCount) < bits_wanted) {
107 105
108 /* If we need to read more data from file into byte buffer, do so */ 106 /* If we need to read more data from file into byte buffer, do so */
109
110 if (bd->inbufPos == bd->inbufCount) { 107 if (bd->inbufPos == bd->inbufCount) {
111 /* if "no input fd" case: in_fd == -1, read fails, we jump */ 108 /* if "no input fd" case: in_fd == -1, read fails, we jump */
112 bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE); 109 bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE);
@@ -116,7 +113,6 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
116 } 113 }
117 114
118 /* Avoid 32-bit overflow (dump bit buffer to top of output) */ 115 /* Avoid 32-bit overflow (dump bit buffer to top of output) */
119
120 if (bd->inbufBitCount >= 24) { 116 if (bd->inbufBitCount >= 24) {
121 bits = bd->inbufBits & ((1 << bd->inbufBitCount) - 1); 117 bits = bd->inbufBits & ((1 << bd->inbufBitCount) - 1);
122 bits_wanted -= bd->inbufBitCount; 118 bits_wanted -= bd->inbufBitCount;
@@ -125,13 +121,11 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
125 } 121 }
126 122
127 /* Grab next 8 bits of input from buffer. */ 123 /* Grab next 8 bits of input from buffer. */
128
129 bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; 124 bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
130 bd->inbufBitCount += 8; 125 bd->inbufBitCount += 8;
131 } 126 }
132 127
133 /* Calculate result */ 128 /* Calculate result */
134
135 bd->inbufBitCount -= bits_wanted; 129 bd->inbufBitCount -= bits_wanted;
136 bits |= (bd->inbufBits >> bd->inbufBitCount) & ((1 << bits_wanted) - 1); 130 bits |= (bd->inbufBits >> bd->inbufBitCount) & ((1 << bits_wanted) - 1);
137 131
@@ -139,7 +133,6 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
139} 133}
140 134
141/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ 135/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */
142
143static int get_next_block(bunzip_data *bd) 136static int get_next_block(bunzip_data *bd)
144{ 137{
145 struct group_data *hufGroup; 138 struct group_data *hufGroup;
@@ -153,13 +146,11 @@ static int get_next_block(bunzip_data *bd)
153 selectors = bd->selectors; 146 selectors = bd->selectors;
154 147
155 /* Reset longjmp I/O error handling */ 148 /* Reset longjmp I/O error handling */
156
157 i = setjmp(bd->jmpbuf); 149 i = setjmp(bd->jmpbuf);
158 if (i) return i; 150 if (i) return i;
159 151
160 /* Read in header signature and CRC, then validate signature. 152 /* Read in header signature and CRC, then validate signature.
161 (last block signature means CRC is for whole file, return now) */ 153 (last block signature means CRC is for whole file, return now) */
162
163 i = get_bits(bd, 24); 154 i = get_bits(bd, 24);
164 j = get_bits(bd, 24); 155 j = get_bits(bd, 24);
165 bd->headerCRC = get_bits(bd, 32); 156 bd->headerCRC = get_bits(bd, 32);
@@ -169,7 +160,6 @@ static int get_next_block(bunzip_data *bd)
169 /* We can add support for blockRandomised if anybody complains. There was 160 /* We can add support for blockRandomised if anybody complains. There was
170 some code for this in busybox 1.0.0-pre3, but nobody ever noticed that 161 some code for this in busybox 1.0.0-pre3, but nobody ever noticed that
171 it didn't actually work. */ 162 it didn't actually work. */
172
173 if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT; 163 if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT;
174 origPtr = get_bits(bd, 24); 164 origPtr = get_bits(bd, 24);
175 if ((int)origPtr > dbufSize) return RETVAL_DATA_ERROR; 165 if ((int)origPtr > dbufSize) return RETVAL_DATA_ERROR;
@@ -179,7 +169,6 @@ static int get_next_block(bunzip_data *bd)
179 symbols to deal with, and writes a sparse bitfield indicating which 169 symbols to deal with, and writes a sparse bitfield indicating which
180 values were present. We make a translation table to convert the symbols 170 values were present. We make a translation table to convert the symbols
181 back to the corresponding bytes. */ 171 back to the corresponding bytes. */
182
183 t = get_bits(bd, 16); 172 t = get_bits(bd, 16);
184 symTotal = 0; 173 symTotal = 0;
185 for (i = 0; i < 16; i++) { 174 for (i = 0; i < 16; i++) {
@@ -192,7 +181,6 @@ static int get_next_block(bunzip_data *bd)
192 } 181 }
193 182
194 /* How many different Huffman coding groups does this block use? */ 183 /* How many different Huffman coding groups does this block use? */
195
196 groupCount = get_bits(bd, 3); 184 groupCount = get_bits(bd, 3);
197 if (groupCount < 2 || groupCount > MAX_GROUPS) 185 if (groupCount < 2 || groupCount > MAX_GROUPS)
198 return RETVAL_DATA_ERROR; 186 return RETVAL_DATA_ERROR;
@@ -201,19 +189,16 @@ static int get_next_block(bunzip_data *bd)
201 group. Read in the group selector list, which is stored as MTF encoded 189 group. Read in the group selector list, which is stored as MTF encoded
202 bit runs. (MTF=Move To Front, as each value is used it's moved to the 190 bit runs. (MTF=Move To Front, as each value is used it's moved to the
203 start of the list.) */ 191 start of the list.) */
204
205 nSelectors = get_bits(bd, 15); 192 nSelectors = get_bits(bd, 15);
206 if (!nSelectors) return RETVAL_DATA_ERROR; 193 if (!nSelectors) return RETVAL_DATA_ERROR;
207 for (i = 0; i < groupCount; i++) mtfSymbol[i] = i; 194 for (i = 0; i < groupCount; i++) mtfSymbol[i] = i;
208 for (i = 0; i < nSelectors; i++) { 195 for (i = 0; i < nSelectors; i++) {
209 196
210 /* Get next value */ 197 /* Get next value */
211
212 for (j = 0; get_bits(bd, 1); j++) 198 for (j = 0; get_bits(bd, 1); j++)
213 if (j >= groupCount) return RETVAL_DATA_ERROR; 199 if (j >= groupCount) return RETVAL_DATA_ERROR;
214 200
215 /* Decode MTF to get the next selector */ 201 /* Decode MTF to get the next selector */
216
217 uc = mtfSymbol[j]; 202 uc = mtfSymbol[j];
218 for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1]; 203 for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1];
219 mtfSymbol[0] = selectors[i] = uc; 204 mtfSymbol[0] = selectors[i] = uc;
@@ -221,10 +206,11 @@ static int get_next_block(bunzip_data *bd)
221 206
222 /* Read the Huffman coding tables for each group, which code for symTotal 207 /* Read the Huffman coding tables for each group, which code for symTotal
223 literal symbols, plus two run symbols (RUNA, RUNB) */ 208 literal symbols, plus two run symbols (RUNA, RUNB) */
224
225 symCount = symTotal + 2; 209 symCount = symTotal + 2;
226 for (j = 0; j < groupCount; j++) { 210 for (j = 0; j < groupCount; j++) {
227 unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1]; 211 unsigned char length[MAX_SYMBOLS];
212 /* 8 bits is ALMOST enough for temp[], see below */
213 unsigned temp[MAX_HUFCODE_BITS+1];
228 int minLen, maxLen, pp; 214 int minLen, maxLen, pp;
229 215
230 /* Read Huffman code lengths for each symbol. They're stored in 216 /* Read Huffman code lengths for each symbol. They're stored in
@@ -233,7 +219,6 @@ static int get_next_block(bunzip_data *bd)
233 (Subtracting 1 before the loop and then adding it back at the end is 219 (Subtracting 1 before the loop and then adding it back at the end is
234 an optimization that makes the test inside the loop simpler: symbol 220 an optimization that makes the test inside the loop simpler: symbol
235 length 0 becomes negative, so an unsigned inequality catches it.) */ 221 length 0 becomes negative, so an unsigned inequality catches it.) */
236
237 t = get_bits(bd, 5) - 1; 222 t = get_bits(bd, 5) - 1;
238 for (i = 0; i < symCount; i++) { 223 for (i = 0; i < symCount; i++) {
239 for (;;) { 224 for (;;) {
@@ -243,7 +228,6 @@ static int get_next_block(bunzip_data *bd)
243 /* If first bit is 0, stop. Else second bit indicates whether 228 /* If first bit is 0, stop. Else second bit indicates whether
244 to increment or decrement the value. Optimization: grab 2 229 to increment or decrement the value. Optimization: grab 2
245 bits and unget the second if the first was 0. */ 230 bits and unget the second if the first was 0. */
246
247 k = get_bits(bd, 2); 231 k = get_bits(bd, 2);
248 if (k < 2) { 232 if (k < 2) {
249 bd->inbufBitCount++; 233 bd->inbufBitCount++;
@@ -251,17 +235,14 @@ static int get_next_block(bunzip_data *bd)
251 } 235 }
252 236
253 /* Add one if second bit 1, else subtract 1. Avoids if/else */ 237 /* Add one if second bit 1, else subtract 1. Avoids if/else */
254
255 t += (((k+1) & 2) - 1); 238 t += (((k+1) & 2) - 1);
256 } 239 }
257 240
258 /* Correct for the initial -1, to get the final symbol length */ 241 /* Correct for the initial -1, to get the final symbol length */
259
260 length[i] = t + 1; 242 length[i] = t + 1;
261 } 243 }
262 244
263 /* Find largest and smallest lengths in this group */ 245 /* Find largest and smallest lengths in this group */
264
265 minLen = maxLen = length[0]; 246 minLen = maxLen = length[0];
266 for (i = 1; i < symCount; i++) { 247 for (i = 1; i < symCount; i++) {
267 if (length[i] > maxLen) maxLen = length[i]; 248 if (length[i] > maxLen) maxLen = length[i];
@@ -278,7 +259,6 @@ static int get_next_block(bunzip_data *bd)
278 * number of bits can have. This is how the Huffman codes can vary in 259 * number of bits can have. This is how the Huffman codes can vary in
279 * length: each code with a value>limit[length] needs another bit. 260 * length: each code with a value>limit[length] needs another bit.
280 */ 261 */
281
282 hufGroup = bd->groups + j; 262 hufGroup = bd->groups + j;
283 hufGroup->minLen = minLen; 263 hufGroup->minLen = minLen;
284 hufGroup->maxLen = maxLen; 264 hufGroup->maxLen = maxLen;
@@ -286,12 +266,10 @@ static int get_next_block(bunzip_data *bd)
286 /* Note that minLen can't be smaller than 1, so we adjust the base 266 /* Note that minLen can't be smaller than 1, so we adjust the base
287 and limit array pointers so we're not always wasting the first 267 and limit array pointers so we're not always wasting the first
288 entry. We do this again when using them (during symbol decoding).*/ 268 entry. We do this again when using them (during symbol decoding).*/
289
290 base = hufGroup->base - 1; 269 base = hufGroup->base - 1;
291 limit = hufGroup->limit - 1; 270 limit = hufGroup->limit - 1;
292 271
293 /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */ 272 /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */
294
295 pp = 0; 273 pp = 0;
296 for (i = minLen; i <= maxLen; i++) { 274 for (i = minLen; i <= maxLen; i++) {
297 temp[i] = limit[i] = 0; 275 temp[i] = limit[i] = 0;
@@ -301,14 +279,14 @@ static int get_next_block(bunzip_data *bd)
301 } 279 }
302 280
303 /* Count symbols coded for at each bit length */ 281 /* Count symbols coded for at each bit length */
304 282 /* NB: in pathological cases, temp[8] can end ip being 256.
283 * That's why uint8_t is too small for temp[]. */
305 for (i = 0; i < symCount; i++) temp[length[i]]++; 284 for (i = 0; i < symCount; i++) temp[length[i]]++;
306 285
307 /* Calculate limit[] (the largest symbol-coding value at each bit 286 /* Calculate limit[] (the largest symbol-coding value at each bit
308 * length, which is (previous limit<<1)+symbols at this level), and 287 * length, which is (previous limit<<1)+symbols at this level), and
309 * base[] (number of symbols to ignore at each bit length, which is 288 * base[] (number of symbols to ignore at each bit length, which is
310 * limit minus the cumulative count of symbols coded for already). */ 289 * limit minus the cumulative count of symbols coded for already). */
311
312 pp = t = 0; 290 pp = t = 0;
313 for (i = minLen; i < maxLen; i++) { 291 for (i = minLen; i < maxLen; i++) {
314 pp += temp[i]; 292 pp += temp[i];
@@ -319,7 +297,6 @@ static int get_next_block(bunzip_data *bd)
319 each level we're really only interested in the first few bits, 297 each level we're really only interested in the first few bits,
320 so here we set all the trailing to-be-ignored bits to 1 so they 298 so here we set all the trailing to-be-ignored bits to 1 so they
321 don't affect the value>limit[length] comparison. */ 299 don't affect the value>limit[length] comparison. */
322
323 limit[i] = (pp << (maxLen - i)) - 1; 300 limit[i] = (pp << (maxLen - i)) - 1;
324 pp <<= 1; 301 pp <<= 1;
325 t += temp[i]; 302 t += temp[i];
@@ -335,7 +312,6 @@ static int get_next_block(bunzip_data *bd)
335 and run length encoding, saving the result into dbuf[dbufCount++] = uc */ 312 and run length encoding, saving the result into dbuf[dbufCount++] = uc */
336 313
337 /* Initialize symbol occurrence counters and symbol Move To Front table */ 314 /* Initialize symbol occurrence counters and symbol Move To Front table */
338
339 memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */ 315 memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */
340 for (i = 0; i < 256; i++) { 316 for (i = 0; i < 256; i++) {
341 //byteCount[i] = 0; 317 //byteCount[i] = 0;
@@ -347,8 +323,7 @@ static int get_next_block(bunzip_data *bd)
347 runPos = dbufCount = selector = 0; 323 runPos = dbufCount = selector = 0;
348 for (;;) { 324 for (;;) {
349 325
350 /* fetch next Huffman coding group from list. */ 326 /* Fetch next Huffman coding group from list. */
351
352 symCount = GROUP_SIZE - 1; 327 symCount = GROUP_SIZE - 1;
353 if (selector >= nSelectors) return RETVAL_DATA_ERROR; 328 if (selector >= nSelectors) return RETVAL_DATA_ERROR;
354 hufGroup = bd->groups + selectors[selector++]; 329 hufGroup = bd->groups + selectors[selector++];
@@ -367,7 +342,6 @@ static int get_next_block(bunzip_data *bd)
367 dry). The following (up to got_huff_bits:) is equivalent to 342 dry). The following (up to got_huff_bits:) is equivalent to
368 j = get_bits(bd, hufGroup->maxLen); 343 j = get_bits(bd, hufGroup->maxLen);
369 */ 344 */
370
371 while ((int)(bd->inbufBitCount) < hufGroup->maxLen) { 345 while ((int)(bd->inbufBitCount) < hufGroup->maxLen) {
372 if (bd->inbufPos == bd->inbufCount) { 346 if (bd->inbufPos == bd->inbufCount) {
373 j = get_bits(bd, hufGroup->maxLen); 347 j = get_bits(bd, hufGroup->maxLen);
@@ -382,13 +356,11 @@ static int get_next_block(bunzip_data *bd)
382 got_huff_bits: 356 got_huff_bits:
383 357
384 /* Figure how how many bits are in next symbol and unget extras */ 358 /* Figure how how many bits are in next symbol and unget extras */
385
386 i = hufGroup->minLen; 359 i = hufGroup->minLen;
387 while (j > limit[i]) ++i; 360 while (j > limit[i]) ++i;
388 bd->inbufBitCount += (hufGroup->maxLen - i); 361 bd->inbufBitCount += (hufGroup->maxLen - i);
389 362
390 /* Huffman decode value to get nextSym (with bounds checking) */ 363 /* Huffman decode value to get nextSym (with bounds checking) */
391
392 if (i > hufGroup->maxLen) 364 if (i > hufGroup->maxLen)
393 return RETVAL_DATA_ERROR; 365 return RETVAL_DATA_ERROR;
394 j = (j >> (hufGroup->maxLen - i)) - base[i]; 366 j = (j >> (hufGroup->maxLen - i)) - base[i];
@@ -400,11 +372,9 @@ static int get_next_block(bunzip_data *bd)
400 byte, or a repeated run of the most recent literal byte. First, 372 byte, or a repeated run of the most recent literal byte. First,
401 check if nextSym indicates a repeated run, and if so loop collecting 373 check if nextSym indicates a repeated run, and if so loop collecting
402 how many times to repeat the last literal. */ 374 how many times to repeat the last literal. */
403
404 if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */ 375 if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */
405 376
406 /* If this is the start of a new run, zero out counter */ 377 /* If this is the start of a new run, zero out counter */
407
408 if (!runPos) { 378 if (!runPos) {
409 runPos = 1; 379 runPos = 1;
410 t = 0; 380 t = 0;
@@ -417,7 +387,6 @@ static int get_next_block(bunzip_data *bd)
417 the basic or 0/1 method (except all bits 0, which would use no 387 the basic or 0/1 method (except all bits 0, which would use no
418 symbols, but a run of length 0 doesn't mean anything in this 388 symbols, but a run of length 0 doesn't mean anything in this
419 context). Thus space is saved. */ 389 context). Thus space is saved. */
420
421 t += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */ 390 t += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */
422 if (runPos < dbufSize) runPos <<= 1; 391 if (runPos < dbufSize) runPos <<= 1;
423 goto end_of_huffman_loop; 392 goto end_of_huffman_loop;
@@ -427,7 +396,6 @@ static int get_next_block(bunzip_data *bd)
427 how many times to repeat the last literal, so append that many 396 how many times to repeat the last literal, so append that many
428 copies to our buffer of decoded symbols (dbuf) now. (The last 397 copies to our buffer of decoded symbols (dbuf) now. (The last
429 literal used is the one at the head of the mtfSymbol array.) */ 398 literal used is the one at the head of the mtfSymbol array.) */
430
431 if (runPos) { 399 if (runPos) {
432 runPos = 0; 400 runPos = 0;
433 if (dbufCount + t >= dbufSize) return RETVAL_DATA_ERROR; 401 if (dbufCount + t >= dbufSize) return RETVAL_DATA_ERROR;
@@ -438,7 +406,6 @@ static int get_next_block(bunzip_data *bd)
438 } 406 }
439 407
440 /* Is this the terminating symbol? */ 408 /* Is this the terminating symbol? */
441
442 if (nextSym > symTotal) break; 409 if (nextSym > symTotal) break;
443 410
444 /* At this point, nextSym indicates a new literal character. Subtract 411 /* At this point, nextSym indicates a new literal character. Subtract
@@ -448,7 +415,6 @@ static int get_next_block(bunzip_data *bd)
448 first symbol in the mtf array, position 0, would have been handled 415 first symbol in the mtf array, position 0, would have been handled
449 as part of a run above. Therefore 1 unused mtf position minus 416 as part of a run above. Therefore 1 unused mtf position minus
450 2 non-literal nextSym values equals -1.) */ 417 2 non-literal nextSym values equals -1.) */
451
452 if (dbufCount >= dbufSize) return RETVAL_DATA_ERROR; 418 if (dbufCount >= dbufSize) return RETVAL_DATA_ERROR;
453 i = nextSym - 1; 419 i = nextSym - 1;
454 uc = mtfSymbol[i]; 420 uc = mtfSymbol[i];
@@ -457,7 +423,6 @@ static int get_next_block(bunzip_data *bd)
457 * small number of symbols, and are bound by 256 in any case, using 423 * small number of symbols, and are bound by 256 in any case, using
458 * memmove here would typically be bigger and slower due to function 424 * memmove here would typically be bigger and slower due to function
459 * call overhead and other assorted setup costs. */ 425 * call overhead and other assorted setup costs. */
460
461 do { 426 do {
462 mtfSymbol[i] = mtfSymbol[i-1]; 427 mtfSymbol[i] = mtfSymbol[i-1];
463 } while (--i); 428 } while (--i);
@@ -465,13 +430,11 @@ static int get_next_block(bunzip_data *bd)
465 uc = symToByte[uc]; 430 uc = symToByte[uc];
466 431
467 /* We have our literal byte. Save it into dbuf. */ 432 /* We have our literal byte. Save it into dbuf. */
468
469 byteCount[uc]++; 433 byteCount[uc]++;
470 dbuf[dbufCount++] = (unsigned)uc; 434 dbuf[dbufCount++] = (unsigned)uc;
471 435
472 /* Skip group initialization if we're not done with this group. Done 436 /* Skip group initialization if we're not done with this group. Done
473 * this way to avoid compiler warning. */ 437 * this way to avoid compiler warning. */
474
475 end_of_huffman_loop: 438 end_of_huffman_loop:
476 if (symCount--) goto continue_this_group; 439 if (symCount--) goto continue_this_group;
477 } 440 }
@@ -484,7 +447,6 @@ static int get_next_block(bunzip_data *bd)
484 */ 447 */
485 448
486 /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ 449 /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
487
488 j = 0; 450 j = 0;
489 for (i = 0; i < 256; i++) { 451 for (i = 0; i < 256; i++) {
490 k = j + byteCount[i]; 452 k = j + byteCount[i];
@@ -493,7 +455,6 @@ static int get_next_block(bunzip_data *bd)
493 } 455 }
494 456
495 /* Figure out what order dbuf would be in if we sorted it. */ 457 /* Figure out what order dbuf would be in if we sorted it. */
496
497 for (i = 0; i < dbufCount; i++) { 458 for (i = 0; i < dbufCount; i++) {
498 uc = (unsigned char)(dbuf[i] & 0xff); 459 uc = (unsigned char)(dbuf[i] & 0xff);
499 dbuf[byteCount[uc]] |= (i << 8); 460 dbuf[byteCount[uc]] |= (i << 8);
@@ -503,11 +464,10 @@ static int get_next_block(bunzip_data *bd)
503 /* Decode first byte by hand to initialize "previous" byte. Note that it 464 /* Decode first byte by hand to initialize "previous" byte. Note that it
504 doesn't get output, and if the first three characters are identical 465 doesn't get output, and if the first three characters are identical
505 it doesn't qualify as a run (hence writeRunCountdown=5). */ 466 it doesn't qualify as a run (hence writeRunCountdown=5). */
506
507 if (dbufCount) { 467 if (dbufCount) {
508 if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR; 468 if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR;
509 bd->writePos = dbuf[origPtr]; 469 bd->writePos = dbuf[origPtr];
510 bd->writeCurrent = (unsigned char)(bd->writePos & 0xff); 470 bd->writeCurrent = (unsigned char)(bd->writePos & 0xff);
511 bd->writePos >>= 8; 471 bd->writePos >>= 8;
512 bd->writeRunCountdown = 5; 472 bd->writeRunCountdown = 5;
513 } 473 }
@@ -522,7 +482,6 @@ static int get_next_block(bunzip_data *bd)
522 error (all errors are negative numbers). If out_fd!=-1, outbuf and len 482 error (all errors are negative numbers). If out_fd!=-1, outbuf and len
523 are ignored, data is written to out_fd and return is RETVAL_OK or error. 483 are ignored, data is written to out_fd and return is RETVAL_OK or error.
524*/ 484*/
525
526int read_bunzip(bunzip_data *bd, char *outbuf, int len) 485int read_bunzip(bunzip_data *bd, char *outbuf, int len)
527{ 486{
528 const unsigned *dbuf; 487 const unsigned *dbuf;
@@ -539,19 +498,15 @@ int read_bunzip(bunzip_data *bd, char *outbuf, int len)
539 /* We will always have pending decoded data to write into the output 498 /* We will always have pending decoded data to write into the output
540 buffer unless this is the very first call (in which case we haven't 499 buffer unless this is the very first call (in which case we haven't
541 Huffman-decoded a block into the intermediate buffer yet). */ 500 Huffman-decoded a block into the intermediate buffer yet). */
542
543 if (bd->writeCopies) { 501 if (bd->writeCopies) {
544 502
545 /* Inside the loop, writeCopies means extra copies (beyond 1) */ 503 /* Inside the loop, writeCopies means extra copies (beyond 1) */
546
547 --bd->writeCopies; 504 --bd->writeCopies;
548 505
549 /* Loop outputting bytes */ 506 /* Loop outputting bytes */
550
551 for (;;) { 507 for (;;) {
552 508
553 /* If the output buffer is full, snapshot state and return */ 509 /* If the output buffer is full, snapshot state and return */
554
555 if (gotcount >= len) { 510 if (gotcount >= len) {
556 bd->writePos = pos; 511 bd->writePos = pos;
557 bd->writeCurrent = current; 512 bd->writeCurrent = current;
@@ -560,13 +515,11 @@ int read_bunzip(bunzip_data *bd, char *outbuf, int len)
560 } 515 }
561 516
562 /* Write next byte into output buffer, updating CRC */ 517 /* Write next byte into output buffer, updating CRC */
563
564 outbuf[gotcount++] = current; 518 outbuf[gotcount++] = current;
565 bd->writeCRC = (bd->writeCRC << 8) 519 bd->writeCRC = (bd->writeCRC << 8)
566 ^ bd->crc32Table[(bd->writeCRC >> 24) ^ current]; 520 ^ bd->crc32Table[(bd->writeCRC >> 24) ^ current];
567 521
568 /* Loop now if we're outputting multiple copies of this byte */ 522 /* Loop now if we're outputting multiple copies of this byte */
569
570 if (bd->writeCopies) { 523 if (bd->writeCopies) {
571 --bd->writeCopies; 524 --bd->writeCopies;
572 continue; 525 continue;
@@ -582,35 +535,29 @@ int read_bunzip(bunzip_data *bd, char *outbuf, int len)
582 /* After 3 consecutive copies of the same byte, the 4th 535 /* After 3 consecutive copies of the same byte, the 4th
583 * is a repeat count. We count down from 4 instead 536 * is a repeat count. We count down from 4 instead
584 * of counting up because testing for non-zero is faster */ 537 * of counting up because testing for non-zero is faster */
585
586 if (--bd->writeRunCountdown) { 538 if (--bd->writeRunCountdown) {
587 if (current != previous) 539 if (current != previous)
588 bd->writeRunCountdown = 4; 540 bd->writeRunCountdown = 4;
589 } else { 541 } else {
590 542
591 /* We have a repeated run, this byte indicates the count */ 543 /* We have a repeated run, this byte indicates the count */
592
593 bd->writeCopies = current; 544 bd->writeCopies = current;
594 current = previous; 545 current = previous;
595 bd->writeRunCountdown = 5; 546 bd->writeRunCountdown = 5;
596 547
597 /* Sometimes there are just 3 bytes (run length 0) */ 548 /* Sometimes there are just 3 bytes (run length 0) */
598
599 if (!bd->writeCopies) goto decode_next_byte; 549 if (!bd->writeCopies) goto decode_next_byte;
600 550
601 /* Subtract the 1 copy we'd output anyway to get extras */ 551 /* Subtract the 1 copy we'd output anyway to get extras */
602
603 --bd->writeCopies; 552 --bd->writeCopies;
604 } 553 }
605 } 554 }
606 555
607 /* Decompression of this block completed successfully */ 556 /* Decompression of this block completed successfully */
608
609 bd->writeCRC = ~bd->writeCRC; 557 bd->writeCRC = ~bd->writeCRC;
610 bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC; 558 bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC;
611 559
612 /* If this block had a CRC error, force file level CRC error. */ 560 /* If this block had a CRC error, force file level CRC error. */
613
614 if (bd->writeCRC != bd->headerCRC) { 561 if (bd->writeCRC != bd->headerCRC) {
615 bd->totalCRC = bd->headerCRC + 1; 562 bd->totalCRC = bd->headerCRC + 1;
616 return RETVAL_LAST_BLOCK; 563 return RETVAL_LAST_BLOCK;
@@ -619,7 +566,6 @@ int read_bunzip(bunzip_data *bd, char *outbuf, int len)
619 566
620 /* Refill the intermediate buffer by Huffman-decoding next block of input */ 567 /* Refill the intermediate buffer by Huffman-decoding next block of input */
621 /* (previous is just a convenient unused temp variable here) */ 568 /* (previous is just a convenient unused temp variable here) */
622
623 previous = get_next_block(bd); 569 previous = get_next_block(bd);
624 if (previous) { 570 if (previous) {
625 bd->writeCount = previous; 571 bd->writeCount = previous;
@@ -631,7 +577,6 @@ int read_bunzip(bunzip_data *bd, char *outbuf, int len)
631 goto decode_next_byte; 577 goto decode_next_byte;
632} 578}
633 579
634
635/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain 580/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain
636 a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are 581 a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
637 ignored, and data is read from file handle into temporary buffer. */ 582 ignored, and data is read from file handle into temporary buffer. */
@@ -639,7 +584,6 @@ int read_bunzip(bunzip_data *bd, char *outbuf, int len)
639/* Because bunzip2 is used for help text unpacking, and because bb_show_usage() 584/* Because bunzip2 is used for help text unpacking, and because bb_show_usage()
640 should work for NOFORK applets too, we must be extremely careful to not leak 585 should work for NOFORK applets too, we must be extremely careful to not leak
641 any allocations! */ 586 any allocations! */
642
643int start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf, 587int start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf,
644 int len) 588 int len)
645{ 589{
@@ -650,16 +594,13 @@ int start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf,
650 }; 594 };
651 595
652 /* Figure out how much data to allocate */ 596 /* Figure out how much data to allocate */
653
654 i = sizeof(bunzip_data); 597 i = sizeof(bunzip_data);
655 if (in_fd != -1) i += IOBUF_SIZE; 598 if (in_fd != -1) i += IOBUF_SIZE;
656 599
657 /* Allocate bunzip_data. Most fields initialize to zero. */ 600 /* Allocate bunzip_data. Most fields initialize to zero. */
658
659 bd = *bdp = xzalloc(i); 601 bd = *bdp = xzalloc(i);
660 602
661 /* Setup input buffer */ 603 /* Setup input buffer */
662
663 bd->in_fd = in_fd; 604 bd->in_fd = in_fd;
664 if (-1 == in_fd) { 605 if (-1 == in_fd) {
665 /* in this case, bd->inbuf is read-only */ 606 /* in this case, bd->inbuf is read-only */
@@ -669,22 +610,18 @@ int start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf,
669 bd->inbuf = (unsigned char *)(bd + 1); 610 bd->inbuf = (unsigned char *)(bd + 1);
670 611
671 /* Init the CRC32 table (big endian) */ 612 /* Init the CRC32 table (big endian) */
672
673 crc32_filltable(bd->crc32Table, 1); 613 crc32_filltable(bd->crc32Table, 1);
674 614
675 /* Setup for I/O error handling via longjmp */ 615 /* Setup for I/O error handling via longjmp */
676
677 i = setjmp(bd->jmpbuf); 616 i = setjmp(bd->jmpbuf);
678 if (i) return i; 617 if (i) return i;
679 618
680 /* Ensure that file starts with "BZh['1'-'9']." */ 619 /* Ensure that file starts with "BZh['1'-'9']." */
681
682 i = get_bits(bd, 32); 620 i = get_bits(bd, 32);
683 if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; 621 if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA;
684 622
685 /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of 623 /* Fourth byte (ascii '1'-'9') indicates block size in units of 100k of
686 uncompressed data. Allocate intermediate buffer for block. */ 624 uncompressed data. Allocate intermediate buffer for block. */
687
688 bd->dbufSize = 100000 * (i - BZh0); 625 bd->dbufSize = 100000 * (i - BZh0);
689 626
690 /* Cannot use xmalloc - may leak bd in NOFORK case! */ 627 /* Cannot use xmalloc - may leak bd in NOFORK case! */
@@ -704,7 +641,6 @@ void dealloc_bunzip(bunzip_data *bd)
704 641
705 642
706/* Decompress src_fd to dst_fd. Stops at end of bzip data, not end of file. */ 643/* Decompress src_fd to dst_fd. Stops at end of bzip data, not end of file. */
707
708USE_DESKTOP(long long) int 644USE_DESKTOP(long long) int
709unpack_bz2_stream(int src_fd, int dst_fd) 645unpack_bz2_stream(int src_fd, int dst_fd)
710{ 646{
@@ -761,9 +697,9 @@ int main(int argc, char **argv)
761 char c; 697 char c;
762 698
763 if (i < 0) 699 if (i < 0)
764 fprintf(stderr,"%s\n", bunzip_errors[-i]); 700 fprintf(stderr, "%s\n", bunzip_errors[-i]);
765 else if (read(STDIN_FILENO, &c, 1)) 701 else if (read(STDIN_FILENO, &c, 1))
766 fprintf(stderr,"Trailing garbage ignored\n"); 702 fprintf(stderr, "Trailing garbage ignored\n");
767 return -i; 703 return -i;
768} 704}
769#endif 705#endif