aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenis Vlasenko <vda.linux@googlemail.com>2008-06-28 18:10:09 +0000
committerDenis Vlasenko <vda.linux@googlemail.com>2008-06-28 18:10:09 +0000
commit86d88c09909b1089bd2b43e896895c48ce33f1b2 (patch)
tree109dcfab6ad4a3d9ee1b48d0bb0c82d38f15af2a
parenta60936da062fc569328cd643c460dcf215ed9966 (diff)
downloadbusybox-w32-86d88c09909b1089bd2b43e896895c48ce33f1b2.tar.gz
busybox-w32-86d88c09909b1089bd2b43e896895c48ce33f1b2.tar.bz2
busybox-w32-86d88c09909b1089bd2b43e896895c48ce33f1b2.zip
bunzip2: make proper fix for the problem "fixed" in rev. 22521
Thanks for Rob Landley <rob@landley.net>
-rw-r--r--archival/libunarchive/decompress_bunzip2.c93
1 files changed, 13 insertions, 80 deletions
diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c
index 106a08b54..654dc28a9 100644
--- a/archival/libunarchive/decompress_bunzip2.c
+++ b/archival/libunarchive/decompress_bunzip2.c
@@ -66,7 +66,6 @@ struct group_data {
66 * | grep 'bd->' | sed 's/^.*bd->/bd->/' | sort | $PAGER 66 * | grep 'bd->' | sed 's/^.*bd->/bd->/' | sort | $PAGER
67 * and moved it (inbufBitCount) to offset 0. 67 * and moved it (inbufBitCount) to offset 0.
68 */ 68 */
69
70struct bunzip_data { 69struct bunzip_data {
71 /* I/O tracking data (file handles, buffers, positions, etc.) */ 70 /* I/O tracking data (file handles, buffers, positions, etc.) */
72 unsigned inbufBitCount, inbufBits; 71 unsigned inbufBitCount, inbufBits;
@@ -102,11 +101,9 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
102 101
103 /* If we need to get more data from the byte buffer, do so. (Loop getting 102 /* If we need to get more data from the byte buffer, do so. (Loop getting
104 one byte at a time to enforce endianness and avoid unaligned access.) */ 103 one byte at a time to enforce endianness and avoid unaligned access.) */
105
106 while ((int)(bd->inbufBitCount) < bits_wanted) { 104 while ((int)(bd->inbufBitCount) < bits_wanted) {
107 105
108 /* If we need to read more data from file into byte buffer, do so */ 106 /* If we need to read more data from file into byte buffer, do so */
109
110 if (bd->inbufPos == bd->inbufCount) { 107 if (bd->inbufPos == bd->inbufCount) {
111 /* if "no input fd" case: in_fd == -1, read fails, we jump */ 108 /* if "no input fd" case: in_fd == -1, read fails, we jump */
112 bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE); 109 bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE);
@@ -116,7 +113,6 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
116 } 113 }
117 114
118 /* Avoid 32-bit overflow (dump bit buffer to top of output) */ 115 /* Avoid 32-bit overflow (dump bit buffer to top of output) */
119
120 if (bd->inbufBitCount >= 24) { 116 if (bd->inbufBitCount >= 24) {
121 bits = bd->inbufBits & ((1 << bd->inbufBitCount) - 1); 117 bits = bd->inbufBits & ((1 << bd->inbufBitCount) - 1);
122 bits_wanted -= bd->inbufBitCount; 118 bits_wanted -= bd->inbufBitCount;
@@ -125,13 +121,11 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
125 } 121 }
126 122
127 /* Grab next 8 bits of input from buffer. */ 123 /* Grab next 8 bits of input from buffer. */
128
129 bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; 124 bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
130 bd->inbufBitCount += 8; 125 bd->inbufBitCount += 8;
131 } 126 }
132 127
133 /* Calculate result */ 128 /* Calculate result */
134
135 bd->inbufBitCount -= bits_wanted; 129 bd->inbufBitCount -= bits_wanted;
136 bits |= (bd->inbufBits >> bd->inbufBitCount) & ((1 << bits_wanted) - 1); 130 bits |= (bd->inbufBits >> bd->inbufBitCount) & ((1 << bits_wanted) - 1);
137 131
@@ -139,29 +133,24 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
139} 133}
140 134
141/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ 135/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */
142
143static int get_next_block(bunzip_data *bd) 136static int get_next_block(bunzip_data *bd)
144{ 137{
145 struct group_data *hufGroup; 138 struct group_data *hufGroup;
146 int dbufCount, nextSym, dbufSize, groupCount, *base, selector, 139 int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector,
147 i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; 140 i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256];
148 unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; 141 unsigned char uc, symToByte[256], mtfSymbol[256], *selectors;
149 /* limit was int* but was changed to unsigned* - grep for '[x]' 142 unsigned *dbuf, origPtr;
150 * in comment to see where it is important. -- vda */
151 unsigned *dbuf, *limit, origPtr;
152 143
153 dbuf = bd->dbuf; 144 dbuf = bd->dbuf;
154 dbufSize = bd->dbufSize; 145 dbufSize = bd->dbufSize;
155 selectors = bd->selectors; 146 selectors = bd->selectors;
156 147
157 /* Reset longjmp I/O error handling */ 148 /* Reset longjmp I/O error handling */
158
159 i = setjmp(bd->jmpbuf); 149 i = setjmp(bd->jmpbuf);
160 if (i) return i; 150 if (i) return i;
161 151
162 /* Read in header signature and CRC, then validate signature. 152 /* Read in header signature and CRC, then validate signature.
163 (last block signature means CRC is for whole file, return now) */ 153 (last block signature means CRC is for whole file, return now) */
164
165 i = get_bits(bd, 24); 154 i = get_bits(bd, 24);
166 j = get_bits(bd, 24); 155 j = get_bits(bd, 24);
167 bd->headerCRC = get_bits(bd, 32); 156 bd->headerCRC = get_bits(bd, 32);
@@ -171,7 +160,6 @@ static int get_next_block(bunzip_data *bd)
171 /* We can add support for blockRandomised if anybody complains. There was 160 /* We can add support for blockRandomised if anybody complains. There was
172 some code for this in busybox 1.0.0-pre3, but nobody ever noticed that 161 some code for this in busybox 1.0.0-pre3, but nobody ever noticed that
173 it didn't actually work. */ 162 it didn't actually work. */
174
175 if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT; 163 if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT;
176 origPtr = get_bits(bd, 24); 164 origPtr = get_bits(bd, 24);
177 if ((int)origPtr > dbufSize) return RETVAL_DATA_ERROR; 165 if ((int)origPtr > dbufSize) return RETVAL_DATA_ERROR;
@@ -181,7 +169,6 @@ static int get_next_block(bunzip_data *bd)
181 symbols to deal with, and writes a sparse bitfield indicating which 169 symbols to deal with, and writes a sparse bitfield indicating which
182 values were present. We make a translation table to convert the symbols 170 values were present. We make a translation table to convert the symbols
183 back to the corresponding bytes. */ 171 back to the corresponding bytes. */
184
185 t = get_bits(bd, 16); 172 t = get_bits(bd, 16);
186 symTotal = 0; 173 symTotal = 0;
187 for (i = 0; i < 16; i++) { 174 for (i = 0; i < 16; i++) {
@@ -194,7 +181,6 @@ static int get_next_block(bunzip_data *bd)
194 } 181 }
195 182
196 /* How many different Huffman coding groups does this block use? */ 183 /* How many different Huffman coding groups does this block use? */
197
198 groupCount = get_bits(bd, 3); 184 groupCount = get_bits(bd, 3);
199 if (groupCount < 2 || groupCount > MAX_GROUPS) 185 if (groupCount < 2 || groupCount > MAX_GROUPS)
200 return RETVAL_DATA_ERROR; 186 return RETVAL_DATA_ERROR;
@@ -203,19 +189,16 @@ static int get_next_block(bunzip_data *bd)
203 group. Read in the group selector list, which is stored as MTF encoded 189 group. Read in the group selector list, which is stored as MTF encoded
204 bit runs. (MTF=Move To Front, as each value is used it's moved to the 190 bit runs. (MTF=Move To Front, as each value is used it's moved to the
205 start of the list.) */ 191 start of the list.) */
206
207 nSelectors = get_bits(bd, 15); 192 nSelectors = get_bits(bd, 15);
208 if (!nSelectors) return RETVAL_DATA_ERROR; 193 if (!nSelectors) return RETVAL_DATA_ERROR;
209 for (i = 0; i < groupCount; i++) mtfSymbol[i] = i; 194 for (i = 0; i < groupCount; i++) mtfSymbol[i] = i;
210 for (i = 0; i < nSelectors; i++) { 195 for (i = 0; i < nSelectors; i++) {
211 196
212 /* Get next value */ 197 /* Get next value */
213
214 for (j = 0; get_bits(bd, 1); j++) 198 for (j = 0; get_bits(bd, 1); j++)
215 if (j >= groupCount) return RETVAL_DATA_ERROR; 199 if (j >= groupCount) return RETVAL_DATA_ERROR;
216 200
217 /* Decode MTF to get the next selector */ 201 /* Decode MTF to get the next selector */
218
219 uc = mtfSymbol[j]; 202 uc = mtfSymbol[j];
220 for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1]; 203 for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1];
221 mtfSymbol[0] = selectors[i] = uc; 204 mtfSymbol[0] = selectors[i] = uc;
@@ -223,10 +206,11 @@ static int get_next_block(bunzip_data *bd)
223 206
224 /* Read the Huffman coding tables for each group, which code for symTotal 207 /* Read the Huffman coding tables for each group, which code for symTotal
225 literal symbols, plus two run symbols (RUNA, RUNB) */ 208 literal symbols, plus two run symbols (RUNA, RUNB) */
226
227 symCount = symTotal + 2; 209 symCount = symTotal + 2;
228 for (j = 0; j < groupCount; j++) { 210 for (j = 0; j < groupCount; j++) {
229 unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1]; 211 unsigned char length[MAX_SYMBOLS];
212 /* 8 bits is ALMOST enough for temp[], see below */
213 unsigned temp[MAX_HUFCODE_BITS+1];
230 int minLen, maxLen, pp; 214 int minLen, maxLen, pp;
231 215
232 /* Read Huffman code lengths for each symbol. They're stored in 216 /* Read Huffman code lengths for each symbol. They're stored in
@@ -235,7 +219,6 @@ static int get_next_block(bunzip_data *bd)
235 (Subtracting 1 before the loop and then adding it back at the end is 219 (Subtracting 1 before the loop and then adding it back at the end is
236 an optimization that makes the test inside the loop simpler: symbol 220 an optimization that makes the test inside the loop simpler: symbol
237 length 0 becomes negative, so an unsigned inequality catches it.) */ 221 length 0 becomes negative, so an unsigned inequality catches it.) */
238
239 t = get_bits(bd, 5) - 1; 222 t = get_bits(bd, 5) - 1;
240 for (i = 0; i < symCount; i++) { 223 for (i = 0; i < symCount; i++) {
241 for (;;) { 224 for (;;) {
@@ -245,7 +228,6 @@ static int get_next_block(bunzip_data *bd)
245 /* If first bit is 0, stop. Else second bit indicates whether 228 /* If first bit is 0, stop. Else second bit indicates whether
246 to increment or decrement the value. Optimization: grab 2 229 to increment or decrement the value. Optimization: grab 2
247 bits and unget the second if the first was 0. */ 230 bits and unget the second if the first was 0. */
248
249 k = get_bits(bd, 2); 231 k = get_bits(bd, 2);
250 if (k < 2) { 232 if (k < 2) {
251 bd->inbufBitCount++; 233 bd->inbufBitCount++;
@@ -253,17 +235,14 @@ static int get_next_block(bunzip_data *bd)
253 } 235 }
254 236
255 /* Add one if second bit 1, else subtract 1. Avoids if/else */ 237 /* Add one if second bit 1, else subtract 1. Avoids if/else */
256
257 t += (((k+1) & 2) - 1); 238 t += (((k+1) & 2) - 1);
258 } 239 }
259 240
260 /* Correct for the initial -1, to get the final symbol length */ 241 /* Correct for the initial -1, to get the final symbol length */
261
262 length[i] = t + 1; 242 length[i] = t + 1;
263 } 243 }
264 244
265 /* Find largest and smallest lengths in this group */ 245 /* Find largest and smallest lengths in this group */
266
267 minLen = maxLen = length[0]; 246 minLen = maxLen = length[0];
268 for (i = 1; i < symCount; i++) { 247 for (i = 1; i < symCount; i++) {
269 if (length[i] > maxLen) maxLen = length[i]; 248 if (length[i] > maxLen) maxLen = length[i];
@@ -280,7 +259,6 @@ static int get_next_block(bunzip_data *bd)
280 * number of bits can have. This is how the Huffman codes can vary in 259 * number of bits can have. This is how the Huffman codes can vary in
281 * length: each code with a value>limit[length] needs another bit. 260 * length: each code with a value>limit[length] needs another bit.
282 */ 261 */
283
284 hufGroup = bd->groups + j; 262 hufGroup = bd->groups + j;
285 hufGroup->minLen = minLen; 263 hufGroup->minLen = minLen;
286 hufGroup->maxLen = maxLen; 264 hufGroup->maxLen = maxLen;
@@ -288,12 +266,10 @@ static int get_next_block(bunzip_data *bd)
288 /* Note that minLen can't be smaller than 1, so we adjust the base 266 /* Note that minLen can't be smaller than 1, so we adjust the base
289 and limit array pointers so we're not always wasting the first 267 and limit array pointers so we're not always wasting the first
290 entry. We do this again when using them (during symbol decoding).*/ 268 entry. We do this again when using them (during symbol decoding).*/
291
292 base = hufGroup->base - 1; 269 base = hufGroup->base - 1;
293 limit = (unsigned*)hufGroup->limit - 1; 270 limit = hufGroup->limit - 1;
294 271
295 /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */ 272 /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */
296
297 pp = 0; 273 pp = 0;
298 for (i = minLen; i <= maxLen; i++) { 274 for (i = minLen; i <= maxLen; i++) {
299 temp[i] = limit[i] = 0; 275 temp[i] = limit[i] = 0;
@@ -303,14 +279,14 @@ static int get_next_block(bunzip_data *bd)
303 } 279 }
304 280
305 /* Count symbols coded for at each bit length */ 281 /* Count symbols coded for at each bit length */
306 282 /* NB: in pathological cases, temp[8] can end ip being 256.
283 * That's why uint8_t is too small for temp[]. */
307 for (i = 0; i < symCount; i++) temp[length[i]]++; 284 for (i = 0; i < symCount; i++) temp[length[i]]++;
308 285
309 /* Calculate limit[] (the largest symbol-coding value at each bit 286 /* Calculate limit[] (the largest symbol-coding value at each bit
310 * length, which is (previous limit<<1)+symbols at this level), and 287 * length, which is (previous limit<<1)+symbols at this level), and
311 * base[] (number of symbols to ignore at each bit length, which is 288 * base[] (number of symbols to ignore at each bit length, which is
312 * limit minus the cumulative count of symbols coded for already). */ 289 * limit minus the cumulative count of symbols coded for already). */
313
314 pp = t = 0; 290 pp = t = 0;
315 for (i = minLen; i < maxLen; i++) { 291 for (i = minLen; i < maxLen; i++) {
316 pp += temp[i]; 292 pp += temp[i];
@@ -321,14 +297,12 @@ static int get_next_block(bunzip_data *bd)
321 each level we're really only interested in the first few bits, 297 each level we're really only interested in the first few bits,
322 so here we set all the trailing to-be-ignored bits to 1 so they 298 so here we set all the trailing to-be-ignored bits to 1 so they
323 don't affect the value>limit[length] comparison. */ 299 don't affect the value>limit[length] comparison. */
324
325 limit[i] = (pp << (maxLen - i)) - 1; 300 limit[i] = (pp << (maxLen - i)) - 1;
326 pp <<= 1; 301 pp <<= 1;
327 t += temp[i]; 302 t += temp[i];
328 base[i+1] = pp - t; 303 base[i+1] = pp - t;
329 } 304 }
330 limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */ 305 limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */
331 /* [x] was observed to occasionally have -1 here: -- vda */
332 limit[maxLen] = pp + temp[maxLen] - 1; 306 limit[maxLen] = pp + temp[maxLen] - 1;
333 base[minLen] = 0; 307 base[minLen] = 0;
334 } 308 }
@@ -338,7 +312,6 @@ static int get_next_block(bunzip_data *bd)
338 and run length encoding, saving the result into dbuf[dbufCount++] = uc */ 312 and run length encoding, saving the result into dbuf[dbufCount++] = uc */
339 313
340 /* Initialize symbol occurrence counters and symbol Move To Front table */ 314 /* Initialize symbol occurrence counters and symbol Move To Front table */
341
342 memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */ 315 memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */
343 for (i = 0; i < 256; i++) { 316 for (i = 0; i < 256; i++) {
344 //byteCount[i] = 0; 317 //byteCount[i] = 0;
@@ -350,13 +323,12 @@ static int get_next_block(bunzip_data *bd)
350 runPos = dbufCount = selector = 0; 323 runPos = dbufCount = selector = 0;
351 for (;;) { 324 for (;;) {
352 325
353 /* fetch next Huffman coding group from list. */ 326 /* Fetch next Huffman coding group from list. */
354
355 symCount = GROUP_SIZE - 1; 327 symCount = GROUP_SIZE - 1;
356 if (selector >= nSelectors) return RETVAL_DATA_ERROR; 328 if (selector >= nSelectors) return RETVAL_DATA_ERROR;
357 hufGroup = bd->groups + selectors[selector++]; 329 hufGroup = bd->groups + selectors[selector++];
358 base = hufGroup->base - 1; 330 base = hufGroup->base - 1;
359 limit = (unsigned*)hufGroup->limit - 1; 331 limit = hufGroup->limit - 1;
360 continue_this_group: 332 continue_this_group:
361 333
362 /* Read next Huffman-coded symbol. */ 334 /* Read next Huffman-coded symbol. */
@@ -370,7 +342,6 @@ static int get_next_block(bunzip_data *bd)
370 dry). The following (up to got_huff_bits:) is equivalent to 342 dry). The following (up to got_huff_bits:) is equivalent to
371 j = get_bits(bd, hufGroup->maxLen); 343 j = get_bits(bd, hufGroup->maxLen);
372 */ 344 */
373
374 while ((int)(bd->inbufBitCount) < hufGroup->maxLen) { 345 while ((int)(bd->inbufBitCount) < hufGroup->maxLen) {
375 if (bd->inbufPos == bd->inbufCount) { 346 if (bd->inbufPos == bd->inbufCount) {
376 j = get_bits(bd, hufGroup->maxLen); 347 j = get_bits(bd, hufGroup->maxLen);
@@ -385,13 +356,11 @@ static int get_next_block(bunzip_data *bd)
385 got_huff_bits: 356 got_huff_bits:
386 357
387 /* Figure how how many bits are in next symbol and unget extras */ 358 /* Figure how how many bits are in next symbol and unget extras */
388
389 i = hufGroup->minLen; 359 i = hufGroup->minLen;
390 while ((unsigned)j > limit[i]) ++i; 360 while (j > limit[i]) ++i;
391 bd->inbufBitCount += (hufGroup->maxLen - i); 361 bd->inbufBitCount += (hufGroup->maxLen - i);
392 362
393 /* Huffman decode value to get nextSym (with bounds checking) */ 363 /* Huffman decode value to get nextSym (with bounds checking) */
394
395 if (i > hufGroup->maxLen) 364 if (i > hufGroup->maxLen)
396 return RETVAL_DATA_ERROR; 365 return RETVAL_DATA_ERROR;
397 j = (j >> (hufGroup->maxLen - i)) - base[i]; 366 j = (j >> (hufGroup->maxLen - i)) - base[i];
@@ -403,11 +372,9 @@ static int get_next_block(bunzip_data *bd)
403 byte, or a repeated run of the most recent literal byte. First, 372 byte, or a repeated run of the most recent literal byte. First,
404 check if nextSym indicates a repeated run, and if so loop collecting 373 check if nextSym indicates a repeated run, and if so loop collecting
405 how many times to repeat the last literal. */ 374 how many times to repeat the last literal. */
406
407 if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */ 375 if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */
408 376
409 /* If this is the start of a new run, zero out counter */ 377 /* If this is the start of a new run, zero out counter */
410
411 if (!runPos) { 378 if (!runPos) {
412 runPos = 1; 379 runPos = 1;
413 t = 0; 380 t = 0;
@@ -420,7 +387,6 @@ static int get_next_block(bunzip_data *bd)
420 the basic or 0/1 method (except all bits 0, which would use no 387 the basic or 0/1 method (except all bits 0, which would use no
421 symbols, but a run of length 0 doesn't mean anything in this 388 symbols, but a run of length 0 doesn't mean anything in this
422 context). Thus space is saved. */ 389 context). Thus space is saved. */
423
424 t += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */ 390 t += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */
425 if (runPos < dbufSize) runPos <<= 1; 391 if (runPos < dbufSize) runPos <<= 1;
426 goto end_of_huffman_loop; 392 goto end_of_huffman_loop;
@@ -430,7 +396,6 @@ static int get_next_block(bunzip_data *bd)
430 how many times to repeat the last literal, so append that many 396 how many times to repeat the last literal, so append that many
431 copies to our buffer of decoded symbols (dbuf) now. (The last 397 copies to our buffer of decoded symbols (dbuf) now. (The last
432 literal used is the one at the head of the mtfSymbol array.) */ 398 literal used is the one at the head of the mtfSymbol array.) */
433
434 if (runPos) { 399 if (runPos) {
435 runPos = 0; 400 runPos = 0;
436 if (dbufCount + t >= dbufSize) return RETVAL_DATA_ERROR; 401 if (dbufCount + t >= dbufSize) return RETVAL_DATA_ERROR;
@@ -441,7 +406,6 @@ static int get_next_block(bunzip_data *bd)
441 } 406 }
442 407
443 /* Is this the terminating symbol? */ 408 /* Is this the terminating symbol? */
444
445 if (nextSym > symTotal) break; 409 if (nextSym > symTotal) break;
446 410
447 /* At this point, nextSym indicates a new literal character. Subtract 411 /* At this point, nextSym indicates a new literal character. Subtract
@@ -451,7 +415,6 @@ static int get_next_block(bunzip_data *bd)
451 first symbol in the mtf array, position 0, would have been handled 415 first symbol in the mtf array, position 0, would have been handled
452 as part of a run above. Therefore 1 unused mtf position minus 416 as part of a run above. Therefore 1 unused mtf position minus
453 2 non-literal nextSym values equals -1.) */ 417 2 non-literal nextSym values equals -1.) */
454
455 if (dbufCount >= dbufSize) return RETVAL_DATA_ERROR; 418 if (dbufCount >= dbufSize) return RETVAL_DATA_ERROR;
456 i = nextSym - 1; 419 i = nextSym - 1;
457 uc = mtfSymbol[i]; 420 uc = mtfSymbol[i];
@@ -460,7 +423,6 @@ static int get_next_block(bunzip_data *bd)
460 * small number of symbols, and are bound by 256 in any case, using 423 * small number of symbols, and are bound by 256 in any case, using
461 * memmove here would typically be bigger and slower due to function 424 * memmove here would typically be bigger and slower due to function
462 * call overhead and other assorted setup costs. */ 425 * call overhead and other assorted setup costs. */
463
464 do { 426 do {
465 mtfSymbol[i] = mtfSymbol[i-1]; 427 mtfSymbol[i] = mtfSymbol[i-1];
466 } while (--i); 428 } while (--i);
@@ -468,13 +430,11 @@ static int get_next_block(bunzip_data *bd)
468 uc = symToByte[uc]; 430 uc = symToByte[uc];
469 431
470 /* We have our literal byte. Save it into dbuf. */ 432 /* We have our literal byte. Save it into dbuf. */
471
472 byteCount[uc]++; 433 byteCount[uc]++;
473 dbuf[dbufCount++] = (unsigned)uc; 434 dbuf[dbufCount++] = (unsigned)uc;
474 435
475 /* Skip group initialization if we're not done with this group. Done 436 /* Skip group initialization if we're not done with this group. Done
476 * this way to avoid compiler warning. */ 437 * this way to avoid compiler warning. */
477
478 end_of_huffman_loop: 438 end_of_huffman_loop:
479 if (symCount--) goto continue_this_group; 439 if (symCount--) goto continue_this_group;
480 } 440 }
@@ -487,7 +447,6 @@ static int get_next_block(bunzip_data *bd)
487 */ 447 */
488 448
489 /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ 449 /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
490
491 j = 0; 450 j = 0;
492 for (i = 0; i < 256; i++) { 451 for (i = 0; i < 256; i++) {
493 k = j + byteCount[i]; 452 k = j + byteCount[i];
@@ -496,7 +455,6 @@ static int get_next_block(bunzip_data *bd)
496 } 455 }
497 456
498 /* Figure out what order dbuf would be in if we sorted it. */ 457 /* Figure out what order dbuf would be in if we sorted it. */
499
500 for (i = 0; i < dbufCount; i++) { 458 for (i = 0; i < dbufCount; i++) {
501 uc = (unsigned char)(dbuf[i] & 0xff); 459 uc = (unsigned char)(dbuf[i] & 0xff);
502 dbuf[byteCount[uc]] |= (i << 8); 460 dbuf[byteCount[uc]] |= (i << 8);
@@ -506,7 +464,6 @@ static int get_next_block(bunzip_data *bd)
506 /* Decode first byte by hand to initialize "previous" byte. Note that it 464 /* Decode first byte by hand to initialize "previous" byte. Note that it
507 doesn't get output, and if the first three characters are identical 465 doesn't get output, and if the first three characters are identical
508 it doesn't qualify as a run (hence writeRunCountdown=5). */ 466 it doesn't qualify as a run (hence writeRunCountdown=5). */
509
510 if (dbufCount) { 467 if (dbufCount) {
511 if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR; 468 if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR;
512 bd->writePos = dbuf[origPtr]; 469 bd->writePos = dbuf[origPtr];
@@ -525,7 +482,6 @@ static int get_next_block(bunzip_data *bd)
525 error (all errors are negative numbers). If out_fd!=-1, outbuf and len 482 error (all errors are negative numbers). If out_fd!=-1, outbuf and len
526 are ignored, data is written to out_fd and return is RETVAL_OK or error. 483 are ignored, data is written to out_fd and return is RETVAL_OK or error.
527*/ 484*/
528
529int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) 485int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
530{ 486{
531 const unsigned *dbuf; 487 const unsigned *dbuf;
@@ -542,19 +498,15 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
542 /* We will always have pending decoded data to write into the output 498 /* We will always have pending decoded data to write into the output
543 buffer unless this is the very first call (in which case we haven't 499 buffer unless this is the very first call (in which case we haven't
544 Huffman-decoded a block into the intermediate buffer yet). */ 500 Huffman-decoded a block into the intermediate buffer yet). */
545
546 if (bd->writeCopies) { 501 if (bd->writeCopies) {
547 502
548 /* Inside the loop, writeCopies means extra copies (beyond 1) */ 503 /* Inside the loop, writeCopies means extra copies (beyond 1) */
549
550 --bd->writeCopies; 504 --bd->writeCopies;
551 505
552 /* Loop outputting bytes */ 506 /* Loop outputting bytes */
553
554 for (;;) { 507 for (;;) {
555 508
556 /* If the output buffer is full, snapshot state and return */ 509 /* If the output buffer is full, snapshot state and return */
557
558 if (gotcount >= len) { 510 if (gotcount >= len) {
559 bd->writePos = pos; 511 bd->writePos = pos;
560 bd->writeCurrent = current; 512 bd->writeCurrent = current;
@@ -563,13 +515,11 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
563 } 515 }
564 516
565 /* Write next byte into output buffer, updating CRC */ 517 /* Write next byte into output buffer, updating CRC */
566
567 outbuf[gotcount++] = current; 518 outbuf[gotcount++] = current;
568 bd->writeCRC = (bd->writeCRC << 8) 519 bd->writeCRC = (bd->writeCRC << 8)
569 ^ bd->crc32Table[(bd->writeCRC >> 24) ^ current]; 520 ^ bd->crc32Table[(bd->writeCRC >> 24) ^ current];
570 521
571 /* Loop now if we're outputting multiple copies of this byte */ 522 /* Loop now if we're outputting multiple copies of this byte */
572
573 if (bd->writeCopies) { 523 if (bd->writeCopies) {
574 --bd->writeCopies; 524 --bd->writeCopies;
575 continue; 525 continue;
@@ -585,35 +535,29 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
585 /* After 3 consecutive copies of the same byte, the 4th 535 /* After 3 consecutive copies of the same byte, the 4th
586 * is a repeat count. We count down from 4 instead 536 * is a repeat count. We count down from 4 instead
587 * of counting up because testing for non-zero is faster */ 537 * of counting up because testing for non-zero is faster */
588
589 if (--bd->writeRunCountdown) { 538 if (--bd->writeRunCountdown) {
590 if (current != previous) 539 if (current != previous)
591 bd->writeRunCountdown = 4; 540 bd->writeRunCountdown = 4;
592 } else { 541 } else {
593 542
594 /* We have a repeated run, this byte indicates the count */ 543 /* We have a repeated run, this byte indicates the count */
595
596 bd->writeCopies = current; 544 bd->writeCopies = current;
597 current = previous; 545 current = previous;
598 bd->writeRunCountdown = 5; 546 bd->writeRunCountdown = 5;
599 547
600 /* Sometimes there are just 3 bytes (run length 0) */ 548 /* Sometimes there are just 3 bytes (run length 0) */
601
602 if (!bd->writeCopies) goto decode_next_byte; 549 if (!bd->writeCopies) goto decode_next_byte;
603 550
604 /* Subtract the 1 copy we'd output anyway to get extras */ 551 /* Subtract the 1 copy we'd output anyway to get extras */
605
606 --bd->writeCopies; 552 --bd->writeCopies;
607 } 553 }
608 } 554 }
609 555
610 /* Decompression of this block completed successfully */ 556 /* Decompression of this block completed successfully */
611
612 bd->writeCRC = ~bd->writeCRC; 557 bd->writeCRC = ~bd->writeCRC;
613 bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC; 558 bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC;
614 559
615 /* If this block had a CRC error, force file level CRC error. */ 560 /* If this block had a CRC error, force file level CRC error. */
616
617 if (bd->writeCRC != bd->headerCRC) { 561 if (bd->writeCRC != bd->headerCRC) {
618 bd->totalCRC = bd->headerCRC + 1; 562 bd->totalCRC = bd->headerCRC + 1;
619 return RETVAL_LAST_BLOCK; 563 return RETVAL_LAST_BLOCK;
@@ -622,7 +566,6 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
622 566
623 /* Refill the intermediate buffer by Huffman-decoding next block of input */ 567 /* Refill the intermediate buffer by Huffman-decoding next block of input */
624 /* (previous is just a convenient unused temp variable here) */ 568 /* (previous is just a convenient unused temp variable here) */
625
626 previous = get_next_block(bd); 569 previous = get_next_block(bd);
627 if (previous) { 570 if (previous) {
628 bd->writeCount = previous; 571 bd->writeCount = previous;
@@ -634,7 +577,6 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
634 goto decode_next_byte; 577 goto decode_next_byte;
635} 578}
636 579
637
638/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain 580/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain
639 a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are 581 a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
640 ignored, and data is read from file handle into temporary buffer. */ 582 ignored, and data is read from file handle into temporary buffer. */
@@ -642,7 +584,6 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
642/* Because bunzip2 is used for help text unpacking, and because bb_show_usage() 584/* Because bunzip2 is used for help text unpacking, and because bb_show_usage()
643 should work for NOFORK applets too, we must be extremely careful to not leak 585 should work for NOFORK applets too, we must be extremely careful to not leak
644 any allocations! */ 586 any allocations! */
645
646int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf, 587int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf,
647 int len) 588 int len)
648{ 589{
@@ -653,16 +594,13 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *in
653 }; 594 };
654 595
655 /* Figure out how much data to allocate */ 596 /* Figure out how much data to allocate */
656
657 i = sizeof(bunzip_data); 597 i = sizeof(bunzip_data);
658 if (in_fd != -1) i += IOBUF_SIZE; 598 if (in_fd != -1) i += IOBUF_SIZE;
659 599
660 /* Allocate bunzip_data. Most fields initialize to zero. */ 600 /* Allocate bunzip_data. Most fields initialize to zero. */
661
662 bd = *bdp = xzalloc(i); 601 bd = *bdp = xzalloc(i);
663 602
664 /* Setup input buffer */ 603 /* Setup input buffer */
665
666 bd->in_fd = in_fd; 604 bd->in_fd = in_fd;
667 if (-1 == in_fd) { 605 if (-1 == in_fd) {
668 /* in this case, bd->inbuf is read-only */ 606 /* in this case, bd->inbuf is read-only */
@@ -672,22 +610,18 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *in
672 bd->inbuf = (unsigned char *)(bd + 1); 610 bd->inbuf = (unsigned char *)(bd + 1);
673 611
674 /* Init the CRC32 table (big endian) */ 612 /* Init the CRC32 table (big endian) */
675
676 crc32_filltable(bd->crc32Table, 1); 613 crc32_filltable(bd->crc32Table, 1);
677 614
678 /* Setup for I/O error handling via longjmp */ 615 /* Setup for I/O error handling via longjmp */
679
680 i = setjmp(bd->jmpbuf); 616 i = setjmp(bd->jmpbuf);
681 if (i) return i; 617 if (i) return i;
682 618
683 /* Ensure that file starts with "BZh['1'-'9']." */ 619 /* Ensure that file starts with "BZh['1'-'9']." */
684
685 i = get_bits(bd, 32); 620 i = get_bits(bd, 32);
686 if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; 621 if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA;
687 622
688 /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of 623 /* Fourth byte (ascii '1'-'9') indicates block size in units of 100k of
689 uncompressed data. Allocate intermediate buffer for block. */ 624 uncompressed data. Allocate intermediate buffer for block. */
690
691 bd->dbufSize = 100000 * (i - BZh0); 625 bd->dbufSize = 100000 * (i - BZh0);
692 626
693 /* Cannot use xmalloc - may leak bd in NOFORK case! */ 627 /* Cannot use xmalloc - may leak bd in NOFORK case! */
@@ -707,7 +641,6 @@ void FAST_FUNC dealloc_bunzip(bunzip_data *bd)
707 641
708 642
709/* Decompress src_fd to dst_fd. Stops at end of bzip data, not end of file. */ 643/* Decompress src_fd to dst_fd. Stops at end of bzip data, not end of file. */
710
711USE_DESKTOP(long long) int FAST_FUNC 644USE_DESKTOP(long long) int FAST_FUNC
712unpack_bz2_stream(int src_fd, int dst_fd) 645unpack_bz2_stream(int src_fd, int dst_fd)
713{ 646{