diff options
author | Denys Vlasenko <dvlasenk@redhat.com> | 2010-10-30 00:54:10 +0200 |
---|---|---|
committer | Denys Vlasenko <dvlasenk@redhat.com> | 2010-10-30 00:54:10 +0200 |
commit | 0c576975c87c543522ab31566139c078429dff38 (patch) | |
tree | 5d9b833809c3c88f3e626cd8b36b57b41f491205 | |
parent | 5d49b72f1a3a362957171610b722ca8ec3f49734 (diff) | |
download | busybox-w32-0c576975c87c543522ab31566139c078429dff38.tar.gz busybox-w32-0c576975c87c543522ab31566139c078429dff38.tar.bz2 busybox-w32-0c576975c87c543522ab31566139c078429dff38.zip |
decompress_bunzip2: code shrink ~10 bytes
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
-rw-r--r-- | archival/libunarchive/decompress_bunzip2.c | 173 |
1 files changed, 99 insertions, 74 deletions
diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c index 2fa0f898a..2d4867220 100644 --- a/archival/libunarchive/decompress_bunzip2.c +++ b/archival/libunarchive/decompress_bunzip2.c | |||
@@ -151,8 +151,8 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted) | |||
151 | static int get_next_block(bunzip_data *bd) | 151 | static int get_next_block(bunzip_data *bd) |
152 | { | 152 | { |
153 | struct group_data *hufGroup; | 153 | struct group_data *hufGroup; |
154 | int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector, | 154 | int dbufCount, dbufSize, groupCount, *base, *limit, selector, |
155 | i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; | 155 | i, j, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; |
156 | uint8_t uc, symToByte[256], mtfSymbol[256], *selectors; | 156 | uint8_t uc, symToByte[256], mtfSymbol[256], *selectors; |
157 | uint32_t *dbuf; | 157 | uint32_t *dbuf; |
158 | unsigned origPtr; | 158 | unsigned origPtr; |
@@ -161,9 +161,12 @@ static int get_next_block(bunzip_data *bd) | |||
161 | dbufSize = bd->dbufSize; | 161 | dbufSize = bd->dbufSize; |
162 | selectors = bd->selectors; | 162 | selectors = bd->selectors; |
163 | 163 | ||
164 | /* In bbox, we are ok with aborting through setjmp which is set up in start_bunzip */ | ||
165 | #if 0 | ||
164 | /* Reset longjmp I/O error handling */ | 166 | /* Reset longjmp I/O error handling */ |
165 | i = setjmp(bd->jmpbuf); | 167 | i = setjmp(bd->jmpbuf); |
166 | if (i) return i; | 168 | if (i) return i; |
169 | #endif | ||
167 | 170 | ||
168 | /* Read in header signature and CRC, then validate signature. | 171 | /* Read in header signature and CRC, then validate signature. |
169 | (last block signature means CRC is for whole file, return now) */ | 172 | (last block signature means CRC is for whole file, return now) */ |
@@ -185,16 +188,23 @@ static int get_next_block(bunzip_data *bd) | |||
185 | symbols to deal with, and writes a sparse bitfield indicating which | 188 | symbols to deal with, and writes a sparse bitfield indicating which |
186 | values were present. We make a translation table to convert the symbols | 189 | values were present. We make a translation table to convert the symbols |
187 | back to the corresponding bytes. */ | 190 | back to the corresponding bytes. */ |
188 | t = get_bits(bd, 16); | ||
189 | symTotal = 0; | 191 | symTotal = 0; |
190 | for (i = 0; i < 16; i++) { | 192 | i = 0; |
191 | if (t & (1 << (15-i))) { | 193 | t = get_bits(bd, 16); |
192 | k = get_bits(bd, 16); | 194 | do { |
193 | for (j = 0; j < 16; j++) | 195 | if (t & (1 << 15)) { |
194 | if (k & (1 << (15-j))) | 196 | unsigned inner_map = get_bits(bd, 16); |
195 | symToByte[symTotal++] = (16*i) + j; | 197 | do { |
198 | if (inner_map & (1 << 15)) | ||
199 | symToByte[symTotal++] = i; | ||
200 | inner_map <<= 1; | ||
201 | i++; | ||
202 | } while (i & 15); | ||
203 | i -= 16; | ||
196 | } | 204 | } |
197 | } | 205 | t <<= 1; |
206 | i += 16; | ||
207 | } while (i < 256); | ||
198 | 208 | ||
199 | /* How many different Huffman coding groups does this block use? */ | 209 | /* How many different Huffman coding groups does this block use? */ |
200 | groupCount = get_bits(bd, 3); | 210 | groupCount = get_bits(bd, 3); |
@@ -205,20 +215,24 @@ static int get_next_block(bunzip_data *bd) | |||
205 | group. Read in the group selector list, which is stored as MTF encoded | 215 | group. Read in the group selector list, which is stored as MTF encoded |
206 | bit runs. (MTF=Move To Front, as each value is used it's moved to the | 216 | bit runs. (MTF=Move To Front, as each value is used it's moved to the |
207 | start of the list.) */ | 217 | start of the list.) */ |
218 | for (i = 0; i < groupCount; i++) | ||
219 | mtfSymbol[i] = i; | ||
208 | nSelectors = get_bits(bd, 15); | 220 | nSelectors = get_bits(bd, 15); |
209 | if (!nSelectors) return RETVAL_DATA_ERROR; | 221 | if (!nSelectors) |
210 | for (i = 0; i < groupCount; i++) mtfSymbol[i] = i; | 222 | return RETVAL_DATA_ERROR; |
211 | for (i = 0; i < nSelectors; i++) { | 223 | for (i = 0; i < nSelectors; i++) { |
212 | 224 | uint8_t tmp_byte; | |
213 | /* Get next value */ | 225 | /* Get next value */ |
214 | for (j = 0; get_bits(bd, 1); j++) | 226 | int n = 0; |
215 | if (j >= groupCount) return RETVAL_DATA_ERROR; | 227 | while (get_bits(bd, 1)) { |
216 | 228 | if (n >= groupCount) return RETVAL_DATA_ERROR; | |
229 | n++; | ||
230 | } | ||
217 | /* Decode MTF to get the next selector */ | 231 | /* Decode MTF to get the next selector */ |
218 | uc = mtfSymbol[j]; | 232 | tmp_byte = mtfSymbol[n]; |
219 | for (; j; j--) | 233 | while (--n >= 0) |
220 | mtfSymbol[j] = mtfSymbol[j-1]; | 234 | mtfSymbol[n + 1] = mtfSymbol[n]; |
221 | mtfSymbol[0] = selectors[i] = uc; | 235 | mtfSymbol[0] = selectors[i] = tmp_byte; |
222 | } | 236 | } |
223 | 237 | ||
224 | /* Read the Huffman coding tables for each group, which code for symTotal | 238 | /* Read the Huffman coding tables for each group, which code for symTotal |
@@ -239,20 +253,21 @@ static int get_next_block(bunzip_data *bd) | |||
239 | t = get_bits(bd, 5) - 1; | 253 | t = get_bits(bd, 5) - 1; |
240 | for (i = 0; i < symCount; i++) { | 254 | for (i = 0; i < symCount; i++) { |
241 | for (;;) { | 255 | for (;;) { |
256 | int two_bits; | ||
242 | if ((unsigned)t > (MAX_HUFCODE_BITS-1)) | 257 | if ((unsigned)t > (MAX_HUFCODE_BITS-1)) |
243 | return RETVAL_DATA_ERROR; | 258 | return RETVAL_DATA_ERROR; |
244 | 259 | ||
245 | /* If first bit is 0, stop. Else second bit indicates whether | 260 | /* If first bit is 0, stop. Else second bit indicates whether |
246 | to increment or decrement the value. Optimization: grab 2 | 261 | to increment or decrement the value. Optimization: grab 2 |
247 | bits and unget the second if the first was 0. */ | 262 | bits and unget the second if the first was 0. */ |
248 | k = get_bits(bd, 2); | 263 | two_bits = get_bits(bd, 2); |
249 | if (k < 2) { | 264 | if (two_bits < 2) { |
250 | bd->inbufBitCount++; | 265 | bd->inbufBitCount++; |
251 | break; | 266 | break; |
252 | } | 267 | } |
253 | 268 | ||
254 | /* Add one if second bit 1, else subtract 1. Avoids if/else */ | 269 | /* Add one if second bit 1, else subtract 1. Avoids if/else */ |
255 | t += (((k+1) & 2) - 1); | 270 | t += (((two_bits+1) & 2) - 1); |
256 | } | 271 | } |
257 | 272 | ||
258 | /* Correct for the initial -1, to get the final symbol length */ | 273 | /* Correct for the initial -1, to get the final symbol length */ |
@@ -282,17 +297,18 @@ static int get_next_block(bunzip_data *bd) | |||
282 | 297 | ||
283 | /* Note that minLen can't be smaller than 1, so we adjust the base | 298 | /* Note that minLen can't be smaller than 1, so we adjust the base |
284 | and limit array pointers so we're not always wasting the first | 299 | and limit array pointers so we're not always wasting the first |
285 | entry. We do this again when using them (during symbol decoding).*/ | 300 | entry. We do this again when using them (during symbol decoding). */ |
286 | base = hufGroup->base - 1; | 301 | base = hufGroup->base - 1; |
287 | limit = hufGroup->limit - 1; | 302 | limit = hufGroup->limit - 1; |
288 | 303 | ||
289 | /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */ | 304 | /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */ |
290 | pp = 0; | 305 | pp = 0; |
291 | for (i = minLen; i <= maxLen; i++) { | 306 | for (i = minLen; i <= maxLen; i++) { |
307 | int k; | ||
292 | temp[i] = limit[i] = 0; | 308 | temp[i] = limit[i] = 0; |
293 | for (t = 0; t < symCount; t++) | 309 | for (k = 0; k < symCount; k++) |
294 | if (length[t] == i) | 310 | if (length[k] == i) |
295 | hufGroup->permute[pp++] = t; | 311 | hufGroup->permute[pp++] = k; |
296 | } | 312 | } |
297 | 313 | ||
298 | /* Count symbols coded for at each bit length */ | 314 | /* Count symbols coded for at each bit length */ |
@@ -305,8 +321,10 @@ static int get_next_block(bunzip_data *bd) | |||
305 | * base[] (number of symbols to ignore at each bit length, which is | 321 | * base[] (number of symbols to ignore at each bit length, which is |
306 | * limit minus the cumulative count of symbols coded for already). */ | 322 | * limit minus the cumulative count of symbols coded for already). */ |
307 | pp = t = 0; | 323 | pp = t = 0; |
308 | for (i = minLen; i < maxLen; i++) { | 324 | for (i = minLen; i < maxLen;) { |
309 | pp += temp[i]; | 325 | unsigned temp_i = temp[i]; |
326 | |||
327 | pp += temp_i; | ||
310 | 328 | ||
311 | /* We read the largest possible symbol size and then unget bits | 329 | /* We read the largest possible symbol size and then unget bits |
312 | after determining how many we need, and those extra bits could | 330 | after determining how many we need, and those extra bits could |
@@ -316,8 +334,8 @@ static int get_next_block(bunzip_data *bd) | |||
316 | don't affect the value>limit[length] comparison. */ | 334 | don't affect the value>limit[length] comparison. */ |
317 | limit[i] = (pp << (maxLen - i)) - 1; | 335 | limit[i] = (pp << (maxLen - i)) - 1; |
318 | pp <<= 1; | 336 | pp <<= 1; |
319 | t += temp[i]; | 337 | t += temp_i; |
320 | base[i+1] = pp - t; | 338 | base[++i] = pp - t; |
321 | } | 339 | } |
322 | limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */ | 340 | limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */ |
323 | limit[maxLen] = pp + temp[maxLen] - 1; | 341 | limit[maxLen] = pp + temp[maxLen] - 1; |
@@ -329,9 +347,9 @@ static int get_next_block(bunzip_data *bd) | |||
329 | and run length encoding, saving the result into dbuf[dbufCount++] = uc */ | 347 | and run length encoding, saving the result into dbuf[dbufCount++] = uc */ |
330 | 348 | ||
331 | /* Initialize symbol occurrence counters and symbol Move To Front table */ | 349 | /* Initialize symbol occurrence counters and symbol Move To Front table */ |
332 | memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */ | 350 | /*memset(byteCount, 0, sizeof(byteCount)); - smaller, but slower */ |
333 | for (i = 0; i < 256; i++) { | 351 | for (i = 0; i < 256; i++) { |
334 | //byteCount[i] = 0; | 352 | byteCount[i] = 0; |
335 | mtfSymbol[i] = (uint8_t)i; | 353 | mtfSymbol[i] = (uint8_t)i; |
336 | } | 354 | } |
337 | 355 | ||
@@ -339,6 +357,7 @@ static int get_next_block(bunzip_data *bd) | |||
339 | 357 | ||
340 | runPos = dbufCount = selector = 0; | 358 | runPos = dbufCount = selector = 0; |
341 | for (;;) { | 359 | for (;;) { |
360 | int nextSym; | ||
342 | 361 | ||
343 | /* Fetch next Huffman coding group from list. */ | 362 | /* Fetch next Huffman coding group from list. */ |
344 | symCount = GROUP_SIZE - 1; | 363 | symCount = GROUP_SIZE - 1; |
@@ -346,44 +365,49 @@ static int get_next_block(bunzip_data *bd) | |||
346 | hufGroup = bd->groups + selectors[selector++]; | 365 | hufGroup = bd->groups + selectors[selector++]; |
347 | base = hufGroup->base - 1; | 366 | base = hufGroup->base - 1; |
348 | limit = hufGroup->limit - 1; | 367 | limit = hufGroup->limit - 1; |
349 | continue_this_group: | ||
350 | 368 | ||
369 | continue_this_group: | ||
351 | /* Read next Huffman-coded symbol. */ | 370 | /* Read next Huffman-coded symbol. */ |
352 | 371 | ||
353 | /* Note: It is far cheaper to read maxLen bits and back up than it is | 372 | /* Note: It is far cheaper to read maxLen bits and back up than it is |
354 | to read minLen bits and then an additional bit at a time, testing | 373 | to read minLen bits and then add additional bit at a time, testing |
355 | as we go. Because there is a trailing last block (with file CRC), | 374 | as we go. Because there is a trailing last block (with file CRC), |
356 | there is no danger of the overread causing an unexpected EOF for a | 375 | there is no danger of the overread causing an unexpected EOF for a |
357 | valid compressed file. As a further optimization, we do the read | 376 | valid compressed file. |
358 | inline (falling back to a call to get_bits if the buffer runs | ||
359 | dry). The following (up to got_huff_bits:) is equivalent to | ||
360 | j = get_bits(bd, hufGroup->maxLen); | ||
361 | */ | 377 | */ |
362 | while ((int)(bd->inbufBitCount) < hufGroup->maxLen) { | 378 | if (1) { |
363 | if (bd->inbufPos == bd->inbufCount) { | 379 | /* As a further optimization, we do the read inline |
364 | j = get_bits(bd, hufGroup->maxLen); | 380 | (falling back to a call to get_bits if the buffer runs dry). |
365 | goto got_huff_bits; | 381 | */ |
366 | } | 382 | int new_cnt; |
367 | bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; | 383 | while ((new_cnt = bd->inbufBitCount - hufGroup->maxLen) < 0) { |
368 | bd->inbufBitCount += 8; | 384 | /* bd->inbufBitCount < hufGroup->maxLen */ |
369 | }; | 385 | if (bd->inbufPos == bd->inbufCount) { |
370 | bd->inbufBitCount -= hufGroup->maxLen; | 386 | nextSym = get_bits(bd, hufGroup->maxLen); |
371 | j = (bd->inbufBits >> bd->inbufBitCount) & ((1 << hufGroup->maxLen) - 1); | 387 | goto got_huff_bits; |
372 | 388 | } | |
373 | got_huff_bits: | 389 | bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; |
374 | 390 | bd->inbufBitCount += 8; | |
375 | /* Figure how how many bits are in next symbol and unget extras */ | 391 | }; |
392 | bd->inbufBitCount = new_cnt; /* "bd->inbufBitCount -= hufGroup->maxLen;" */ | ||
393 | nextSym = (bd->inbufBits >> new_cnt) & ((1 << hufGroup->maxLen) - 1); | ||
394 | got_huff_bits: ; | ||
395 | } else { /* unoptimized equivalent */ | ||
396 | nextSym = get_bits(bd, hufGroup->maxLen); | ||
397 | } | ||
398 | /* Figure how many bits are in next symbol and unget extras */ | ||
376 | i = hufGroup->minLen; | 399 | i = hufGroup->minLen; |
377 | while (j > limit[i]) ++i; | 400 | while (nextSym > limit[i]) ++i; |
378 | bd->inbufBitCount += (hufGroup->maxLen - i); | 401 | j = hufGroup->maxLen - i; |
402 | if (j < 0) | ||
403 | return RETVAL_DATA_ERROR; | ||
404 | bd->inbufBitCount += j; | ||
379 | 405 | ||
380 | /* Huffman decode value to get nextSym (with bounds checking) */ | 406 | /* Huffman decode value to get nextSym (with bounds checking) */ |
381 | if (i > hufGroup->maxLen) | 407 | nextSym = (nextSym >> j) - base[i]; |
408 | if ((unsigned)nextSym >= MAX_SYMBOLS) | ||
382 | return RETVAL_DATA_ERROR; | 409 | return RETVAL_DATA_ERROR; |
383 | j = (j >> (hufGroup->maxLen - i)) - base[i]; | 410 | nextSym = hufGroup->permute[nextSym]; |
384 | if ((unsigned)j >= MAX_SYMBOLS) | ||
385 | return RETVAL_DATA_ERROR; | ||
386 | nextSym = hufGroup->permute[j]; | ||
387 | 411 | ||
388 | /* We have now decoded the symbol, which indicates either a new literal | 412 | /* We have now decoded the symbol, which indicates either a new literal |
389 | byte, or a repeated run of the most recent literal byte. First, | 413 | byte, or a repeated run of the most recent literal byte. First, |
@@ -392,7 +416,7 @@ static int get_next_block(bunzip_data *bd) | |||
392 | if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */ | 416 | if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */ |
393 | 417 | ||
394 | /* If this is the start of a new run, zero out counter */ | 418 | /* If this is the start of a new run, zero out counter */ |
395 | if (!runPos) { | 419 | if (runPos == 0) { |
396 | runPos = 1; | 420 | runPos = 1; |
397 | t = 0; | 421 | t = 0; |
398 | } | 422 | } |
@@ -413,13 +437,13 @@ static int get_next_block(bunzip_data *bd) | |||
413 | how many times to repeat the last literal, so append that many | 437 | how many times to repeat the last literal, so append that many |
414 | copies to our buffer of decoded symbols (dbuf) now. (The last | 438 | copies to our buffer of decoded symbols (dbuf) now. (The last |
415 | literal used is the one at the head of the mtfSymbol array.) */ | 439 | literal used is the one at the head of the mtfSymbol array.) */ |
416 | if (runPos) { | 440 | if (runPos != 0) { |
417 | runPos = 0; | 441 | uint8_t tmp_byte; |
418 | if (dbufCount + t >= dbufSize) return RETVAL_DATA_ERROR; | 442 | if (dbufCount + t >= dbufSize) return RETVAL_DATA_ERROR; |
419 | 443 | tmp_byte = symToByte[mtfSymbol[0]]; | |
420 | uc = symToByte[mtfSymbol[0]]; | 444 | byteCount[tmp_byte] += t; |
421 | byteCount[uc] += t; | 445 | while (--t >= 0) dbuf[dbufCount++] = tmp_byte; |
422 | while (t--) dbuf[dbufCount++] = uc; | 446 | runPos = 0; |
423 | } | 447 | } |
424 | 448 | ||
425 | /* Is this the terminating symbol? */ | 449 | /* Is this the terminating symbol? */ |
@@ -448,12 +472,12 @@ static int get_next_block(bunzip_data *bd) | |||
448 | 472 | ||
449 | /* We have our literal byte. Save it into dbuf. */ | 473 | /* We have our literal byte. Save it into dbuf. */ |
450 | byteCount[uc]++; | 474 | byteCount[uc]++; |
451 | dbuf[dbufCount++] = (unsigned)uc; | 475 | dbuf[dbufCount++] = (uint32_t)uc; |
452 | 476 | ||
453 | /* Skip group initialization if we're not done with this group. Done | 477 | /* Skip group initialization if we're not done with this group. Done |
454 | * this way to avoid compiler warning. */ | 478 | * this way to avoid compiler warning. */ |
455 | end_of_huffman_loop: | 479 | end_of_huffman_loop: |
456 | if (symCount--) goto continue_this_group; | 480 | if (--symCount >= 0) goto continue_this_group; |
457 | } | 481 | } |
458 | 482 | ||
459 | /* At this point, we've read all the Huffman-coded symbols (and repeated | 483 | /* At this point, we've read all the Huffman-coded symbols (and repeated |
@@ -466,16 +490,17 @@ static int get_next_block(bunzip_data *bd) | |||
466 | /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ | 490 | /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ |
467 | j = 0; | 491 | j = 0; |
468 | for (i = 0; i < 256; i++) { | 492 | for (i = 0; i < 256; i++) { |
469 | k = j + byteCount[i]; | 493 | int tmp_count = j + byteCount[i]; |
470 | byteCount[i] = j; | 494 | byteCount[i] = j; |
471 | j = k; | 495 | j = tmp_count; |
472 | } | 496 | } |
473 | 497 | ||
474 | /* Figure out what order dbuf would be in if we sorted it. */ | 498 | /* Figure out what order dbuf would be in if we sorted it. */ |
475 | for (i = 0; i < dbufCount; i++) { | 499 | for (i = 0; i < dbufCount; i++) { |
476 | uc = (uint8_t)dbuf[i]; | 500 | uint8_t tmp_byte = (uint8_t)dbuf[i]; |
477 | dbuf[byteCount[uc]] |= (i << 8); | 501 | int tmp_count = byteCount[tmp_byte]; |
478 | byteCount[uc]++; | 502 | dbuf[tmp_count] |= (i << 8); |
503 | byteCount[tmp_byte] = tmp_count + 1; | ||
479 | } | 504 | } |
480 | 505 | ||
481 | /* Decode first byte by hand to initialize "previous" byte. Note that it | 506 | /* Decode first byte by hand to initialize "previous" byte. Note that it |