summaryrefslogtreecommitdiff
path: root/archival
diff options
context:
space:
mode:
authorEric Andersen <andersen@codepoet.org>2003-10-18 01:59:46 +0000
committerEric Andersen <andersen@codepoet.org>2003-10-18 01:59:46 +0000
commit1acfb72e71a4af7e03da772cd721144b2899c96f (patch)
tree05f0f27bf3834cf1f9401e604bfa99ed3c7d0c6a /archival
parent0d6d88a2058d191c34d25a8709aca40311bb0c2e (diff)
downloadbusybox-w32-1acfb72e71a4af7e03da772cd721144b2899c96f.tar.gz
busybox-w32-1acfb72e71a4af7e03da772cd721144b2899c96f.tar.bz2
busybox-w32-1acfb72e71a4af7e03da772cd721144b2899c96f.zip
Manuel Novoa III writes:
Hello Rob, Here's a patch to your bunzip-3.c file. Nice work btw. One minor bug fix... checking for error return when read()ing. Some size/performance optimizations as well. One instance of memset() seems unnecssary. You might want to take a look. Anyway, on my machine, decompressing linux-2.6.0-test7.tar.bz2 to /dev/null gave the following times: bunzip-3.c bzcat (system) bunzip-3.c (patched) real 0m24.420s 0m22.725s 0m20.701s user 0m23.930s 0m22.170s 0m20.180s sys 0m0.070s 0m0.080s 0m0.140s Size of the patched version is comparable (slightly larger or smaller depending on compiler flags). Manuel
Diffstat (limited to 'archival')
-rw-r--r--archival/libunarchive/decompress_bunzip2.c65
1 files changed, 43 insertions, 22 deletions
diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c
index 474186a2a..b4bcb0b49 100644
--- a/archival/libunarchive/decompress_bunzip2.c
+++ b/archival/libunarchive/decompress_bunzip2.c
@@ -15,6 +15,7 @@
15#include <stdlib.h> 15#include <stdlib.h>
16#include <string.h> 16#include <string.h>
17#include <unistd.h> 17#include <unistd.h>
18#include <limits.h>
18 19
19/* Constants for huffman coding */ 20/* Constants for huffman coding */
20#define MAX_GROUPS 6 21#define MAX_GROUPS 6
@@ -37,13 +38,14 @@
37/* Other housekeeping constants */ 38/* Other housekeeping constants */
38#define IOBUF_SIZE 4096 39#define IOBUF_SIZE 4096
39 40
40char *bunzip_errors[]={NULL,"Bad file checksum","Not bzip data", 41static char * const bunzip_errors[]={NULL,"Bad file checksum","Not bzip data",
41 "Unexpected input EOF","Unexpected output EOF","Data error", 42 "Unexpected input EOF","Unexpected output EOF","Data error",
42 "Out of memory","Obsolete (pre 0.9.5) bzip format not supported."}; 43 "Out of memory","Obsolete (pre 0.9.5) bzip format not supported."};
43 44
44/* This is what we know about each huffman coding group */ 45/* This is what we know about each huffman coding group */
45struct group_data { 46struct group_data {
46 int limit[MAX_HUFCODE_BITS],base[MAX_HUFCODE_BITS],permute[MAX_SYMBOLS]; 47 /* We have an extra slot at the end of limit[] for a sentinal value. */
48 int limit[MAX_HUFCODE_BITS+1],base[MAX_HUFCODE_BITS],permute[MAX_SYMBOLS];
47 char minLen, maxLen; 49 char minLen, maxLen;
48}; 50};
49 51
@@ -82,7 +84,7 @@ static unsigned int get_bits(bunzip_data *bd, char bits_wanted)
82 while (bd->inbufBitCount<bits_wanted) { 84 while (bd->inbufBitCount<bits_wanted) {
83 /* If we need to read more data from file into byte buffer, do so */ 85 /* If we need to read more data from file into byte buffer, do so */
84 if(bd->inbufPos==bd->inbufCount) { 86 if(bd->inbufPos==bd->inbufCount) {
85 if(!(bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE))) 87 if((bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE)) <= 0)
86 longjmp(bd->jmpbuf,RETVAL_UNEXPECTED_INPUT_EOF); 88 longjmp(bd->jmpbuf,RETVAL_UNEXPECTED_INPUT_EOF);
87 bd->inbufPos=0; 89 bd->inbufPos=0;
88 } 90 }
@@ -104,6 +106,14 @@ static unsigned int get_bits(bunzip_data *bd, char bits_wanted)
104 return bits; 106 return bits;
105} 107}
106 108
109/* At certain times, it pays to have an optimized inline version of
110 * get_bits() which gets a single bit. */
111#define GET_A_BIT(bd) \
112 ((bd->inbufBitCount > 0) \
113 ? ((unsigned int)(((bd)->inbufBits >> --(bd)->inbufBitCount) & 1)) \
114 : get_bits((bd), 1))
115
116
107/* Decompress a block of text to into intermediate buffer */ 117/* Decompress a block of text to into intermediate buffer */
108 118
109extern int read_bunzip_data(bunzip_data *bd) 119extern int read_bunzip_data(bunzip_data *bd)
@@ -140,7 +150,10 @@ extern int read_bunzip_data(bunzip_data *bd)
140 values were present. We make a translation table to convert the symbols 150 values were present. We make a translation table to convert the symbols
141 back to the corresponding bytes. */ 151 back to the corresponding bytes. */
142 t=get_bits(bd, 16); 152 t=get_bits(bd, 16);
153#if 0
154 /* I don't believe this is necessary. Rob? */
143 memset(symToByte,0,256); 155 memset(symToByte,0,256);
156#endif
144 symTotal=0; 157 symTotal=0;
145 for (i=0;i<16;i++) { 158 for (i=0;i<16;i++) {
146 if(t&(1<<(15-i))) { 159 if(t&(1<<(15-i))) {
@@ -162,7 +175,13 @@ extern int read_bunzip_data(bunzip_data *bd)
162 for(j=0;get_bits(bd,1);j++) if (j>=groupCount) return RETVAL_DATA_ERROR; 175 for(j=0;get_bits(bd,1);j++) if (j>=groupCount) return RETVAL_DATA_ERROR;
163 /* Decode MTF to get the next selector */ 176 /* Decode MTF to get the next selector */
164 uc = mtfSymbol[j]; 177 uc = mtfSymbol[j];
165 memmove(mtfSymbol+1,mtfSymbol,j); 178 /* A very small amount of data to move, so memmove is overkill
179 * and bigger at least in my tests. */
180 k = j;
181 while (k) {
182 mtfSymbol[k] = mtfSymbol[k-1];
183 --k;
184 }
166 mtfSymbol[0]=selectors[i]=uc; 185 mtfSymbol[0]=selectors[i]=uc;
167 } 186 }
168 /* Read the huffman coding tables for each group, which code for symTotal 187 /* Read the huffman coding tables for each group, which code for symTotal
@@ -172,15 +191,15 @@ extern int read_bunzip_data(bunzip_data *bd)
172 unsigned char length[MAX_SYMBOLS],temp[MAX_HUFCODE_BITS+1]; 191 unsigned char length[MAX_SYMBOLS],temp[MAX_HUFCODE_BITS+1];
173 int minLen, maxLen, pp; 192 int minLen, maxLen, pp;
174 /* Read lengths */ 193 /* Read lengths */
175 t=get_bits(bd, 5); 194 t=get_bits(bd, 5) - 1; /* This lets us avoid a test in the loop. */
176 for (i = 0; i < symCount; i++) { 195 for (i = 0; i < symCount; i++) {
177 for(;;) { 196 for(;;) {
178 if (t < 1 || t > MAX_HUFCODE_BITS) return RETVAL_DATA_ERROR; 197 if (((unsigned)t) > (MAX_HUFCODE_BITS-1)) return RETVAL_DATA_ERROR;
179 if(!get_bits(bd, 1)) break; 198 if(!get_bits(bd, 1)) break;
180 if(!get_bits(bd, 1)) t++; 199 /* We can avoid an if/else with a little arithmetic. */
181 else t--; 200 t += (1 - 2*get_bits(bd, 1)); /* 0 -> t++ ; 1 -> t-- */
182 } 201 }
183 length[i] = t; 202 length[i] = t + 1; /* Correct for the initial -1 adjustment. */
184 } 203 }
185 /* Find largest and smallest lengths in this group */ 204 /* Find largest and smallest lengths in this group */
186 minLen=maxLen=length[0]; 205 minLen=maxLen=length[0];
@@ -228,6 +247,7 @@ extern int read_bunzip_data(bunzip_data *bd)
228 pp<<=1; 247 pp<<=1;
229 base[i+1]=pp-(t+=temp[i]); 248 base[i+1]=pp-(t+=temp[i]);
230 } 249 }
250 limit[maxLen+1] = INT_MAX; /* Sentinal value for reading next sym. */
231 limit[maxLen]=pp+temp[maxLen]-1; 251 limit[maxLen]=pp+temp[maxLen]-1;
232 base[minLen]=0; 252 base[minLen]=0;
233 } 253 }
@@ -252,19 +272,15 @@ extern int read_bunzip_data(bunzip_data *bd)
252 /* Read next huffman-coded symbol */ 272 /* Read next huffman-coded symbol */
253 i = hufGroup->minLen; 273 i = hufGroup->minLen;
254 j=get_bits(bd, i); 274 j=get_bits(bd, i);
255 for(;;) { 275 while (j > limit[i]) { /* The sentinal allows us to avoid testing i. */
256 if (i > hufGroup->maxLen) return RETVAL_DATA_ERROR; 276 j = (j << 1) | GET_A_BIT(bd);
257 if (j <= limit[i]) break; 277 ++i;
258 i++;
259
260 j = (j << 1) | get_bits(bd,1);
261 } 278 }
262 /* Huffman decode nextSym (with bounds checking) */ 279 /* Huffman decode nextSym (with bounds checking) */
263 j-=base[i]; 280 if ((i > hufGroup->maxLen) || (((unsigned)(j-=base[i])) >= MAX_SYMBOLS)) return RETVAL_DATA_ERROR;
264 if (j < 0 || j >= MAX_SYMBOLS) return RETVAL_DATA_ERROR;
265 nextSym = hufGroup->permute[j]; 281 nextSym = hufGroup->permute[j];
266 /* If this is a repeated run, loop collecting data */ 282 /* If this is a repeated run, loop collecting data */
267 if (nextSym == SYMBOL_RUNA || nextSym == SYMBOL_RUNB) { 283 if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */
268 /* If this is the start of a new run, zero out counter */ 284 /* If this is the start of a new run, zero out counter */
269 if(!runPos) { 285 if(!runPos) {
270 runPos = 1; 286 runPos = 1;
@@ -277,8 +293,7 @@ extern int read_bunzip_data(bunzip_data *bd)
277 the basic or 0/1 method (except all bits 0, which would use no 293 the basic or 0/1 method (except all bits 0, which would use no
278 symbols, but a run of length 0 doesn't mean anything in this 294 symbols, but a run of length 0 doesn't mean anything in this
279 context). Thus space is saved. */ 295 context). Thus space is saved. */
280 if (nextSym == SYMBOL_RUNA) t += runPos; 296 t += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */
281 else t += 2*runPos;
282 runPos <<= 1; 297 runPos <<= 1;
283 continue; 298 continue;
284 } 299 }
@@ -305,7 +320,13 @@ extern int read_bunzip_data(bunzip_data *bd)
305 if(dbufCount>=dbufSize) return RETVAL_DATA_ERROR; 320 if(dbufCount>=dbufSize) return RETVAL_DATA_ERROR;
306 i = nextSym - 1; 321 i = nextSym - 1;
307 uc = mtfSymbol[i]; 322 uc = mtfSymbol[i];
308 memmove(mtfSymbol+1,mtfSymbol,i); 323 /* Since we typically expect to move only a small number of symbols,
324 * and are bound by 256 in any case, using memmove here would
325 * typically be slower due to function call overhead and other
326 * assorted setup costs. */
327 do {
328 mtfSymbol[i] = mtfSymbol[i-1];
329 } while (--i);
309 mtfSymbol[0] = uc; 330 mtfSymbol[0] = uc;
310 uc=symToByte[uc]; 331 uc=symToByte[uc];
311 /* We have our literal byte. Save it into dbuf. */ 332 /* We have our literal byte. Save it into dbuf. */
@@ -319,7 +340,7 @@ extern int read_bunzip_data(bunzip_data *bd)
319 */ 340 */
320 341
321 /* Now we know what dbufCount is, do a better sanity check on origPtr. */ 342 /* Now we know what dbufCount is, do a better sanity check on origPtr. */
322 if (origPtr<0 || origPtr>=dbufCount) return RETVAL_DATA_ERROR; 343 if (((unsigned)origPtr)>=dbufCount) return RETVAL_DATA_ERROR;
323 /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ 344 /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
324 j=0; 345 j=0;
325 for(i=0;i<256;i++) { 346 for(i=0;i<256;i++) {