diff options
author | Denis Vlasenko <vda.linux@googlemail.com> | 2008-06-28 18:10:09 +0000 |
---|---|---|
committer | Denis Vlasenko <vda.linux@googlemail.com> | 2008-06-28 18:10:09 +0000 |
commit | 86d88c09909b1089bd2b43e896895c48ce33f1b2 (patch) | |
tree | 109dcfab6ad4a3d9ee1b48d0bb0c82d38f15af2a | |
parent | a60936da062fc569328cd643c460dcf215ed9966 (diff) | |
download | busybox-w32-86d88c09909b1089bd2b43e896895c48ce33f1b2.tar.gz busybox-w32-86d88c09909b1089bd2b43e896895c48ce33f1b2.tar.bz2 busybox-w32-86d88c09909b1089bd2b43e896895c48ce33f1b2.zip |
bunzip2: make proper fix for the problem "fixed" in rev. 22521
Thanks for Rob Landley <rob@landley.net>
-rw-r--r-- | archival/libunarchive/decompress_bunzip2.c | 93 |
1 files changed, 13 insertions, 80 deletions
diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c index 106a08b54..654dc28a9 100644 --- a/archival/libunarchive/decompress_bunzip2.c +++ b/archival/libunarchive/decompress_bunzip2.c | |||
@@ -66,7 +66,6 @@ struct group_data { | |||
66 | * | grep 'bd->' | sed 's/^.*bd->/bd->/' | sort | $PAGER | 66 | * | grep 'bd->' | sed 's/^.*bd->/bd->/' | sort | $PAGER |
67 | * and moved it (inbufBitCount) to offset 0. | 67 | * and moved it (inbufBitCount) to offset 0. |
68 | */ | 68 | */ |
69 | |||
70 | struct bunzip_data { | 69 | struct bunzip_data { |
71 | /* I/O tracking data (file handles, buffers, positions, etc.) */ | 70 | /* I/O tracking data (file handles, buffers, positions, etc.) */ |
72 | unsigned inbufBitCount, inbufBits; | 71 | unsigned inbufBitCount, inbufBits; |
@@ -102,11 +101,9 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted) | |||
102 | 101 | ||
103 | /* If we need to get more data from the byte buffer, do so. (Loop getting | 102 | /* If we need to get more data from the byte buffer, do so. (Loop getting |
104 | one byte at a time to enforce endianness and avoid unaligned access.) */ | 103 | one byte at a time to enforce endianness and avoid unaligned access.) */ |
105 | |||
106 | while ((int)(bd->inbufBitCount) < bits_wanted) { | 104 | while ((int)(bd->inbufBitCount) < bits_wanted) { |
107 | 105 | ||
108 | /* If we need to read more data from file into byte buffer, do so */ | 106 | /* If we need to read more data from file into byte buffer, do so */ |
109 | |||
110 | if (bd->inbufPos == bd->inbufCount) { | 107 | if (bd->inbufPos == bd->inbufCount) { |
111 | /* if "no input fd" case: in_fd == -1, read fails, we jump */ | 108 | /* if "no input fd" case: in_fd == -1, read fails, we jump */ |
112 | bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE); | 109 | bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE); |
@@ -116,7 +113,6 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted) | |||
116 | } | 113 | } |
117 | 114 | ||
118 | /* Avoid 32-bit overflow (dump bit buffer to top of output) */ | 115 | /* Avoid 32-bit overflow (dump bit buffer to top of output) */ |
119 | |||
120 | if (bd->inbufBitCount >= 24) { | 116 | if (bd->inbufBitCount >= 24) { |
121 | bits = bd->inbufBits & ((1 << bd->inbufBitCount) - 1); | 117 | bits = bd->inbufBits & ((1 << bd->inbufBitCount) - 1); |
122 | bits_wanted -= bd->inbufBitCount; | 118 | bits_wanted -= bd->inbufBitCount; |
@@ -125,13 +121,11 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted) | |||
125 | } | 121 | } |
126 | 122 | ||
127 | /* Grab next 8 bits of input from buffer. */ | 123 | /* Grab next 8 bits of input from buffer. */ |
128 | |||
129 | bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; | 124 | bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; |
130 | bd->inbufBitCount += 8; | 125 | bd->inbufBitCount += 8; |
131 | } | 126 | } |
132 | 127 | ||
133 | /* Calculate result */ | 128 | /* Calculate result */ |
134 | |||
135 | bd->inbufBitCount -= bits_wanted; | 129 | bd->inbufBitCount -= bits_wanted; |
136 | bits |= (bd->inbufBits >> bd->inbufBitCount) & ((1 << bits_wanted) - 1); | 130 | bits |= (bd->inbufBits >> bd->inbufBitCount) & ((1 << bits_wanted) - 1); |
137 | 131 | ||
@@ -139,29 +133,24 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted) | |||
139 | } | 133 | } |
140 | 134 | ||
141 | /* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ | 135 | /* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ |
142 | |||
143 | static int get_next_block(bunzip_data *bd) | 136 | static int get_next_block(bunzip_data *bd) |
144 | { | 137 | { |
145 | struct group_data *hufGroup; | 138 | struct group_data *hufGroup; |
146 | int dbufCount, nextSym, dbufSize, groupCount, *base, selector, | 139 | int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector, |
147 | i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; | 140 | i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; |
148 | unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; | 141 | unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; |
149 | /* limit was int* but was changed to unsigned* - grep for '[x]' | 142 | unsigned *dbuf, origPtr; |
150 | * in comment to see where it is important. -- vda */ | ||
151 | unsigned *dbuf, *limit, origPtr; | ||
152 | 143 | ||
153 | dbuf = bd->dbuf; | 144 | dbuf = bd->dbuf; |
154 | dbufSize = bd->dbufSize; | 145 | dbufSize = bd->dbufSize; |
155 | selectors = bd->selectors; | 146 | selectors = bd->selectors; |
156 | 147 | ||
157 | /* Reset longjmp I/O error handling */ | 148 | /* Reset longjmp I/O error handling */ |
158 | |||
159 | i = setjmp(bd->jmpbuf); | 149 | i = setjmp(bd->jmpbuf); |
160 | if (i) return i; | 150 | if (i) return i; |
161 | 151 | ||
162 | /* Read in header signature and CRC, then validate signature. | 152 | /* Read in header signature and CRC, then validate signature. |
163 | (last block signature means CRC is for whole file, return now) */ | 153 | (last block signature means CRC is for whole file, return now) */ |
164 | |||
165 | i = get_bits(bd, 24); | 154 | i = get_bits(bd, 24); |
166 | j = get_bits(bd, 24); | 155 | j = get_bits(bd, 24); |
167 | bd->headerCRC = get_bits(bd, 32); | 156 | bd->headerCRC = get_bits(bd, 32); |
@@ -171,7 +160,6 @@ static int get_next_block(bunzip_data *bd) | |||
171 | /* We can add support for blockRandomised if anybody complains. There was | 160 | /* We can add support for blockRandomised if anybody complains. There was |
172 | some code for this in busybox 1.0.0-pre3, but nobody ever noticed that | 161 | some code for this in busybox 1.0.0-pre3, but nobody ever noticed that |
173 | it didn't actually work. */ | 162 | it didn't actually work. */ |
174 | |||
175 | if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT; | 163 | if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT; |
176 | origPtr = get_bits(bd, 24); | 164 | origPtr = get_bits(bd, 24); |
177 | if ((int)origPtr > dbufSize) return RETVAL_DATA_ERROR; | 165 | if ((int)origPtr > dbufSize) return RETVAL_DATA_ERROR; |
@@ -181,7 +169,6 @@ static int get_next_block(bunzip_data *bd) | |||
181 | symbols to deal with, and writes a sparse bitfield indicating which | 169 | symbols to deal with, and writes a sparse bitfield indicating which |
182 | values were present. We make a translation table to convert the symbols | 170 | values were present. We make a translation table to convert the symbols |
183 | back to the corresponding bytes. */ | 171 | back to the corresponding bytes. */ |
184 | |||
185 | t = get_bits(bd, 16); | 172 | t = get_bits(bd, 16); |
186 | symTotal = 0; | 173 | symTotal = 0; |
187 | for (i = 0; i < 16; i++) { | 174 | for (i = 0; i < 16; i++) { |
@@ -194,7 +181,6 @@ static int get_next_block(bunzip_data *bd) | |||
194 | } | 181 | } |
195 | 182 | ||
196 | /* How many different Huffman coding groups does this block use? */ | 183 | /* How many different Huffman coding groups does this block use? */ |
197 | |||
198 | groupCount = get_bits(bd, 3); | 184 | groupCount = get_bits(bd, 3); |
199 | if (groupCount < 2 || groupCount > MAX_GROUPS) | 185 | if (groupCount < 2 || groupCount > MAX_GROUPS) |
200 | return RETVAL_DATA_ERROR; | 186 | return RETVAL_DATA_ERROR; |
@@ -203,19 +189,16 @@ static int get_next_block(bunzip_data *bd) | |||
203 | group. Read in the group selector list, which is stored as MTF encoded | 189 | group. Read in the group selector list, which is stored as MTF encoded |
204 | bit runs. (MTF=Move To Front, as each value is used it's moved to the | 190 | bit runs. (MTF=Move To Front, as each value is used it's moved to the |
205 | start of the list.) */ | 191 | start of the list.) */ |
206 | |||
207 | nSelectors = get_bits(bd, 15); | 192 | nSelectors = get_bits(bd, 15); |
208 | if (!nSelectors) return RETVAL_DATA_ERROR; | 193 | if (!nSelectors) return RETVAL_DATA_ERROR; |
209 | for (i = 0; i < groupCount; i++) mtfSymbol[i] = i; | 194 | for (i = 0; i < groupCount; i++) mtfSymbol[i] = i; |
210 | for (i = 0; i < nSelectors; i++) { | 195 | for (i = 0; i < nSelectors; i++) { |
211 | 196 | ||
212 | /* Get next value */ | 197 | /* Get next value */ |
213 | |||
214 | for (j = 0; get_bits(bd, 1); j++) | 198 | for (j = 0; get_bits(bd, 1); j++) |
215 | if (j >= groupCount) return RETVAL_DATA_ERROR; | 199 | if (j >= groupCount) return RETVAL_DATA_ERROR; |
216 | 200 | ||
217 | /* Decode MTF to get the next selector */ | 201 | /* Decode MTF to get the next selector */ |
218 | |||
219 | uc = mtfSymbol[j]; | 202 | uc = mtfSymbol[j]; |
220 | for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1]; | 203 | for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1]; |
221 | mtfSymbol[0] = selectors[i] = uc; | 204 | mtfSymbol[0] = selectors[i] = uc; |
@@ -223,10 +206,11 @@ static int get_next_block(bunzip_data *bd) | |||
223 | 206 | ||
224 | /* Read the Huffman coding tables for each group, which code for symTotal | 207 | /* Read the Huffman coding tables for each group, which code for symTotal |
225 | literal symbols, plus two run symbols (RUNA, RUNB) */ | 208 | literal symbols, plus two run symbols (RUNA, RUNB) */ |
226 | |||
227 | symCount = symTotal + 2; | 209 | symCount = symTotal + 2; |
228 | for (j = 0; j < groupCount; j++) { | 210 | for (j = 0; j < groupCount; j++) { |
229 | unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1]; | 211 | unsigned char length[MAX_SYMBOLS]; |
212 | /* 8 bits is ALMOST enough for temp[], see below */ | ||
213 | unsigned temp[MAX_HUFCODE_BITS+1]; | ||
230 | int minLen, maxLen, pp; | 214 | int minLen, maxLen, pp; |
231 | 215 | ||
232 | /* Read Huffman code lengths for each symbol. They're stored in | 216 | /* Read Huffman code lengths for each symbol. They're stored in |
@@ -235,7 +219,6 @@ static int get_next_block(bunzip_data *bd) | |||
235 | (Subtracting 1 before the loop and then adding it back at the end is | 219 | (Subtracting 1 before the loop and then adding it back at the end is |
236 | an optimization that makes the test inside the loop simpler: symbol | 220 | an optimization that makes the test inside the loop simpler: symbol |
237 | length 0 becomes negative, so an unsigned inequality catches it.) */ | 221 | length 0 becomes negative, so an unsigned inequality catches it.) */ |
238 | |||
239 | t = get_bits(bd, 5) - 1; | 222 | t = get_bits(bd, 5) - 1; |
240 | for (i = 0; i < symCount; i++) { | 223 | for (i = 0; i < symCount; i++) { |
241 | for (;;) { | 224 | for (;;) { |
@@ -245,7 +228,6 @@ static int get_next_block(bunzip_data *bd) | |||
245 | /* If first bit is 0, stop. Else second bit indicates whether | 228 | /* If first bit is 0, stop. Else second bit indicates whether |
246 | to increment or decrement the value. Optimization: grab 2 | 229 | to increment or decrement the value. Optimization: grab 2 |
247 | bits and unget the second if the first was 0. */ | 230 | bits and unget the second if the first was 0. */ |
248 | |||
249 | k = get_bits(bd, 2); | 231 | k = get_bits(bd, 2); |
250 | if (k < 2) { | 232 | if (k < 2) { |
251 | bd->inbufBitCount++; | 233 | bd->inbufBitCount++; |
@@ -253,17 +235,14 @@ static int get_next_block(bunzip_data *bd) | |||
253 | } | 235 | } |
254 | 236 | ||
255 | /* Add one if second bit 1, else subtract 1. Avoids if/else */ | 237 | /* Add one if second bit 1, else subtract 1. Avoids if/else */ |
256 | |||
257 | t += (((k+1) & 2) - 1); | 238 | t += (((k+1) & 2) - 1); |
258 | } | 239 | } |
259 | 240 | ||
260 | /* Correct for the initial -1, to get the final symbol length */ | 241 | /* Correct for the initial -1, to get the final symbol length */ |
261 | |||
262 | length[i] = t + 1; | 242 | length[i] = t + 1; |
263 | } | 243 | } |
264 | 244 | ||
265 | /* Find largest and smallest lengths in this group */ | 245 | /* Find largest and smallest lengths in this group */ |
266 | |||
267 | minLen = maxLen = length[0]; | 246 | minLen = maxLen = length[0]; |
268 | for (i = 1; i < symCount; i++) { | 247 | for (i = 1; i < symCount; i++) { |
269 | if (length[i] > maxLen) maxLen = length[i]; | 248 | if (length[i] > maxLen) maxLen = length[i]; |
@@ -280,7 +259,6 @@ static int get_next_block(bunzip_data *bd) | |||
280 | * number of bits can have. This is how the Huffman codes can vary in | 259 | * number of bits can have. This is how the Huffman codes can vary in |
281 | * length: each code with a value>limit[length] needs another bit. | 260 | * length: each code with a value>limit[length] needs another bit. |
282 | */ | 261 | */ |
283 | |||
284 | hufGroup = bd->groups + j; | 262 | hufGroup = bd->groups + j; |
285 | hufGroup->minLen = minLen; | 263 | hufGroup->minLen = minLen; |
286 | hufGroup->maxLen = maxLen; | 264 | hufGroup->maxLen = maxLen; |
@@ -288,12 +266,10 @@ static int get_next_block(bunzip_data *bd) | |||
288 | /* Note that minLen can't be smaller than 1, so we adjust the base | 266 | /* Note that minLen can't be smaller than 1, so we adjust the base |
289 | and limit array pointers so we're not always wasting the first | 267 | and limit array pointers so we're not always wasting the first |
290 | entry. We do this again when using them (during symbol decoding).*/ | 268 | entry. We do this again when using them (during symbol decoding).*/ |
291 | |||
292 | base = hufGroup->base - 1; | 269 | base = hufGroup->base - 1; |
293 | limit = (unsigned*)hufGroup->limit - 1; | 270 | limit = hufGroup->limit - 1; |
294 | 271 | ||
295 | /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */ | 272 | /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */ |
296 | |||
297 | pp = 0; | 273 | pp = 0; |
298 | for (i = minLen; i <= maxLen; i++) { | 274 | for (i = minLen; i <= maxLen; i++) { |
299 | temp[i] = limit[i] = 0; | 275 | temp[i] = limit[i] = 0; |
@@ -303,14 +279,14 @@ static int get_next_block(bunzip_data *bd) | |||
303 | } | 279 | } |
304 | 280 | ||
305 | /* Count symbols coded for at each bit length */ | 281 | /* Count symbols coded for at each bit length */ |
306 | 282 | /* NB: in pathological cases, temp[8] can end ip being 256. | |
283 | * That's why uint8_t is too small for temp[]. */ | ||
307 | for (i = 0; i < symCount; i++) temp[length[i]]++; | 284 | for (i = 0; i < symCount; i++) temp[length[i]]++; |
308 | 285 | ||
309 | /* Calculate limit[] (the largest symbol-coding value at each bit | 286 | /* Calculate limit[] (the largest symbol-coding value at each bit |
310 | * length, which is (previous limit<<1)+symbols at this level), and | 287 | * length, which is (previous limit<<1)+symbols at this level), and |
311 | * base[] (number of symbols to ignore at each bit length, which is | 288 | * base[] (number of symbols to ignore at each bit length, which is |
312 | * limit minus the cumulative count of symbols coded for already). */ | 289 | * limit minus the cumulative count of symbols coded for already). */ |
313 | |||
314 | pp = t = 0; | 290 | pp = t = 0; |
315 | for (i = minLen; i < maxLen; i++) { | 291 | for (i = minLen; i < maxLen; i++) { |
316 | pp += temp[i]; | 292 | pp += temp[i]; |
@@ -321,14 +297,12 @@ static int get_next_block(bunzip_data *bd) | |||
321 | each level we're really only interested in the first few bits, | 297 | each level we're really only interested in the first few bits, |
322 | so here we set all the trailing to-be-ignored bits to 1 so they | 298 | so here we set all the trailing to-be-ignored bits to 1 so they |
323 | don't affect the value>limit[length] comparison. */ | 299 | don't affect the value>limit[length] comparison. */ |
324 | |||
325 | limit[i] = (pp << (maxLen - i)) - 1; | 300 | limit[i] = (pp << (maxLen - i)) - 1; |
326 | pp <<= 1; | 301 | pp <<= 1; |
327 | t += temp[i]; | 302 | t += temp[i]; |
328 | base[i+1] = pp - t; | 303 | base[i+1] = pp - t; |
329 | } | 304 | } |
330 | limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */ | 305 | limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */ |
331 | /* [x] was observed to occasionally have -1 here: -- vda */ | ||
332 | limit[maxLen] = pp + temp[maxLen] - 1; | 306 | limit[maxLen] = pp + temp[maxLen] - 1; |
333 | base[minLen] = 0; | 307 | base[minLen] = 0; |
334 | } | 308 | } |
@@ -338,7 +312,6 @@ static int get_next_block(bunzip_data *bd) | |||
338 | and run length encoding, saving the result into dbuf[dbufCount++] = uc */ | 312 | and run length encoding, saving the result into dbuf[dbufCount++] = uc */ |
339 | 313 | ||
340 | /* Initialize symbol occurrence counters and symbol Move To Front table */ | 314 | /* Initialize symbol occurrence counters and symbol Move To Front table */ |
341 | |||
342 | memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */ | 315 | memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */ |
343 | for (i = 0; i < 256; i++) { | 316 | for (i = 0; i < 256; i++) { |
344 | //byteCount[i] = 0; | 317 | //byteCount[i] = 0; |
@@ -350,13 +323,12 @@ static int get_next_block(bunzip_data *bd) | |||
350 | runPos = dbufCount = selector = 0; | 323 | runPos = dbufCount = selector = 0; |
351 | for (;;) { | 324 | for (;;) { |
352 | 325 | ||
353 | /* fetch next Huffman coding group from list. */ | 326 | /* Fetch next Huffman coding group from list. */ |
354 | |||
355 | symCount = GROUP_SIZE - 1; | 327 | symCount = GROUP_SIZE - 1; |
356 | if (selector >= nSelectors) return RETVAL_DATA_ERROR; | 328 | if (selector >= nSelectors) return RETVAL_DATA_ERROR; |
357 | hufGroup = bd->groups + selectors[selector++]; | 329 | hufGroup = bd->groups + selectors[selector++]; |
358 | base = hufGroup->base - 1; | 330 | base = hufGroup->base - 1; |
359 | limit = (unsigned*)hufGroup->limit - 1; | 331 | limit = hufGroup->limit - 1; |
360 | continue_this_group: | 332 | continue_this_group: |
361 | 333 | ||
362 | /* Read next Huffman-coded symbol. */ | 334 | /* Read next Huffman-coded symbol. */ |
@@ -370,7 +342,6 @@ static int get_next_block(bunzip_data *bd) | |||
370 | dry). The following (up to got_huff_bits:) is equivalent to | 342 | dry). The following (up to got_huff_bits:) is equivalent to |
371 | j = get_bits(bd, hufGroup->maxLen); | 343 | j = get_bits(bd, hufGroup->maxLen); |
372 | */ | 344 | */ |
373 | |||
374 | while ((int)(bd->inbufBitCount) < hufGroup->maxLen) { | 345 | while ((int)(bd->inbufBitCount) < hufGroup->maxLen) { |
375 | if (bd->inbufPos == bd->inbufCount) { | 346 | if (bd->inbufPos == bd->inbufCount) { |
376 | j = get_bits(bd, hufGroup->maxLen); | 347 | j = get_bits(bd, hufGroup->maxLen); |
@@ -385,13 +356,11 @@ static int get_next_block(bunzip_data *bd) | |||
385 | got_huff_bits: | 356 | got_huff_bits: |
386 | 357 | ||
387 | /* Figure how how many bits are in next symbol and unget extras */ | 358 | /* Figure how how many bits are in next symbol and unget extras */ |
388 | |||
389 | i = hufGroup->minLen; | 359 | i = hufGroup->minLen; |
390 | while ((unsigned)j > limit[i]) ++i; | 360 | while (j > limit[i]) ++i; |
391 | bd->inbufBitCount += (hufGroup->maxLen - i); | 361 | bd->inbufBitCount += (hufGroup->maxLen - i); |
392 | 362 | ||
393 | /* Huffman decode value to get nextSym (with bounds checking) */ | 363 | /* Huffman decode value to get nextSym (with bounds checking) */ |
394 | |||
395 | if (i > hufGroup->maxLen) | 364 | if (i > hufGroup->maxLen) |
396 | return RETVAL_DATA_ERROR; | 365 | return RETVAL_DATA_ERROR; |
397 | j = (j >> (hufGroup->maxLen - i)) - base[i]; | 366 | j = (j >> (hufGroup->maxLen - i)) - base[i]; |
@@ -403,11 +372,9 @@ static int get_next_block(bunzip_data *bd) | |||
403 | byte, or a repeated run of the most recent literal byte. First, | 372 | byte, or a repeated run of the most recent literal byte. First, |
404 | check if nextSym indicates a repeated run, and if so loop collecting | 373 | check if nextSym indicates a repeated run, and if so loop collecting |
405 | how many times to repeat the last literal. */ | 374 | how many times to repeat the last literal. */ |
406 | |||
407 | if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */ | 375 | if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */ |
408 | 376 | ||
409 | /* If this is the start of a new run, zero out counter */ | 377 | /* If this is the start of a new run, zero out counter */ |
410 | |||
411 | if (!runPos) { | 378 | if (!runPos) { |
412 | runPos = 1; | 379 | runPos = 1; |
413 | t = 0; | 380 | t = 0; |
@@ -420,7 +387,6 @@ static int get_next_block(bunzip_data *bd) | |||
420 | the basic or 0/1 method (except all bits 0, which would use no | 387 | the basic or 0/1 method (except all bits 0, which would use no |
421 | symbols, but a run of length 0 doesn't mean anything in this | 388 | symbols, but a run of length 0 doesn't mean anything in this |
422 | context). Thus space is saved. */ | 389 | context). Thus space is saved. */ |
423 | |||
424 | t += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */ | 390 | t += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */ |
425 | if (runPos < dbufSize) runPos <<= 1; | 391 | if (runPos < dbufSize) runPos <<= 1; |
426 | goto end_of_huffman_loop; | 392 | goto end_of_huffman_loop; |
@@ -430,7 +396,6 @@ static int get_next_block(bunzip_data *bd) | |||
430 | how many times to repeat the last literal, so append that many | 396 | how many times to repeat the last literal, so append that many |
431 | copies to our buffer of decoded symbols (dbuf) now. (The last | 397 | copies to our buffer of decoded symbols (dbuf) now. (The last |
432 | literal used is the one at the head of the mtfSymbol array.) */ | 398 | literal used is the one at the head of the mtfSymbol array.) */ |
433 | |||
434 | if (runPos) { | 399 | if (runPos) { |
435 | runPos = 0; | 400 | runPos = 0; |
436 | if (dbufCount + t >= dbufSize) return RETVAL_DATA_ERROR; | 401 | if (dbufCount + t >= dbufSize) return RETVAL_DATA_ERROR; |
@@ -441,7 +406,6 @@ static int get_next_block(bunzip_data *bd) | |||
441 | } | 406 | } |
442 | 407 | ||
443 | /* Is this the terminating symbol? */ | 408 | /* Is this the terminating symbol? */ |
444 | |||
445 | if (nextSym > symTotal) break; | 409 | if (nextSym > symTotal) break; |
446 | 410 | ||
447 | /* At this point, nextSym indicates a new literal character. Subtract | 411 | /* At this point, nextSym indicates a new literal character. Subtract |
@@ -451,7 +415,6 @@ static int get_next_block(bunzip_data *bd) | |||
451 | first symbol in the mtf array, position 0, would have been handled | 415 | first symbol in the mtf array, position 0, would have been handled |
452 | as part of a run above. Therefore 1 unused mtf position minus | 416 | as part of a run above. Therefore 1 unused mtf position minus |
453 | 2 non-literal nextSym values equals -1.) */ | 417 | 2 non-literal nextSym values equals -1.) */ |
454 | |||
455 | if (dbufCount >= dbufSize) return RETVAL_DATA_ERROR; | 418 | if (dbufCount >= dbufSize) return RETVAL_DATA_ERROR; |
456 | i = nextSym - 1; | 419 | i = nextSym - 1; |
457 | uc = mtfSymbol[i]; | 420 | uc = mtfSymbol[i]; |
@@ -460,7 +423,6 @@ static int get_next_block(bunzip_data *bd) | |||
460 | * small number of symbols, and are bound by 256 in any case, using | 423 | * small number of symbols, and are bound by 256 in any case, using |
461 | * memmove here would typically be bigger and slower due to function | 424 | * memmove here would typically be bigger and slower due to function |
462 | * call overhead and other assorted setup costs. */ | 425 | * call overhead and other assorted setup costs. */ |
463 | |||
464 | do { | 426 | do { |
465 | mtfSymbol[i] = mtfSymbol[i-1]; | 427 | mtfSymbol[i] = mtfSymbol[i-1]; |
466 | } while (--i); | 428 | } while (--i); |
@@ -468,13 +430,11 @@ static int get_next_block(bunzip_data *bd) | |||
468 | uc = symToByte[uc]; | 430 | uc = symToByte[uc]; |
469 | 431 | ||
470 | /* We have our literal byte. Save it into dbuf. */ | 432 | /* We have our literal byte. Save it into dbuf. */ |
471 | |||
472 | byteCount[uc]++; | 433 | byteCount[uc]++; |
473 | dbuf[dbufCount++] = (unsigned)uc; | 434 | dbuf[dbufCount++] = (unsigned)uc; |
474 | 435 | ||
475 | /* Skip group initialization if we're not done with this group. Done | 436 | /* Skip group initialization if we're not done with this group. Done |
476 | * this way to avoid compiler warning. */ | 437 | * this way to avoid compiler warning. */ |
477 | |||
478 | end_of_huffman_loop: | 438 | end_of_huffman_loop: |
479 | if (symCount--) goto continue_this_group; | 439 | if (symCount--) goto continue_this_group; |
480 | } | 440 | } |
@@ -487,7 +447,6 @@ static int get_next_block(bunzip_data *bd) | |||
487 | */ | 447 | */ |
488 | 448 | ||
489 | /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ | 449 | /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ |
490 | |||
491 | j = 0; | 450 | j = 0; |
492 | for (i = 0; i < 256; i++) { | 451 | for (i = 0; i < 256; i++) { |
493 | k = j + byteCount[i]; | 452 | k = j + byteCount[i]; |
@@ -496,7 +455,6 @@ static int get_next_block(bunzip_data *bd) | |||
496 | } | 455 | } |
497 | 456 | ||
498 | /* Figure out what order dbuf would be in if we sorted it. */ | 457 | /* Figure out what order dbuf would be in if we sorted it. */ |
499 | |||
500 | for (i = 0; i < dbufCount; i++) { | 458 | for (i = 0; i < dbufCount; i++) { |
501 | uc = (unsigned char)(dbuf[i] & 0xff); | 459 | uc = (unsigned char)(dbuf[i] & 0xff); |
502 | dbuf[byteCount[uc]] |= (i << 8); | 460 | dbuf[byteCount[uc]] |= (i << 8); |
@@ -506,7 +464,6 @@ static int get_next_block(bunzip_data *bd) | |||
506 | /* Decode first byte by hand to initialize "previous" byte. Note that it | 464 | /* Decode first byte by hand to initialize "previous" byte. Note that it |
507 | doesn't get output, and if the first three characters are identical | 465 | doesn't get output, and if the first three characters are identical |
508 | it doesn't qualify as a run (hence writeRunCountdown=5). */ | 466 | it doesn't qualify as a run (hence writeRunCountdown=5). */ |
509 | |||
510 | if (dbufCount) { | 467 | if (dbufCount) { |
511 | if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR; | 468 | if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR; |
512 | bd->writePos = dbuf[origPtr]; | 469 | bd->writePos = dbuf[origPtr]; |
@@ -525,7 +482,6 @@ static int get_next_block(bunzip_data *bd) | |||
525 | error (all errors are negative numbers). If out_fd!=-1, outbuf and len | 482 | error (all errors are negative numbers). If out_fd!=-1, outbuf and len |
526 | are ignored, data is written to out_fd and return is RETVAL_OK or error. | 483 | are ignored, data is written to out_fd and return is RETVAL_OK or error. |
527 | */ | 484 | */ |
528 | |||
529 | int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | 485 | int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) |
530 | { | 486 | { |
531 | const unsigned *dbuf; | 487 | const unsigned *dbuf; |
@@ -542,19 +498,15 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
542 | /* We will always have pending decoded data to write into the output | 498 | /* We will always have pending decoded data to write into the output |
543 | buffer unless this is the very first call (in which case we haven't | 499 | buffer unless this is the very first call (in which case we haven't |
544 | Huffman-decoded a block into the intermediate buffer yet). */ | 500 | Huffman-decoded a block into the intermediate buffer yet). */ |
545 | |||
546 | if (bd->writeCopies) { | 501 | if (bd->writeCopies) { |
547 | 502 | ||
548 | /* Inside the loop, writeCopies means extra copies (beyond 1) */ | 503 | /* Inside the loop, writeCopies means extra copies (beyond 1) */ |
549 | |||
550 | --bd->writeCopies; | 504 | --bd->writeCopies; |
551 | 505 | ||
552 | /* Loop outputting bytes */ | 506 | /* Loop outputting bytes */ |
553 | |||
554 | for (;;) { | 507 | for (;;) { |
555 | 508 | ||
556 | /* If the output buffer is full, snapshot state and return */ | 509 | /* If the output buffer is full, snapshot state and return */ |
557 | |||
558 | if (gotcount >= len) { | 510 | if (gotcount >= len) { |
559 | bd->writePos = pos; | 511 | bd->writePos = pos; |
560 | bd->writeCurrent = current; | 512 | bd->writeCurrent = current; |
@@ -563,13 +515,11 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
563 | } | 515 | } |
564 | 516 | ||
565 | /* Write next byte into output buffer, updating CRC */ | 517 | /* Write next byte into output buffer, updating CRC */ |
566 | |||
567 | outbuf[gotcount++] = current; | 518 | outbuf[gotcount++] = current; |
568 | bd->writeCRC = (bd->writeCRC << 8) | 519 | bd->writeCRC = (bd->writeCRC << 8) |
569 | ^ bd->crc32Table[(bd->writeCRC >> 24) ^ current]; | 520 | ^ bd->crc32Table[(bd->writeCRC >> 24) ^ current]; |
570 | 521 | ||
571 | /* Loop now if we're outputting multiple copies of this byte */ | 522 | /* Loop now if we're outputting multiple copies of this byte */ |
572 | |||
573 | if (bd->writeCopies) { | 523 | if (bd->writeCopies) { |
574 | --bd->writeCopies; | 524 | --bd->writeCopies; |
575 | continue; | 525 | continue; |
@@ -585,35 +535,29 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
585 | /* After 3 consecutive copies of the same byte, the 4th | 535 | /* After 3 consecutive copies of the same byte, the 4th |
586 | * is a repeat count. We count down from 4 instead | 536 | * is a repeat count. We count down from 4 instead |
587 | * of counting up because testing for non-zero is faster */ | 537 | * of counting up because testing for non-zero is faster */ |
588 | |||
589 | if (--bd->writeRunCountdown) { | 538 | if (--bd->writeRunCountdown) { |
590 | if (current != previous) | 539 | if (current != previous) |
591 | bd->writeRunCountdown = 4; | 540 | bd->writeRunCountdown = 4; |
592 | } else { | 541 | } else { |
593 | 542 | ||
594 | /* We have a repeated run, this byte indicates the count */ | 543 | /* We have a repeated run, this byte indicates the count */ |
595 | |||
596 | bd->writeCopies = current; | 544 | bd->writeCopies = current; |
597 | current = previous; | 545 | current = previous; |
598 | bd->writeRunCountdown = 5; | 546 | bd->writeRunCountdown = 5; |
599 | 547 | ||
600 | /* Sometimes there are just 3 bytes (run length 0) */ | 548 | /* Sometimes there are just 3 bytes (run length 0) */ |
601 | |||
602 | if (!bd->writeCopies) goto decode_next_byte; | 549 | if (!bd->writeCopies) goto decode_next_byte; |
603 | 550 | ||
604 | /* Subtract the 1 copy we'd output anyway to get extras */ | 551 | /* Subtract the 1 copy we'd output anyway to get extras */ |
605 | |||
606 | --bd->writeCopies; | 552 | --bd->writeCopies; |
607 | } | 553 | } |
608 | } | 554 | } |
609 | 555 | ||
610 | /* Decompression of this block completed successfully */ | 556 | /* Decompression of this block completed successfully */ |
611 | |||
612 | bd->writeCRC = ~bd->writeCRC; | 557 | bd->writeCRC = ~bd->writeCRC; |
613 | bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC; | 558 | bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC; |
614 | 559 | ||
615 | /* If this block had a CRC error, force file level CRC error. */ | 560 | /* If this block had a CRC error, force file level CRC error. */ |
616 | |||
617 | if (bd->writeCRC != bd->headerCRC) { | 561 | if (bd->writeCRC != bd->headerCRC) { |
618 | bd->totalCRC = bd->headerCRC + 1; | 562 | bd->totalCRC = bd->headerCRC + 1; |
619 | return RETVAL_LAST_BLOCK; | 563 | return RETVAL_LAST_BLOCK; |
@@ -622,7 +566,6 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
622 | 566 | ||
623 | /* Refill the intermediate buffer by Huffman-decoding next block of input */ | 567 | /* Refill the intermediate buffer by Huffman-decoding next block of input */ |
624 | /* (previous is just a convenient unused temp variable here) */ | 568 | /* (previous is just a convenient unused temp variable here) */ |
625 | |||
626 | previous = get_next_block(bd); | 569 | previous = get_next_block(bd); |
627 | if (previous) { | 570 | if (previous) { |
628 | bd->writeCount = previous; | 571 | bd->writeCount = previous; |
@@ -634,7 +577,6 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
634 | goto decode_next_byte; | 577 | goto decode_next_byte; |
635 | } | 578 | } |
636 | 579 | ||
637 | |||
638 | /* Allocate the structure, read file header. If in_fd==-1, inbuf must contain | 580 | /* Allocate the structure, read file header. If in_fd==-1, inbuf must contain |
639 | a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are | 581 | a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are |
640 | ignored, and data is read from file handle into temporary buffer. */ | 582 | ignored, and data is read from file handle into temporary buffer. */ |
@@ -642,7 +584,6 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
642 | /* Because bunzip2 is used for help text unpacking, and because bb_show_usage() | 584 | /* Because bunzip2 is used for help text unpacking, and because bb_show_usage() |
643 | should work for NOFORK applets too, we must be extremely careful to not leak | 585 | should work for NOFORK applets too, we must be extremely careful to not leak |
644 | any allocations! */ | 586 | any allocations! */ |
645 | |||
646 | int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf, | 587 | int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf, |
647 | int len) | 588 | int len) |
648 | { | 589 | { |
@@ -653,16 +594,13 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *in | |||
653 | }; | 594 | }; |
654 | 595 | ||
655 | /* Figure out how much data to allocate */ | 596 | /* Figure out how much data to allocate */ |
656 | |||
657 | i = sizeof(bunzip_data); | 597 | i = sizeof(bunzip_data); |
658 | if (in_fd != -1) i += IOBUF_SIZE; | 598 | if (in_fd != -1) i += IOBUF_SIZE; |
659 | 599 | ||
660 | /* Allocate bunzip_data. Most fields initialize to zero. */ | 600 | /* Allocate bunzip_data. Most fields initialize to zero. */ |
661 | |||
662 | bd = *bdp = xzalloc(i); | 601 | bd = *bdp = xzalloc(i); |
663 | 602 | ||
664 | /* Setup input buffer */ | 603 | /* Setup input buffer */ |
665 | |||
666 | bd->in_fd = in_fd; | 604 | bd->in_fd = in_fd; |
667 | if (-1 == in_fd) { | 605 | if (-1 == in_fd) { |
668 | /* in this case, bd->inbuf is read-only */ | 606 | /* in this case, bd->inbuf is read-only */ |
@@ -672,22 +610,18 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *in | |||
672 | bd->inbuf = (unsigned char *)(bd + 1); | 610 | bd->inbuf = (unsigned char *)(bd + 1); |
673 | 611 | ||
674 | /* Init the CRC32 table (big endian) */ | 612 | /* Init the CRC32 table (big endian) */ |
675 | |||
676 | crc32_filltable(bd->crc32Table, 1); | 613 | crc32_filltable(bd->crc32Table, 1); |
677 | 614 | ||
678 | /* Setup for I/O error handling via longjmp */ | 615 | /* Setup for I/O error handling via longjmp */ |
679 | |||
680 | i = setjmp(bd->jmpbuf); | 616 | i = setjmp(bd->jmpbuf); |
681 | if (i) return i; | 617 | if (i) return i; |
682 | 618 | ||
683 | /* Ensure that file starts with "BZh['1'-'9']." */ | 619 | /* Ensure that file starts with "BZh['1'-'9']." */ |
684 | |||
685 | i = get_bits(bd, 32); | 620 | i = get_bits(bd, 32); |
686 | if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; | 621 | if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; |
687 | 622 | ||
688 | /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of | 623 | /* Fourth byte (ascii '1'-'9') indicates block size in units of 100k of |
689 | uncompressed data. Allocate intermediate buffer for block. */ | 624 | uncompressed data. Allocate intermediate buffer for block. */ |
690 | |||
691 | bd->dbufSize = 100000 * (i - BZh0); | 625 | bd->dbufSize = 100000 * (i - BZh0); |
692 | 626 | ||
693 | /* Cannot use xmalloc - may leak bd in NOFORK case! */ | 627 | /* Cannot use xmalloc - may leak bd in NOFORK case! */ |
@@ -707,7 +641,6 @@ void FAST_FUNC dealloc_bunzip(bunzip_data *bd) | |||
707 | 641 | ||
708 | 642 | ||
709 | /* Decompress src_fd to dst_fd. Stops at end of bzip data, not end of file. */ | 643 | /* Decompress src_fd to dst_fd. Stops at end of bzip data, not end of file. */ |
710 | |||
711 | USE_DESKTOP(long long) int FAST_FUNC | 644 | USE_DESKTOP(long long) int FAST_FUNC |
712 | unpack_bz2_stream(int src_fd, int dst_fd) | 645 | unpack_bz2_stream(int src_fd, int dst_fd) |
713 | { | 646 | { |