diff options
Diffstat (limited to 'compress.c')
-rw-r--r-- | compress.c | 141 |
1 files changed, 90 insertions, 51 deletions
@@ -8,7 +8,7 @@ | |||
8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
10 | 10 | ||
11 | Copyright (C) 1996-1998 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-1999 Julian R Seward. All rights reserved. |
12 | 12 | ||
13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
@@ -41,9 +41,9 @@ | |||
41 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | 41 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
43 | 43 | ||
44 | Julian Seward, Guildford, Surrey, UK. | 44 | Julian Seward, Cambridge, UK. |
45 | jseward@acm.org | 45 | jseward@acm.org |
46 | bzip2/libbzip2 version 0.9.0 of 28 June 1998 | 46 | bzip2/libbzip2 version 0.9.5 of 24 May 1999 |
47 | 47 | ||
48 | This program is based on (at least) the work of: | 48 | This program is based on (at least) the work of: |
49 | Mike Burrows | 49 | Mike Burrows |
@@ -90,7 +90,7 @@ static | |||
90 | void bsFinishWrite ( EState* s ) | 90 | void bsFinishWrite ( EState* s ) |
91 | { | 91 | { |
92 | while (s->bsLive > 0) { | 92 | while (s->bsLive > 0) { |
93 | ((UChar*)(s->quadrant))[s->numZ] = (UChar)(s->bsBuff >> 24); | 93 | s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24); |
94 | s->numZ++; | 94 | s->numZ++; |
95 | s->bsBuff <<= 8; | 95 | s->bsBuff <<= 8; |
96 | s->bsLive -= 8; | 96 | s->bsLive -= 8; |
@@ -102,7 +102,7 @@ void bsFinishWrite ( EState* s ) | |||
102 | #define bsNEEDW(nz) \ | 102 | #define bsNEEDW(nz) \ |
103 | { \ | 103 | { \ |
104 | while (s->bsLive >= 8) { \ | 104 | while (s->bsLive >= 8) { \ |
105 | ((UChar*)(s->quadrant))[s->numZ] \ | 105 | s->zbits[s->numZ] \ |
106 | = (UChar)(s->bsBuff >> 24); \ | 106 | = (UChar)(s->bsBuff >> 24); \ |
107 | s->numZ++; \ | 107 | s->numZ++; \ |
108 | s->bsBuff <<= 8; \ | 108 | s->bsBuff <<= 8; \ |
@@ -162,13 +162,39 @@ void makeMaps_e ( EState* s ) | |||
162 | static | 162 | static |
163 | void generateMTFValues ( EState* s ) | 163 | void generateMTFValues ( EState* s ) |
164 | { | 164 | { |
165 | UChar yy[256]; | 165 | UChar yy[256]; |
166 | Int32 i, j; | 166 | Int32 i, j; |
167 | UChar tmp; | 167 | UChar tmp; |
168 | UChar tmp2; | 168 | UChar tmp2; |
169 | Int32 zPend; | 169 | Int32 zPend; |
170 | Int32 wr; | 170 | Int32 wr; |
171 | Int32 EOB; | 171 | Int32 EOB; |
172 | |||
173 | /* | ||
174 | After sorting (eg, here), | ||
175 | s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, | ||
176 | and | ||
177 | ((UInt16*)s->arr2) [ 0 .. s->nblock-1 ] [15:8] | ||
178 | holds the original block data. | ||
179 | |||
180 | The first thing to do is generate the MTF values, | ||
181 | and put them in | ||
182 | ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ]. | ||
183 | Because there are strictly fewer or equal MTF values | ||
184 | than block values, ptr values in this area are overwritten | ||
185 | with MTF values only when they are no longer needed. | ||
186 | |||
187 | The final compressed bitstream is generated into the | ||
188 | area starting at | ||
189 | (UChar*) (&((UInt16)s->arr2)[s->nblock]) | ||
190 | |||
191 | These storage aliases are set up in bzCompressInit(), | ||
192 | except for the last one, which is arranged in | ||
193 | compressBlock(). | ||
194 | */ | ||
195 | UInt32* ptr = s->ptr; | ||
196 | UInt16* block = s->block; | ||
197 | UInt16* mtfv = s->mtfv; | ||
172 | 198 | ||
173 | makeMaps_e ( s ); | 199 | makeMaps_e ( s ); |
174 | EOB = s->nInUse+1; | 200 | EOB = s->nInUse+1; |
@@ -183,52 +209,61 @@ void generateMTFValues ( EState* s ) | |||
183 | UChar ll_i; | 209 | UChar ll_i; |
184 | 210 | ||
185 | AssertD ( wr <= i, "generateMTFValues(1)" ); | 211 | AssertD ( wr <= i, "generateMTFValues(1)" ); |
186 | j = s->zptr[i]-1; if (j < 0) j += s->nblock; | 212 | j = ptr[i]-1; if (j < 0) j += s->nblock; |
187 | ll_i = s->unseqToSeq[s->block[j]]; | 213 | ll_i = s->unseqToSeq[block[j] >> 8]; |
188 | AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); | 214 | AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); |
189 | 215 | ||
190 | j = 0; | 216 | tmp = yy[0]; |
191 | tmp = yy[j]; | 217 | if (tmp == ll_i) { |
192 | while ( ll_i != tmp ) { | ||
193 | j++; | ||
194 | tmp2 = tmp; | ||
195 | tmp = yy[j]; | ||
196 | yy[j] = tmp2; | ||
197 | }; | ||
198 | yy[0] = tmp; | ||
199 | |||
200 | if (j == 0) { | ||
201 | zPend++; | 218 | zPend++; |
202 | } else { | 219 | } else { |
220 | tmp2 = tmp; | ||
221 | tmp = yy[1]; | ||
222 | yy[1] = tmp2; | ||
223 | j = 1; | ||
224 | while ( ll_i != tmp ) { | ||
225 | j++; | ||
226 | tmp2 = tmp; | ||
227 | tmp = yy[j]; | ||
228 | yy[j] = tmp2; | ||
229 | }; | ||
230 | yy[0] = tmp; | ||
231 | |||
203 | if (zPend > 0) { | 232 | if (zPend > 0) { |
204 | zPend--; | 233 | zPend--; |
205 | while (True) { | 234 | while (True) { |
206 | switch (zPend % 2) { | 235 | if (zPend & 1) { |
207 | case 0: s->szptr[wr] = BZ_RUNA; wr++; s->mtfFreq[BZ_RUNA]++; break; | 236 | mtfv[wr] = BZ_RUNB; wr++; |
208 | case 1: s->szptr[wr] = BZ_RUNB; wr++; s->mtfFreq[BZ_RUNB]++; break; | 237 | s->mtfFreq[BZ_RUNB]++; |
209 | }; | 238 | } else { |
239 | mtfv[wr] = BZ_RUNA; wr++; | ||
240 | s->mtfFreq[BZ_RUNA]++; | ||
241 | } | ||
210 | if (zPend < 2) break; | 242 | if (zPend < 2) break; |
211 | zPend = (zPend - 2) / 2; | 243 | zPend = (zPend - 2) / 2; |
212 | }; | 244 | }; |
213 | zPend = 0; | 245 | zPend = 0; |
214 | } | 246 | } |
215 | s->szptr[wr] = j+1; wr++; s->mtfFreq[j+1]++; | 247 | mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; |
216 | } | 248 | } |
217 | } | 249 | } |
218 | 250 | ||
219 | if (zPend > 0) { | 251 | if (zPend > 0) { |
220 | zPend--; | 252 | zPend--; |
221 | while (True) { | 253 | while (True) { |
222 | switch (zPend % 2) { | 254 | if (zPend & 1) { |
223 | case 0: s->szptr[wr] = BZ_RUNA; wr++; s->mtfFreq[BZ_RUNA]++; break; | 255 | mtfv[wr] = BZ_RUNB; wr++; |
224 | case 1: s->szptr[wr] = BZ_RUNB; wr++; s->mtfFreq[BZ_RUNB]++; break; | 256 | s->mtfFreq[BZ_RUNB]++; |
225 | }; | 257 | } else { |
258 | mtfv[wr] = BZ_RUNA; wr++; | ||
259 | s->mtfFreq[BZ_RUNA]++; | ||
260 | } | ||
226 | if (zPend < 2) break; | 261 | if (zPend < 2) break; |
227 | zPend = (zPend - 2) / 2; | 262 | zPend = (zPend - 2) / 2; |
228 | }; | 263 | }; |
229 | } | 264 | } |
230 | 265 | ||
231 | s->szptr[wr] = EOB; wr++; s->mtfFreq[EOB]++; | 266 | mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; |
232 | 267 | ||
233 | s->nMTF = wr; | 268 | s->nMTF = wr; |
234 | } | 269 | } |
@@ -259,6 +294,8 @@ void sendMTFValues ( EState* s ) | |||
259 | UInt16 cost[BZ_N_GROUPS]; | 294 | UInt16 cost[BZ_N_GROUPS]; |
260 | Int32 fave[BZ_N_GROUPS]; | 295 | Int32 fave[BZ_N_GROUPS]; |
261 | 296 | ||
297 | UInt16* mtfv = s->mtfv; | ||
298 | |||
262 | if (s->verbosity >= 3) | 299 | if (s->verbosity >= 3) |
263 | VPrintf3( " %d in block, %d after MTF & 1-2 coding, " | 300 | VPrintf3( " %d in block, %d after MTF & 1-2 coding, " |
264 | "%d+2 syms in use\n", | 301 | "%d+2 syms in use\n", |
@@ -348,7 +385,7 @@ void sendMTFValues ( EState* s ) | |||
348 | register UInt16 cost0, cost1, cost2, cost3, cost4, cost5; | 385 | register UInt16 cost0, cost1, cost2, cost3, cost4, cost5; |
349 | cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0; | 386 | cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0; |
350 | for (i = gs; i <= ge; i++) { | 387 | for (i = gs; i <= ge; i++) { |
351 | UInt16 icv = s->szptr[i]; | 388 | UInt16 icv = mtfv[i]; |
352 | cost0 += s->len[0][icv]; | 389 | cost0 += s->len[0][icv]; |
353 | cost1 += s->len[1][icv]; | 390 | cost1 += s->len[1][icv]; |
354 | cost2 += s->len[2][icv]; | 391 | cost2 += s->len[2][icv]; |
@@ -360,7 +397,7 @@ void sendMTFValues ( EState* s ) | |||
360 | cost[3] = cost3; cost[4] = cost4; cost[5] = cost5; | 397 | cost[3] = cost3; cost[4] = cost4; cost[5] = cost5; |
361 | } else { | 398 | } else { |
362 | for (i = gs; i <= ge; i++) { | 399 | for (i = gs; i <= ge; i++) { |
363 | UInt16 icv = s->szptr[i]; | 400 | UInt16 icv = mtfv[i]; |
364 | for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; | 401 | for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; |
365 | } | 402 | } |
366 | } | 403 | } |
@@ -381,7 +418,7 @@ void sendMTFValues ( EState* s ) | |||
381 | Increment the symbol frequencies for the selected table. | 418 | Increment the symbol frequencies for the selected table. |
382 | --*/ | 419 | --*/ |
383 | for (i = gs; i <= ge; i++) | 420 | for (i = gs; i <= ge; i++) |
384 | s->rfreq[bt][ s->szptr[i] ]++; | 421 | s->rfreq[bt][ mtfv[i] ]++; |
385 | 422 | ||
386 | gs = ge+1; | 423 | gs = ge+1; |
387 | } | 424 | } |
@@ -502,8 +539,8 @@ void sendMTFValues ( EState* s ) | |||
502 | for (i = gs; i <= ge; i++) { | 539 | for (i = gs; i <= ge; i++) { |
503 | AssertH ( s->selector[selCtr] < nGroups, 3006 ); | 540 | AssertH ( s->selector[selCtr] < nGroups, 3006 ); |
504 | bsW ( s, | 541 | bsW ( s, |
505 | s->len [s->selector[selCtr]] [s->szptr[i]], | 542 | s->len [s->selector[selCtr]] [mtfv[i]], |
506 | s->code [s->selector[selCtr]] [s->szptr[i]] ); | 543 | s->code [s->selector[selCtr]] [mtfv[i]] ); |
507 | } | 544 | } |
508 | 545 | ||
509 | gs = ge+1; | 546 | gs = ge+1; |
@@ -534,13 +571,15 @@ void compressBlock ( EState* s, Bool is_last_block ) | |||
534 | blockSort ( s ); | 571 | blockSort ( s ); |
535 | } | 572 | } |
536 | 573 | ||
574 | s->zbits = (UChar*) (&((UInt16*)s->arr2)[s->nblock]); | ||
575 | |||
537 | /*-- If this is the first block, create the stream header. --*/ | 576 | /*-- If this is the first block, create the stream header. --*/ |
538 | if (s->blockNo == 1) { | 577 | if (s->blockNo == 1) { |
539 | bsInitWrite ( s ); | 578 | bsInitWrite ( s ); |
540 | bsPutUChar ( s, 'B' ); | 579 | bsPutUChar ( s, 'B' ); |
541 | bsPutUChar ( s, 'Z' ); | 580 | bsPutUChar ( s, 'Z' ); |
542 | bsPutUChar ( s, 'h' ); | 581 | bsPutUChar ( s, 'h' ); |
543 | bsPutUChar ( s, '0' + s->blockSize100k ); | 582 | bsPutUChar ( s, (UChar)('0' + s->blockSize100k) ); |
544 | } | 583 | } |
545 | 584 | ||
546 | if (s->nblock > 0) { | 585 | if (s->nblock > 0) { |
@@ -552,11 +591,16 @@ void compressBlock ( EState* s, Bool is_last_block ) | |||
552 | /*-- Now the block's CRC, so it is in a known place. --*/ | 591 | /*-- Now the block's CRC, so it is in a known place. --*/ |
553 | bsPutUInt32 ( s, s->blockCRC ); | 592 | bsPutUInt32 ( s, s->blockCRC ); |
554 | 593 | ||
555 | /*-- Now a single bit indicating randomisation. --*/ | 594 | /*-- |
556 | if (s->blockRandomised) { | 595 | Now a single bit indicating (non-)randomisation. |
557 | bsW(s,1,1); s->nBlocksRandomised++; | 596 | As of version 0.9.5, we use a better sorting algorithm |
558 | } else | 597 | which makes randomisation unnecessary. So always set |
559 | bsW(s,1,0); | 598 | the randomised bit to 'no'. Of course, the decoder |
599 | still needs to be able to handle randomised blocks | ||
600 | so as to maintain backwards compatibility with | ||
601 | older versions of bzip2. | ||
602 | --*/ | ||
603 | bsW(s,1,0); | ||
560 | 604 | ||
561 | bsW ( s, 24, s->origPtr ); | 605 | bsW ( s, 24, s->origPtr ); |
562 | generateMTFValues ( s ); | 606 | generateMTFValues ( s ); |
@@ -567,11 +611,6 @@ void compressBlock ( EState* s, Bool is_last_block ) | |||
567 | /*-- If this is the last block, add the stream trailer. --*/ | 611 | /*-- If this is the last block, add the stream trailer. --*/ |
568 | if (is_last_block) { | 612 | if (is_last_block) { |
569 | 613 | ||
570 | if (s->verbosity >= 2 && s->nBlocksRandomised > 0) | ||
571 | VPrintf2 ( " %d block%s needed randomisation\n", | ||
572 | s->nBlocksRandomised, | ||
573 | s->nBlocksRandomised == 1 ? "" : "s" ); | ||
574 | |||
575 | bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 ); | 614 | bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 ); |
576 | bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 ); | 615 | bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 ); |
577 | bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); | 616 | bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); |