aboutsummaryrefslogtreecommitdiff
path: root/compress.c
diff options
context:
space:
mode:
authorJulian Seward <jseward@acm.org>1999-09-04 22:13:13 +0200
committerJulian Seward <jseward@acm.org>1999-09-04 22:13:13 +0200
commitf93cd82a9a7094ad90fd19bbc6ccf6f4627f8060 (patch)
treec95407df5665f5a7395683f07552f2b13f2e501f /compress.c
parent977101ad5f833f5c0a574bfeea408e5301a6b052 (diff)
downloadbzip2-f93cd82a9a7094ad90fd19bbc6ccf6f4627f8060.tar.gz
bzip2-f93cd82a9a7094ad90fd19bbc6ccf6f4627f8060.tar.bz2
bzip2-f93cd82a9a7094ad90fd19bbc6ccf6f4627f8060.zip
bzip2-0.9.5dbzip2-0.9.5d
Diffstat (limited to 'compress.c')
-rw-r--r--compress.c141
1 files changed, 90 insertions, 51 deletions
diff --git a/compress.c b/compress.c
index 23abd43..7b192c3 100644
--- a/compress.c
+++ b/compress.c
@@ -8,7 +8,7 @@
8 This file is a part of bzip2 and/or libbzip2, a program and 8 This file is a part of bzip2 and/or libbzip2, a program and
9 library for lossless, block-sorting data compression. 9 library for lossless, block-sorting data compression.
10 10
11 Copyright (C) 1996-1998 Julian R Seward. All rights reserved. 11 Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
12 12
13 Redistribution and use in source and binary forms, with or without 13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions 14 modification, are permitted provided that the following conditions
@@ -41,9 +41,9 @@
41 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 41 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 43
44 Julian Seward, Guildford, Surrey, UK. 44 Julian Seward, Cambridge, UK.
45 jseward@acm.org 45 jseward@acm.org
46 bzip2/libbzip2 version 0.9.0 of 28 June 1998 46 bzip2/libbzip2 version 0.9.5 of 24 May 1999
47 47
48 This program is based on (at least) the work of: 48 This program is based on (at least) the work of:
49 Mike Burrows 49 Mike Burrows
@@ -90,7 +90,7 @@ static
90void bsFinishWrite ( EState* s ) 90void bsFinishWrite ( EState* s )
91{ 91{
92 while (s->bsLive > 0) { 92 while (s->bsLive > 0) {
93 ((UChar*)(s->quadrant))[s->numZ] = (UChar)(s->bsBuff >> 24); 93 s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
94 s->numZ++; 94 s->numZ++;
95 s->bsBuff <<= 8; 95 s->bsBuff <<= 8;
96 s->bsLive -= 8; 96 s->bsLive -= 8;
@@ -102,7 +102,7 @@ void bsFinishWrite ( EState* s )
102#define bsNEEDW(nz) \ 102#define bsNEEDW(nz) \
103{ \ 103{ \
104 while (s->bsLive >= 8) { \ 104 while (s->bsLive >= 8) { \
105 ((UChar*)(s->quadrant))[s->numZ] \ 105 s->zbits[s->numZ] \
106 = (UChar)(s->bsBuff >> 24); \ 106 = (UChar)(s->bsBuff >> 24); \
107 s->numZ++; \ 107 s->numZ++; \
108 s->bsBuff <<= 8; \ 108 s->bsBuff <<= 8; \
@@ -162,13 +162,39 @@ void makeMaps_e ( EState* s )
162static 162static
163void generateMTFValues ( EState* s ) 163void generateMTFValues ( EState* s )
164{ 164{
165 UChar yy[256]; 165 UChar yy[256];
166 Int32 i, j; 166 Int32 i, j;
167 UChar tmp; 167 UChar tmp;
168 UChar tmp2; 168 UChar tmp2;
169 Int32 zPend; 169 Int32 zPend;
170 Int32 wr; 170 Int32 wr;
171 Int32 EOB; 171 Int32 EOB;
172
173 /*
174 After sorting (eg, here),
175 s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
176 and
177 ((UInt16*)s->arr2) [ 0 .. s->nblock-1 ] [15:8]
178 holds the original block data.
179
180 The first thing to do is generate the MTF values,
181 and put them in
182 ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
183 Because there are strictly fewer or equal MTF values
184 than block values, ptr values in this area are overwritten
185 with MTF values only when they are no longer needed.
186
187 The final compressed bitstream is generated into the
188 area starting at
189 (UChar*) (&((UInt16)s->arr2)[s->nblock])
190
191 These storage aliases are set up in bzCompressInit(),
192 except for the last one, which is arranged in
193 compressBlock().
194 */
195 UInt32* ptr = s->ptr;
196 UInt16* block = s->block;
197 UInt16* mtfv = s->mtfv;
172 198
173 makeMaps_e ( s ); 199 makeMaps_e ( s );
174 EOB = s->nInUse+1; 200 EOB = s->nInUse+1;
@@ -183,52 +209,61 @@ void generateMTFValues ( EState* s )
183 UChar ll_i; 209 UChar ll_i;
184 210
185 AssertD ( wr <= i, "generateMTFValues(1)" ); 211 AssertD ( wr <= i, "generateMTFValues(1)" );
186 j = s->zptr[i]-1; if (j < 0) j += s->nblock; 212 j = ptr[i]-1; if (j < 0) j += s->nblock;
187 ll_i = s->unseqToSeq[s->block[j]]; 213 ll_i = s->unseqToSeq[block[j] >> 8];
188 AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); 214 AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
189 215
190 j = 0; 216 tmp = yy[0];
191 tmp = yy[j]; 217 if (tmp == ll_i) {
192 while ( ll_i != tmp ) {
193 j++;
194 tmp2 = tmp;
195 tmp = yy[j];
196 yy[j] = tmp2;
197 };
198 yy[0] = tmp;
199
200 if (j == 0) {
201 zPend++; 218 zPend++;
202 } else { 219 } else {
220 tmp2 = tmp;
221 tmp = yy[1];
222 yy[1] = tmp2;
223 j = 1;
224 while ( ll_i != tmp ) {
225 j++;
226 tmp2 = tmp;
227 tmp = yy[j];
228 yy[j] = tmp2;
229 };
230 yy[0] = tmp;
231
203 if (zPend > 0) { 232 if (zPend > 0) {
204 zPend--; 233 zPend--;
205 while (True) { 234 while (True) {
206 switch (zPend % 2) { 235 if (zPend & 1) {
207 case 0: s->szptr[wr] = BZ_RUNA; wr++; s->mtfFreq[BZ_RUNA]++; break; 236 mtfv[wr] = BZ_RUNB; wr++;
208 case 1: s->szptr[wr] = BZ_RUNB; wr++; s->mtfFreq[BZ_RUNB]++; break; 237 s->mtfFreq[BZ_RUNB]++;
209 }; 238 } else {
239 mtfv[wr] = BZ_RUNA; wr++;
240 s->mtfFreq[BZ_RUNA]++;
241 }
210 if (zPend < 2) break; 242 if (zPend < 2) break;
211 zPend = (zPend - 2) / 2; 243 zPend = (zPend - 2) / 2;
212 }; 244 };
213 zPend = 0; 245 zPend = 0;
214 } 246 }
215 s->szptr[wr] = j+1; wr++; s->mtfFreq[j+1]++; 247 mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
216 } 248 }
217 } 249 }
218 250
219 if (zPend > 0) { 251 if (zPend > 0) {
220 zPend--; 252 zPend--;
221 while (True) { 253 while (True) {
222 switch (zPend % 2) { 254 if (zPend & 1) {
223 case 0: s->szptr[wr] = BZ_RUNA; wr++; s->mtfFreq[BZ_RUNA]++; break; 255 mtfv[wr] = BZ_RUNB; wr++;
224 case 1: s->szptr[wr] = BZ_RUNB; wr++; s->mtfFreq[BZ_RUNB]++; break; 256 s->mtfFreq[BZ_RUNB]++;
225 }; 257 } else {
258 mtfv[wr] = BZ_RUNA; wr++;
259 s->mtfFreq[BZ_RUNA]++;
260 }
226 if (zPend < 2) break; 261 if (zPend < 2) break;
227 zPend = (zPend - 2) / 2; 262 zPend = (zPend - 2) / 2;
228 }; 263 };
229 } 264 }
230 265
231 s->szptr[wr] = EOB; wr++; s->mtfFreq[EOB]++; 266 mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
232 267
233 s->nMTF = wr; 268 s->nMTF = wr;
234} 269}
@@ -259,6 +294,8 @@ void sendMTFValues ( EState* s )
259 UInt16 cost[BZ_N_GROUPS]; 294 UInt16 cost[BZ_N_GROUPS];
260 Int32 fave[BZ_N_GROUPS]; 295 Int32 fave[BZ_N_GROUPS];
261 296
297 UInt16* mtfv = s->mtfv;
298
262 if (s->verbosity >= 3) 299 if (s->verbosity >= 3)
263 VPrintf3( " %d in block, %d after MTF & 1-2 coding, " 300 VPrintf3( " %d in block, %d after MTF & 1-2 coding, "
264 "%d+2 syms in use\n", 301 "%d+2 syms in use\n",
@@ -348,7 +385,7 @@ void sendMTFValues ( EState* s )
348 register UInt16 cost0, cost1, cost2, cost3, cost4, cost5; 385 register UInt16 cost0, cost1, cost2, cost3, cost4, cost5;
349 cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0; 386 cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
350 for (i = gs; i <= ge; i++) { 387 for (i = gs; i <= ge; i++) {
351 UInt16 icv = s->szptr[i]; 388 UInt16 icv = mtfv[i];
352 cost0 += s->len[0][icv]; 389 cost0 += s->len[0][icv];
353 cost1 += s->len[1][icv]; 390 cost1 += s->len[1][icv];
354 cost2 += s->len[2][icv]; 391 cost2 += s->len[2][icv];
@@ -360,7 +397,7 @@ void sendMTFValues ( EState* s )
360 cost[3] = cost3; cost[4] = cost4; cost[5] = cost5; 397 cost[3] = cost3; cost[4] = cost4; cost[5] = cost5;
361 } else { 398 } else {
362 for (i = gs; i <= ge; i++) { 399 for (i = gs; i <= ge; i++) {
363 UInt16 icv = s->szptr[i]; 400 UInt16 icv = mtfv[i];
364 for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; 401 for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
365 } 402 }
366 } 403 }
@@ -381,7 +418,7 @@ void sendMTFValues ( EState* s )
381 Increment the symbol frequencies for the selected table. 418 Increment the symbol frequencies for the selected table.
382 --*/ 419 --*/
383 for (i = gs; i <= ge; i++) 420 for (i = gs; i <= ge; i++)
384 s->rfreq[bt][ s->szptr[i] ]++; 421 s->rfreq[bt][ mtfv[i] ]++;
385 422
386 gs = ge+1; 423 gs = ge+1;
387 } 424 }
@@ -502,8 +539,8 @@ void sendMTFValues ( EState* s )
502 for (i = gs; i <= ge; i++) { 539 for (i = gs; i <= ge; i++) {
503 AssertH ( s->selector[selCtr] < nGroups, 3006 ); 540 AssertH ( s->selector[selCtr] < nGroups, 3006 );
504 bsW ( s, 541 bsW ( s,
505 s->len [s->selector[selCtr]] [s->szptr[i]], 542 s->len [s->selector[selCtr]] [mtfv[i]],
506 s->code [s->selector[selCtr]] [s->szptr[i]] ); 543 s->code [s->selector[selCtr]] [mtfv[i]] );
507 } 544 }
508 545
509 gs = ge+1; 546 gs = ge+1;
@@ -534,13 +571,15 @@ void compressBlock ( EState* s, Bool is_last_block )
534 blockSort ( s ); 571 blockSort ( s );
535 } 572 }
536 573
574 s->zbits = (UChar*) (&((UInt16*)s->arr2)[s->nblock]);
575
537 /*-- If this is the first block, create the stream header. --*/ 576 /*-- If this is the first block, create the stream header. --*/
538 if (s->blockNo == 1) { 577 if (s->blockNo == 1) {
539 bsInitWrite ( s ); 578 bsInitWrite ( s );
540 bsPutUChar ( s, 'B' ); 579 bsPutUChar ( s, 'B' );
541 bsPutUChar ( s, 'Z' ); 580 bsPutUChar ( s, 'Z' );
542 bsPutUChar ( s, 'h' ); 581 bsPutUChar ( s, 'h' );
543 bsPutUChar ( s, '0' + s->blockSize100k ); 582 bsPutUChar ( s, (UChar)('0' + s->blockSize100k) );
544 } 583 }
545 584
546 if (s->nblock > 0) { 585 if (s->nblock > 0) {
@@ -552,11 +591,16 @@ void compressBlock ( EState* s, Bool is_last_block )
552 /*-- Now the block's CRC, so it is in a known place. --*/ 591 /*-- Now the block's CRC, so it is in a known place. --*/
553 bsPutUInt32 ( s, s->blockCRC ); 592 bsPutUInt32 ( s, s->blockCRC );
554 593
555 /*-- Now a single bit indicating randomisation. --*/ 594 /*--
556 if (s->blockRandomised) { 595 Now a single bit indicating (non-)randomisation.
557 bsW(s,1,1); s->nBlocksRandomised++; 596 As of version 0.9.5, we use a better sorting algorithm
558 } else 597 which makes randomisation unnecessary. So always set
559 bsW(s,1,0); 598 the randomised bit to 'no'. Of course, the decoder
599 still needs to be able to handle randomised blocks
600 so as to maintain backwards compatibility with
601 older versions of bzip2.
602 --*/
603 bsW(s,1,0);
560 604
561 bsW ( s, 24, s->origPtr ); 605 bsW ( s, 24, s->origPtr );
562 generateMTFValues ( s ); 606 generateMTFValues ( s );
@@ -567,11 +611,6 @@ void compressBlock ( EState* s, Bool is_last_block )
567 /*-- If this is the last block, add the stream trailer. --*/ 611 /*-- If this is the last block, add the stream trailer. --*/
568 if (is_last_block) { 612 if (is_last_block) {
569 613
570 if (s->verbosity >= 2 && s->nBlocksRandomised > 0)
571 VPrintf2 ( " %d block%s needed randomisation\n",
572 s->nBlocksRandomised,
573 s->nBlocksRandomised == 1 ? "" : "s" );
574
575 bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 ); 614 bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
576 bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 ); 615 bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
577 bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); 616 bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );