diff options
Diffstat (limited to 'compress.c')
-rw-r--r-- | compress.c | 193 |
1 files changed, 140 insertions, 53 deletions
@@ -8,7 +8,7 @@ | |||
8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
10 | 10 | ||
11 | Copyright (C) 1996-1999 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2000 Julian R Seward. All rights reserved. |
12 | 12 | ||
13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
@@ -43,7 +43,7 @@ | |||
43 | 43 | ||
44 | Julian Seward, Cambridge, UK. | 44 | Julian Seward, Cambridge, UK. |
45 | jseward@acm.org | 45 | jseward@acm.org |
46 | bzip2/libbzip2 version 0.9.5 of 24 May 1999 | 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 |
47 | 47 | ||
48 | This program is based on (at least) the work of: | 48 | This program is based on (at least) the work of: |
49 | Mike Burrows | 49 | Mike Burrows |
@@ -78,7 +78,7 @@ | |||
78 | /*---------------------------------------------------*/ | 78 | /*---------------------------------------------------*/ |
79 | 79 | ||
80 | /*---------------------------------------------------*/ | 80 | /*---------------------------------------------------*/ |
81 | void bsInitWrite ( EState* s ) | 81 | void BZ2_bsInitWrite ( EState* s ) |
82 | { | 82 | { |
83 | s->bsLive = 0; | 83 | s->bsLive = 0; |
84 | s->bsBuff = 0; | 84 | s->bsBuff = 0; |
@@ -113,6 +113,7 @@ void bsFinishWrite ( EState* s ) | |||
113 | 113 | ||
114 | /*---------------------------------------------------*/ | 114 | /*---------------------------------------------------*/ |
115 | static | 115 | static |
116 | __inline__ | ||
116 | void bsW ( EState* s, Int32 n, UInt32 v ) | 117 | void bsW ( EState* s, Int32 n, UInt32 v ) |
117 | { | 118 | { |
118 | bsNEEDW ( n ); | 119 | bsNEEDW ( n ); |
@@ -164,8 +165,6 @@ void generateMTFValues ( EState* s ) | |||
164 | { | 165 | { |
165 | UChar yy[256]; | 166 | UChar yy[256]; |
166 | Int32 i, j; | 167 | Int32 i, j; |
167 | UChar tmp; | ||
168 | UChar tmp2; | ||
169 | Int32 zPend; | 168 | Int32 zPend; |
170 | Int32 wr; | 169 | Int32 wr; |
171 | Int32 EOB; | 170 | Int32 EOB; |
@@ -174,7 +173,7 @@ void generateMTFValues ( EState* s ) | |||
174 | After sorting (eg, here), | 173 | After sorting (eg, here), |
175 | s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, | 174 | s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, |
176 | and | 175 | and |
177 | ((UInt16*)s->arr2) [ 0 .. s->nblock-1 ] [15:8] | 176 | ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] |
178 | holds the original block data. | 177 | holds the original block data. |
179 | 178 | ||
180 | The first thing to do is generate the MTF values, | 179 | The first thing to do is generate the MTF values, |
@@ -186,14 +185,14 @@ void generateMTFValues ( EState* s ) | |||
186 | 185 | ||
187 | The final compressed bitstream is generated into the | 186 | The final compressed bitstream is generated into the |
188 | area starting at | 187 | area starting at |
189 | (UChar*) (&((UInt16)s->arr2)[s->nblock]) | 188 | (UChar*) (&((UChar*)s->arr2)[s->nblock]) |
190 | 189 | ||
191 | These storage aliases are set up in bzCompressInit(), | 190 | These storage aliases are set up in bzCompressInit(), |
192 | except for the last one, which is arranged in | 191 | except for the last one, which is arranged in |
193 | compressBlock(). | 192 | compressBlock(). |
194 | */ | 193 | */ |
195 | UInt32* ptr = s->ptr; | 194 | UInt32* ptr = s->ptr; |
196 | UInt16* block = s->block; | 195 | UChar* block = s->block; |
197 | UInt16* mtfv = s->mtfv; | 196 | UInt16* mtfv = s->mtfv; |
198 | 197 | ||
199 | makeMaps_e ( s ); | 198 | makeMaps_e ( s ); |
@@ -207,27 +206,14 @@ void generateMTFValues ( EState* s ) | |||
207 | 206 | ||
208 | for (i = 0; i < s->nblock; i++) { | 207 | for (i = 0; i < s->nblock; i++) { |
209 | UChar ll_i; | 208 | UChar ll_i; |
210 | |||
211 | AssertD ( wr <= i, "generateMTFValues(1)" ); | 209 | AssertD ( wr <= i, "generateMTFValues(1)" ); |
212 | j = ptr[i]-1; if (j < 0) j += s->nblock; | 210 | j = ptr[i]-1; if (j < 0) j += s->nblock; |
213 | ll_i = s->unseqToSeq[block[j] >> 8]; | 211 | ll_i = s->unseqToSeq[block[j]]; |
214 | AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); | 212 | AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); |
215 | 213 | ||
216 | tmp = yy[0]; | 214 | if (yy[0] == ll_i) { |
217 | if (tmp == ll_i) { | ||
218 | zPend++; | 215 | zPend++; |
219 | } else { | 216 | } else { |
220 | tmp2 = tmp; | ||
221 | tmp = yy[1]; | ||
222 | yy[1] = tmp2; | ||
223 | j = 1; | ||
224 | while ( ll_i != tmp ) { | ||
225 | j++; | ||
226 | tmp2 = tmp; | ||
227 | tmp = yy[j]; | ||
228 | yy[j] = tmp2; | ||
229 | }; | ||
230 | yy[0] = tmp; | ||
231 | 217 | ||
232 | if (zPend > 0) { | 218 | if (zPend > 0) { |
233 | zPend--; | 219 | zPend--; |
@@ -244,7 +230,26 @@ void generateMTFValues ( EState* s ) | |||
244 | }; | 230 | }; |
245 | zPend = 0; | 231 | zPend = 0; |
246 | } | 232 | } |
247 | mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; | 233 | { |
234 | register UChar rtmp; | ||
235 | register UChar* ryy_j; | ||
236 | register UChar rll_i; | ||
237 | rtmp = yy[1]; | ||
238 | yy[1] = yy[0]; | ||
239 | ryy_j = &(yy[1]); | ||
240 | rll_i = ll_i; | ||
241 | while ( rll_i != rtmp ) { | ||
242 | register UChar rtmp2; | ||
243 | ryy_j++; | ||
244 | rtmp2 = rtmp; | ||
245 | rtmp = *ryy_j; | ||
246 | *ryy_j = rtmp2; | ||
247 | }; | ||
248 | yy[0] = rtmp; | ||
249 | j = ryy_j - &(yy[0]); | ||
250 | mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; | ||
251 | } | ||
252 | |||
248 | } | 253 | } |
249 | } | 254 | } |
250 | 255 | ||
@@ -261,6 +266,7 @@ void generateMTFValues ( EState* s ) | |||
261 | if (zPend < 2) break; | 266 | if (zPend < 2) break; |
262 | zPend = (zPend - 2) / 2; | 267 | zPend = (zPend - 2) / 2; |
263 | }; | 268 | }; |
269 | zPend = 0; | ||
264 | } | 270 | } |
265 | 271 | ||
266 | mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; | 272 | mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; |
@@ -365,6 +371,18 @@ void sendMTFValues ( EState* s ) | |||
365 | for (v = 0; v < alphaSize; v++) | 371 | for (v = 0; v < alphaSize; v++) |
366 | s->rfreq[t][v] = 0; | 372 | s->rfreq[t][v] = 0; |
367 | 373 | ||
374 | /*--- | ||
375 | Set up an auxiliary length table which is used to fast-track | ||
376 | the common case (nGroups == 6). | ||
377 | ---*/ | ||
378 | if (nGroups == 6) { | ||
379 | for (v = 0; v < alphaSize; v++) { | ||
380 | s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; | ||
381 | s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; | ||
382 | s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; | ||
383 | } | ||
384 | } | ||
385 | |||
368 | nSelectors = 0; | 386 | nSelectors = 0; |
369 | totc = 0; | 387 | totc = 0; |
370 | gs = 0; | 388 | gs = 0; |
@@ -381,21 +399,37 @@ void sendMTFValues ( EState* s ) | |||
381 | --*/ | 399 | --*/ |
382 | for (t = 0; t < nGroups; t++) cost[t] = 0; | 400 | for (t = 0; t < nGroups; t++) cost[t] = 0; |
383 | 401 | ||
384 | if (nGroups == 6) { | 402 | if (nGroups == 6 && 50 == ge-gs+1) { |
385 | register UInt16 cost0, cost1, cost2, cost3, cost4, cost5; | 403 | /*--- fast track the common case ---*/ |
386 | cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0; | 404 | register UInt32 cost01, cost23, cost45; |
387 | for (i = gs; i <= ge; i++) { | 405 | register UInt16 icv; |
388 | UInt16 icv = mtfv[i]; | 406 | cost01 = cost23 = cost45 = 0; |
389 | cost0 += s->len[0][icv]; | 407 | |
390 | cost1 += s->len[1][icv]; | 408 | # define BZ_ITER(nn) \ |
391 | cost2 += s->len[2][icv]; | 409 | icv = mtfv[gs+(nn)]; \ |
392 | cost3 += s->len[3][icv]; | 410 | cost01 += s->len_pack[icv][0]; \ |
393 | cost4 += s->len[4][icv]; | 411 | cost23 += s->len_pack[icv][1]; \ |
394 | cost5 += s->len[5][icv]; | 412 | cost45 += s->len_pack[icv][2]; \ |
395 | } | 413 | |
396 | cost[0] = cost0; cost[1] = cost1; cost[2] = cost2; | 414 | BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); |
397 | cost[3] = cost3; cost[4] = cost4; cost[5] = cost5; | 415 | BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); |
416 | BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); | ||
417 | BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); | ||
418 | BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); | ||
419 | BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); | ||
420 | BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); | ||
421 | BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); | ||
422 | BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); | ||
423 | BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); | ||
424 | |||
425 | # undef BZ_ITER | ||
426 | |||
427 | cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; | ||
428 | cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; | ||
429 | cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; | ||
430 | |||
398 | } else { | 431 | } else { |
432 | /*--- slow version which correctly handles all situations ---*/ | ||
399 | for (i = gs; i <= ge; i++) { | 433 | for (i = gs; i <= ge; i++) { |
400 | UInt16 icv = mtfv[i]; | 434 | UInt16 icv = mtfv[i]; |
401 | for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; | 435 | for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; |
@@ -417,8 +451,29 @@ void sendMTFValues ( EState* s ) | |||
417 | /*-- | 451 | /*-- |
418 | Increment the symbol frequencies for the selected table. | 452 | Increment the symbol frequencies for the selected table. |
419 | --*/ | 453 | --*/ |
420 | for (i = gs; i <= ge; i++) | 454 | if (nGroups == 6 && 50 == ge-gs+1) { |
421 | s->rfreq[bt][ mtfv[i] ]++; | 455 | /*--- fast track the common case ---*/ |
456 | |||
457 | # define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++ | ||
458 | |||
459 | BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); | ||
460 | BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); | ||
461 | BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); | ||
462 | BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); | ||
463 | BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); | ||
464 | BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); | ||
465 | BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); | ||
466 | BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); | ||
467 | BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); | ||
468 | BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); | ||
469 | |||
470 | # undef BZ_ITUR | ||
471 | |||
472 | } else { | ||
473 | /*--- slow version which correctly handles all situations ---*/ | ||
474 | for (i = gs; i <= ge; i++) | ||
475 | s->rfreq[bt][ mtfv[i] ]++; | ||
476 | } | ||
422 | 477 | ||
423 | gs = ge+1; | 478 | gs = ge+1; |
424 | } | 479 | } |
@@ -434,8 +489,8 @@ void sendMTFValues ( EState* s ) | |||
434 | Recompute the tables based on the accumulated frequencies. | 489 | Recompute the tables based on the accumulated frequencies. |
435 | --*/ | 490 | --*/ |
436 | for (t = 0; t < nGroups; t++) | 491 | for (t = 0; t < nGroups; t++) |
437 | hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), | 492 | BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), |
438 | alphaSize, 20 ); | 493 | alphaSize, 20 ); |
439 | } | 494 | } |
440 | 495 | ||
441 | 496 | ||
@@ -474,8 +529,8 @@ void sendMTFValues ( EState* s ) | |||
474 | } | 529 | } |
475 | AssertH ( !(maxLen > 20), 3004 ); | 530 | AssertH ( !(maxLen > 20), 3004 ); |
476 | AssertH ( !(minLen < 1), 3005 ); | 531 | AssertH ( !(minLen < 1), 3005 ); |
477 | hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), | 532 | BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), |
478 | minLen, maxLen, alphaSize ); | 533 | minLen, maxLen, alphaSize ); |
479 | } | 534 | } |
480 | 535 | ||
481 | /*--- Transmit the mapping table. ---*/ | 536 | /*--- Transmit the mapping table. ---*/ |
@@ -536,13 +591,45 @@ void sendMTFValues ( EState* s ) | |||
536 | if (gs >= s->nMTF) break; | 591 | if (gs >= s->nMTF) break; |
537 | ge = gs + BZ_G_SIZE - 1; | 592 | ge = gs + BZ_G_SIZE - 1; |
538 | if (ge >= s->nMTF) ge = s->nMTF-1; | 593 | if (ge >= s->nMTF) ge = s->nMTF-1; |
539 | for (i = gs; i <= ge; i++) { | 594 | AssertH ( s->selector[selCtr] < nGroups, 3006 ); |
540 | AssertH ( s->selector[selCtr] < nGroups, 3006 ); | 595 | |
541 | bsW ( s, | 596 | if (nGroups == 6 && 50 == ge-gs+1) { |
542 | s->len [s->selector[selCtr]] [mtfv[i]], | 597 | /*--- fast track the common case ---*/ |
543 | s->code [s->selector[selCtr]] [mtfv[i]] ); | 598 | UInt16 mtfv_i; |
599 | UChar* s_len_sel_selCtr | ||
600 | = &(s->len[s->selector[selCtr]][0]); | ||
601 | Int32* s_code_sel_selCtr | ||
602 | = &(s->code[s->selector[selCtr]][0]); | ||
603 | |||
604 | # define BZ_ITAH(nn) \ | ||
605 | mtfv_i = mtfv[gs+(nn)]; \ | ||
606 | bsW ( s, \ | ||
607 | s_len_sel_selCtr[mtfv_i], \ | ||
608 | s_code_sel_selCtr[mtfv_i] ) | ||
609 | |||
610 | BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); | ||
611 | BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); | ||
612 | BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); | ||
613 | BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); | ||
614 | BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); | ||
615 | BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); | ||
616 | BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); | ||
617 | BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); | ||
618 | BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); | ||
619 | BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); | ||
620 | |||
621 | # undef BZ_ITAH | ||
622 | |||
623 | } else { | ||
624 | /*--- slow version which correctly handles all situations ---*/ | ||
625 | for (i = gs; i <= ge; i++) { | ||
626 | bsW ( s, | ||
627 | s->len [s->selector[selCtr]] [mtfv[i]], | ||
628 | s->code [s->selector[selCtr]] [mtfv[i]] ); | ||
629 | } | ||
544 | } | 630 | } |
545 | 631 | ||
632 | |||
546 | gs = ge+1; | 633 | gs = ge+1; |
547 | selCtr++; | 634 | selCtr++; |
548 | } | 635 | } |
@@ -554,7 +641,7 @@ void sendMTFValues ( EState* s ) | |||
554 | 641 | ||
555 | 642 | ||
556 | /*---------------------------------------------------*/ | 643 | /*---------------------------------------------------*/ |
557 | void compressBlock ( EState* s, Bool is_last_block ) | 644 | void BZ2_compressBlock ( EState* s, Bool is_last_block ) |
558 | { | 645 | { |
559 | if (s->nblock > 0) { | 646 | if (s->nblock > 0) { |
560 | 647 | ||
@@ -568,14 +655,14 @@ void compressBlock ( EState* s, Bool is_last_block ) | |||
568 | "combined CRC = 0x%8x, size = %d\n", | 655 | "combined CRC = 0x%8x, size = %d\n", |
569 | s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); | 656 | s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); |
570 | 657 | ||
571 | blockSort ( s ); | 658 | BZ2_blockSort ( s ); |
572 | } | 659 | } |
573 | 660 | ||
574 | s->zbits = (UChar*) (&((UInt16*)s->arr2)[s->nblock]); | 661 | s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]); |
575 | 662 | ||
576 | /*-- If this is the first block, create the stream header. --*/ | 663 | /*-- If this is the first block, create the stream header. --*/ |
577 | if (s->blockNo == 1) { | 664 | if (s->blockNo == 1) { |
578 | bsInitWrite ( s ); | 665 | BZ2_bsInitWrite ( s ); |
579 | bsPutUChar ( s, 'B' ); | 666 | bsPutUChar ( s, 'B' ); |
580 | bsPutUChar ( s, 'Z' ); | 667 | bsPutUChar ( s, 'Z' ); |
581 | bsPutUChar ( s, 'h' ); | 668 | bsPutUChar ( s, 'h' ); |