aboutsummaryrefslogtreecommitdiff
path: root/compress.c
diff options
context:
space:
mode:
authorJulian Seward <jseward@acm.org>1998-08-23 22:13:13 +0200
committerJulian Seward <jseward@acm.org>1998-08-23 22:13:13 +0200
commit977101ad5f833f5c0a574bfeea408e5301a6b052 (patch)
treefc1e8fed202869c116cbf6b8c362456042494a0a /compress.c
parent1eb67a9d8f7f05ae310bc9ef297d176f3a3f8a37 (diff)
downloadbzip2-977101ad5f833f5c0a574bfeea408e5301a6b052.tar.gz
bzip2-977101ad5f833f5c0a574bfeea408e5301a6b052.tar.bz2
bzip2-977101ad5f833f5c0a574bfeea408e5301a6b052.zip
bzip2-0.9.0cbzip2-0.9.0c
Diffstat (limited to 'compress.c')
-rw-r--r--compress.c588
1 files changed, 588 insertions, 0 deletions
diff --git a/compress.c b/compress.c
new file mode 100644
index 0000000..23abd43
--- /dev/null
+++ b/compress.c
@@ -0,0 +1,588 @@
1
2/*-------------------------------------------------------------*/
3/*--- Compression machinery (not incl block sorting) ---*/
4/*--- compress.c ---*/
5/*-------------------------------------------------------------*/
6
7/*--
8 This file is a part of bzip2 and/or libbzip2, a program and
9 library for lossless, block-sorting data compression.
10
11 Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
12
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions
15 are met:
16
17 1. Redistributions of source code must retain the above copyright
18 notice, this list of conditions and the following disclaimer.
19
20 2. The origin of this software must not be misrepresented; you must
21 not claim that you wrote the original software. If you use this
22 software in a product, an acknowledgment in the product
23 documentation would be appreciated but is not required.
24
25 3. Altered source versions must be plainly marked as such, and must
26 not be misrepresented as being the original software.
27
28 4. The name of the author may not be used to endorse or promote
29 products derived from this software without specific prior written
30 permission.
31
32 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
33 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
34 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
36 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
38 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
40 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43
44 Julian Seward, Guildford, Surrey, UK.
45 jseward@acm.org
46 bzip2/libbzip2 version 0.9.0 of 28 June 1998
47
48 This program is based on (at least) the work of:
49 Mike Burrows
50 David Wheeler
51 Peter Fenwick
52 Alistair Moffat
53 Radford Neal
54 Ian H. Witten
55 Robert Sedgewick
56 Jon L. Bentley
57
58 For more information on these sources, see the manual.
59--*/
60
61/*--
62 CHANGES
63 ~~~~~~~
64 0.9.0 -- original version.
65
66 0.9.0a/b -- no changes in this file.
67
68 0.9.0c
69 * changed setting of nGroups in sendMTFValues() so as to
70 do a bit better on small files
71--*/
72
73#include "bzlib_private.h"
74
75
76/*---------------------------------------------------*/
77/*--- Bit stream I/O ---*/
78/*---------------------------------------------------*/
79
80/*---------------------------------------------------*/
81void bsInitWrite ( EState* s )
82{
83 s->bsLive = 0;
84 s->bsBuff = 0;
85}
86
87
88/*---------------------------------------------------*/
89static
90void bsFinishWrite ( EState* s )
91{
92 while (s->bsLive > 0) {
93 ((UChar*)(s->quadrant))[s->numZ] = (UChar)(s->bsBuff >> 24);
94 s->numZ++;
95 s->bsBuff <<= 8;
96 s->bsLive -= 8;
97 }
98}
99
100
101/*---------------------------------------------------*/
102#define bsNEEDW(nz) \
103{ \
104 while (s->bsLive >= 8) { \
105 ((UChar*)(s->quadrant))[s->numZ] \
106 = (UChar)(s->bsBuff >> 24); \
107 s->numZ++; \
108 s->bsBuff <<= 8; \
109 s->bsLive -= 8; \
110 } \
111}
112
113
114/*---------------------------------------------------*/
115static
116void bsW ( EState* s, Int32 n, UInt32 v )
117{
118 bsNEEDW ( n );
119 s->bsBuff |= (v << (32 - s->bsLive - n));
120 s->bsLive += n;
121}
122
123
124/*---------------------------------------------------*/
125static
126void bsPutUInt32 ( EState* s, UInt32 u )
127{
128 bsW ( s, 8, (u >> 24) & 0xffL );
129 bsW ( s, 8, (u >> 16) & 0xffL );
130 bsW ( s, 8, (u >> 8) & 0xffL );
131 bsW ( s, 8, u & 0xffL );
132}
133
134
135/*---------------------------------------------------*/
136static
137void bsPutUChar ( EState* s, UChar c )
138{
139 bsW( s, 8, (UInt32)c );
140}
141
142
143/*---------------------------------------------------*/
144/*--- The back end proper ---*/
145/*---------------------------------------------------*/
146
147/*---------------------------------------------------*/
148static
149void makeMaps_e ( EState* s )
150{
151 Int32 i;
152 s->nInUse = 0;
153 for (i = 0; i < 256; i++)
154 if (s->inUse[i]) {
155 s->unseqToSeq[i] = s->nInUse;
156 s->nInUse++;
157 }
158}
159
160
161/*---------------------------------------------------*/
162static
163void generateMTFValues ( EState* s )
164{
165 UChar yy[256];
166 Int32 i, j;
167 UChar tmp;
168 UChar tmp2;
169 Int32 zPend;
170 Int32 wr;
171 Int32 EOB;
172
173 makeMaps_e ( s );
174 EOB = s->nInUse+1;
175
176 for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
177
178 wr = 0;
179 zPend = 0;
180 for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
181
182 for (i = 0; i < s->nblock; i++) {
183 UChar ll_i;
184
185 AssertD ( wr <= i, "generateMTFValues(1)" );
186 j = s->zptr[i]-1; if (j < 0) j += s->nblock;
187 ll_i = s->unseqToSeq[s->block[j]];
188 AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
189
190 j = 0;
191 tmp = yy[j];
192 while ( ll_i != tmp ) {
193 j++;
194 tmp2 = tmp;
195 tmp = yy[j];
196 yy[j] = tmp2;
197 };
198 yy[0] = tmp;
199
200 if (j == 0) {
201 zPend++;
202 } else {
203 if (zPend > 0) {
204 zPend--;
205 while (True) {
206 switch (zPend % 2) {
207 case 0: s->szptr[wr] = BZ_RUNA; wr++; s->mtfFreq[BZ_RUNA]++; break;
208 case 1: s->szptr[wr] = BZ_RUNB; wr++; s->mtfFreq[BZ_RUNB]++; break;
209 };
210 if (zPend < 2) break;
211 zPend = (zPend - 2) / 2;
212 };
213 zPend = 0;
214 }
215 s->szptr[wr] = j+1; wr++; s->mtfFreq[j+1]++;
216 }
217 }
218
219 if (zPend > 0) {
220 zPend--;
221 while (True) {
222 switch (zPend % 2) {
223 case 0: s->szptr[wr] = BZ_RUNA; wr++; s->mtfFreq[BZ_RUNA]++; break;
224 case 1: s->szptr[wr] = BZ_RUNB; wr++; s->mtfFreq[BZ_RUNB]++; break;
225 };
226 if (zPend < 2) break;
227 zPend = (zPend - 2) / 2;
228 };
229 }
230
231 s->szptr[wr] = EOB; wr++; s->mtfFreq[EOB]++;
232
233 s->nMTF = wr;
234}
235
236
237/*---------------------------------------------------*/
238#define BZ_LESSER_ICOST 0
239#define BZ_GREATER_ICOST 15
240
241static
242void sendMTFValues ( EState* s )
243{
244 Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
245 Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
246 Int32 nGroups, nBytes;
247
248 /*--
249 UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
250 is a global since the decoder also needs it.
251
252 Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
253 Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
254 are also globals only used in this proc.
255 Made global to keep stack frame size small.
256 --*/
257
258
259 UInt16 cost[BZ_N_GROUPS];
260 Int32 fave[BZ_N_GROUPS];
261
262 if (s->verbosity >= 3)
263 VPrintf3( " %d in block, %d after MTF & 1-2 coding, "
264 "%d+2 syms in use\n",
265 s->nblock, s->nMTF, s->nInUse );
266
267 alphaSize = s->nInUse+2;
268 for (t = 0; t < BZ_N_GROUPS; t++)
269 for (v = 0; v < alphaSize; v++)
270 s->len[t][v] = BZ_GREATER_ICOST;
271
272 /*--- Decide how many coding tables to use ---*/
273 AssertH ( s->nMTF > 0, 3001 );
274 if (s->nMTF < 200) nGroups = 2; else
275 if (s->nMTF < 600) nGroups = 3; else
276 if (s->nMTF < 1200) nGroups = 4; else
277 if (s->nMTF < 2400) nGroups = 5; else
278 nGroups = 6;
279
280 /*--- Generate an initial set of coding tables ---*/
281 {
282 Int32 nPart, remF, tFreq, aFreq;
283
284 nPart = nGroups;
285 remF = s->nMTF;
286 gs = 0;
287 while (nPart > 0) {
288 tFreq = remF / nPart;
289 ge = gs-1;
290 aFreq = 0;
291 while (aFreq < tFreq && ge < alphaSize-1) {
292 ge++;
293 aFreq += s->mtfFreq[ge];
294 }
295
296 if (ge > gs
297 && nPart != nGroups && nPart != 1
298 && ((nGroups-nPart) % 2 == 1)) {
299 aFreq -= s->mtfFreq[ge];
300 ge--;
301 }
302
303 if (s->verbosity >= 3)
304 VPrintf5( " initial group %d, [%d .. %d], "
305 "has %d syms (%4.1f%%)\n",
306 nPart, gs, ge, aFreq,
307 (100.0 * (float)aFreq) / (float)(s->nMTF) );
308
309 for (v = 0; v < alphaSize; v++)
310 if (v >= gs && v <= ge)
311 s->len[nPart-1][v] = BZ_LESSER_ICOST; else
312 s->len[nPart-1][v] = BZ_GREATER_ICOST;
313
314 nPart--;
315 gs = ge+1;
316 remF -= aFreq;
317 }
318 }
319
320 /*---
321 Iterate up to BZ_N_ITERS times to improve the tables.
322 ---*/
323 for (iter = 0; iter < BZ_N_ITERS; iter++) {
324
325 for (t = 0; t < nGroups; t++) fave[t] = 0;
326
327 for (t = 0; t < nGroups; t++)
328 for (v = 0; v < alphaSize; v++)
329 s->rfreq[t][v] = 0;
330
331 nSelectors = 0;
332 totc = 0;
333 gs = 0;
334 while (True) {
335
336 /*--- Set group start & end marks. --*/
337 if (gs >= s->nMTF) break;
338 ge = gs + BZ_G_SIZE - 1;
339 if (ge >= s->nMTF) ge = s->nMTF-1;
340
341 /*--
342 Calculate the cost of this group as coded
343 by each of the coding tables.
344 --*/
345 for (t = 0; t < nGroups; t++) cost[t] = 0;
346
347 if (nGroups == 6) {
348 register UInt16 cost0, cost1, cost2, cost3, cost4, cost5;
349 cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
350 for (i = gs; i <= ge; i++) {
351 UInt16 icv = s->szptr[i];
352 cost0 += s->len[0][icv];
353 cost1 += s->len[1][icv];
354 cost2 += s->len[2][icv];
355 cost3 += s->len[3][icv];
356 cost4 += s->len[4][icv];
357 cost5 += s->len[5][icv];
358 }
359 cost[0] = cost0; cost[1] = cost1; cost[2] = cost2;
360 cost[3] = cost3; cost[4] = cost4; cost[5] = cost5;
361 } else {
362 for (i = gs; i <= ge; i++) {
363 UInt16 icv = s->szptr[i];
364 for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
365 }
366 }
367
368 /*--
369 Find the coding table which is best for this group,
370 and record its identity in the selector table.
371 --*/
372 bc = 999999999; bt = -1;
373 for (t = 0; t < nGroups; t++)
374 if (cost[t] < bc) { bc = cost[t]; bt = t; };
375 totc += bc;
376 fave[bt]++;
377 s->selector[nSelectors] = bt;
378 nSelectors++;
379
380 /*--
381 Increment the symbol frequencies for the selected table.
382 --*/
383 for (i = gs; i <= ge; i++)
384 s->rfreq[bt][ s->szptr[i] ]++;
385
386 gs = ge+1;
387 }
388 if (s->verbosity >= 3) {
389 VPrintf2 ( " pass %d: size is %d, grp uses are ",
390 iter+1, totc/8 );
391 for (t = 0; t < nGroups; t++)
392 VPrintf1 ( "%d ", fave[t] );
393 VPrintf0 ( "\n" );
394 }
395
396 /*--
397 Recompute the tables based on the accumulated frequencies.
398 --*/
399 for (t = 0; t < nGroups; t++)
400 hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
401 alphaSize, 20 );
402 }
403
404
405 AssertH( nGroups < 8, 3002 );
406 AssertH( nSelectors < 32768 &&
407 nSelectors <= (2 + (900000 / BZ_G_SIZE)),
408 3003 );
409
410
411 /*--- Compute MTF values for the selectors. ---*/
412 {
413 UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
414 for (i = 0; i < nGroups; i++) pos[i] = i;
415 for (i = 0; i < nSelectors; i++) {
416 ll_i = s->selector[i];
417 j = 0;
418 tmp = pos[j];
419 while ( ll_i != tmp ) {
420 j++;
421 tmp2 = tmp;
422 tmp = pos[j];
423 pos[j] = tmp2;
424 };
425 pos[0] = tmp;
426 s->selectorMtf[i] = j;
427 }
428 };
429
430 /*--- Assign actual codes for the tables. --*/
431 for (t = 0; t < nGroups; t++) {
432 minLen = 32;
433 maxLen = 0;
434 for (i = 0; i < alphaSize; i++) {
435 if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
436 if (s->len[t][i] < minLen) minLen = s->len[t][i];
437 }
438 AssertH ( !(maxLen > 20), 3004 );
439 AssertH ( !(minLen < 1), 3005 );
440 hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
441 minLen, maxLen, alphaSize );
442 }
443
444 /*--- Transmit the mapping table. ---*/
445 {
446 Bool inUse16[16];
447 for (i = 0; i < 16; i++) {
448 inUse16[i] = False;
449 for (j = 0; j < 16; j++)
450 if (s->inUse[i * 16 + j]) inUse16[i] = True;
451 }
452
453 nBytes = s->numZ;
454 for (i = 0; i < 16; i++)
455 if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
456
457 for (i = 0; i < 16; i++)
458 if (inUse16[i])
459 for (j = 0; j < 16; j++) {
460 if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
461 }
462
463 if (s->verbosity >= 3)
464 VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes );
465 }
466
467 /*--- Now the selectors. ---*/
468 nBytes = s->numZ;
469 bsW ( s, 3, nGroups );
470 bsW ( s, 15, nSelectors );
471 for (i = 0; i < nSelectors; i++) {
472 for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
473 bsW(s,1,0);
474 }
475 if (s->verbosity >= 3)
476 VPrintf1( "selectors %d, ", s->numZ-nBytes );
477
478 /*--- Now the coding tables. ---*/
479 nBytes = s->numZ;
480
481 for (t = 0; t < nGroups; t++) {
482 Int32 curr = s->len[t][0];
483 bsW ( s, 5, curr );
484 for (i = 0; i < alphaSize; i++) {
485 while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
486 while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
487 bsW ( s, 1, 0 );
488 }
489 }
490
491 if (s->verbosity >= 3)
492 VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
493
494 /*--- And finally, the block data proper ---*/
495 nBytes = s->numZ;
496 selCtr = 0;
497 gs = 0;
498 while (True) {
499 if (gs >= s->nMTF) break;
500 ge = gs + BZ_G_SIZE - 1;
501 if (ge >= s->nMTF) ge = s->nMTF-1;
502 for (i = gs; i <= ge; i++) {
503 AssertH ( s->selector[selCtr] < nGroups, 3006 );
504 bsW ( s,
505 s->len [s->selector[selCtr]] [s->szptr[i]],
506 s->code [s->selector[selCtr]] [s->szptr[i]] );
507 }
508
509 gs = ge+1;
510 selCtr++;
511 }
512 AssertH( selCtr == nSelectors, 3007 );
513
514 if (s->verbosity >= 3)
515 VPrintf1( "codes %d\n", s->numZ-nBytes );
516}
517
518
519/*---------------------------------------------------*/
520void compressBlock ( EState* s, Bool is_last_block )
521{
522 if (s->nblock > 0) {
523
524 BZ_FINALISE_CRC ( s->blockCRC );
525 s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
526 s->combinedCRC ^= s->blockCRC;
527 if (s->blockNo > 1) s->numZ = 0;
528
529 if (s->verbosity >= 2)
530 VPrintf4( " block %d: crc = 0x%8x, "
531 "combined CRC = 0x%8x, size = %d\n",
532 s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
533
534 blockSort ( s );
535 }
536
537 /*-- If this is the first block, create the stream header. --*/
538 if (s->blockNo == 1) {
539 bsInitWrite ( s );
540 bsPutUChar ( s, 'B' );
541 bsPutUChar ( s, 'Z' );
542 bsPutUChar ( s, 'h' );
543 bsPutUChar ( s, '0' + s->blockSize100k );
544 }
545
546 if (s->nblock > 0) {
547
548 bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
549 bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
550 bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
551
552 /*-- Now the block's CRC, so it is in a known place. --*/
553 bsPutUInt32 ( s, s->blockCRC );
554
555 /*-- Now a single bit indicating randomisation. --*/
556 if (s->blockRandomised) {
557 bsW(s,1,1); s->nBlocksRandomised++;
558 } else
559 bsW(s,1,0);
560
561 bsW ( s, 24, s->origPtr );
562 generateMTFValues ( s );
563 sendMTFValues ( s );
564 }
565
566
567 /*-- If this is the last block, add the stream trailer. --*/
568 if (is_last_block) {
569
570 if (s->verbosity >= 2 && s->nBlocksRandomised > 0)
571 VPrintf2 ( " %d block%s needed randomisation\n",
572 s->nBlocksRandomised,
573 s->nBlocksRandomised == 1 ? "" : "s" );
574
575 bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
576 bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
577 bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
578 bsPutUInt32 ( s, s->combinedCRC );
579 if (s->verbosity >= 2)
580 VPrintf1( " final combined CRC = 0x%x\n ", s->combinedCRC );
581 bsFinishWrite ( s );
582 }
583}
584
585
586/*-------------------------------------------------------------*/
587/*--- end compress.c ---*/
588/*-------------------------------------------------------------*/