aboutsummaryrefslogtreecommitdiff
path: root/bzip2.c
diff options
context:
space:
mode:
authorJulian Seward <jseward@acm.org>1998-08-23 22:13:13 +0200
committerJulian Seward <jseward@acm.org>1998-08-23 22:13:13 +0200
commit977101ad5f833f5c0a574bfeea408e5301a6b052 (patch)
treefc1e8fed202869c116cbf6b8c362456042494a0a /bzip2.c
parent1eb67a9d8f7f05ae310bc9ef297d176f3a3f8a37 (diff)
downloadbzip2-977101ad5f833f5c0a574bfeea408e5301a6b052.tar.gz
bzip2-977101ad5f833f5c0a574bfeea408e5301a6b052.tar.bz2
bzip2-977101ad5f833f5c0a574bfeea408e5301a6b052.zip
bzip2-0.9.0cbzip2-0.9.0c
Diffstat (limited to 'bzip2.c')
-rw-r--r--bzip2.c3389
1 files changed, 434 insertions, 2955 deletions
diff --git a/bzip2.c b/bzip2.c
index 53ce10d..6a3ab95 100644
--- a/bzip2.c
+++ b/bzip2.c
@@ -4,28 +4,45 @@
4/*-----------------------------------------------------------*/ 4/*-----------------------------------------------------------*/
5 5
6/*-- 6/*--
7 This program is bzip2, a lossless, block-sorting data compressor, 7 This file is a part of bzip2 and/or libbzip2, a program and
8 version 0.1pl2, dated 29-Aug-1997. 8 library for lossless, block-sorting data compression.
9 9
10 Copyright (C) 1996, 1997 by Julian Seward. 10 Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
11 Guildford, Surrey, UK 11
12 email: jseward@acm.org 12 Redistribution and use in source and binary forms, with or without
13 13 modification, are permitted provided that the following conditions
14 This program is free software; you can redistribute it and/or modify 14 are met:
15 it under the terms of the GNU General Public License as published by 15
16 the Free Software Foundation; either version 2 of the License, or 16 1. Redistributions of source code must retain the above copyright
17 (at your option) any later version. 17 notice, this list of conditions and the following disclaimer.
18 18
19 This program is distributed in the hope that it will be useful, 19 2. The origin of this software must not be misrepresented; you must
20 but WITHOUT ANY WARRANTY; without even the implied warranty of 20 not claim that you wrote the original software. If you use this
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 software in a product, an acknowledgment in the product
22 GNU General Public License for more details. 22 documentation would be appreciated but is not required.
23 23
24 You should have received a copy of the GNU General Public License 24 3. Altered source versions must be plainly marked as such, and must
25 along with this program; if not, write to the Free Software 25 not be misrepresented as being the original software.
26 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 26
27 27 4. The name of the author may not be used to endorse or promote
28 The GNU General Public License is contained in the file LICENSE. 28 products derived from this software without specific prior written
29 permission.
30
31 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
32 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
33 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
35 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
37 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
39 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
40 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
41 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42
43 Julian Seward, Guildford, Surrey, UK.
44 jseward@acm.org
45 bzip2/libbzip2 version 0.9.0c of 18 October 1998
29 46
30 This program is based on (at least) the work of: 47 This program is based on (at least) the work of:
31 Mike Burrows 48 Mike Burrows
@@ -37,21 +54,23 @@
37 Robert Sedgewick 54 Robert Sedgewick
38 Jon L. Bentley 55 Jon L. Bentley
39 56
40 For more information on these sources, see the file ALGORITHMS. 57 For more information on these sources, see the manual.
41--*/ 58--*/
42 59
60
43/*----------------------------------------------------*/ 61/*----------------------------------------------------*/
44/*--- IMPORTANT ---*/ 62/*--- IMPORTANT ---*/
45/*----------------------------------------------------*/ 63/*----------------------------------------------------*/
46 64
47/*-- 65/*--
48 WARNING: 66 WARNING:
49 This program (attempts to) compress data by performing several 67 This program and library (attempts to) compress data by
50 non-trivial transformations on it. Unless you are 100% familiar 68 performing several non-trivial transformations on it.
51 with *all* the algorithms contained herein, and with the 69 Unless you are 100% familiar with *all* the algorithms
52 consequences of modifying them, you should NOT meddle with the 70 contained herein, and with the consequences of modifying them,
53 compression or decompression machinery. Incorrect changes can 71 you should NOT meddle with the compression or decompression
54 and very likely *will* lead to disasterous loss of data. 72 machinery. Incorrect changes can and very likely *will*
73 lead to disasterous loss of data.
55 74
56 DISCLAIMER: 75 DISCLAIMER:
57 I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE 76 I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE
@@ -65,18 +84,19 @@
65 of various special cases in the code which occur with very low 84 of various special cases in the code which occur with very low
66 but non-zero probability make it impossible to rule out the 85 but non-zero probability make it impossible to rule out the
67 possibility of bugs remaining in the program. DO NOT COMPRESS 86 possibility of bugs remaining in the program. DO NOT COMPRESS
68 ANY DATA WITH THIS PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE 87 ANY DATA WITH THIS PROGRAM AND/OR LIBRARY UNLESS YOU ARE PREPARED
69 POSSIBILITY, HOWEVER SMALL, THAT THE DATA WILL NOT BE RECOVERABLE. 88 TO ACCEPT THE POSSIBILITY, HOWEVER SMALL, THAT THE DATA WILL
89 NOT BE RECOVERABLE.
70 90
71 That is not to say this program is inherently unreliable. 91 That is not to say this program is inherently unreliable.
72 Indeed, I very much hope the opposite is true. bzip2 has been 92 Indeed, I very much hope the opposite is true. bzip2/libbzip2
73 carefully constructed and extensively tested. 93 has been carefully constructed and extensively tested.
74 94
75 PATENTS: 95 PATENTS:
76 To the best of my knowledge, bzip2 does not use any patented 96 To the best of my knowledge, bzip2/libbzip2 does not use any
77 algorithms. However, I do not have the resources available to 97 patented algorithms. However, I do not have the resources
78 carry out a full patent search. Therefore I cannot give any 98 available to carry out a full patent search. Therefore I cannot
79 guarantee of the above statement. 99 give any guarantee of the above statement.
80--*/ 100--*/
81 101
82 102
@@ -103,6 +123,10 @@
103--*/ 123--*/
104#define BZ_LCCWIN32 0 124#define BZ_LCCWIN32 0
105 125
126#ifdef _WIN32
127#define BZ_LCCWIN32 1
128#define BZ_UNIX 0
129#endif
106 130
107 131
108/*---------------------------------------------*/ 132/*---------------------------------------------*/
@@ -112,12 +136,10 @@
112 136
113#include <stdio.h> 137#include <stdio.h>
114#include <stdlib.h> 138#include <stdlib.h>
115#if DEBUG
116 #include <assert.h>
117#endif
118#include <string.h> 139#include <string.h>
119#include <signal.h> 140#include <signal.h>
120#include <math.h> 141#include <math.h>
142#include "bzlib.h"
121 143
122#define ERROR_IF_EOF(i) { if ((i) == EOF) ioError(); } 144#define ERROR_IF_EOF(i) { if ((i) == EOF) ioError(); }
123#define ERROR_IF_NOT_ZERO(i) { if ((i) != 0) ioError(); } 145#define ERROR_IF_NOT_ZERO(i) { if ((i) != 0) ioError(); }
@@ -130,68 +152,45 @@
130--*/ 152--*/
131 153
132#if BZ_UNIX 154#if BZ_UNIX
133 #include <sys/types.h> 155# include <sys/types.h>
134 #include <utime.h> 156# include <utime.h>
135 #include <unistd.h> 157# include <unistd.h>
136 #include <malloc.h> 158# include <sys/stat.h>
137 #include <sys/stat.h> 159# include <sys/times.h>
138 #include <sys/times.h> 160
139 161# define PATH_SEP '/'
140 #define Int32 int 162# define MY_LSTAT lstat
141 #define UInt32 unsigned int 163# define MY_S_IFREG S_ISREG
142 #define Char char 164# define MY_STAT stat
143 #define UChar unsigned char 165
144 #define Int16 short 166# define APPEND_FILESPEC(root, name) \
145 #define UInt16 unsigned short
146
147 #define PATH_SEP '/'
148 #define MY_LSTAT lstat
149 #define MY_S_IFREG S_ISREG
150 #define MY_STAT stat
151
152 #define APPEND_FILESPEC(root, name) \
153 root=snocString((root), (name)) 167 root=snocString((root), (name))
154 168
155 #define SET_BINARY_MODE(fd) /**/ 169# define SET_BINARY_MODE(fd) /**/
156 170
157 /*-- 171# ifdef __GNUC__
158 You should try very hard to persuade your C compiler 172# define NORETURN __attribute__ ((noreturn))
159 to inline the bits marked INLINE. Otherwise bzip2 will 173# else
160 run rather slowly. gcc version 2.x is recommended. 174# define NORETURN /**/
161 --*/ 175# endif
162 #ifdef __GNUC__
163 #define INLINE inline
164 #define NORETURN __attribute__ ((noreturn))
165 #else
166 #define INLINE /**/
167 #define NORETURN /**/
168 #endif
169#endif 176#endif
170 177
171 178
172 179
173#if BZ_LCCWIN32 180#if BZ_LCCWIN32
174 #include <io.h> 181# include <io.h>
175 #include <fcntl.h> 182# include <fcntl.h>
176 #include <sys\stat.h> 183# include <sys\stat.h>
177 184
178 #define Int32 int 185# define NORETURN /**/
179 #define UInt32 unsigned int 186# define PATH_SEP '\\'
180 #define Int16 short 187# define MY_LSTAT _stat
181 #define UInt16 unsigned short 188# define MY_STAT _stat
182 #define Char char 189# define MY_S_IFREG(x) ((x) & _S_IFREG)
183 #define UChar unsigned char 190
184 191# if 0
185 #define INLINE /**/
186 #define NORETURN /**/
187 #define PATH_SEP '\\'
188 #define MY_LSTAT _stat
189 #define MY_STAT _stat
190 #define MY_S_IFREG(x) ((x) & _S_IFREG)
191
192 #if 0
193 /*-- lcc-win32 seems to expand wildcards itself --*/ 192 /*-- lcc-win32 seems to expand wildcards itself --*/
194 #define APPEND_FILESPEC(root, spec) \ 193# define APPEND_FILESPEC(root, spec) \
195 do { \ 194 do { \
196 if ((spec)[0] == '-') { \ 195 if ((spec)[0] == '-') { \
197 root = snocString((root), (spec)); \ 196 root = snocString((root), (spec)); \
@@ -211,12 +210,12 @@
211 } \ 210 } \
212 } \ 211 } \
213 } while ( 0 ) 212 } while ( 0 )
214 #else 213# else
215 #define APPEND_FILESPEC(root, name) \ 214# define APPEND_FILESPEC(root, name) \
216 root = snocString ((root), (name)) 215 root = snocString ((root), (name))
217 #endif 216# endif
218 217
219 #define SET_BINARY_MODE(fd) \ 218# define SET_BINARY_MODE(fd) \
220 do { \ 219 do { \
221 int retVal = setmode ( fileno ( fd ), \ 220 int retVal = setmode ( fileno ( fd ), \
222 O_BINARY ); \ 221 O_BINARY ); \
@@ -231,111 +230,32 @@
231 Some more stuff for all platforms :-) 230 Some more stuff for all platforms :-)
232--*/ 231--*/
233 232
234#define Bool unsigned char 233typedef char Char;
235#define True 1 234typedef unsigned char Bool;
236#define False 0 235typedef unsigned char UChar;
236typedef int Int32;
237typedef unsigned int UInt32;
238typedef short Int16;
239typedef unsigned short UInt16;
240
241#define True ((Bool)1)
242#define False ((Bool)0)
237 243
238/*-- 244/*--
239 IntNative is your platform's `native' int size. 245 IntNative is your platform's `native' int size.
240 Only here to avoid probs with 64-bit platforms. 246 Only here to avoid probs with 64-bit platforms.
241--*/ 247--*/
242#define IntNative int 248typedef int IntNative;
243
244
245/*--
246 change to 1, or compile with -DDEBUG=1 to debug
247--*/
248#ifndef DEBUG
249#define DEBUG 0
250#endif
251
252
253/*---------------------------------------------------*/
254/*--- ---*/
255/*---------------------------------------------------*/
256
257/*--
258 Implementation notes, July 1997
259 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
260
261 Memory allocation
262 ~~~~~~~~~~~~~~~~~
263 All large data structures are allocated on the C heap,
264 for better or for worse. That includes the various
265 arrays of pointers, striped words, bytes, frequency
266 tables and buffers for compression and decompression.
267
268 bzip2 can operate at various block-sizes, ranging from
269 100k to 900k in 100k steps, and it allocates only as
270 much as it needs to. When compressing, we know from the
271 command-line options what the block-size is going to be,
272 so all allocation can be done at start-up; if that
273 succeeds, there can be no further allocation problems.
274
275 Decompression is more complicated. Each compressed file
276 contains, in its header, a byte indicating the block
277 size used for compression. This means bzip2 potentially
278 needs to reallocate memory for each file it deals with,
279 which in turn opens the possibility for a memory allocation
280 failure part way through a run of files, by encountering
281 a file requiring a much larger block size than all the
282 ones preceding it.
283
284 The policy is to simply give up if a memory allocation
285 failure occurs. During decompression, it would be
286 possible to move on to subsequent files in the hope that
287 some might ask for a smaller block size, but the
288 complications for doing this seem more trouble than they
289 are worth.
290
291
292 Compressed file formats
293 ~~~~~~~~~~~~~~~~~~~~~~~
294 [This is now entirely different from both 0.21, and from
295 any previous Huffman-coded variant of bzip.
296 See the associated file bzip2.txt for details.]
297
298
299 Error conditions
300 ~~~~~~~~~~~~~~~~
301 Dealing with error conditions is the least satisfactory
302 aspect of bzip2. The policy is to try and leave the
303 filesystem in a consistent state, then quit, even if it
304 means not processing some of the files mentioned in the
305 command line. `A consistent state' means that a file
306 exists either in its compressed or uncompressed form,
307 but not both. This boils down to the rule `delete the
308 output file if an error condition occurs, leaving the
309 input intact'. Input files are only deleted when we can
310 be pretty sure the output file has been written and
311 closed successfully.
312
313 Errors are a dog because there's so many things to
314 deal with. The following can happen mid-file, and
315 require cleaning up.
316
317 internal `panics' -- indicating a bug
318 corrupted or inconsistent compressed file
319 can't allocate enough memory to decompress this file
320 I/O error reading/writing/opening/closing
321 signal catches -- Control-C, SIGTERM, SIGHUP.
322
323 Other conditions, primarily pertaining to file names,
324 can be checked in-between files, which makes dealing
325 with them easier.
326--*/
327
328 249
329 250
330/*---------------------------------------------------*/ 251/*---------------------------------------------------*/
331/*--- Misc (file handling) data decls ---*/ 252/*--- Misc (file handling) data decls ---*/
332/*---------------------------------------------------*/ 253/*---------------------------------------------------*/
333 254
334UInt32 bytesIn, bytesOut;
335Int32 verbosity; 255Int32 verbosity;
336Bool keepInputFiles, smallMode, testFailsExist; 256Bool keepInputFiles, smallMode;
337UInt32 globalCrc; 257Bool forceOverwrite, testFailsExist;
338Int32 numFileNames, numFilesProcessed; 258Int32 numFileNames, numFilesProcessed, blockSize100k;
339 259
340 260
341/*-- source modes; F==file, I==stdin, O==stdout --*/ 261/*-- source modes; F==file, I==stdin, O==stdout --*/
@@ -351,2691 +271,304 @@ Int32 numFileNames, numFilesProcessed;
351Int32 opMode; 271Int32 opMode;
352Int32 srcMode; 272Int32 srcMode;
353 273
274#define FILE_NAME_LEN 1034
354 275
355Int32 longestFileName; 276Int32 longestFileName;
356Char inName[1024]; 277Char inName[FILE_NAME_LEN];
357Char outName[1024]; 278Char outName[FILE_NAME_LEN];
358Char *progName; 279Char *progName;
359Char progNameReally[1024]; 280Char progNameReally[FILE_NAME_LEN];
360FILE *outputHandleJustInCase; 281FILE *outputHandleJustInCase;
361 282Int32 workFactor;
362void panic ( Char* ) NORETURN; 283
363void ioError ( void ) NORETURN; 284void panic ( Char* ) NORETURN;
364void compressOutOfMemory ( Int32, Int32 ) NORETURN; 285void ioError ( void ) NORETURN;
365void uncompressOutOfMemory ( Int32, Int32 ) NORETURN; 286void outOfMemory ( void ) NORETURN;
366void blockOverrun ( void ) NORETURN; 287void blockOverrun ( void ) NORETURN;
367void badBlockHeader ( void ) NORETURN; 288void badBlockHeader ( void ) NORETURN;
368void badBGLengths ( void ) NORETURN; 289void badBGLengths ( void ) NORETURN;
369void crcError ( UInt32, UInt32 ) NORETURN; 290void crcError ( void ) NORETURN;
370void bitStreamEOF ( void ) NORETURN; 291void bitStreamEOF ( void ) NORETURN;
371void cleanUpAndFail ( Int32 ) NORETURN; 292void cleanUpAndFail ( Int32 ) NORETURN;
372void compressedStreamEOF ( void ) NORETURN; 293void compressedStreamEOF ( void ) NORETURN;
373 294
295void copyFileName ( Char*, Char* );
374void* myMalloc ( Int32 ); 296void* myMalloc ( Int32 );
375 297
376 298
377 299
378/*---------------------------------------------------*/ 300/*---------------------------------------------------*/
379/*--- Data decls for the front end ---*/ 301/*--- Processing of complete files and streams ---*/
380/*---------------------------------------------------*/
381
382/*--
383 The overshoot bytes allow us to avoid most of
384 the cost of pointer renormalisation during
385 comparison of rotations in sorting.
386 The figure of 20 is derived as follows:
387 qSort3 allows an overshoot of up to 10.
388 It then calls simpleSort, which calls
389 fullGtU, also with max overshoot 10.
390 fullGtU does up to 10 comparisons without
391 renormalising, giving 10+10 == 20.
392--*/
393#define NUM_OVERSHOOT_BYTES 20
394
395/*--
396 These are the main data structures for
397 the Burrows-Wheeler transform.
398--*/
399
400/*--
401 Pointers to compression and decompression
402 structures. Set by
403 allocateCompressStructures and
404 setDecompressStructureSizes
405
406 The structures are always set to be suitable
407 for a block of size 100000 * blockSize100k.
408--*/
409UChar *block; /*-- compress --*/
410UInt16 *quadrant; /*-- compress --*/
411Int32 *zptr; /*-- compress --*/
412UInt16 *szptr; /*-- overlays zptr ---*/
413Int32 *ftab; /*-- compress --*/
414
415UInt16 *ll16; /*-- small decompress --*/
416UChar *ll4; /*-- small decompress --*/
417
418Int32 *tt; /*-- fast decompress --*/
419UChar *ll8; /*-- fast decompress --*/
420
421
422/*--
423 freq table collected to save a pass over the data
424 during decompression.
425--*/
426Int32 unzftab[256];
427
428
429/*--
430 index of the last char in the block, so
431 the block size == last + 1.
432--*/
433Int32 last;
434
435
436/*--
437 index in zptr[] of original string after sorting.
438--*/
439Int32 origPtr;
440
441
442/*--
443 always: in the range 0 .. 9.
444 The current block size is 100000 * this number.
445--*/
446Int32 blockSize100k;
447
448
449/*--
450 Used when sorting. If too many long comparisons
451 happen, we stop sorting, randomise the block
452 slightly, and try again.
453--*/
454
455Int32 workFactor;
456Int32 workDone;
457Int32 workLimit;
458Bool blockRandomised;
459Bool firstAttempt;
460Int32 nBlocksRandomised;
461
462
463
464/*---------------------------------------------------*/
465/*--- Data decls for the back end ---*/
466/*---------------------------------------------------*/
467
468#define MAX_ALPHA_SIZE 258
469#define MAX_CODE_LEN 23
470
471#define RUNA 0
472#define RUNB 1
473
474#define N_GROUPS 6
475#define G_SIZE 50
476#define N_ITERS 4
477
478#define MAX_SELECTORS (2 + (900000 / G_SIZE))
479
480Bool inUse[256];
481Int32 nInUse;
482
483UChar seqToUnseq[256];
484UChar unseqToSeq[256];
485
486UChar selector [MAX_SELECTORS];
487UChar selectorMtf[MAX_SELECTORS];
488
489Int32 nMTF;
490
491Int32 mtfFreq[MAX_ALPHA_SIZE];
492
493UChar len [N_GROUPS][MAX_ALPHA_SIZE];
494
495/*-- decompress only --*/
496Int32 limit [N_GROUPS][MAX_ALPHA_SIZE];
497Int32 base [N_GROUPS][MAX_ALPHA_SIZE];
498Int32 perm [N_GROUPS][MAX_ALPHA_SIZE];
499Int32 minLens[N_GROUPS];
500
501/*-- compress only --*/
502Int32 code [N_GROUPS][MAX_ALPHA_SIZE];
503Int32 rfreq[N_GROUPS][MAX_ALPHA_SIZE];
504
505
506/*---------------------------------------------------*/
507/*--- 32-bit CRC grunge ---*/
508/*---------------------------------------------------*/
509
510/*--
511 I think this is an implementation of the AUTODIN-II,
512 Ethernet & FDDI 32-bit CRC standard. Vaguely derived
513 from code by Rob Warnock, in Section 51 of the
514 comp.compression FAQ.
515--*/
516
517UInt32 crc32Table[256] = {
518
519 /*-- Ugly, innit? --*/
520
521 0x00000000UL, 0x04c11db7UL, 0x09823b6eUL, 0x0d4326d9UL,
522 0x130476dcUL, 0x17c56b6bUL, 0x1a864db2UL, 0x1e475005UL,
523 0x2608edb8UL, 0x22c9f00fUL, 0x2f8ad6d6UL, 0x2b4bcb61UL,
524 0x350c9b64UL, 0x31cd86d3UL, 0x3c8ea00aUL, 0x384fbdbdUL,
525 0x4c11db70UL, 0x48d0c6c7UL, 0x4593e01eUL, 0x4152fda9UL,
526 0x5f15adacUL, 0x5bd4b01bUL, 0x569796c2UL, 0x52568b75UL,
527 0x6a1936c8UL, 0x6ed82b7fUL, 0x639b0da6UL, 0x675a1011UL,
528 0x791d4014UL, 0x7ddc5da3UL, 0x709f7b7aUL, 0x745e66cdUL,
529 0x9823b6e0UL, 0x9ce2ab57UL, 0x91a18d8eUL, 0x95609039UL,
530 0x8b27c03cUL, 0x8fe6dd8bUL, 0x82a5fb52UL, 0x8664e6e5UL,
531 0xbe2b5b58UL, 0xbaea46efUL, 0xb7a96036UL, 0xb3687d81UL,
532 0xad2f2d84UL, 0xa9ee3033UL, 0xa4ad16eaUL, 0xa06c0b5dUL,
533 0xd4326d90UL, 0xd0f37027UL, 0xddb056feUL, 0xd9714b49UL,
534 0xc7361b4cUL, 0xc3f706fbUL, 0xceb42022UL, 0xca753d95UL,
535 0xf23a8028UL, 0xf6fb9d9fUL, 0xfbb8bb46UL, 0xff79a6f1UL,
536 0xe13ef6f4UL, 0xe5ffeb43UL, 0xe8bccd9aUL, 0xec7dd02dUL,
537 0x34867077UL, 0x30476dc0UL, 0x3d044b19UL, 0x39c556aeUL,
538 0x278206abUL, 0x23431b1cUL, 0x2e003dc5UL, 0x2ac12072UL,
539 0x128e9dcfUL, 0x164f8078UL, 0x1b0ca6a1UL, 0x1fcdbb16UL,
540 0x018aeb13UL, 0x054bf6a4UL, 0x0808d07dUL, 0x0cc9cdcaUL,
541 0x7897ab07UL, 0x7c56b6b0UL, 0x71159069UL, 0x75d48ddeUL,
542 0x6b93dddbUL, 0x6f52c06cUL, 0x6211e6b5UL, 0x66d0fb02UL,
543 0x5e9f46bfUL, 0x5a5e5b08UL, 0x571d7dd1UL, 0x53dc6066UL,
544 0x4d9b3063UL, 0x495a2dd4UL, 0x44190b0dUL, 0x40d816baUL,
545 0xaca5c697UL, 0xa864db20UL, 0xa527fdf9UL, 0xa1e6e04eUL,
546 0xbfa1b04bUL, 0xbb60adfcUL, 0xb6238b25UL, 0xb2e29692UL,
547 0x8aad2b2fUL, 0x8e6c3698UL, 0x832f1041UL, 0x87ee0df6UL,
548 0x99a95df3UL, 0x9d684044UL, 0x902b669dUL, 0x94ea7b2aUL,
549 0xe0b41de7UL, 0xe4750050UL, 0xe9362689UL, 0xedf73b3eUL,
550 0xf3b06b3bUL, 0xf771768cUL, 0xfa325055UL, 0xfef34de2UL,
551 0xc6bcf05fUL, 0xc27dede8UL, 0xcf3ecb31UL, 0xcbffd686UL,
552 0xd5b88683UL, 0xd1799b34UL, 0xdc3abdedUL, 0xd8fba05aUL,
553 0x690ce0eeUL, 0x6dcdfd59UL, 0x608edb80UL, 0x644fc637UL,
554 0x7a089632UL, 0x7ec98b85UL, 0x738aad5cUL, 0x774bb0ebUL,
555 0x4f040d56UL, 0x4bc510e1UL, 0x46863638UL, 0x42472b8fUL,
556 0x5c007b8aUL, 0x58c1663dUL, 0x558240e4UL, 0x51435d53UL,
557 0x251d3b9eUL, 0x21dc2629UL, 0x2c9f00f0UL, 0x285e1d47UL,
558 0x36194d42UL, 0x32d850f5UL, 0x3f9b762cUL, 0x3b5a6b9bUL,
559 0x0315d626UL, 0x07d4cb91UL, 0x0a97ed48UL, 0x0e56f0ffUL,
560 0x1011a0faUL, 0x14d0bd4dUL, 0x19939b94UL, 0x1d528623UL,
561 0xf12f560eUL, 0xf5ee4bb9UL, 0xf8ad6d60UL, 0xfc6c70d7UL,
562 0xe22b20d2UL, 0xe6ea3d65UL, 0xeba91bbcUL, 0xef68060bUL,
563 0xd727bbb6UL, 0xd3e6a601UL, 0xdea580d8UL, 0xda649d6fUL,
564 0xc423cd6aUL, 0xc0e2d0ddUL, 0xcda1f604UL, 0xc960ebb3UL,
565 0xbd3e8d7eUL, 0xb9ff90c9UL, 0xb4bcb610UL, 0xb07daba7UL,
566 0xae3afba2UL, 0xaafbe615UL, 0xa7b8c0ccUL, 0xa379dd7bUL,
567 0x9b3660c6UL, 0x9ff77d71UL, 0x92b45ba8UL, 0x9675461fUL,
568 0x8832161aUL, 0x8cf30badUL, 0x81b02d74UL, 0x857130c3UL,
569 0x5d8a9099UL, 0x594b8d2eUL, 0x5408abf7UL, 0x50c9b640UL,
570 0x4e8ee645UL, 0x4a4ffbf2UL, 0x470cdd2bUL, 0x43cdc09cUL,
571 0x7b827d21UL, 0x7f436096UL, 0x7200464fUL, 0x76c15bf8UL,
572 0x68860bfdUL, 0x6c47164aUL, 0x61043093UL, 0x65c52d24UL,
573 0x119b4be9UL, 0x155a565eUL, 0x18197087UL, 0x1cd86d30UL,
574 0x029f3d35UL, 0x065e2082UL, 0x0b1d065bUL, 0x0fdc1becUL,
575 0x3793a651UL, 0x3352bbe6UL, 0x3e119d3fUL, 0x3ad08088UL,
576 0x2497d08dUL, 0x2056cd3aUL, 0x2d15ebe3UL, 0x29d4f654UL,
577 0xc5a92679UL, 0xc1683bceUL, 0xcc2b1d17UL, 0xc8ea00a0UL,
578 0xd6ad50a5UL, 0xd26c4d12UL, 0xdf2f6bcbUL, 0xdbee767cUL,
579 0xe3a1cbc1UL, 0xe760d676UL, 0xea23f0afUL, 0xeee2ed18UL,
580 0xf0a5bd1dUL, 0xf464a0aaUL, 0xf9278673UL, 0xfde69bc4UL,
581 0x89b8fd09UL, 0x8d79e0beUL, 0x803ac667UL, 0x84fbdbd0UL,
582 0x9abc8bd5UL, 0x9e7d9662UL, 0x933eb0bbUL, 0x97ffad0cUL,
583 0xafb010b1UL, 0xab710d06UL, 0xa6322bdfUL, 0xa2f33668UL,
584 0xbcb4666dUL, 0xb8757bdaUL, 0xb5365d03UL, 0xb1f740b4UL
585};
586
587
588/*---------------------------------------------*/
589void initialiseCRC ( void )
590{
591 globalCrc = 0xffffffffUL;
592}
593
594
595/*---------------------------------------------*/
596UInt32 getFinalCRC ( void )
597{
598 return ~globalCrc;
599}
600
601
602/*---------------------------------------------*/
603UInt32 getGlobalCRC ( void )
604{
605 return globalCrc;
606}
607
608
609/*---------------------------------------------*/
610void setGlobalCRC ( UInt32 newCrc )
611{
612 globalCrc = newCrc;
613}
614
615
616/*---------------------------------------------*/
617#define UPDATE_CRC(crcVar,cha) \
618{ \
619 crcVar = (crcVar << 8) ^ \
620 crc32Table[(crcVar >> 24) ^ \
621 ((UChar)cha)]; \
622}
623
624
625/*---------------------------------------------------*/
626/*--- Bit stream I/O ---*/
627/*---------------------------------------------------*/ 302/*---------------------------------------------------*/
628 303
629
630UInt32 bsBuff;
631Int32 bsLive;
632FILE* bsStream;
633Bool bsWriting;
634
635
636/*---------------------------------------------*/
637void bsSetStream ( FILE* f, Bool wr )
638{
639 if (bsStream != NULL) panic ( "bsSetStream" );
640 bsStream = f;
641 bsLive = 0;
642 bsBuff = 0;
643 bytesOut = 0;
644 bytesIn = 0;
645 bsWriting = wr;
646}
647
648
649/*---------------------------------------------*/
650void bsFinishedWithStream ( void )
651{
652 if (bsWriting)
653 while (bsLive > 0) {
654 fputc ( (UChar)(bsBuff >> 24), bsStream );
655 bsBuff <<= 8;
656 bsLive -= 8;
657 bytesOut++;
658 }
659 bsStream = NULL;
660}
661
662
663/*---------------------------------------------*/
664#define bsNEEDR(nz) \
665{ \
666 while (bsLive < nz) { \
667 Int32 zzi = fgetc ( bsStream ); \
668 if (zzi == EOF) compressedStreamEOF(); \
669 bsBuff = (bsBuff << 8) | (zzi & 0xffL); \
670 bsLive += 8; \
671 } \
672}
673
674
675/*---------------------------------------------*/
676#define bsNEEDW(nz) \
677{ \
678 while (bsLive >= 8) { \
679 fputc ( (UChar)(bsBuff >> 24), \
680 bsStream ); \
681 bsBuff <<= 8; \
682 bsLive -= 8; \
683 bytesOut++; \
684 } \
685}
686
687
688/*---------------------------------------------*/
689#define bsR1(vz) \
690{ \
691 bsNEEDR(1); \
692 vz = (bsBuff >> (bsLive-1)) & 1; \
693 bsLive--; \
694}
695
696
697/*---------------------------------------------*/
698INLINE UInt32 bsR ( Int32 n )
699{
700 UInt32 v;
701 bsNEEDR ( n );
702 v = (bsBuff >> (bsLive-n)) & ((1 << n)-1);
703 bsLive -= n;
704 return v;
705}
706
707
708/*---------------------------------------------*/
709INLINE void bsW ( Int32 n, UInt32 v )
710{
711 bsNEEDW ( n );
712 bsBuff |= (v << (32 - bsLive - n));
713 bsLive += n;
714}
715
716
717/*---------------------------------------------*/
718UChar bsGetUChar ( void )
719{
720 return (UChar)bsR(8);
721}
722
723
724/*---------------------------------------------*/
725void bsPutUChar ( UChar c )
726{
727 bsW(8, (UInt32)c );
728}
729
730
731/*---------------------------------------------*/ 304/*---------------------------------------------*/
732Int32 bsGetUInt32 ( void ) 305Bool myfeof ( FILE* f )
733{ 306{
734 UInt32 u; 307 Int32 c = fgetc ( f );
735 u = 0; 308 if (c == EOF) return True;
736 u = (u << 8) | bsR(8); 309 ungetc ( c, f );
737 u = (u << 8) | bsR(8); 310 return False;
738 u = (u << 8) | bsR(8);
739 u = (u << 8) | bsR(8);
740 return u;
741}
742
743
744/*---------------------------------------------*/
745UInt32 bsGetIntVS ( UInt32 numBits )
746{
747 return (UInt32)bsR(numBits);
748}
749
750
751/*---------------------------------------------*/
752UInt32 bsGetInt32 ( void )
753{
754 return (Int32)bsGetUInt32();
755}
756
757
758/*---------------------------------------------*/
759void bsPutUInt32 ( UInt32 u )
760{
761 bsW ( 8, (u >> 24) & 0xffL );
762 bsW ( 8, (u >> 16) & 0xffL );
763 bsW ( 8, (u >> 8) & 0xffL );
764 bsW ( 8, u & 0xffL );
765}
766
767
768/*---------------------------------------------*/
769void bsPutInt32 ( Int32 c )
770{
771 bsPutUInt32 ( (UInt32)c );
772} 311}
773 312
774 313
775/*---------------------------------------------*/ 314/*---------------------------------------------*/
776void bsPutIntVS ( Int32 numBits, UInt32 c ) 315void compressStream ( FILE *stream, FILE *zStream )
777{ 316{
778 bsW ( numBits, c ); 317 BZFILE* bzf = NULL;
779} 318 UChar ibuf[5000];
780 319 Int32 nIbuf;
781 320 UInt32 nbytes_in, nbytes_out;
782/*---------------------------------------------------*/ 321 Int32 bzerr, bzerr_dummy, ret;
783/*--- Huffman coding low-level stuff ---*/
784/*---------------------------------------------------*/
785
786#define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
787#define DEPTHOF(zz1) ((zz1) & 0x000000ff)
788#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
789
790#define ADDWEIGHTS(zw1,zw2) \
791 (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
792 (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
793
794#define UPHEAP(z) \
795{ \
796 Int32 zz, tmp; \
797 zz = z; tmp = heap[zz]; \
798 while (weight[tmp] < weight[heap[zz >> 1]]) { \
799 heap[zz] = heap[zz >> 1]; \
800 zz >>= 1; \
801 } \
802 heap[zz] = tmp; \
803}
804
805#define DOWNHEAP(z) \
806{ \
807 Int32 zz, yy, tmp; \
808 zz = z; tmp = heap[zz]; \
809 while (True) { \
810 yy = zz << 1; \
811 if (yy > nHeap) break; \
812 if (yy < nHeap && \
813 weight[heap[yy+1]] < weight[heap[yy]]) \
814 yy++; \
815 if (weight[tmp] < weight[heap[yy]]) break; \
816 heap[zz] = heap[yy]; \
817 zz = yy; \
818 } \
819 heap[zz] = tmp; \
820}
821 322
323 SET_BINARY_MODE(stream);
324 SET_BINARY_MODE(zStream);
822 325
823/*---------------------------------------------*/ 326 if (ferror(stream)) goto errhandler_io;
824void hbMakeCodeLengths ( UChar *len, 327 if (ferror(zStream)) goto errhandler_io;
825 Int32 *freq,
826 Int32 alphaSize,
827 Int32 maxLen )
828{
829 /*--
830 Nodes and heap entries run from 1. Entry 0
831 for both the heap and nodes is a sentinel.
832 --*/
833 Int32 nNodes, nHeap, n1, n2, i, j, k;
834 Bool tooLong;
835 328
836 Int32 heap [ MAX_ALPHA_SIZE + 2 ]; 329 bzf = bzWriteOpen ( &bzerr, zStream,
837 Int32 weight [ MAX_ALPHA_SIZE * 2 ]; 330 blockSize100k, verbosity, workFactor );
838 Int32 parent [ MAX_ALPHA_SIZE * 2 ]; 331 if (bzerr != BZ_OK) goto errhandler;
839 332
840 for (i = 0; i < alphaSize; i++) 333 if (verbosity >= 2) fprintf ( stderr, "\n" );
841 weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
842 334
843 while (True) { 335 while (True) {
844 336
845 nNodes = alphaSize; 337 if (myfeof(stream)) break;
846 nHeap = 0; 338 nIbuf = fread ( ibuf, sizeof(UChar), 5000, stream );
847 339 if (ferror(stream)) goto errhandler_io;
848 heap[0] = 0; 340 if (nIbuf > 0) bzWrite ( &bzerr, bzf, (void*)ibuf, nIbuf );
849 weight[0] = 0; 341 if (bzerr != BZ_OK) goto errhandler;
850 parent[0] = -2;
851
852 for (i = 1; i <= alphaSize; i++) {
853 parent[i] = -1;
854 nHeap++;
855 heap[nHeap] = i;
856 UPHEAP(nHeap);
857 }
858 if (!(nHeap < (MAX_ALPHA_SIZE+2)))
859 panic ( "hbMakeCodeLengths(1)" );
860
861 while (nHeap > 1) {
862 n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
863 n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
864 nNodes++;
865 parent[n1] = parent[n2] = nNodes;
866 weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
867 parent[nNodes] = -1;
868 nHeap++;
869 heap[nHeap] = nNodes;
870 UPHEAP(nHeap);
871 }
872 if (!(nNodes < (MAX_ALPHA_SIZE * 2)))
873 panic ( "hbMakeCodeLengths(2)" );
874
875 tooLong = False;
876 for (i = 1; i <= alphaSize; i++) {
877 j = 0;
878 k = i;
879 while (parent[k] >= 0) { k = parent[k]; j++; }
880 len[i-1] = j;
881 if (j > maxLen) tooLong = True;
882 }
883
884 if (! tooLong) break;
885 342
886 for (i = 1; i < alphaSize; i++) {
887 j = weight[i] >> 8;
888 j = 1 + (j / 2);
889 weight[i] = j << 8;
890 }
891 } 343 }
892}
893
894 344
895/*---------------------------------------------*/ 345 bzWriteClose ( &bzerr, bzf, 0, &nbytes_in, &nbytes_out );
896void hbAssignCodes ( Int32 *code, 346 if (bzerr != BZ_OK) goto errhandler;
897 UChar *length,
898 Int32 minLen,
899 Int32 maxLen,
900 Int32 alphaSize )
901{
902 Int32 n, vec, i;
903 347
904 vec = 0; 348 if (ferror(zStream)) goto errhandler_io;
905 for (n = minLen; n <= maxLen; n++) { 349 ret = fflush ( zStream );
906 for (i = 0; i < alphaSize; i++) 350 if (ret == EOF) goto errhandler_io;
907 if (length[i] == n) { code[i] = vec; vec++; }; 351 if (zStream != stdout) {
908 vec <<= 1; 352 ret = fclose ( zStream );
353 if (ret == EOF) goto errhandler_io;
909 } 354 }
910} 355 if (ferror(stream)) goto errhandler_io;
911 356 ret = fclose ( stream );
912 357 if (ret == EOF) goto errhandler_io;
913/*---------------------------------------------*/
914void hbCreateDecodeTables ( Int32 *limit,
915 Int32 *base,
916 Int32 *perm,
917 UChar *length,
918 Int32 minLen,
919 Int32 maxLen,
920 Int32 alphaSize )
921{
922 Int32 pp, i, j, vec;
923
924 pp = 0;
925 for (i = minLen; i <= maxLen; i++)
926 for (j = 0; j < alphaSize; j++)
927 if (length[j] == i) { perm[pp] = j; pp++; };
928
929 for (i = 0; i < MAX_CODE_LEN; i++) base[i] = 0;
930 for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
931 358
932 for (i = 1; i < MAX_CODE_LEN; i++) base[i] += base[i-1]; 359 if (nbytes_in == 0) nbytes_in = 1;
933 360
934 for (i = 0; i < MAX_CODE_LEN; i++) limit[i] = 0; 361 if (verbosity >= 1)
935 vec = 0; 362 fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, "
936 363 "%5.2f%% saved, %d in, %d out.\n",
937 for (i = minLen; i <= maxLen; i++) { 364 (float)nbytes_in / (float)nbytes_out,
938 vec += (base[i+1] - base[i]); 365 (8.0 * (float)nbytes_out) / (float)nbytes_in,
939 limit[i] = vec-1; 366 100.0 * (1.0 - (float)nbytes_out / (float)nbytes_in),
940 vec <<= 1; 367 nbytes_in,
941 } 368 nbytes_out
942 for (i = minLen + 1; i <= maxLen; i++) 369 );
943 base[i] = ((limit[i-1] + 1) << 1) - base[i];
944}
945
946
947
948/*---------------------------------------------------*/
949/*--- Undoing the reversible transformation ---*/
950/*---------------------------------------------------*/
951
952/*---------------------------------------------*/
953#define SET_LL4(i,n) \
954 { if (((i) & 0x1) == 0) \
955 ll4[(i) >> 1] = (ll4[(i) >> 1] & 0xf0) | (n); else \
956 ll4[(i) >> 1] = (ll4[(i) >> 1] & 0x0f) | ((n) << 4); \
957 }
958
959#define GET_LL4(i) \
960 (((UInt32)(ll4[(i) >> 1])) >> (((i) << 2) & 0x4) & 0xF)
961
962#define SET_LL(i,n) \
963 { ll16[i] = (UInt16)(n & 0x0000ffff); \
964 SET_LL4(i, n >> 16); \
965 }
966
967#define GET_LL(i) \
968 (((UInt32)ll16[i]) | (GET_LL4(i) << 16))
969
970
971/*---------------------------------------------*/
972/*--
973 Manage memory for compression/decompression.
974 When compressing, a single block size applies to
975 all files processed, and that's set when the
976 program starts. But when decompressing, each file
977 processed could have been compressed with a
978 different block size, so we may have to free
979 and reallocate on a per-file basis.
980
981 A call with argument of zero means
982 `free up everything.' And a value of zero for
983 blockSize100k means no memory is currently allocated.
984--*/
985 370
371 return;
986 372
987/*---------------------------------------------*/ 373 errhandler:
988void allocateCompressStructures ( void ) 374 bzWriteClose ( &bzerr_dummy, bzf, 1, &nbytes_in, &nbytes_out );
989{ 375 switch (bzerr) {
990 Int32 n = 100000 * blockSize100k; 376 case BZ_MEM_ERROR:
991 block = malloc ( (n + 1 + NUM_OVERSHOOT_BYTES) * sizeof(UChar) ); 377 outOfMemory ();
992 quadrant = malloc ( (n + NUM_OVERSHOOT_BYTES) * sizeof(Int16) ); 378 case BZ_IO_ERROR:
993 zptr = malloc ( n * sizeof(Int32) ); 379 errhandler_io:
994 ftab = malloc ( 65537 * sizeof(Int32) ); 380 ioError(); break;
995 381 default:
996 if (block == NULL || quadrant == NULL || 382 panic ( "compress:unexpected error" );
997 zptr == NULL || ftab == NULL) {
998 Int32 totalDraw
999 = (n + 1 + NUM_OVERSHOOT_BYTES) * sizeof(UChar) +
1000 (n + NUM_OVERSHOOT_BYTES) * sizeof(Int16) +
1001 n * sizeof(Int32) +
1002 65537 * sizeof(Int32);
1003
1004 compressOutOfMemory ( totalDraw, n );
1005 } 383 }
1006 384
1007 /*-- 385 panic ( "compress:end" );
1008 Since we want valid indexes for block of 386 /*notreached*/
1009 -1 to n + NUM_OVERSHOOT_BYTES - 1
1010 inclusive.
1011 --*/
1012 block++;
1013
1014 /*--
1015 The back end needs a place to store the MTF values
1016 whilst it calculates the coding tables. We could
1017 put them in the zptr array. However, these values
1018 will fit in a short, so we overlay szptr at the
1019 start of zptr, in the hope of reducing the number
1020 of cache misses induced by the multiple traversals
1021 of the MTF values when calculating coding tables.
1022 Seems to improve compression speed by about 1%.
1023 --*/
1024 szptr = (UInt16*)zptr;
1025}
1026
1027
1028/*---------------------------------------------*/
1029void setDecompressStructureSizes ( Int32 newSize100k )
1030{
1031 if (! (0 <= newSize100k && newSize100k <= 9 &&
1032 0 <= blockSize100k && blockSize100k <= 9))
1033 panic ( "setDecompressStructureSizes" );
1034
1035 if (newSize100k == blockSize100k) return;
1036
1037 blockSize100k = newSize100k;
1038
1039 if (ll16 != NULL) free ( ll16 );
1040 if (ll4 != NULL) free ( ll4 );
1041 if (ll8 != NULL) free ( ll8 );
1042 if (tt != NULL) free ( tt );
1043
1044 if (newSize100k == 0) return;
1045
1046 if (smallMode) {
1047
1048 Int32 n = 100000 * newSize100k;
1049 ll16 = malloc ( n * sizeof(UInt16) );
1050 ll4 = malloc ( ((n+1) >> 1) * sizeof(UChar) );
1051
1052 if (ll4 == NULL || ll16 == NULL) {
1053 Int32 totalDraw
1054 = n * sizeof(Int16) + ((n+1) >> 1) * sizeof(UChar);
1055 uncompressOutOfMemory ( totalDraw, n );
1056 }
1057
1058 } else {
1059
1060 Int32 n = 100000 * newSize100k;
1061 ll8 = malloc ( n * sizeof(UChar) );
1062 tt = malloc ( n * sizeof(Int32) );
1063
1064 if (ll8 == NULL || tt == NULL) {
1065 Int32 totalDraw
1066 = n * sizeof(UChar) + n * sizeof(UInt32);
1067 uncompressOutOfMemory ( totalDraw, n );
1068 }
1069
1070 }
1071} 387}
1072 388
1073 389
1074 390
1075/*---------------------------------------------------*/
1076/*--- The new back end ---*/
1077/*---------------------------------------------------*/
1078
1079/*---------------------------------------------*/
1080void makeMaps ( void )
1081{
1082 Int32 i;
1083 nInUse = 0;
1084 for (i = 0; i < 256; i++)
1085 if (inUse[i]) {
1086 seqToUnseq[nInUse] = i;
1087 unseqToSeq[i] = nInUse;
1088 nInUse++;
1089 }
1090}
1091
1092
1093/*---------------------------------------------*/ 391/*---------------------------------------------*/
1094void generateMTFValues ( void ) 392Bool uncompressStream ( FILE *zStream, FILE *stream )
1095{
1096 UChar yy[256];
1097 Int32 i, j;
1098 UChar tmp;
1099 UChar tmp2;
1100 Int32 zPend;
1101 Int32 wr;
1102 Int32 EOB;
1103
1104 makeMaps();
1105 EOB = nInUse+1;
1106
1107 for (i = 0; i <= EOB; i++) mtfFreq[i] = 0;
1108
1109 wr = 0;
1110 zPend = 0;
1111 for (i = 0; i < nInUse; i++) yy[i] = (UChar) i;
1112
1113
1114 for (i = 0; i <= last; i++) {
1115 UChar ll_i;
1116
1117 #if DEBUG
1118 assert (wr <= i);
1119 #endif
1120
1121 ll_i = unseqToSeq[block[zptr[i] - 1]];
1122 #if DEBUG
1123 assert (ll_i < nInUse);
1124 #endif
1125
1126 j = 0;
1127 tmp = yy[j];
1128 while ( ll_i != tmp ) {
1129 j++;
1130 tmp2 = tmp;
1131 tmp = yy[j];
1132 yy[j] = tmp2;
1133 };
1134 yy[0] = tmp;
1135
1136 if (j == 0) {
1137 zPend++;
1138 } else {
1139 if (zPend > 0) {
1140 zPend--;
1141 while (True) {
1142 switch (zPend % 2) {
1143 case 0: szptr[wr] = RUNA; wr++; mtfFreq[RUNA]++; break;
1144 case 1: szptr[wr] = RUNB; wr++; mtfFreq[RUNB]++; break;
1145 };
1146 if (zPend < 2) break;
1147 zPend = (zPend - 2) / 2;
1148 };
1149 zPend = 0;
1150 }
1151 szptr[wr] = j+1; wr++; mtfFreq[j+1]++;
1152 }
1153 }
1154
1155 if (zPend > 0) {
1156 zPend--;
1157 while (True) {
1158 switch (zPend % 2) {
1159 case 0: szptr[wr] = RUNA; wr++; mtfFreq[RUNA]++; break;
1160 case 1: szptr[wr] = RUNB; wr++; mtfFreq[RUNB]++; break;
1161 };
1162 if (zPend < 2) break;
1163 zPend = (zPend - 2) / 2;
1164 };
1165 }
1166
1167 szptr[wr] = EOB; wr++; mtfFreq[EOB]++;
1168
1169 nMTF = wr;
1170}
1171
1172
1173/*---------------------------------------------*/
1174#define LESSER_ICOST 0
1175#define GREATER_ICOST 15
1176
1177void sendMTFValues ( void )
1178{ 393{
1179 Int32 v, t, i, j, gs, ge, totc, bt, bc, iter; 394 BZFILE* bzf = NULL;
1180 Int32 nSelectors, alphaSize, minLen, maxLen, selCtr; 395 Int32 bzerr, bzerr_dummy, ret, nread, streamNo, i;
1181 Int32 nGroups, nBytes; 396 UChar obuf[5000];
1182 397 UChar unused[BZ_MAX_UNUSED];
1183 /*-- 398 Int32 nUnused;
1184 UChar len [N_GROUPS][MAX_ALPHA_SIZE]; 399 UChar* unusedTmp;
1185 is a global since the decoder also needs it.
1186
1187 Int32 code[N_GROUPS][MAX_ALPHA_SIZE];
1188 Int32 rfreq[N_GROUPS][MAX_ALPHA_SIZE];
1189 are also globals only used in this proc.
1190 Made global to keep stack frame size small.
1191 --*/
1192
1193
1194 UInt16 cost[N_GROUPS];
1195 Int32 fave[N_GROUPS];
1196
1197 if (verbosity >= 3)
1198 fprintf ( stderr,
1199 " %d in block, %d after MTF & 1-2 coding, %d+2 syms in use\n",
1200 last+1, nMTF, nInUse );
1201
1202 alphaSize = nInUse+2;
1203 for (t = 0; t < N_GROUPS; t++)
1204 for (v = 0; v < alphaSize; v++)
1205 len[t][v] = GREATER_ICOST;
1206
1207 /*--- Decide how many coding tables to use ---*/
1208 if (nMTF <= 0) panic ( "sendMTFValues(0)" );
1209 if (nMTF < 200) nGroups = 2; else
1210 if (nMTF < 800) nGroups = 4; else
1211 nGroups = 6;
1212
1213 /*--- Generate an initial set of coding tables ---*/
1214 {
1215 Int32 nPart, remF, tFreq, aFreq;
1216
1217 nPart = nGroups;
1218 remF = nMTF;
1219 gs = 0;
1220 while (nPart > 0) {
1221 tFreq = remF / nPart;
1222 ge = gs-1;
1223 aFreq = 0;
1224 while (aFreq < tFreq && ge < alphaSize-1) {
1225 ge++;
1226 aFreq += mtfFreq[ge];
1227 }
1228
1229 if (ge > gs
1230 && nPart != nGroups && nPart != 1
1231 && ((nGroups-nPart) % 2 == 1)) {
1232 aFreq -= mtfFreq[ge];
1233 ge--;
1234 }
1235 400
1236 if (verbosity >= 3) 401 nUnused = 0;
1237 fprintf ( stderr, 402 streamNo = 0;
1238 " initial group %d, [%d .. %d], has %d syms (%4.1f%%)\n",
1239 nPart, gs, ge, aFreq,
1240 (100.0 * (float)aFreq) / (float)nMTF );
1241
1242 for (v = 0; v < alphaSize; v++)
1243 if (v >= gs && v <= ge)
1244 len[nPart-1][v] = LESSER_ICOST; else
1245 len[nPart-1][v] = GREATER_ICOST;
1246
1247 nPart--;
1248 gs = ge+1;
1249 remF -= aFreq;
1250 }
1251 }
1252
1253 /*---
1254 Iterate up to N_ITERS times to improve the tables.
1255 ---*/
1256 for (iter = 0; iter < N_ITERS; iter++) {
1257
1258 for (t = 0; t < nGroups; t++) fave[t] = 0;
1259
1260 for (t = 0; t < nGroups; t++)
1261 for (v = 0; v < alphaSize; v++)
1262 rfreq[t][v] = 0;
1263
1264 nSelectors = 0;
1265 totc = 0;
1266 gs = 0;
1267 while (True) {
1268
1269 /*--- Set group start & end marks. --*/
1270 if (gs >= nMTF) break;
1271 ge = gs + G_SIZE - 1;
1272 if (ge >= nMTF) ge = nMTF-1;
1273
1274 /*--
1275 Calculate the cost of this group as coded
1276 by each of the coding tables.
1277 --*/
1278 for (t = 0; t < nGroups; t++) cost[t] = 0;
1279
1280 if (nGroups == 6) {
1281 register UInt16 cost0, cost1, cost2, cost3, cost4, cost5;
1282 cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
1283 for (i = gs; i <= ge; i++) {
1284 UInt16 icv = szptr[i];
1285 cost0 += len[0][icv];
1286 cost1 += len[1][icv];
1287 cost2 += len[2][icv];
1288 cost3 += len[3][icv];
1289 cost4 += len[4][icv];
1290 cost5 += len[5][icv];
1291 }
1292 cost[0] = cost0; cost[1] = cost1; cost[2] = cost2;
1293 cost[3] = cost3; cost[4] = cost4; cost[5] = cost5;
1294 } else {
1295 for (i = gs; i <= ge; i++) {
1296 UInt16 icv = szptr[i];
1297 for (t = 0; t < nGroups; t++) cost[t] += len[t][icv];
1298 }
1299 }
1300
1301 /*--
1302 Find the coding table which is best for this group,
1303 and record its identity in the selector table.
1304 --*/
1305 bc = 999999999; bt = -1;
1306 for (t = 0; t < nGroups; t++)
1307 if (cost[t] < bc) { bc = cost[t]; bt = t; };
1308 totc += bc;
1309 fave[bt]++;
1310 selector[nSelectors] = bt;
1311 nSelectors++;
1312
1313 /*--
1314 Increment the symbol frequencies for the selected table.
1315 --*/
1316 for (i = gs; i <= ge; i++)
1317 rfreq[bt][ szptr[i] ]++;
1318
1319 gs = ge+1;
1320 }
1321 if (verbosity >= 3) {
1322 fprintf ( stderr,
1323 " pass %d: size is %d, grp uses are ",
1324 iter+1, totc/8 );
1325 for (t = 0; t < nGroups; t++)
1326 fprintf ( stderr, "%d ", fave[t] );
1327 fprintf ( stderr, "\n" );
1328 }
1329
1330 /*--
1331 Recompute the tables based on the accumulated frequencies.
1332 --*/
1333 for (t = 0; t < nGroups; t++)
1334 hbMakeCodeLengths ( &len[t][0], &rfreq[t][0], alphaSize, 20 );
1335 }
1336 403
404 SET_BINARY_MODE(stream);
405 SET_BINARY_MODE(zStream);
1337 406
1338 if (!(nGroups < 8)) panic ( "sendMTFValues(1)" ); 407 if (ferror(stream)) goto errhandler_io;
1339 if (!(nSelectors < 32768 && 408 if (ferror(zStream)) goto errhandler_io;
1340 nSelectors <= (2 + (900000 / G_SIZE))))
1341 panic ( "sendMTFValues(2)" );
1342
1343
1344 /*--- Compute MTF values for the selectors. ---*/
1345 {
1346 UChar pos[N_GROUPS], ll_i, tmp2, tmp;
1347 for (i = 0; i < nGroups; i++) pos[i] = i;
1348 for (i = 0; i < nSelectors; i++) {
1349 ll_i = selector[i];
1350 j = 0;
1351 tmp = pos[j];
1352 while ( ll_i != tmp ) {
1353 j++;
1354 tmp2 = tmp;
1355 tmp = pos[j];
1356 pos[j] = tmp2;
1357 };
1358 pos[0] = tmp;
1359 selectorMtf[i] = j;
1360 }
1361 };
1362
1363 /*--- Assign actual codes for the tables. --*/
1364 for (t = 0; t < nGroups; t++) {
1365 minLen = 32;
1366 maxLen = 0;
1367 for (i = 0; i < alphaSize; i++) {
1368 if (len[t][i] > maxLen) maxLen = len[t][i];
1369 if (len[t][i] < minLen) minLen = len[t][i];
1370 }
1371 if (maxLen > 20) panic ( "sendMTFValues(3)" );
1372 if (minLen < 1) panic ( "sendMTFValues(4)" );
1373 hbAssignCodes ( &code[t][0], &len[t][0],
1374 minLen, maxLen, alphaSize );
1375 }
1376
1377 /*--- Transmit the mapping table. ---*/
1378 {
1379 Bool inUse16[16];
1380 for (i = 0; i < 16; i++) {
1381 inUse16[i] = False;
1382 for (j = 0; j < 16; j++)
1383 if (inUse[i * 16 + j]) inUse16[i] = True;
1384 }
1385
1386 nBytes = bytesOut;
1387 for (i = 0; i < 16; i++)
1388 if (inUse16[i]) bsW(1,1); else bsW(1,0);
1389
1390 for (i = 0; i < 16; i++)
1391 if (inUse16[i])
1392 for (j = 0; j < 16; j++)
1393 if (inUse[i * 16 + j]) bsW(1,1); else bsW(1,0);
1394
1395 if (verbosity >= 3)
1396 fprintf ( stderr, " bytes: mapping %d, ", bytesOut-nBytes );
1397 }
1398
1399 /*--- Now the selectors. ---*/
1400 nBytes = bytesOut;
1401 bsW ( 3, nGroups );
1402 bsW ( 15, nSelectors );
1403 for (i = 0; i < nSelectors; i++) {
1404 for (j = 0; j < selectorMtf[i]; j++) bsW(1,1);
1405 bsW(1,0);
1406 }
1407 if (verbosity >= 3)
1408 fprintf ( stderr, "selectors %d, ", bytesOut-nBytes );
1409
1410 /*--- Now the coding tables. ---*/
1411 nBytes = bytesOut;
1412
1413 for (t = 0; t < nGroups; t++) {
1414 Int32 curr = len[t][0];
1415 bsW ( 5, curr );
1416 for (i = 0; i < alphaSize; i++) {
1417 while (curr < len[t][i]) { bsW(2,2); curr++; /* 10 */ };
1418 while (curr > len[t][i]) { bsW(2,3); curr--; /* 11 */ };
1419 bsW ( 1, 0 );
1420 }
1421 }
1422
1423 if (verbosity >= 3)
1424 fprintf ( stderr, "code lengths %d, ", bytesOut-nBytes );
1425 409
1426 /*--- And finally, the block data proper ---*/
1427 nBytes = bytesOut;
1428 selCtr = 0;
1429 gs = 0;
1430 while (True) { 410 while (True) {
1431 if (gs >= nMTF) break;
1432 ge = gs + G_SIZE - 1;
1433 if (ge >= nMTF) ge = nMTF-1;
1434 for (i = gs; i <= ge; i++) {
1435 #if DEBUG
1436 assert (selector[selCtr] < nGroups);
1437 #endif
1438 bsW ( len [selector[selCtr]] [szptr[i]],
1439 code [selector[selCtr]] [szptr[i]] );
1440 }
1441 411
1442 gs = ge+1; 412 bzf = bzReadOpen (
1443 selCtr++; 413 &bzerr, zStream, verbosity,
1444 } 414 (int)smallMode, unused, nUnused
1445 if (!(selCtr == nSelectors)) panic ( "sendMTFValues(5)" ); 415 );
1446 416 if (bzf == NULL || bzerr != BZ_OK) goto errhandler;
1447 if (verbosity >= 3) 417 streamNo++;
1448 fprintf ( stderr, "codes %d\n", bytesOut-nBytes ); 418
1449} 419 while (bzerr == BZ_OK) {
1450 420 nread = bzRead ( &bzerr, bzf, obuf, 5000 );
1451 421 if (bzerr == BZ_DATA_ERROR_MAGIC) goto errhandler;
1452/*---------------------------------------------*/ 422 if ((bzerr == BZ_OK || bzerr == BZ_STREAM_END) && nread > 0)
1453void moveToFrontCodeAndSend ( void ) 423 fwrite ( obuf, sizeof(UChar), nread, stream );
1454{ 424 if (ferror(stream)) goto errhandler_io;
1455 bsPutIntVS ( 24, origPtr );
1456 generateMTFValues();
1457 sendMTFValues();
1458}
1459
1460
1461/*---------------------------------------------*/
1462void recvDecodingTables ( void )
1463{
1464 Int32 i, j, t, nGroups, nSelectors, alphaSize;
1465 Int32 minLen, maxLen;
1466 Bool inUse16[16];
1467
1468 /*--- Receive the mapping table ---*/
1469 for (i = 0; i < 16; i++)
1470 if (bsR(1) == 1)
1471 inUse16[i] = True; else
1472 inUse16[i] = False;
1473
1474 for (i = 0; i < 256; i++) inUse[i] = False;
1475
1476 for (i = 0; i < 16; i++)
1477 if (inUse16[i])
1478 for (j = 0; j < 16; j++)
1479 if (bsR(1) == 1) inUse[i * 16 + j] = True;
1480
1481 makeMaps();
1482 alphaSize = nInUse+2;
1483
1484 /*--- Now the selectors ---*/
1485 nGroups = bsR ( 3 );
1486 nSelectors = bsR ( 15 );
1487 for (i = 0; i < nSelectors; i++) {
1488 j = 0;
1489 while (bsR(1) == 1) j++;
1490 selectorMtf[i] = j;
1491 }
1492
1493 /*--- Undo the MTF values for the selectors. ---*/
1494 {
1495 UChar pos[N_GROUPS], tmp, v;
1496 for (v = 0; v < nGroups; v++) pos[v] = v;
1497
1498 for (i = 0; i < nSelectors; i++) {
1499 v = selectorMtf[i];
1500 tmp = pos[v];
1501 while (v > 0) { pos[v] = pos[v-1]; v--; }
1502 pos[0] = tmp;
1503 selector[i] = tmp;
1504 } 425 }
1505 } 426 if (bzerr != BZ_STREAM_END) goto errhandler;
1506
1507 /*--- Now the coding tables ---*/
1508 for (t = 0; t < nGroups; t++) {
1509 Int32 curr = bsR ( 5 );
1510 for (i = 0; i < alphaSize; i++) {
1511 while (bsR(1) == 1) {
1512 if (bsR(1) == 0) curr++; else curr--;
1513 }
1514 len[t][i] = curr;
1515 }
1516 }
1517
1518 /*--- Create the Huffman decoding tables ---*/
1519 for (t = 0; t < nGroups; t++) {
1520 minLen = 32;
1521 maxLen = 0;
1522 for (i = 0; i < alphaSize; i++) {
1523 if (len[t][i] > maxLen) maxLen = len[t][i];
1524 if (len[t][i] < minLen) minLen = len[t][i];
1525 }
1526 hbCreateDecodeTables (
1527 &limit[t][0], &base[t][0], &perm[t][0], &len[t][0],
1528 minLen, maxLen, alphaSize
1529 );
1530 minLens[t] = minLen;
1531 }
1532}
1533
1534
1535/*---------------------------------------------*/
1536#define GET_MTF_VAL(lval) \
1537{ \
1538 Int32 zt, zn, zvec, zj; \
1539 if (groupPos == 0) { \
1540 groupNo++; \
1541 groupPos = G_SIZE; \
1542 } \
1543 groupPos--; \
1544 zt = selector[groupNo]; \
1545 zn = minLens[zt]; \
1546 zvec = bsR ( zn ); \
1547 while (zvec > limit[zt][zn]) { \
1548 zn++; bsR1(zj); \
1549 zvec = (zvec << 1) | zj; \
1550 }; \
1551 lval = perm[zt][zvec - base[zt][zn]]; \
1552}
1553
1554
1555/*---------------------------------------------*/
1556void getAndMoveToFrontDecode ( void )
1557{
1558 UChar yy[256];
1559 Int32 i, j, nextSym, limitLast;
1560 Int32 EOB, groupNo, groupPos;
1561
1562 limitLast = 100000 * blockSize100k;
1563 origPtr = bsGetIntVS ( 24 );
1564
1565 recvDecodingTables();
1566 EOB = nInUse+1;
1567 groupNo = -1;
1568 groupPos = 0;
1569
1570 /*--
1571 Setting up the unzftab entries here is not strictly
1572 necessary, but it does save having to do it later
1573 in a separate pass, and so saves a block's worth of
1574 cache misses.
1575 --*/
1576 for (i = 0; i <= 255; i++) unzftab[i] = 0;
1577
1578 for (i = 0; i <= 255; i++) yy[i] = (UChar) i;
1579
1580 last = -1;
1581 427
1582 GET_MTF_VAL(nextSym); 428 bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused );
429 if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" );
1583 430
1584 while (True) { 431 for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i];
1585
1586 if (nextSym == EOB) break;
1587
1588 if (nextSym == RUNA || nextSym == RUNB) {
1589 UChar ch;
1590 Int32 s = -1;
1591 Int32 N = 1;
1592 do {
1593 if (nextSym == RUNA) s = s + (0+1) * N; else
1594 if (nextSym == RUNB) s = s + (1+1) * N;
1595 N = N * 2;
1596 GET_MTF_VAL(nextSym);
1597 }
1598 while (nextSym == RUNA || nextSym == RUNB);
1599 432
1600 s++; 433 bzReadClose ( &bzerr, bzf );
1601 ch = seqToUnseq[yy[0]]; 434 if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" );
1602 unzftab[ch] += s;
1603 435
1604 if (smallMode) 436 if (nUnused == 0 && myfeof(zStream)) break;
1605 while (s > 0) {
1606 last++;
1607 ll16[last] = ch;
1608 s--;
1609 }
1610 else
1611 while (s > 0) {
1612 last++;
1613 ll8[last] = ch;
1614 s--;
1615 };
1616
1617 if (last >= limitLast) blockOverrun();
1618 continue;
1619
1620 } else {
1621
1622 UChar tmp;
1623 last++; if (last >= limitLast) blockOverrun();
1624
1625 tmp = yy[nextSym-1];
1626 unzftab[seqToUnseq[tmp]]++;
1627 if (smallMode)
1628 ll16[last] = seqToUnseq[tmp]; else
1629 ll8[last] = seqToUnseq[tmp];
1630
1631 /*--
1632 This loop is hammered during decompression,
1633 hence the unrolling.
1634
1635 for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1];
1636 --*/
1637
1638 j = nextSym-1;
1639 for (; j > 3; j -= 4) {
1640 yy[j] = yy[j-1];
1641 yy[j-1] = yy[j-2];
1642 yy[j-2] = yy[j-3];
1643 yy[j-3] = yy[j-4];
1644 }
1645 for (; j > 0; j--) yy[j] = yy[j-1];
1646 437
1647 yy[0] = tmp;
1648 GET_MTF_VAL(nextSym);
1649 continue;
1650 }
1651 } 438 }
1652}
1653
1654
1655/*---------------------------------------------------*/
1656/*--- Block-sorting machinery ---*/
1657/*---------------------------------------------------*/
1658 439
1659/*---------------------------------------------*/ 440 if (ferror(zStream)) goto errhandler_io;
1660/*-- 441 ret = fclose ( zStream );
1661 Compare two strings in block. We assume (see 442 if (ret == EOF) goto errhandler_io;
1662 discussion above) that i1 and i2 have a max
1663 offset of 10 on entry, and that the first
1664 bytes of both block and quadrant have been
1665 copied into the "overshoot area", ie
1666 into the subscript range
1667 [last+1 .. last+NUM_OVERSHOOT_BYTES].
1668--*/
1669INLINE Bool fullGtU ( Int32 i1, Int32 i2 )
1670{
1671 Int32 k;
1672 UChar c1, c2;
1673 UInt16 s1, s2;
1674
1675 #if DEBUG
1676 /*--
1677 shellsort shouldn't ask to compare
1678 something with itself.
1679 --*/
1680 assert (i1 != i2);
1681 #endif
1682
1683 c1 = block[i1];
1684 c2 = block[i2];
1685 if (c1 != c2) return (c1 > c2);
1686 i1++; i2++;
1687
1688 c1 = block[i1];
1689 c2 = block[i2];
1690 if (c1 != c2) return (c1 > c2);
1691 i1++; i2++;
1692
1693 c1 = block[i1];
1694 c2 = block[i2];
1695 if (c1 != c2) return (c1 > c2);
1696 i1++; i2++;
1697
1698 c1 = block[i1];
1699 c2 = block[i2];
1700 if (c1 != c2) return (c1 > c2);
1701 i1++; i2++;
1702
1703 c1 = block[i1];
1704 c2 = block[i2];
1705 if (c1 != c2) return (c1 > c2);
1706 i1++; i2++;
1707
1708 c1 = block[i1];
1709 c2 = block[i2];
1710 if (c1 != c2) return (c1 > c2);
1711 i1++; i2++;
1712
1713 k = last + 1;
1714
1715 do {
1716
1717 c1 = block[i1];
1718 c2 = block[i2];
1719 if (c1 != c2) return (c1 > c2);
1720 s1 = quadrant[i1];
1721 s2 = quadrant[i2];
1722 if (s1 != s2) return (s1 > s2);
1723 i1++; i2++;
1724
1725 c1 = block[i1];
1726 c2 = block[i2];
1727 if (c1 != c2) return (c1 > c2);
1728 s1 = quadrant[i1];
1729 s2 = quadrant[i2];
1730 if (s1 != s2) return (s1 > s2);
1731 i1++; i2++;
1732
1733 c1 = block[i1];
1734 c2 = block[i2];
1735 if (c1 != c2) return (c1 > c2);
1736 s1 = quadrant[i1];
1737 s2 = quadrant[i2];
1738 if (s1 != s2) return (s1 > s2);
1739 i1++; i2++;
1740
1741 c1 = block[i1];
1742 c2 = block[i2];
1743 if (c1 != c2) return (c1 > c2);
1744 s1 = quadrant[i1];
1745 s2 = quadrant[i2];
1746 if (s1 != s2) return (s1 > s2);
1747 i1++; i2++;
1748
1749 if (i1 > last) { i1 -= last; i1--; };
1750 if (i2 > last) { i2 -= last; i2--; };
1751
1752 k -= 4;
1753 workDone++;
1754 }
1755 while (k >= 0);
1756 443
1757 return False; 444 if (ferror(stream)) goto errhandler_io;
1758} 445 ret = fflush ( stream );
1759 446 if (ret != 0) goto errhandler_io;
1760/*---------------------------------------------*/ 447 if (stream != stdout) {
1761/*-- 448 ret = fclose ( stream );
1762 Knuth's increments seem to work better 449 if (ret == EOF) goto errhandler_io;
1763 than Incerpi-Sedgewick here. Possibly
1764 because the number of elems to sort is
1765 usually small, typically <= 20.
1766--*/
1767Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
1768 9841, 29524, 88573, 265720,
1769 797161, 2391484 };
1770
1771void simpleSort ( Int32 lo, Int32 hi, Int32 d )
1772{
1773 Int32 i, j, h, bigN, hp;
1774 Int32 v;
1775
1776 bigN = hi - lo + 1;
1777 if (bigN < 2) return;
1778
1779 hp = 0;
1780 while (incs[hp] < bigN) hp++;
1781 hp--;
1782
1783 for (; hp >= 0; hp--) {
1784 h = incs[hp];
1785 if (verbosity >= 5)
1786 fprintf ( stderr, " shell increment %d\n", h );
1787
1788 i = lo + h;
1789 while (True) {
1790
1791 /*-- copy 1 --*/
1792 if (i > hi) break;
1793 v = zptr[i];
1794 j = i;
1795 while ( fullGtU ( zptr[j-h]+d, v+d ) ) {
1796 zptr[j] = zptr[j-h];
1797 j = j - h;
1798 if (j <= (lo + h - 1)) break;
1799 }
1800 zptr[j] = v;
1801 i++;
1802
1803 /*-- copy 2 --*/
1804 if (i > hi) break;
1805 v = zptr[i];
1806 j = i;
1807 while ( fullGtU ( zptr[j-h]+d, v+d ) ) {
1808 zptr[j] = zptr[j-h];
1809 j = j - h;
1810 if (j <= (lo + h - 1)) break;
1811 }
1812 zptr[j] = v;
1813 i++;
1814
1815 /*-- copy 3 --*/
1816 if (i > hi) break;
1817 v = zptr[i];
1818 j = i;
1819 while ( fullGtU ( zptr[j-h]+d, v+d ) ) {
1820 zptr[j] = zptr[j-h];
1821 j = j - h;
1822 if (j <= (lo + h - 1)) break;
1823 }
1824 zptr[j] = v;
1825 i++;
1826
1827 if (workDone > workLimit && firstAttempt) return;
1828 }
1829 }
1830}
1831
1832
1833/*---------------------------------------------*/
1834/*--
1835 The following is an implementation of
1836 an elegant 3-way quicksort for strings,
1837 described in a paper "Fast Algorithms for
1838 Sorting and Searching Strings", by Robert
1839 Sedgewick and Jon L. Bentley.
1840--*/
1841
1842#define swap(lv1, lv2) \
1843 { Int32 tmp = lv1; lv1 = lv2; lv2 = tmp; }
1844
1845INLINE void vswap ( Int32 p1, Int32 p2, Int32 n )
1846{
1847 while (n > 0) {
1848 swap(zptr[p1], zptr[p2]);
1849 p1++; p2++; n--;
1850 }
1851}
1852
1853INLINE UChar med3 ( UChar a, UChar b, UChar c )
1854{
1855 UChar t;
1856 if (a > b) { t = a; a = b; b = t; };
1857 if (b > c) { t = b; b = c; c = t; };
1858 if (a > b) b = a;
1859 return b;
1860}
1861
1862
1863#define min(a,b) ((a) < (b)) ? (a) : (b)
1864
1865typedef
1866 struct { Int32 ll; Int32 hh; Int32 dd; }
1867 StackElem;
1868
1869#define push(lz,hz,dz) { stack[sp].ll = lz; \
1870 stack[sp].hh = hz; \
1871 stack[sp].dd = dz; \
1872 sp++; }
1873
1874#define pop(lz,hz,dz) { sp--; \
1875 lz = stack[sp].ll; \
1876 hz = stack[sp].hh; \
1877 dz = stack[sp].dd; }
1878
1879#define SMALL_THRESH 20
1880#define DEPTH_THRESH 10
1881
1882/*--
1883 If you are ever unlucky/improbable enough
1884 to get a stack overflow whilst sorting,
1885 increase the following constant and try
1886 again. In practice I have never seen the
1887 stack go above 27 elems, so the following
1888 limit seems very generous.
1889--*/
1890#define QSORT_STACK_SIZE 1000
1891
1892
1893void qSort3 ( Int32 loSt, Int32 hiSt, Int32 dSt )
1894{
1895 Int32 unLo, unHi, ltLo, gtHi, med, n, m;
1896 Int32 sp, lo, hi, d;
1897 StackElem stack[QSORT_STACK_SIZE];
1898
1899 sp = 0;
1900 push ( loSt, hiSt, dSt );
1901
1902 while (sp > 0) {
1903
1904 if (sp >= QSORT_STACK_SIZE) panic ( "stack overflow in qSort3" );
1905
1906 pop ( lo, hi, d );
1907
1908 if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) {
1909 simpleSort ( lo, hi, d );
1910 if (workDone > workLimit && firstAttempt) return;
1911 continue;
1912 }
1913
1914 med = med3 ( block[zptr[ lo ]+d],
1915 block[zptr[ hi ]+d],
1916 block[zptr[ (lo+hi)>>1 ]+d] );
1917
1918 unLo = ltLo = lo;
1919 unHi = gtHi = hi;
1920
1921 while (True) {
1922 while (True) {
1923 if (unLo > unHi) break;
1924 n = ((Int32)block[zptr[unLo]+d]) - med;
1925 if (n == 0) { swap(zptr[unLo], zptr[ltLo]); ltLo++; unLo++; continue; };
1926 if (n > 0) break;
1927 unLo++;
1928 }
1929 while (True) {
1930 if (unLo > unHi) break;
1931 n = ((Int32)block[zptr[unHi]+d]) - med;
1932 if (n == 0) { swap(zptr[unHi], zptr[gtHi]); gtHi--; unHi--; continue; };
1933 if (n < 0) break;
1934 unHi--;
1935 }
1936 if (unLo > unHi) break;
1937 swap(zptr[unLo], zptr[unHi]); unLo++; unHi--;
1938 }
1939 #if DEBUG
1940 assert (unHi == unLo-1);
1941 #endif
1942
1943 if (gtHi < ltLo) {
1944 push(lo, hi, d+1 );
1945 continue;
1946 }
1947
1948 n = min(ltLo-lo, unLo-ltLo); vswap(lo, unLo-n, n);
1949 m = min(hi-gtHi, gtHi-unHi); vswap(unLo, hi-m+1, m);
1950
1951 n = lo + unLo - ltLo - 1;
1952 m = hi - (gtHi - unHi) + 1;
1953
1954 push ( lo, n, d );
1955 push ( n+1, m-1, d+1 );
1956 push ( m, hi, d );
1957 }
1958}
1959
1960
1961/*---------------------------------------------*/
1962
1963#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
1964
1965#define SETMASK (1 << 21)
1966#define CLEARMASK (~(SETMASK))
1967
1968void sortIt ( void )
1969{
1970 Int32 i, j, ss, sb;
1971 Int32 runningOrder[256];
1972 Int32 copy[256];
1973 Bool bigDone[256];
1974 UChar c1, c2;
1975 Int32 numQSorted;
1976
1977 /*--
1978 In the various block-sized structures, live data runs
1979 from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First,
1980 set up the overshoot area for block.
1981 --*/
1982
1983 if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" );
1984 for (i = 0; i < NUM_OVERSHOOT_BYTES; i++)
1985 block[last+i+1] = block[i % (last+1)];
1986 for (i = 0; i <= last+NUM_OVERSHOOT_BYTES; i++)
1987 quadrant[i] = 0;
1988
1989 block[-1] = block[last];
1990
1991 if (last < 4000) {
1992
1993 /*--
1994 Use simpleSort(), since the full sorting mechanism
1995 has quite a large constant overhead.
1996 --*/
1997 if (verbosity >= 4) fprintf ( stderr, " simpleSort ...\n" );
1998 for (i = 0; i <= last; i++) zptr[i] = i;
1999 firstAttempt = False;
2000 workDone = workLimit = 0;
2001 simpleSort ( 0, last, 0 );
2002 if (verbosity >= 4) fprintf ( stderr, " simpleSort done.\n" );
2003
2004 } else {
2005
2006 numQSorted = 0;
2007 for (i = 0; i <= 255; i++) bigDone[i] = False;
2008
2009 if (verbosity >= 4) fprintf ( stderr, " bucket sorting ...\n" );
2010
2011 for (i = 0; i <= 65536; i++) ftab[i] = 0;
2012
2013 c1 = block[-1];
2014 for (i = 0; i <= last; i++) {
2015 c2 = block[i];
2016 ftab[(c1 << 8) + c2]++;
2017 c1 = c2;
2018 }
2019
2020 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
2021
2022 c1 = block[0];
2023 for (i = 0; i < last; i++) {
2024 c2 = block[i+1];
2025 j = (c1 << 8) + c2;
2026 c1 = c2;
2027 ftab[j]--;
2028 zptr[ftab[j]] = i;
2029 }
2030 j = (block[last] << 8) + block[0];
2031 ftab[j]--;
2032 zptr[ftab[j]] = last;
2033
2034 /*--
2035 Now ftab contains the first loc of every small bucket.
2036 Calculate the running order, from smallest to largest
2037 big bucket.
2038 --*/
2039
2040 for (i = 0; i <= 255; i++) runningOrder[i] = i;
2041
2042 {
2043 Int32 vv;
2044 Int32 h = 1;
2045 do h = 3 * h + 1; while (h <= 256);
2046 do {
2047 h = h / 3;
2048 for (i = h; i <= 255; i++) {
2049 vv = runningOrder[i];
2050 j = i;
2051 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
2052 runningOrder[j] = runningOrder[j-h];
2053 j = j - h;
2054 if (j <= (h - 1)) goto zero;
2055 }
2056 zero:
2057 runningOrder[j] = vv;
2058 }
2059 } while (h != 1);
2060 }
2061
2062 /*--
2063 The main sorting loop.
2064 --*/
2065
2066 for (i = 0; i <= 255; i++) {
2067
2068 /*--
2069 Process big buckets, starting with the least full.
2070 --*/
2071 ss = runningOrder[i];
2072
2073 /*--
2074 Complete the big bucket [ss] by quicksorting
2075 any unsorted small buckets [ss, j]. Hopefully
2076 previous pointer-scanning phases have already
2077 completed many of the small buckets [ss, j], so
2078 we don't have to sort them at all.
2079 --*/
2080 for (j = 0; j <= 255; j++) {
2081 sb = (ss << 8) + j;
2082 if ( ! (ftab[sb] & SETMASK) ) {
2083 Int32 lo = ftab[sb] & CLEARMASK;
2084 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
2085 if (hi > lo) {
2086 if (verbosity >= 4)
2087 fprintf ( stderr,
2088 " qsort [0x%x, 0x%x] done %d this %d\n",
2089 ss, j, numQSorted, hi - lo + 1 );
2090 qSort3 ( lo, hi, 2 );
2091 numQSorted += ( hi - lo + 1 );
2092 if (workDone > workLimit && firstAttempt) return;
2093 }
2094 ftab[sb] |= SETMASK;
2095 }
2096 }
2097
2098 /*--
2099 The ss big bucket is now done. Record this fact,
2100 and update the quadrant descriptors. Remember to
2101 update quadrants in the overshoot area too, if
2102 necessary. The "if (i < 255)" test merely skips
2103 this updating for the last bucket processed, since
2104 updating for the last bucket is pointless.
2105 --*/
2106 bigDone[ss] = True;
2107
2108 if (i < 255) {
2109 Int32 bbStart = ftab[ss << 8] & CLEARMASK;
2110 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
2111 Int32 shifts = 0;
2112
2113 while ((bbSize >> shifts) > 65534) shifts++;
2114
2115 for (j = 0; j < bbSize; j++) {
2116 Int32 a2update = zptr[bbStart + j];
2117 UInt16 qVal = (UInt16)(j >> shifts);
2118 quadrant[a2update] = qVal;
2119 if (a2update < NUM_OVERSHOOT_BYTES)
2120 quadrant[a2update + last + 1] = qVal;
2121 }
2122
2123 if (! ( ((bbSize-1) >> shifts) <= 65535 )) panic ( "sortIt" );
2124 }
2125
2126 /*--
2127 Now scan this big bucket so as to synthesise the
2128 sorted order for small buckets [t, ss] for all t != ss.
2129 --*/
2130 for (j = 0; j <= 255; j++)
2131 copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
2132
2133 for (j = ftab[ss << 8] & CLEARMASK;
2134 j < (ftab[(ss+1) << 8] & CLEARMASK);
2135 j++) {
2136 c1 = block[zptr[j]-1];
2137 if ( ! bigDone[c1] ) {
2138 zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1;
2139 copy[c1] ++;
2140 }
2141 }
2142
2143 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
2144 }
2145 if (verbosity >= 4)
2146 fprintf ( stderr, " %d pointers, %d sorted, %d scanned\n",
2147 last+1, numQSorted, (last+1) - numQSorted );
2148 }
2149}
2150
2151
2152/*---------------------------------------------------*/
2153/*--- Stuff for randomising repetitive blocks ---*/
2154/*---------------------------------------------------*/
2155
2156/*---------------------------------------------*/
2157Int32 rNums[512] = {
2158 619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
2159 985, 724, 205, 454, 863, 491, 741, 242, 949, 214,
2160 733, 859, 335, 708, 621, 574, 73, 654, 730, 472,
2161 419, 436, 278, 496, 867, 210, 399, 680, 480, 51,
2162 878, 465, 811, 169, 869, 675, 611, 697, 867, 561,
2163 862, 687, 507, 283, 482, 129, 807, 591, 733, 623,
2164 150, 238, 59, 379, 684, 877, 625, 169, 643, 105,
2165 170, 607, 520, 932, 727, 476, 693, 425, 174, 647,
2166 73, 122, 335, 530, 442, 853, 695, 249, 445, 515,
2167 909, 545, 703, 919, 874, 474, 882, 500, 594, 612,
2168 641, 801, 220, 162, 819, 984, 589, 513, 495, 799,
2169 161, 604, 958, 533, 221, 400, 386, 867, 600, 782,
2170 382, 596, 414, 171, 516, 375, 682, 485, 911, 276,
2171 98, 553, 163, 354, 666, 933, 424, 341, 533, 870,
2172 227, 730, 475, 186, 263, 647, 537, 686, 600, 224,
2173 469, 68, 770, 919, 190, 373, 294, 822, 808, 206,
2174 184, 943, 795, 384, 383, 461, 404, 758, 839, 887,
2175 715, 67, 618, 276, 204, 918, 873, 777, 604, 560,
2176 951, 160, 578, 722, 79, 804, 96, 409, 713, 940,
2177 652, 934, 970, 447, 318, 353, 859, 672, 112, 785,
2178 645, 863, 803, 350, 139, 93, 354, 99, 820, 908,
2179 609, 772, 154, 274, 580, 184, 79, 626, 630, 742,
2180 653, 282, 762, 623, 680, 81, 927, 626, 789, 125,
2181 411, 521, 938, 300, 821, 78, 343, 175, 128, 250,
2182 170, 774, 972, 275, 999, 639, 495, 78, 352, 126,
2183 857, 956, 358, 619, 580, 124, 737, 594, 701, 612,
2184 669, 112, 134, 694, 363, 992, 809, 743, 168, 974,
2185 944, 375, 748, 52, 600, 747, 642, 182, 862, 81,
2186 344, 805, 988, 739, 511, 655, 814, 334, 249, 515,
2187 897, 955, 664, 981, 649, 113, 974, 459, 893, 228,
2188 433, 837, 553, 268, 926, 240, 102, 654, 459, 51,
2189 686, 754, 806, 760, 493, 403, 415, 394, 687, 700,
2190 946, 670, 656, 610, 738, 392, 760, 799, 887, 653,
2191 978, 321, 576, 617, 626, 502, 894, 679, 243, 440,
2192 680, 879, 194, 572, 640, 724, 926, 56, 204, 700,
2193 707, 151, 457, 449, 797, 195, 791, 558, 945, 679,
2194 297, 59, 87, 824, 713, 663, 412, 693, 342, 606,
2195 134, 108, 571, 364, 631, 212, 174, 643, 304, 329,
2196 343, 97, 430, 751, 497, 314, 983, 374, 822, 928,
2197 140, 206, 73, 263, 980, 736, 876, 478, 430, 305,
2198 170, 514, 364, 692, 829, 82, 855, 953, 676, 246,
2199 369, 970, 294, 750, 807, 827, 150, 790, 288, 923,
2200 804, 378, 215, 828, 592, 281, 565, 555, 710, 82,
2201 896, 831, 547, 261, 524, 462, 293, 465, 502, 56,
2202 661, 821, 976, 991, 658, 869, 905, 758, 745, 193,
2203 768, 550, 608, 933, 378, 286, 215, 979, 792, 961,
2204 61, 688, 793, 644, 986, 403, 106, 366, 905, 644,
2205 372, 567, 466, 434, 645, 210, 389, 550, 919, 135,
2206 780, 773, 635, 389, 707, 100, 626, 958, 165, 504,
2207 920, 176, 193, 713, 857, 265, 203, 50, 668, 108,
2208 645, 990, 626, 197, 510, 357, 358, 850, 858, 364,
2209 936, 638
2210};
2211
2212
2213#define RAND_DECLS \
2214 Int32 rNToGo = 0; \
2215 Int32 rTPos = 0; \
2216
2217#define RAND_MASK ((rNToGo == 1) ? 1 : 0)
2218
2219#define RAND_UPD_MASK \
2220 if (rNToGo == 0) { \
2221 rNToGo = rNums[rTPos]; \
2222 rTPos++; if (rTPos == 512) rTPos = 0; \
2223 } \
2224 rNToGo--;
2225
2226
2227
2228/*---------------------------------------------------*/
2229/*--- The Reversible Transformation (tm) ---*/
2230/*---------------------------------------------------*/
2231
2232/*---------------------------------------------*/
2233void randomiseBlock ( void )
2234{
2235 Int32 i;
2236 RAND_DECLS;
2237 for (i = 0; i < 256; i++) inUse[i] = False;
2238
2239 for (i = 0; i <= last; i++) {
2240 RAND_UPD_MASK;
2241 block[i] ^= RAND_MASK;
2242 inUse[block[i]] = True;
2243 }
2244}
2245
2246
2247/*---------------------------------------------*/
2248void doReversibleTransformation ( void )
2249{
2250 Int32 i;
2251
2252 if (verbosity >= 2) fprintf ( stderr, "\n" );
2253
2254 workLimit = workFactor * last;
2255 workDone = 0;
2256 blockRandomised = False;
2257 firstAttempt = True;
2258
2259 sortIt ();
2260
2261 if (verbosity >= 3)
2262 fprintf ( stderr, " %d work, %d block, ratio %5.2f\n",
2263 workDone, last, (float)workDone / (float)(last) );
2264
2265 if (workDone > workLimit && firstAttempt) {
2266 if (verbosity >= 2)
2267 fprintf ( stderr, " sorting aborted; randomising block\n" );
2268 randomiseBlock ();
2269 workLimit = workDone = 0;
2270 blockRandomised = True;
2271 firstAttempt = False;
2272 sortIt();
2273 if (verbosity >= 3)
2274 fprintf ( stderr, " %d work, %d block, ratio %f\n",
2275 workDone, last, (float)workDone / (float)(last) );
2276 }
2277
2278 origPtr = -1;
2279 for (i = 0; i <= last; i++)
2280 if (zptr[i] == 0)
2281 { origPtr = i; break; };
2282
2283 if (origPtr == -1) panic ( "doReversibleTransformation" );
2284}
2285
2286
2287/*---------------------------------------------*/
2288
2289INLINE Int32 indexIntoF ( Int32 indx, Int32 *cftab )
2290{
2291 Int32 nb, na, mid;
2292 nb = 0;
2293 na = 256;
2294 do {
2295 mid = (nb + na) >> 1;
2296 if (indx >= cftab[mid]) nb = mid; else na = mid;
2297 }
2298 while (na - nb != 1);
2299 return nb;
2300}
2301
2302
2303#define GET_SMALL(cccc) \
2304 \
2305 cccc = indexIntoF ( tPos, cftab ); \
2306 tPos = GET_LL(tPos);
2307
2308
2309void undoReversibleTransformation_small ( FILE* dst )
2310{
2311 Int32 cftab[257], cftabAlso[257];
2312 Int32 i, j, tmp, tPos;
2313 UChar ch;
2314
2315 /*--
2316 We assume here that the global array unzftab will
2317 already be holding the frequency counts for
2318 ll8[0 .. last].
2319 --*/
2320
2321 /*-- Set up cftab to facilitate generation of indexIntoF --*/
2322 cftab[0] = 0;
2323 for (i = 1; i <= 256; i++) cftab[i] = unzftab[i-1];
2324 for (i = 1; i <= 256; i++) cftab[i] += cftab[i-1];
2325
2326 /*-- Make a copy of it, used in generation of T --*/
2327 for (i = 0; i <= 256; i++) cftabAlso[i] = cftab[i];
2328
2329 /*-- compute the T vector --*/
2330 for (i = 0; i <= last; i++) {
2331 ch = (UChar)ll16[i];
2332 SET_LL(i, cftabAlso[ch]);
2333 cftabAlso[ch]++;
2334 }
2335
2336 /*--
2337 Compute T^(-1) by pointer reversal on T. This is rather
2338 subtle, in that, if the original block was two or more
2339 (in general, N) concatenated copies of the same thing,
2340 the T vector will consist of N cycles, each of length
2341 blocksize / N, and decoding will involve traversing one
2342 of these cycles N times. Which particular cycle doesn't
2343 matter -- they are all equivalent. The tricky part is to
2344 make sure that the pointer reversal creates a correct
2345 reversed cycle for us to traverse. So, the code below
2346 simply reverses whatever cycle origPtr happens to fall into,
2347 without regard to the cycle length. That gives one reversed
2348 cycle, which for normal blocks, is the entire block-size long.
2349 For repeated blocks, it will be interspersed with the other
2350 N-1 non-reversed cycles. Providing that the F-subscripting
2351 phase which follows starts at origPtr, all then works ok.
2352 --*/
2353 i = origPtr;
2354 j = GET_LL(i);
2355 do {
2356 tmp = GET_LL(j);
2357 SET_LL(j, i);
2358 i = j;
2359 j = tmp;
2360 }
2361 while (i != origPtr);
2362
2363 /*--
2364 We recreate the original by subscripting F through T^(-1).
2365 The run-length-decoder below requires characters incrementally,
2366 so tPos is set to a starting value, and is updated by
2367 the GET_SMALL macro.
2368 --*/
2369 tPos = origPtr;
2370
2371 /*-------------------------------------------------*/
2372 /*--
2373 This is pretty much a verbatim copy of the
2374 run-length decoder present in the distribution
2375 bzip-0.21; it has to be here to avoid creating
2376 block[] as an intermediary structure. As in 0.21,
2377 this code derives from some sent to me by
2378 Christian von Roques.
2379
2380 It allows dst==NULL, so as to support the test (-t)
2381 option without slowing down the fast decompression
2382 code.
2383 --*/
2384 {
2385 IntNative retVal;
2386 Int32 i2, count, chPrev, ch2;
2387 UInt32 localCrc;
2388
2389 count = 0;
2390 i2 = 0;
2391 ch2 = 256; /*-- not a char and not EOF --*/
2392 localCrc = getGlobalCRC();
2393
2394 {
2395 RAND_DECLS;
2396 while ( i2 <= last ) {
2397 chPrev = ch2;
2398 GET_SMALL(ch2);
2399 if (blockRandomised) {
2400 RAND_UPD_MASK;
2401 ch2 ^= (UInt32)RAND_MASK;
2402 }
2403 i2++;
2404
2405 if (dst)
2406 retVal = putc ( ch2, dst );
2407
2408 UPDATE_CRC ( localCrc, (UChar)ch2 );
2409
2410 if (ch2 != chPrev) {
2411 count = 1;
2412 } else {
2413 count++;
2414 if (count >= 4) {
2415 Int32 j2;
2416 UChar z;
2417 GET_SMALL(z);
2418 if (blockRandomised) {
2419 RAND_UPD_MASK;
2420 z ^= RAND_MASK;
2421 }
2422 for (j2 = 0; j2 < (Int32)z; j2++) {
2423 if (dst) retVal = putc (ch2, dst);
2424 UPDATE_CRC ( localCrc, (UChar)ch2 );
2425 }
2426 i2++;
2427 count = 0;
2428 }
2429 }
2430 }
2431 }
2432
2433 setGlobalCRC ( localCrc );
2434 }
2435 /*-- end of the in-line run-length-decoder. --*/
2436}
2437#undef GET_SMALL
2438
2439
2440/*---------------------------------------------*/
2441
2442#define GET_FAST(cccc) \
2443 \
2444 cccc = ll8[tPos]; \
2445 tPos = tt[tPos];
2446
2447
2448void undoReversibleTransformation_fast ( FILE* dst )
2449{
2450 Int32 cftab[257];
2451 Int32 i, tPos;
2452 UChar ch;
2453
2454 /*--
2455 We assume here that the global array unzftab will
2456 already be holding the frequency counts for
2457 ll8[0 .. last].
2458 --*/
2459
2460 /*-- Set up cftab to facilitate generation of T^(-1) --*/
2461 cftab[0] = 0;
2462 for (i = 1; i <= 256; i++) cftab[i] = unzftab[i-1];
2463 for (i = 1; i <= 256; i++) cftab[i] += cftab[i-1];
2464
2465 /*-- compute the T^(-1) vector --*/
2466 for (i = 0; i <= last; i++) {
2467 ch = (UChar)ll8[i];
2468 tt[cftab[ch]] = i;
2469 cftab[ch]++;
2470 } 450 }
451 if (verbosity >= 2) fprintf ( stderr, "\n " );
452 return True;
2471 453
2472 /*-- 454 errhandler:
2473 We recreate the original by subscripting L through T^(-1). 455 bzReadClose ( &bzerr_dummy, bzf );
2474 The run-length-decoder below requires characters incrementally, 456 switch (bzerr) {
2475 so tPos is set to a starting value, and is updated by 457 case BZ_IO_ERROR:
2476 the GET_FAST macro. 458 errhandler_io:
2477 --*/ 459 ioError(); break;
2478 tPos = tt[origPtr]; 460 case BZ_DATA_ERROR:
2479 461 crcError();
2480 /*-------------------------------------------------*/ 462 case BZ_MEM_ERROR:
2481 /*-- 463 outOfMemory();
2482 This is pretty much a verbatim copy of the 464 case BZ_UNEXPECTED_EOF:
2483 run-length decoder present in the distribution 465 compressedStreamEOF();
2484 bzip-0.21; it has to be here to avoid creating 466 case BZ_DATA_ERROR_MAGIC:
2485 block[] as an intermediary structure. As in 0.21, 467 if (streamNo == 1) {
2486 this code derives from some sent to me by 468 return False;
2487 Christian von Roques. 469 } else {
2488 --*/ 470 fprintf ( stderr,
2489 { 471 "\n%s: %s: trailing garbage after EOF ignored\n",
2490 IntNative retVal; 472 progName, inName );
2491 Int32 i2, count, chPrev, ch2; 473 return True;
2492 UInt32 localCrc;
2493
2494 count = 0;
2495 i2 = 0;
2496 ch2 = 256; /*-- not a char and not EOF --*/
2497 localCrc = getGlobalCRC();
2498
2499 if (blockRandomised) {
2500 RAND_DECLS;
2501 while ( i2 <= last ) {
2502 chPrev = ch2;
2503 GET_FAST(ch2);
2504 RAND_UPD_MASK;
2505 ch2 ^= (UInt32)RAND_MASK;
2506 i2++;
2507
2508 retVal = putc ( ch2, dst );
2509 UPDATE_CRC ( localCrc, (UChar)ch2 );
2510
2511 if (ch2 != chPrev) {
2512 count = 1;
2513 } else {
2514 count++;
2515 if (count >= 4) {
2516 Int32 j2;
2517 UChar z;
2518 GET_FAST(z);
2519 RAND_UPD_MASK;
2520 z ^= RAND_MASK;
2521 for (j2 = 0; j2 < (Int32)z; j2++) {
2522 retVal = putc (ch2, dst);
2523 UPDATE_CRC ( localCrc, (UChar)ch2 );
2524 }
2525 i2++;
2526 count = 0;
2527 }
2528 }
2529 }
2530
2531 } else {
2532
2533 while ( i2 <= last ) {
2534 chPrev = ch2;
2535 GET_FAST(ch2);
2536 i2++;
2537
2538 retVal = putc ( ch2, dst );
2539 UPDATE_CRC ( localCrc, (UChar)ch2 );
2540
2541 if (ch2 != chPrev) {
2542 count = 1;
2543 } else {
2544 count++;
2545 if (count >= 4) {
2546 Int32 j2;
2547 UChar z;
2548 GET_FAST(z);
2549 for (j2 = 0; j2 < (Int32)z; j2++) {
2550 retVal = putc (ch2, dst);
2551 UPDATE_CRC ( localCrc, (UChar)ch2 );
2552 }
2553 i2++;
2554 count = 0;
2555 }
2556 }
2557 } 474 }
2558 475 default:
2559 } /*-- if (blockRandomised) --*/ 476 panic ( "decompress:unexpected error" );
2560
2561 setGlobalCRC ( localCrc );
2562 }
2563 /*-- end of the in-line run-length-decoder. --*/
2564}
2565#undef GET_FAST
2566
2567
2568/*---------------------------------------------------*/
2569/*--- The block loader and RLEr ---*/
2570/*---------------------------------------------------*/
2571
2572/*---------------------------------------------*/
2573/* Top 16: run length, 1 to 255.
2574* Lower 16: the char, or MY_EOF for EOF.
2575*/
2576
2577#define MY_EOF 257
2578
2579INLINE Int32 getRLEpair ( FILE* src )
2580{
2581 Int32 runLength;
2582 IntNative ch, chLatest;
2583
2584 ch = getc ( src );
2585
2586 /*--- Because I have no idea what kind of a value EOF is. ---*/
2587 if (ch == EOF) {
2588 ERROR_IF_NOT_ZERO ( ferror(src));
2589 return (1 << 16) | MY_EOF;
2590 }
2591
2592 runLength = 0;
2593 do {
2594 chLatest = getc ( src );
2595 runLength++;
2596 bytesIn++;
2597 }
2598 while (ch == chLatest && runLength < 255);
2599
2600 if ( chLatest != EOF ) {
2601 if ( ungetc ( chLatest, src ) == EOF )
2602 panic ( "getRLEpair: ungetc failed" );
2603 } else {
2604 ERROR_IF_NOT_ZERO ( ferror(src) );
2605 }
2606
2607 /*--- Conditional is just a speedup hack. ---*/
2608 if (runLength == 1) {
2609 UPDATE_CRC ( globalCrc, (UChar)ch );
2610 return (1 << 16) | ch;
2611 } else {
2612 Int32 i;
2613 for (i = 1; i <= runLength; i++)
2614 UPDATE_CRC ( globalCrc, (UChar)ch );
2615 return (runLength << 16) | ch;
2616 } 477 }
2617}
2618 478
2619 479 panic ( "decompress:end" );
2620/*---------------------------------------------*/ 480 return True; /*notreached*/
2621void loadAndRLEsource ( FILE* src )
2622{
2623 Int32 ch, allowableBlockSize, i;
2624
2625 last = -1;
2626 ch = 0;
2627
2628 for (i = 0; i < 256; i++) inUse[i] = False;
2629
2630 /*--- 20 is just a paranoia constant ---*/
2631 allowableBlockSize = 100000 * blockSize100k - 20;
2632
2633 while (last < allowableBlockSize && ch != MY_EOF) {
2634 Int32 rlePair, runLen;
2635 rlePair = getRLEpair ( src );
2636 ch = rlePair & 0xFFFF;
2637 runLen = (UInt32)rlePair >> 16;
2638
2639 #if DEBUG
2640 assert (runLen >= 1 && runLen <= 255);
2641 #endif
2642
2643 if (ch != MY_EOF) {
2644 inUse[ch] = True;
2645 switch (runLen) {
2646 case 1:
2647 last++; block[last] = (UChar)ch; break;
2648 case 2:
2649 last++; block[last] = (UChar)ch;
2650 last++; block[last] = (UChar)ch; break;
2651 case 3:
2652 last++; block[last] = (UChar)ch;
2653 last++; block[last] = (UChar)ch;
2654 last++; block[last] = (UChar)ch; break;
2655 default:
2656 inUse[runLen-4] = True;
2657 last++; block[last] = (UChar)ch;
2658 last++; block[last] = (UChar)ch;
2659 last++; block[last] = (UChar)ch;
2660 last++; block[last] = (UChar)ch;
2661 last++; block[last] = (UChar)(runLen-4); break;
2662 }
2663 }
2664 }
2665} 481}
2666 482
2667 483
2668/*---------------------------------------------------*/
2669/*--- Processing of complete files and streams ---*/
2670/*---------------------------------------------------*/
2671
2672/*---------------------------------------------*/ 484/*---------------------------------------------*/
2673void compressStream ( FILE *stream, FILE *zStream ) 485Bool testStream ( FILE *zStream )
2674{ 486{
2675 IntNative retVal; 487 BZFILE* bzf = NULL;
2676 UInt32 blockCRC, combinedCRC; 488 Int32 bzerr, bzerr_dummy, ret, nread, streamNo, i;
2677 Int32 blockNo; 489 UChar obuf[5000];
490 UChar unused[BZ_MAX_UNUSED];
491 Int32 nUnused;
492 UChar* unusedTmp;
2678 493
2679 blockNo = 0; 494 nUnused = 0;
2680 bytesIn = 0; 495 streamNo = 0;
2681 bytesOut = 0;
2682 nBlocksRandomised = 0;
2683 496
2684 SET_BINARY_MODE(stream);
2685 SET_BINARY_MODE(zStream); 497 SET_BINARY_MODE(zStream);
2686 498 if (ferror(zStream)) goto errhandler_io;
2687 ERROR_IF_NOT_ZERO ( ferror(stream) );
2688 ERROR_IF_NOT_ZERO ( ferror(zStream) );
2689
2690 bsSetStream ( zStream, True );
2691
2692 /*--- Write `magic' bytes B and Z,
2693 then h indicating file-format == huffmanised,
2694 followed by a digit indicating blockSize100k.
2695 ---*/
2696 bsPutUChar ( 'B' );
2697 bsPutUChar ( 'Z' );
2698 bsPutUChar ( 'h' );
2699 bsPutUChar ( '0' + blockSize100k );
2700
2701 combinedCRC = 0;
2702
2703 if (verbosity >= 2) fprintf ( stderr, "\n" );
2704 499
2705 while (True) { 500 while (True) {
2706 501
2707 blockNo++; 502 bzf = bzReadOpen (
2708 initialiseCRC (); 503 &bzerr, zStream, verbosity,
2709 loadAndRLEsource ( stream ); 504 (int)smallMode, unused, nUnused
2710 ERROR_IF_NOT_ZERO ( ferror(stream) ); 505 );
2711 if (last == -1) break; 506 if (bzf == NULL || bzerr != BZ_OK) goto errhandler;
2712 507 streamNo++;
2713 blockCRC = getFinalCRC ();
2714 combinedCRC = (combinedCRC << 1) | (combinedCRC >> 31);
2715 combinedCRC ^= blockCRC;
2716
2717 if (verbosity >= 2)
2718 fprintf ( stderr, " block %d: crc = 0x%8x, combined CRC = 0x%8x, size = %d",
2719 blockNo, blockCRC, combinedCRC, last+1 );
2720
2721 /*-- sort the block and establish posn of original string --*/
2722 doReversibleTransformation ();
2723
2724 /*--
2725 A 6-byte block header, the value chosen arbitrarily
2726 as 0x314159265359 :-). A 32 bit value does not really
2727 give a strong enough guarantee that the value will not
2728 appear by chance in the compressed datastream. Worst-case
2729 probability of this event, for a 900k block, is about
2730 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits.
2731 For a compressed file of size 100Gb -- about 100000 blocks --
2732 only a 48-bit marker will do. NB: normal compression/
2733 decompression do *not* rely on these statistical properties.
2734 They are only important when trying to recover blocks from
2735 damaged files.
2736 --*/
2737 bsPutUChar ( 0x31 ); bsPutUChar ( 0x41 );
2738 bsPutUChar ( 0x59 ); bsPutUChar ( 0x26 );
2739 bsPutUChar ( 0x53 ); bsPutUChar ( 0x59 );
2740
2741 /*-- Now the block's CRC, so it is in a known place. --*/
2742 bsPutUInt32 ( blockCRC );
2743
2744 /*-- Now a single bit indicating randomisation. --*/
2745 if (blockRandomised) {
2746 bsW(1,1); nBlocksRandomised++;
2747 } else
2748 bsW(1,0);
2749
2750 /*-- Finally, block's contents proper. --*/
2751 moveToFrontCodeAndSend ();
2752
2753 ERROR_IF_NOT_ZERO ( ferror(zStream) );
2754 }
2755
2756 if (verbosity >= 2 && nBlocksRandomised > 0)
2757 fprintf ( stderr, " %d block%s needed randomisation\n",
2758 nBlocksRandomised,
2759 nBlocksRandomised == 1 ? "" : "s" );
2760
2761 /*--
2762 Now another magic 48-bit number, 0x177245385090, to
2763 indicate the end of the last block. (sqrt(pi), if
2764 you want to know. I did want to use e, but it contains
2765 too much repetition -- 27 18 28 18 28 46 -- for me
2766 to feel statistically comfortable. Call me paranoid.)
2767 --*/
2768
2769 bsPutUChar ( 0x17 ); bsPutUChar ( 0x72 );
2770 bsPutUChar ( 0x45 ); bsPutUChar ( 0x38 );
2771 bsPutUChar ( 0x50 ); bsPutUChar ( 0x90 );
2772
2773 bsPutUInt32 ( combinedCRC );
2774 if (verbosity >= 2)
2775 fprintf ( stderr, " final combined CRC = 0x%x\n ", combinedCRC );
2776 508
2777 /*-- Close the files in an utterly paranoid way. --*/ 509 while (bzerr == BZ_OK) {
2778 bsFinishedWithStream (); 510 nread = bzRead ( &bzerr, bzf, obuf, 5000 );
2779 511 if (bzerr == BZ_DATA_ERROR_MAGIC) goto errhandler;
2780 ERROR_IF_NOT_ZERO ( ferror(zStream) ); 512 }
2781 retVal = fflush ( zStream ); 513 if (bzerr != BZ_STREAM_END) goto errhandler;
2782 ERROR_IF_EOF ( retVal );
2783 retVal = fclose ( zStream );
2784 ERROR_IF_EOF ( retVal );
2785
2786 ERROR_IF_NOT_ZERO ( ferror(stream) );
2787 retVal = fclose ( stream );
2788 ERROR_IF_EOF ( retVal );
2789
2790 if (bytesIn == 0) bytesIn = 1;
2791 if (bytesOut == 0) bytesOut = 1;
2792 514
2793 if (verbosity >= 1) 515 bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused );
2794 fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, " 516 if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" );
2795 "%5.2f%% saved, %d in, %d out.\n",
2796 (float)bytesIn / (float)bytesOut,
2797 (8.0 * (float)bytesOut) / (float)bytesIn,
2798 100.0 * (1.0 - (float)bytesOut / (float)bytesIn),
2799 bytesIn,
2800 bytesOut
2801 );
2802}
2803 517
518 for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i];
2804 519
2805/*---------------------------------------------*/ 520 bzReadClose ( &bzerr, bzf );
2806Bool uncompressStream ( FILE *zStream, FILE *stream ) 521 if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" );
2807{ 522 if (nUnused == 0 && myfeof(zStream)) break;
2808 UChar magic1, magic2, magic3, magic4;
2809 UChar magic5, magic6;
2810 UInt32 storedBlockCRC, storedCombinedCRC;
2811 UInt32 computedBlockCRC, computedCombinedCRC;
2812 Int32 currBlockNo;
2813 IntNative retVal;
2814 523
2815 SET_BINARY_MODE(stream);
2816 SET_BINARY_MODE(zStream);
2817
2818 ERROR_IF_NOT_ZERO ( ferror(stream) );
2819 ERROR_IF_NOT_ZERO ( ferror(zStream) );
2820
2821 bsSetStream ( zStream, False );
2822
2823 /*--
2824 A bad magic number is `recoverable from';
2825 return with False so the caller skips the file.
2826 --*/
2827 magic1 = bsGetUChar ();
2828 magic2 = bsGetUChar ();
2829 magic3 = bsGetUChar ();
2830 magic4 = bsGetUChar ();
2831 if (magic1 != 'B' ||
2832 magic2 != 'Z' ||
2833 magic3 != 'h' ||
2834 magic4 < '1' ||
2835 magic4 > '9') {
2836 bsFinishedWithStream();
2837 retVal = fclose ( stream );
2838 ERROR_IF_EOF ( retVal );
2839 return False;
2840 } 524 }
2841 525
2842 setDecompressStructureSizes ( magic4 - '0' ); 526 if (ferror(zStream)) goto errhandler_io;
2843 computedCombinedCRC = 0; 527 ret = fclose ( zStream );
2844 528 if (ret == EOF) goto errhandler_io;
2845 if (verbosity >= 2) fprintf ( stderr, "\n " );
2846 currBlockNo = 0;
2847
2848 while (True) {
2849 magic1 = bsGetUChar ();
2850 magic2 = bsGetUChar ();
2851 magic3 = bsGetUChar ();
2852 magic4 = bsGetUChar ();
2853 magic5 = bsGetUChar ();
2854 magic6 = bsGetUChar ();
2855 if (magic1 == 0x17 && magic2 == 0x72 &&
2856 magic3 == 0x45 && magic4 == 0x38 &&
2857 magic5 == 0x50 && magic6 == 0x90) break;
2858
2859 if (magic1 != 0x31 || magic2 != 0x41 ||
2860 magic3 != 0x59 || magic4 != 0x26 ||
2861 magic5 != 0x53 || magic6 != 0x59) badBlockHeader();
2862
2863 storedBlockCRC = bsGetUInt32 ();
2864
2865 if (bsR(1) == 1)
2866 blockRandomised = True; else
2867 blockRandomised = False;
2868
2869 currBlockNo++;
2870 if (verbosity >= 2)
2871 fprintf ( stderr, "[%d: huff+mtf ", currBlockNo );
2872 getAndMoveToFrontDecode ();
2873 ERROR_IF_NOT_ZERO ( ferror(zStream) );
2874
2875 initialiseCRC();
2876 if (verbosity >= 2) fprintf ( stderr, "rt+rld" );
2877 if (smallMode)
2878 undoReversibleTransformation_small ( stream );
2879 else
2880 undoReversibleTransformation_fast ( stream );
2881
2882 ERROR_IF_NOT_ZERO ( ferror(stream) );
2883
2884 computedBlockCRC = getFinalCRC();
2885 if (verbosity >= 3)
2886 fprintf ( stderr, " {0x%x, 0x%x}", storedBlockCRC, computedBlockCRC );
2887 if (verbosity >= 2) fprintf ( stderr, "] " );
2888
2889 /*-- A bad CRC is considered a fatal error. --*/
2890 if (storedBlockCRC != computedBlockCRC)
2891 crcError ( storedBlockCRC, computedBlockCRC );
2892
2893 computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >> 31);
2894 computedCombinedCRC ^= computedBlockCRC;
2895 };
2896 529
2897 if (verbosity >= 2) fprintf ( stderr, "\n " ); 530 if (verbosity >= 2) fprintf ( stderr, "\n " );
2898
2899 storedCombinedCRC = bsGetUInt32 ();
2900 if (verbosity >= 2)
2901 fprintf ( stderr,
2902 "combined CRCs: stored = 0x%x, computed = 0x%x\n ",
2903 storedCombinedCRC, computedCombinedCRC );
2904 if (storedCombinedCRC != computedCombinedCRC)
2905 crcError ( storedCombinedCRC, computedCombinedCRC );
2906
2907
2908 bsFinishedWithStream ();
2909 ERROR_IF_NOT_ZERO ( ferror(zStream) );
2910 retVal = fclose ( zStream );
2911 ERROR_IF_EOF ( retVal );
2912
2913 ERROR_IF_NOT_ZERO ( ferror(stream) );
2914 retVal = fflush ( stream );
2915 ERROR_IF_NOT_ZERO ( retVal );
2916 if (stream != stdout) {
2917 retVal = fclose ( stream );
2918 ERROR_IF_EOF ( retVal );
2919 }
2920 return True; 531 return True;
2921}
2922
2923
2924/*---------------------------------------------*/
2925Bool testStream ( FILE *zStream )
2926{
2927 UChar magic1, magic2, magic3, magic4;
2928 UChar magic5, magic6;
2929 UInt32 storedBlockCRC, storedCombinedCRC;
2930 UInt32 computedBlockCRC, computedCombinedCRC;
2931 Int32 currBlockNo;
2932 IntNative retVal;
2933
2934 SET_BINARY_MODE(zStream);
2935 ERROR_IF_NOT_ZERO ( ferror(zStream) );
2936
2937 bsSetStream ( zStream, False );
2938
2939 magic1 = bsGetUChar ();
2940 magic2 = bsGetUChar ();
2941 magic3 = bsGetUChar ();
2942 magic4 = bsGetUChar ();
2943 if (magic1 != 'B' ||
2944 magic2 != 'Z' ||
2945 magic3 != 'h' ||
2946 magic4 < '1' ||
2947 magic4 > '9') {
2948 bsFinishedWithStream();
2949 fclose ( zStream );
2950 fprintf ( stderr, "\n%s: bad magic number (ie, not created by bzip2)\n",
2951 inName );
2952 return False;
2953 }
2954 532
2955 smallMode = True; 533 errhandler:
2956 setDecompressStructureSizes ( magic4 - '0' ); 534 bzReadClose ( &bzerr_dummy, bzf );
2957 computedCombinedCRC = 0; 535 switch (bzerr) {
2958 536 case BZ_IO_ERROR:
2959 if (verbosity >= 2) fprintf ( stderr, "\n" ); 537 errhandler_io:
2960 currBlockNo = 0; 538 ioError(); break;
2961 539 case BZ_DATA_ERROR:
2962 while (True) {
2963 magic1 = bsGetUChar ();
2964 magic2 = bsGetUChar ();
2965 magic3 = bsGetUChar ();
2966 magic4 = bsGetUChar ();
2967 magic5 = bsGetUChar ();
2968 magic6 = bsGetUChar ();
2969 if (magic1 == 0x17 && magic2 == 0x72 &&
2970 magic3 == 0x45 && magic4 == 0x38 &&
2971 magic5 == 0x50 && magic6 == 0x90) break;
2972
2973 currBlockNo++;
2974 if (magic1 != 0x31 || magic2 != 0x41 ||
2975 magic3 != 0x59 || magic4 != 0x26 ||
2976 magic5 != 0x53 || magic6 != 0x59) {
2977 bsFinishedWithStream();
2978 fclose ( zStream );
2979 fprintf ( stderr, 540 fprintf ( stderr,
2980 "\n%s, block %d: bad header (not == 0x314159265359)\n", 541 "\n%s: data integrity (CRC) error in data\n",
2981 inName, currBlockNo ); 542 inName );
2982 return False; 543 return False;
2983 } 544 case BZ_MEM_ERROR:
2984 storedBlockCRC = bsGetUInt32 (); 545 outOfMemory();
2985 546 case BZ_UNEXPECTED_EOF:
2986 if (bsR(1) == 1) 547 fprintf ( stderr,
2987 blockRandomised = True; else 548 "\n%s: file ends unexpectedly\n",
2988 blockRandomised = False; 549 inName );
2989
2990 if (verbosity >= 2)
2991 fprintf ( stderr, " block [%d: huff+mtf ", currBlockNo );
2992 getAndMoveToFrontDecode ();
2993 ERROR_IF_NOT_ZERO ( ferror(zStream) );
2994
2995 initialiseCRC();
2996 if (verbosity >= 2) fprintf ( stderr, "rt+rld" );
2997 undoReversibleTransformation_small ( NULL );
2998
2999 computedBlockCRC = getFinalCRC();
3000 if (verbosity >= 3)
3001 fprintf ( stderr, " {0x%x, 0x%x}", storedBlockCRC, computedBlockCRC );
3002 if (verbosity >= 2) fprintf ( stderr, "] " );
3003
3004 if (storedBlockCRC != computedBlockCRC) {
3005 bsFinishedWithStream();
3006 fclose ( zStream );
3007 fprintf ( stderr, "\n%s, block %d: computed CRC does not match stored one\n",
3008 inName, currBlockNo );
3009 return False; 550 return False;
3010 } 551 case BZ_DATA_ERROR_MAGIC:
3011 552 if (streamNo == 1) {
3012 if (verbosity >= 2) fprintf ( stderr, "ok\n" ); 553 fprintf ( stderr,
3013 computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >> 31); 554 "\n%s: bad magic number (ie, not created by bzip2)\n",
3014 computedCombinedCRC ^= computedBlockCRC; 555 inName );
3015 }; 556 return False;
3016 557 } else {
3017 storedCombinedCRC = bsGetUInt32 (); 558 fprintf ( stderr,
3018 if (verbosity >= 2) 559 "\n%s: %s: trailing garbage after EOF ignored\n",
3019 fprintf ( stderr, 560 progName, inName );
3020 " combined CRCs: stored = 0x%x, computed = 0x%x\n ", 561 return True;
3021 storedCombinedCRC, computedCombinedCRC ); 562 }
3022 if (storedCombinedCRC != computedCombinedCRC) { 563 default:
3023 bsFinishedWithStream(); 564 panic ( "test:unexpected error" );
3024 fclose ( zStream );
3025 fprintf ( stderr, "\n%s: computed CRC does not match stored one\n",
3026 inName );
3027 return False;
3028 } 565 }
3029 566
3030 bsFinishedWithStream (); 567 panic ( "test:end" );
3031 ERROR_IF_NOT_ZERO ( ferror(zStream) ); 568 return True; /*notreached*/
3032 retVal = fclose ( zStream );
3033 ERROR_IF_EOF ( retVal );
3034 return True;
3035} 569}
3036 570
3037 571
3038
3039/*---------------------------------------------------*/ 572/*---------------------------------------------------*/
3040/*--- Error [non-] handling grunge ---*/ 573/*--- Error [non-] handling grunge ---*/
3041/*---------------------------------------------------*/ 574/*---------------------------------------------------*/
@@ -3059,8 +592,7 @@ void showFileNames ( void )
3059 fprintf ( 592 fprintf (
3060 stderr, 593 stderr,
3061 "\tInput file = %s, output file = %s\n", 594 "\tInput file = %s, output file = %s\n",
3062 inName==NULL ? "(null)" : inName, 595 inName, outName
3063 outName==NULL ? "(null)" : outName
3064 ); 596 );
3065} 597}
3066 598
@@ -3072,8 +604,7 @@ void cleanUpAndFail ( Int32 ec )
3072 604
3073 if ( srcMode == SM_F2F && opMode != OM_TEST ) { 605 if ( srcMode == SM_F2F && opMode != OM_TEST ) {
3074 fprintf ( stderr, "%s: Deleting output file %s, if it exists.\n", 606 fprintf ( stderr, "%s: Deleting output file %s, if it exists.\n",
3075 progName, 607 progName, outName );
3076 outName==NULL ? "(null)" : outName );
3077 if (outputHandleJustInCase != NULL) 608 if (outputHandleJustInCase != NULL)
3078 fclose ( outputHandleJustInCase ); 609 fclose ( outputHandleJustInCase );
3079 retVal = remove ( outName ); 610 retVal = remove ( outName );
@@ -3108,11 +639,10 @@ void panic ( Char* s )
3108 639
3109 640
3110/*---------------------------------------------*/ 641/*---------------------------------------------*/
3111void badBGLengths ( void ) 642void crcError ()
3112{ 643{
3113 fprintf ( stderr, 644 fprintf ( stderr,
3114 "\n%s: error when reading background model code lengths,\n" 645 "\n%s: Data integrity error when decompressing.\n",
3115 "\twhich probably means the compressed file is corrupted.\n",
3116 progName ); 646 progName );
3117 showFileNames(); 647 showFileNames();
3118 cadvise(); 648 cadvise();
@@ -3121,19 +651,6 @@ void badBGLengths ( void )
3121 651
3122 652
3123/*---------------------------------------------*/ 653/*---------------------------------------------*/
3124void crcError ( UInt32 crcStored, UInt32 crcComputed )
3125{
3126 fprintf ( stderr,
3127 "\n%s: Data integrity error when decompressing.\n"
3128 "\tStored CRC = 0x%x, computed CRC = 0x%x\n",
3129 progName, crcStored, crcComputed );
3130 showFileNames();
3131 cadvise();
3132 cleanUpAndFail( 2 );
3133}
3134
3135
3136/*---------------------------------------------*/
3137void compressedStreamEOF ( void ) 654void compressedStreamEOF ( void )
3138{ 655{
3139 fprintf ( stderr, 656 fprintf ( stderr,
@@ -3160,46 +677,6 @@ void ioError ( )
3160 677
3161 678
3162/*---------------------------------------------*/ 679/*---------------------------------------------*/
3163void blockOverrun ()
3164{
3165 fprintf ( stderr,
3166 "\n%s: block overrun during decompression,\n"
3167 "\twhich probably means the compressed file\n"
3168 "\tis corrupted.\n",
3169 progName );
3170 showFileNames();
3171 cadvise();
3172 cleanUpAndFail( 2 );
3173}
3174
3175
3176/*---------------------------------------------*/
3177void badBlockHeader ()
3178{
3179 fprintf ( stderr,
3180 "\n%s: bad block header in the compressed file,\n"
3181 "\twhich probably means it is corrupted.\n",
3182 progName );
3183 showFileNames();
3184 cadvise();
3185 cleanUpAndFail( 2 );
3186}
3187
3188
3189/*---------------------------------------------*/
3190void bitStreamEOF ()
3191{
3192 fprintf ( stderr,
3193 "\n%s: read past the end of compressed data,\n"
3194 "\twhich probably means it is corrupted.\n",
3195 progName );
3196 showFileNames();
3197 cadvise();
3198 cleanUpAndFail( 2 );
3199}
3200
3201
3202/*---------------------------------------------*/
3203void mySignalCatcher ( IntNative n ) 680void mySignalCatcher ( IntNative n )
3204{ 681{
3205 fprintf ( stderr, 682 fprintf ( stderr,
@@ -3233,27 +710,11 @@ void mySIGSEGVorSIGBUScatcher ( IntNative n )
3233 710
3234 711
3235/*---------------------------------------------*/ 712/*---------------------------------------------*/
3236void uncompressOutOfMemory ( Int32 draw, Int32 blockSize ) 713void outOfMemory ( void )
3237{
3238 fprintf ( stderr,
3239 "\n%s: Can't allocate enough memory for decompression.\n"
3240 "\tRequested %d bytes for a block size of %d.\n"
3241 "\tTry selecting space-economic decompress (with flag -s)\n"
3242 "\tand failing that, find a machine with more memory.\n",
3243 progName, draw, blockSize );
3244 showFileNames();
3245 cleanUpAndFail(1);
3246}
3247
3248
3249/*---------------------------------------------*/
3250void compressOutOfMemory ( Int32 draw, Int32 blockSize )
3251{ 714{
3252 fprintf ( stderr, 715 fprintf ( stderr,
3253 "\n%s: Can't allocate enough memory for compression.\n" 716 "\n%s: couldn't allocate enough memory\n",
3254 "\tRequested %d bytes for a block size of %d.\n" 717 progName );
3255 "\tTry selecting a small block size (with flag -s).\n",
3256 progName, draw, blockSize );
3257 showFileNames(); 718 showFileNames();
3258 cleanUpAndFail(1); 719 cleanUpAndFail(1);
3259} 720}
@@ -3274,6 +735,24 @@ void pad ( Char *s )
3274 735
3275 736
3276/*---------------------------------------------*/ 737/*---------------------------------------------*/
738void copyFileName ( Char* to, Char* from )
739{
740 if ( strlen(from) > FILE_NAME_LEN-10 ) {
741 fprintf (
742 stderr,
743 "bzip2: file name\n`%s'\nis suspiciously (> 1024 chars) long.\n"
744 "Try using a reasonable file name instead. Sorry! :)\n",
745 from
746 );
747 exit(1);
748 }
749
750 strncpy(to,from,FILE_NAME_LEN-10);
751 to[FILE_NAME_LEN-10]='\0';
752}
753
754
755/*---------------------------------------------*/
3277Bool fileExists ( Char* name ) 756Bool fileExists ( Char* name )
3278{ 757{
3279 FILE *tmp = fopen ( name, "rb" ); 758 FILE *tmp = fopen ( name, "rb" );
@@ -3287,7 +766,7 @@ Bool fileExists ( Char* name )
3287/*-- 766/*--
3288 if in doubt, return True 767 if in doubt, return True
3289--*/ 768--*/
3290Bool notABogStandardFile ( Char* name ) 769Bool notAStandardFile ( Char* name )
3291{ 770{
3292 IntNative i; 771 IntNative i;
3293 struct MY_STAT statBuf; 772 struct MY_STAT statBuf;
@@ -3300,9 +779,9 @@ Bool notABogStandardFile ( Char* name )
3300 779
3301 780
3302/*---------------------------------------------*/ 781/*---------------------------------------------*/
3303void copyDateAndPermissions ( Char *srcName, Char *dstName ) 782void copyDatePermissionsAndOwner ( Char *srcName, Char *dstName )
3304{ 783{
3305 #if BZ_UNIX 784#if BZ_UNIX
3306 IntNative retVal; 785 IntNative retVal;
3307 struct MY_STAT statBuf; 786 struct MY_STAT statBuf;
3308 struct utimbuf uTimBuf; 787 struct utimbuf uTimBuf;
@@ -3314,13 +793,34 @@ void copyDateAndPermissions ( Char *srcName, Char *dstName )
3314 793
3315 retVal = chmod ( dstName, statBuf.st_mode ); 794 retVal = chmod ( dstName, statBuf.st_mode );
3316 ERROR_IF_NOT_ZERO ( retVal ); 795 ERROR_IF_NOT_ZERO ( retVal );
796 /* Not sure if this is really portable or not. Causes
797 problems on my x86-Linux Redhat 5.0 box. Decided
798 to omit it from 0.9.0. JRS, 27 June 98. If you
799 understand Unix file semantics and portability issues
800 well enough to fix this properly, drop me a line
801 at jseward@acm.org.
802 retVal = chown ( dstName, statBuf.st_uid, statBuf.st_gid );
803 ERROR_IF_NOT_ZERO ( retVal );
804 */
3317 retVal = utime ( dstName, &uTimBuf ); 805 retVal = utime ( dstName, &uTimBuf );
3318 ERROR_IF_NOT_ZERO ( retVal ); 806 ERROR_IF_NOT_ZERO ( retVal );
3319 #endif 807#endif
3320} 808}
3321 809
3322 810
3323/*---------------------------------------------*/ 811/*---------------------------------------------*/
812void setInterimPermissions ( Char *dstName )
813{
814#if BZ_UNIX
815 IntNative retVal;
816 retVal = chmod ( dstName, S_IRUSR | S_IWUSR );
817 ERROR_IF_NOT_ZERO ( retVal );
818#endif
819}
820
821
822
823/*---------------------------------------------*/
3324Bool endsInBz2 ( Char* name ) 824Bool endsInBz2 ( Char* name )
3325{ 825{
3326 Int32 n = strlen ( name ); 826 Int32 n = strlen ( name );
@@ -3353,13 +853,13 @@ void compress ( Char *name )
3353 panic ( "compress: bad modes\n" ); 853 panic ( "compress: bad modes\n" );
3354 854
3355 switch (srcMode) { 855 switch (srcMode) {
3356 case SM_I2O: strcpy ( inName, "(stdin)" ); 856 case SM_I2O: copyFileName ( inName, "(stdin)" );
3357 strcpy ( outName, "(stdout)" ); break; 857 copyFileName ( outName, "(stdout)" ); break;
3358 case SM_F2F: strcpy ( inName, name ); 858 case SM_F2F: copyFileName ( inName, name );
3359 strcpy ( outName, name ); 859 copyFileName ( outName, name );
3360 strcat ( outName, ".bz2" ); break; 860 strcat ( outName, ".bz2" ); break;
3361 case SM_F2O: strcpy ( inName, name ); 861 case SM_F2O: copyFileName ( inName, name );
3362 strcpy ( outName, "(stdout)" ); break; 862 copyFileName ( outName, "(stdout)" ); break;
3363 } 863 }
3364 864
3365 if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) { 865 if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
@@ -3377,12 +877,12 @@ void compress ( Char *name )
3377 progName, inName ); 877 progName, inName );
3378 return; 878 return;
3379 } 879 }
3380 if ( srcMode != SM_I2O && notABogStandardFile ( inName )) { 880 if ( srcMode != SM_I2O && notAStandardFile ( inName )) {
3381 fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n", 881 fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
3382 progName, inName ); 882 progName, inName );
3383 return; 883 return;
3384 } 884 }
3385 if ( srcMode == SM_F2F && fileExists ( outName ) ) { 885 if ( srcMode == SM_F2F && !forceOverwrite && fileExists ( outName ) ) {
3386 fprintf ( stderr, "%s: Output file %s already exists, skipping.\n", 886 fprintf ( stderr, "%s: Output file %s already exists, skipping.\n",
3387 progName, outName ); 887 progName, outName );
3388 return; 888 return;
@@ -3434,6 +934,7 @@ void compress ( Char *name )
3434 progName, inName ); 934 progName, inName );
3435 return; 935 return;
3436 }; 936 };
937 setInterimPermissions ( outName );
3437 break; 938 break;
3438 939
3439 default: 940 default:
@@ -3454,7 +955,7 @@ void compress ( Char *name )
3454 955
3455 /*--- If there was an I/O error, we won't get here. ---*/ 956 /*--- If there was an I/O error, we won't get here. ---*/
3456 if ( srcMode == SM_F2F ) { 957 if ( srcMode == SM_F2F ) {
3457 copyDateAndPermissions ( inName, outName ); 958 copyDatePermissionsAndOwner ( inName, outName );
3458 if ( !keepInputFiles ) { 959 if ( !keepInputFiles ) {
3459 IntNative retVal = remove ( inName ); 960 IntNative retVal = remove ( inName );
3460 ERROR_IF_NOT_ZERO ( retVal ); 961 ERROR_IF_NOT_ZERO ( retVal );
@@ -3474,15 +975,15 @@ void uncompress ( Char *name )
3474 panic ( "uncompress: bad modes\n" ); 975 panic ( "uncompress: bad modes\n" );
3475 976
3476 switch (srcMode) { 977 switch (srcMode) {
3477 case SM_I2O: strcpy ( inName, "(stdin)" ); 978 case SM_I2O: copyFileName ( inName, "(stdin)" );
3478 strcpy ( outName, "(stdout)" ); break; 979 copyFileName ( outName, "(stdout)" ); break;
3479 case SM_F2F: strcpy ( inName, name ); 980 case SM_F2F: copyFileName ( inName, name );
3480 strcpy ( outName, name ); 981 copyFileName ( outName, name );
3481 if (endsInBz2 ( outName )) 982 if (endsInBz2 ( outName ))
3482 outName [ strlen ( outName ) - 4 ] = '\0'; 983 outName [ strlen ( outName ) - 4 ] = '\0';
3483 break; 984 break;
3484 case SM_F2O: strcpy ( inName, name ); 985 case SM_F2O: copyFileName ( inName, name );
3485 strcpy ( outName, "(stdout)" ); break; 986 copyFileName ( outName, "(stdout)" ); break;
3486 } 987 }
3487 988
3488 if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) { 989 if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
@@ -3501,12 +1002,12 @@ void uncompress ( Char *name )
3501 progName, inName ); 1002 progName, inName );
3502 return; 1003 return;
3503 } 1004 }
3504 if ( srcMode != SM_I2O && notABogStandardFile ( inName )) { 1005 if ( srcMode != SM_I2O && notAStandardFile ( inName )) {
3505 fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n", 1006 fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
3506 progName, inName ); 1007 progName, inName );
3507 return; 1008 return;
3508 } 1009 }
3509 if ( srcMode == SM_F2F && fileExists ( outName ) ) { 1010 if ( srcMode == SM_F2F && !forceOverwrite && fileExists ( outName ) ) {
3510 fprintf ( stderr, "%s: Output file %s already exists, skipping.\n", 1011 fprintf ( stderr, "%s: Output file %s already exists, skipping.\n",
3511 progName, outName ); 1012 progName, outName );
3512 return; 1013 return;
@@ -3550,6 +1051,7 @@ void uncompress ( Char *name )
3550 progName, inName ); 1051 progName, inName );
3551 return; 1052 return;
3552 }; 1053 };
1054 setInterimPermissions ( outName );
3553 break; 1055 break;
3554 1056
3555 default: 1057 default:
@@ -3571,7 +1073,7 @@ void uncompress ( Char *name )
3571 /*--- If there was an I/O error, we won't get here. ---*/ 1073 /*--- If there was an I/O error, we won't get here. ---*/
3572 if ( magicNumberOK ) { 1074 if ( magicNumberOK ) {
3573 if ( srcMode == SM_F2F ) { 1075 if ( srcMode == SM_F2F ) {
3574 copyDateAndPermissions ( inName, outName ); 1076 copyDatePermissionsAndOwner ( inName, outName );
3575 if ( !keepInputFiles ) { 1077 if ( !keepInputFiles ) {
3576 IntNative retVal = remove ( inName ); 1078 IntNative retVal = remove ( inName );
3577 ERROR_IF_NOT_ZERO ( retVal ); 1079 ERROR_IF_NOT_ZERO ( retVal );
@@ -3607,11 +1109,11 @@ void testf ( Char *name )
3607 if (name == NULL && srcMode != SM_I2O) 1109 if (name == NULL && srcMode != SM_I2O)
3608 panic ( "testf: bad modes\n" ); 1110 panic ( "testf: bad modes\n" );
3609 1111
3610 strcpy ( outName, "(none)" ); 1112 copyFileName ( outName, "(none)" );
3611 switch (srcMode) { 1113 switch (srcMode) {
3612 case SM_I2O: strcpy ( inName, "(stdin)" ); break; 1114 case SM_I2O: copyFileName ( inName, "(stdin)" ); break;
3613 case SM_F2F: strcpy ( inName, name ); break; 1115 case SM_F2F: copyFileName ( inName, name ); break;
3614 case SM_F2O: strcpy ( inName, name ); break; 1116 case SM_F2O: copyFileName ( inName, name ); break;
3615 } 1117 }
3616 1118
3617 if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) { 1119 if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
@@ -3630,7 +1132,7 @@ void testf ( Char *name )
3630 progName, inName ); 1132 progName, inName );
3631 return; 1133 return;
3632 } 1134 }
3633 if ( srcMode != SM_I2O && notABogStandardFile ( inName )) { 1135 if ( srcMode != SM_I2O && notAStandardFile ( inName )) {
3634 fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n", 1136 fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
3635 progName, inName ); 1137 progName, inName );
3636 return; 1138 return;
@@ -3684,25 +1186,18 @@ void license ( void )
3684 fprintf ( stderr, 1186 fprintf ( stderr,
3685 1187
3686 "bzip2, a block-sorting file compressor. " 1188 "bzip2, a block-sorting file compressor. "
3687 "Version 0.1pl2, 29-Aug-97.\n" 1189 "Version 0.9.0c, 18-Oct-98.\n"
3688 " \n" 1190 " \n"
3689 " Copyright (C) 1996, 1997 by Julian Seward.\n" 1191 " Copyright (C) 1996, 1997, 1998 by Julian Seward.\n"
3690 " \n" 1192 " \n"
3691 " This program is free software; you can redistribute it and/or modify\n" 1193 " This program is free software; you can redistribute it and/or modify\n"
3692 " it under the terms of the GNU General Public License as published by\n" 1194 " it under the terms set out in the LICENSE file, which is included\n"
3693 " the Free Software Foundation; either version 2 of the License, or\n" 1195 " in the bzip2-0.9.0c source distribution.\n"
3694 " (at your option) any later version.\n"
3695 " \n" 1196 " \n"
3696 " This program is distributed in the hope that it will be useful,\n" 1197 " This program is distributed in the hope that it will be useful,\n"
3697 " but WITHOUT ANY WARRANTY; without even the implied warranty of\n" 1198 " but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
3698 " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" 1199 " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
3699 " GNU General Public License for more details.\n" 1200 " LICENSE file for more details.\n"
3700 " \n"
3701 " You should have received a copy of the GNU General Public License\n"
3702 " along with this program; if not, write to the Free Software\n"
3703 " Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n"
3704 " \n"
3705 " The GNU General Public License is contained in the file LICENSE.\n"
3706 " \n" 1201 " \n"
3707 ); 1202 );
3708} 1203}
@@ -3714,16 +1209,17 @@ void usage ( Char *fullProgName )
3714 fprintf ( 1209 fprintf (
3715 stderr, 1210 stderr,
3716 "bzip2, a block-sorting file compressor. " 1211 "bzip2, a block-sorting file compressor. "
3717 "Version 0.1pl2, 29-Aug-97.\n" 1212 "Version 0.9.0c, 18-Oct-98.\n"
3718 "\n usage: %s [flags and input files in any order]\n" 1213 "\n usage: %s [flags and input files in any order]\n"
3719 "\n" 1214 "\n"
3720 " -h --help print this message\n" 1215 " -h --help print this message\n"
3721 " -d --decompress force decompression\n" 1216 " -d --decompress force decompression\n"
3722 " -f --compress force compression\n" 1217 " -z --compress force compression\n"
1218 " -k --keep keep (don't delete) input files\n"
1219 " -f --force overwrite existing output filess\n"
3723 " -t --test test compressed file integrity\n" 1220 " -t --test test compressed file integrity\n"
3724 " -c --stdout output to standard out\n" 1221 " -c --stdout output to standard out\n"
3725 " -v --verbose be verbose (a 2nd -v gives more)\n" 1222 " -v --verbose be verbose (a 2nd -v gives more)\n"
3726 " -k --keep keep (don't delete) input files\n"
3727 " -L --license display software version & license\n" 1223 " -L --license display software version & license\n"
3728 " -V --version display software version & license\n" 1224 " -V --version display software version & license\n"
3729 " -s --small use less memory (at most 2500k)\n" 1225 " -s --small use less memory (at most 2500k)\n"
@@ -3731,15 +1227,16 @@ void usage ( Char *fullProgName )
3731 " --repetitive-fast compress repetitive blocks faster\n" 1227 " --repetitive-fast compress repetitive blocks faster\n"
3732 " --repetitive-best compress repetitive blocks better\n" 1228 " --repetitive-best compress repetitive blocks better\n"
3733 "\n" 1229 "\n"
3734 " If invoked as `bzip2', the default action is to compress.\n" 1230 " If invoked as `bzip2', default action is to compress.\n"
3735 " as `bunzip2', the default action is to decompress.\n" 1231 " as `bunzip2', default action is to decompress.\n"
1232 " as `bz2cat', default action is to decompress to stdout.\n"
3736 "\n" 1233 "\n"
3737 " If no file names are given, bzip2 compresses or decompresses\n" 1234 " If no file names are given, bzip2 compresses or decompresses\n"
3738 " from standard input to standard output. You can combine\n" 1235 " from standard input to standard output. You can combine\n"
3739 " flags, so `-v -4' means the same as -v4 or -4v, &c.\n" 1236 " short flags, so `-v -4' means the same as -v4 or -4v, &c.\n"
3740 #if BZ_UNIX 1237#if BZ_UNIX
3741 "\n" 1238 "\n"
3742 #endif 1239#endif
3743 , 1240 ,
3744 1241
3745 fullProgName 1242 fullProgName
@@ -3776,14 +1273,7 @@ void *myMalloc ( Int32 n )
3776 void* p; 1273 void* p;
3777 1274
3778 p = malloc ( (size_t)n ); 1275 p = malloc ( (size_t)n );
3779 if (p == NULL) { 1276 if (p == NULL) outOfMemory ();
3780 fprintf (
3781 stderr,
3782 "%s: `malloc' failed on request for %d bytes.\n",
3783 progName, n
3784 );
3785 exit ( 1 );
3786 }
3787 return p; 1277 return p;
3788} 1278}
3789 1279
@@ -3817,7 +1307,6 @@ Cell *snocString ( Cell *root, Char *name )
3817} 1307}
3818 1308
3819 1309
3820
3821/*---------------------------------------------*/ 1310/*---------------------------------------------*/
3822#define ISFLAG(s) (strcmp(aa->name, (s))==0) 1311#define ISFLAG(s) (strcmp(aa->name, (s))==0)
3823 1312
@@ -3829,11 +1318,6 @@ IntNative main ( IntNative argc, Char *argv[] )
3829 Cell *argList; 1318 Cell *argList;
3830 Cell *aa; 1319 Cell *aa;
3831 1320
3832
3833 #if DEBUG
3834 fprintf ( stderr, "bzip2: *** compiled with debugging ON ***\n" );
3835 #endif
3836
3837 /*-- Be really really really paranoid :-) --*/ 1321 /*-- Be really really really paranoid :-) --*/
3838 if (sizeof(Int32) != 4 || sizeof(UInt32) != 4 || 1322 if (sizeof(Int32) != 4 || sizeof(UInt32) != 4 ||
3839 sizeof(Int16) != 2 || sizeof(UInt16) != 2 || 1323 sizeof(Int16) != 2 || sizeof(UInt16) != 2 ||
@@ -3844,7 +1328,7 @@ IntNative main ( IntNative argc, Char *argv[] )
3844 "\tof 4, 2 and 1 bytes to run properly, and they don't.\n" 1328 "\tof 4, 2 and 1 bytes to run properly, and they don't.\n"
3845 "\tProbably you can fix this by defining them correctly,\n" 1329 "\tProbably you can fix this by defining them correctly,\n"
3846 "\tand recompiling. Bye!\n" ); 1330 "\tand recompiling. Bye!\n" );
3847 exit(1); 1331 exit(3);
3848 } 1332 }
3849 1333
3850 1334
@@ -3852,35 +1336,28 @@ IntNative main ( IntNative argc, Char *argv[] )
3852 signal (SIGINT, mySignalCatcher); 1336 signal (SIGINT, mySignalCatcher);
3853 signal (SIGTERM, mySignalCatcher); 1337 signal (SIGTERM, mySignalCatcher);
3854 signal (SIGSEGV, mySIGSEGVorSIGBUScatcher); 1338 signal (SIGSEGV, mySIGSEGVorSIGBUScatcher);
3855 #if BZ_UNIX 1339#if BZ_UNIX
3856 signal (SIGHUP, mySignalCatcher); 1340 signal (SIGHUP, mySignalCatcher);
3857 signal (SIGBUS, mySIGSEGVorSIGBUScatcher); 1341 signal (SIGBUS, mySIGSEGVorSIGBUScatcher);
3858 #endif 1342#endif
3859 1343
3860 1344
3861 /*-- Initialise --*/ 1345 /*-- Initialise --*/
3862 outputHandleJustInCase = NULL; 1346 outputHandleJustInCase = NULL;
3863 ftab = NULL;
3864 ll4 = NULL;
3865 ll16 = NULL;
3866 ll8 = NULL;
3867 tt = NULL;
3868 block = NULL;
3869 zptr = NULL;
3870 smallMode = False; 1347 smallMode = False;
3871 keepInputFiles = False; 1348 keepInputFiles = False;
1349 forceOverwrite = False;
3872 verbosity = 0; 1350 verbosity = 0;
3873 blockSize100k = 9; 1351 blockSize100k = 9;
3874 testFailsExist = False; 1352 testFailsExist = False;
3875 bsStream = NULL;
3876 numFileNames = 0; 1353 numFileNames = 0;
3877 numFilesProcessed = 0; 1354 numFilesProcessed = 0;
3878 workFactor = 30; 1355 workFactor = 30;
3879 1356
3880 strcpy ( inName, "(none)" ); 1357 copyFileName ( inName, "(none)" );
3881 strcpy ( outName, "(none)" ); 1358 copyFileName ( outName, "(none)" );
3882 1359
3883 strcpy ( progNameReally, argv[0] ); 1360 copyFileName ( progNameReally, argv[0] );
3884 progName = &progNameReally[0]; 1361 progName = &progNameReally[0];
3885 for (tmp = &progNameReally[0]; *tmp != '\0'; tmp++) 1362 for (tmp = &progNameReally[0]; *tmp != '\0'; tmp++)
3886 if (*tmp == PATH_SEP) progName = tmp + 1; 1363 if (*tmp == PATH_SEP) progName = tmp + 1;
@@ -3903,20 +1380,26 @@ IntNative main ( IntNative argc, Char *argv[] )
3903 } 1380 }
3904 1381
3905 1382
3906 /*-- Determine what to do (compress/uncompress/test). --*/ 1383 /*-- Determine source modes; flag handling may change this too. --*/
1384 if (numFileNames == 0)
1385 srcMode = SM_I2O; else srcMode = SM_F2F;
1386
1387
1388 /*-- Determine what to do (compress/uncompress/test/cat). --*/
3907 /*-- Note that subsequent flag handling may change this. --*/ 1389 /*-- Note that subsequent flag handling may change this. --*/
3908 opMode = OM_Z; 1390 opMode = OM_Z;
3909 1391
3910 if ( (strcmp ( "bunzip2", progName ) == 0) || 1392 if ( (strstr ( progName, "unzip" ) != 0) ||
3911 (strcmp ( "BUNZIP2", progName ) == 0) || 1393 (strstr ( progName, "UNZIP" ) != 0) )
3912 (strcmp ( "bunzip2.exe", progName ) == 0) ||
3913 (strcmp ( "BUNZIP2.EXE", progName ) == 0) )
3914 opMode = OM_UNZ; 1394 opMode = OM_UNZ;
3915 1395
3916 1396 if ( (strstr ( progName, "z2cat" ) != 0) ||
3917 /*-- Determine source modes; flag handling may change this too. --*/ 1397 (strstr ( progName, "Z2CAT" ) != 0) ||
3918 if (numFileNames == 0) 1398 (strstr ( progName, "zcat" ) != 0) ||
3919 srcMode = SM_I2O; else srcMode = SM_F2F; 1399 (strstr ( progName, "ZCAT" ) != 0) ) {
1400 opMode = OM_UNZ;
1401 srcMode = (numFileNames == 0) ? SM_I2O : SM_F2O;
1402 }
3920 1403
3921 1404
3922 /*-- Look at the flags. --*/ 1405 /*-- Look at the flags. --*/
@@ -3926,7 +1409,8 @@ IntNative main ( IntNative argc, Char *argv[] )
3926 switch (aa->name[j]) { 1409 switch (aa->name[j]) {
3927 case 'c': srcMode = SM_F2O; break; 1410 case 'c': srcMode = SM_F2O; break;
3928 case 'd': opMode = OM_UNZ; break; 1411 case 'd': opMode = OM_UNZ; break;
3929 case 'f': opMode = OM_Z; break; 1412 case 'z': opMode = OM_Z; break;
1413 case 'f': forceOverwrite = True; break;
3930 case 't': opMode = OM_TEST; break; 1414 case 't': opMode = OM_TEST; break;
3931 case 'k': keepInputFiles = True; break; 1415 case 'k': keepInputFiles = True; break;
3932 case 's': smallMode = True; break; 1416 case 's': smallMode = True; break;
@@ -3957,6 +1441,7 @@ IntNative main ( IntNative argc, Char *argv[] )
3957 if (ISFLAG("--stdout")) srcMode = SM_F2O; else 1441 if (ISFLAG("--stdout")) srcMode = SM_F2O; else
3958 if (ISFLAG("--decompress")) opMode = OM_UNZ; else 1442 if (ISFLAG("--decompress")) opMode = OM_UNZ; else
3959 if (ISFLAG("--compress")) opMode = OM_Z; else 1443 if (ISFLAG("--compress")) opMode = OM_Z; else
1444 if (ISFLAG("--force")) forceOverwrite = True; else
3960 if (ISFLAG("--test")) opMode = OM_TEST; else 1445 if (ISFLAG("--test")) opMode = OM_TEST; else
3961 if (ISFLAG("--keep")) keepInputFiles = True; else 1446 if (ISFLAG("--keep")) keepInputFiles = True; else
3962 if (ISFLAG("--small")) smallMode = True; else 1447 if (ISFLAG("--small")) smallMode = True; else
@@ -3974,14 +1459,9 @@ IntNative main ( IntNative argc, Char *argv[] )
3974 } 1459 }
3975 } 1460 }
3976 1461
1462 if (verbosity > 4) verbosity = 4;
3977 if (opMode == OM_Z && smallMode) blockSize100k = 2; 1463 if (opMode == OM_Z && smallMode) blockSize100k = 2;
3978 1464
3979 if (opMode == OM_Z && srcMode == SM_F2O && numFileNames > 1) {
3980 fprintf ( stderr, "%s: I won't compress multiple files to stdout.\n",
3981 progName );
3982 exit ( 1 );
3983 }
3984
3985 if (srcMode == SM_F2O && numFileNames == 0) { 1465 if (srcMode == SM_F2O && numFileNames == 0) {
3986 fprintf ( stderr, "%s: -c expects at least one filename.\n", 1466 fprintf ( stderr, "%s: -c expects at least one filename.\n",
3987 progName ); 1467 progName );
@@ -3997,7 +1477,6 @@ IntNative main ( IntNative argc, Char *argv[] )
3997 if (opMode != OM_Z) blockSize100k = 0; 1477 if (opMode != OM_Z) blockSize100k = 0;
3998 1478
3999 if (opMode == OM_Z) { 1479 if (opMode == OM_Z) {
4000 allocateCompressStructures();
4001 if (srcMode == SM_I2O) 1480 if (srcMode == SM_I2O)
4002 compress ( NULL ); 1481 compress ( NULL );
4003 else 1482 else