From 977101ad5f833f5c0a574bfeea408e5301a6b052 Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Sun, 23 Aug 1998 22:13:13 +0200 Subject: bzip2-0.9.0c --- bzip2.c | 3521 +++++++++------------------------------------------------------ 1 file changed, 500 insertions(+), 3021 deletions(-) (limited to 'bzip2.c') diff --git a/bzip2.c b/bzip2.c index 53ce10d..6a3ab95 100644 --- a/bzip2.c +++ b/bzip2.c @@ -4,28 +4,45 @@ /*-----------------------------------------------------------*/ /*-- - This program is bzip2, a lossless, block-sorting data compressor, - version 0.1pl2, dated 29-Aug-1997. - - Copyright (C) 1996, 1997 by Julian Seward. - Guildford, Surrey, UK - email: jseward@acm.org - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - The GNU General Public License is contained in the file LICENSE. + This file is a part of bzip2 and/or libbzip2, a program and + library for lossless, block-sorting data compression. + + Copyright (C) 1996-1998 Julian R Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Julian Seward, Guildford, Surrey, UK. + jseward@acm.org + bzip2/libbzip2 version 0.9.0c of 18 October 1998 This program is based on (at least) the work of: Mike Burrows @@ -37,21 +54,23 @@ Robert Sedgewick Jon L. Bentley - For more information on these sources, see the file ALGORITHMS. + For more information on these sources, see the manual. --*/ + /*----------------------------------------------------*/ /*--- IMPORTANT ---*/ /*----------------------------------------------------*/ /*-- WARNING: - This program (attempts to) compress data by performing several - non-trivial transformations on it. Unless you are 100% familiar - with *all* the algorithms contained herein, and with the - consequences of modifying them, you should NOT meddle with the - compression or decompression machinery. Incorrect changes can - and very likely *will* lead to disasterous loss of data. + This program and library (attempts to) compress data by + performing several non-trivial transformations on it. + Unless you are 100% familiar with *all* the algorithms + contained herein, and with the consequences of modifying them, + you should NOT meddle with the compression or decompression + machinery. Incorrect changes can and very likely *will* + lead to disasterous loss of data. DISCLAIMER: I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE @@ -65,18 +84,19 @@ of various special cases in the code which occur with very low but non-zero probability make it impossible to rule out the possibility of bugs remaining in the program. DO NOT COMPRESS - ANY DATA WITH THIS PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE - POSSIBILITY, HOWEVER SMALL, THAT THE DATA WILL NOT BE RECOVERABLE. + ANY DATA WITH THIS PROGRAM AND/OR LIBRARY UNLESS YOU ARE PREPARED + TO ACCEPT THE POSSIBILITY, HOWEVER SMALL, THAT THE DATA WILL + NOT BE RECOVERABLE. That is not to say this program is inherently unreliable. - Indeed, I very much hope the opposite is true. bzip2 has been - carefully constructed and extensively tested. + Indeed, I very much hope the opposite is true. bzip2/libbzip2 + has been carefully constructed and extensively tested. PATENTS: - To the best of my knowledge, bzip2 does not use any patented - algorithms. However, I do not have the resources available to - carry out a full patent search. Therefore I cannot give any - guarantee of the above statement. + To the best of my knowledge, bzip2/libbzip2 does not use any + patented algorithms. However, I do not have the resources + available to carry out a full patent search. Therefore I cannot + give any guarantee of the above statement. --*/ @@ -103,6 +123,10 @@ --*/ #define BZ_LCCWIN32 0 +#ifdef _WIN32 +#define BZ_LCCWIN32 1 +#define BZ_UNIX 0 +#endif /*---------------------------------------------*/ @@ -112,12 +136,10 @@ #include #include -#if DEBUG - #include -#endif #include #include #include +#include "bzlib.h" #define ERROR_IF_EOF(i) { if ((i) == EOF) ioError(); } #define ERROR_IF_NOT_ZERO(i) { if ((i) != 0) ioError(); } @@ -130,2910 +152,421 @@ --*/ #if BZ_UNIX - #include - #include - #include - #include - #include - #include - - #define Int32 int - #define UInt32 unsigned int - #define Char char - #define UChar unsigned char - #define Int16 short - #define UInt16 unsigned short - - #define PATH_SEP '/' - #define MY_LSTAT lstat - #define MY_S_IFREG S_ISREG - #define MY_STAT stat - - #define APPEND_FILESPEC(root, name) \ +# include +# include +# include +# include +# include + +# define PATH_SEP '/' +# define MY_LSTAT lstat +# define MY_S_IFREG S_ISREG +# define MY_STAT stat + +# define APPEND_FILESPEC(root, name) \ root=snocString((root), (name)) - #define SET_BINARY_MODE(fd) /**/ - - /*-- - You should try very hard to persuade your C compiler - to inline the bits marked INLINE. Otherwise bzip2 will - run rather slowly. gcc version 2.x is recommended. - --*/ - #ifdef __GNUC__ - #define INLINE inline - #define NORETURN __attribute__ ((noreturn)) - #else - #define INLINE /**/ - #define NORETURN /**/ - #endif -#endif - - - -#if BZ_LCCWIN32 - #include - #include - #include - - #define Int32 int - #define UInt32 unsigned int - #define Int16 short - #define UInt16 unsigned short - #define Char char - #define UChar unsigned char - - #define INLINE /**/ - #define NORETURN /**/ - #define PATH_SEP '\\' - #define MY_LSTAT _stat - #define MY_STAT _stat - #define MY_S_IFREG(x) ((x) & _S_IFREG) - - #if 0 - /*-- lcc-win32 seems to expand wildcards itself --*/ - #define APPEND_FILESPEC(root, spec) \ - do { \ - if ((spec)[0] == '-') { \ - root = snocString((root), (spec)); \ - } else { \ - struct _finddata_t c_file; \ - long hFile; \ - hFile = _findfirst((spec), &c_file); \ - if ( hFile == -1L ) { \ - root = snocString ((root), (spec)); \ - } else { \ - int anInt = 0; \ - while ( anInt == 0 ) { \ - root = snocString((root), \ - &c_file.name[0]); \ - anInt = _findnext(hFile, &c_file); \ - } \ - } \ - } \ - } while ( 0 ) - #else - #define APPEND_FILESPEC(root, name) \ - root = snocString ((root), (name)) - #endif - - #define SET_BINARY_MODE(fd) \ - do { \ - int retVal = setmode ( fileno ( fd ), \ - O_BINARY ); \ - ERROR_IF_MINUS_ONE ( retVal ); \ - } while ( 0 ) - -#endif - - -/*---------------------------------------------*/ -/*-- - Some more stuff for all platforms :-) ---*/ - -#define Bool unsigned char -#define True 1 -#define False 0 - -/*-- - IntNative is your platform's `native' int size. - Only here to avoid probs with 64-bit platforms. ---*/ -#define IntNative int - - -/*-- - change to 1, or compile with -DDEBUG=1 to debug ---*/ -#ifndef DEBUG -#define DEBUG 0 -#endif - - -/*---------------------------------------------------*/ -/*--- ---*/ -/*---------------------------------------------------*/ - -/*-- - Implementation notes, July 1997 - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - Memory allocation - ~~~~~~~~~~~~~~~~~ - All large data structures are allocated on the C heap, - for better or for worse. That includes the various - arrays of pointers, striped words, bytes, frequency - tables and buffers for compression and decompression. - - bzip2 can operate at various block-sizes, ranging from - 100k to 900k in 100k steps, and it allocates only as - much as it needs to. When compressing, we know from the - command-line options what the block-size is going to be, - so all allocation can be done at start-up; if that - succeeds, there can be no further allocation problems. - - Decompression is more complicated. Each compressed file - contains, in its header, a byte indicating the block - size used for compression. This means bzip2 potentially - needs to reallocate memory for each file it deals with, - which in turn opens the possibility for a memory allocation - failure part way through a run of files, by encountering - a file requiring a much larger block size than all the - ones preceding it. - - The policy is to simply give up if a memory allocation - failure occurs. During decompression, it would be - possible to move on to subsequent files in the hope that - some might ask for a smaller block size, but the - complications for doing this seem more trouble than they - are worth. - - - Compressed file formats - ~~~~~~~~~~~~~~~~~~~~~~~ - [This is now entirely different from both 0.21, and from - any previous Huffman-coded variant of bzip. - See the associated file bzip2.txt for details.] - - - Error conditions - ~~~~~~~~~~~~~~~~ - Dealing with error conditions is the least satisfactory - aspect of bzip2. The policy is to try and leave the - filesystem in a consistent state, then quit, even if it - means not processing some of the files mentioned in the - command line. `A consistent state' means that a file - exists either in its compressed or uncompressed form, - but not both. This boils down to the rule `delete the - output file if an error condition occurs, leaving the - input intact'. Input files are only deleted when we can - be pretty sure the output file has been written and - closed successfully. - - Errors are a dog because there's so many things to - deal with. The following can happen mid-file, and - require cleaning up. - - internal `panics' -- indicating a bug - corrupted or inconsistent compressed file - can't allocate enough memory to decompress this file - I/O error reading/writing/opening/closing - signal catches -- Control-C, SIGTERM, SIGHUP. - - Other conditions, primarily pertaining to file names, - can be checked in-between files, which makes dealing - with them easier. ---*/ - - - -/*---------------------------------------------------*/ -/*--- Misc (file handling) data decls ---*/ -/*---------------------------------------------------*/ - -UInt32 bytesIn, bytesOut; -Int32 verbosity; -Bool keepInputFiles, smallMode, testFailsExist; -UInt32 globalCrc; -Int32 numFileNames, numFilesProcessed; - - -/*-- source modes; F==file, I==stdin, O==stdout --*/ -#define SM_I2O 1 -#define SM_F2O 2 -#define SM_F2F 3 - -/*-- operation modes --*/ -#define OM_Z 1 -#define OM_UNZ 2 -#define OM_TEST 3 - -Int32 opMode; -Int32 srcMode; - - -Int32 longestFileName; -Char inName[1024]; -Char outName[1024]; -Char *progName; -Char progNameReally[1024]; -FILE *outputHandleJustInCase; - -void panic ( Char* ) NORETURN; -void ioError ( void ) NORETURN; -void compressOutOfMemory ( Int32, Int32 ) NORETURN; -void uncompressOutOfMemory ( Int32, Int32 ) NORETURN; -void blockOverrun ( void ) NORETURN; -void badBlockHeader ( void ) NORETURN; -void badBGLengths ( void ) NORETURN; -void crcError ( UInt32, UInt32 ) NORETURN; -void bitStreamEOF ( void ) NORETURN; -void cleanUpAndFail ( Int32 ) NORETURN; -void compressedStreamEOF ( void ) NORETURN; - -void* myMalloc ( Int32 ); - - - -/*---------------------------------------------------*/ -/*--- Data decls for the front end ---*/ -/*---------------------------------------------------*/ - -/*-- - The overshoot bytes allow us to avoid most of - the cost of pointer renormalisation during - comparison of rotations in sorting. - The figure of 20 is derived as follows: - qSort3 allows an overshoot of up to 10. - It then calls simpleSort, which calls - fullGtU, also with max overshoot 10. - fullGtU does up to 10 comparisons without - renormalising, giving 10+10 == 20. ---*/ -#define NUM_OVERSHOOT_BYTES 20 - -/*-- - These are the main data structures for - the Burrows-Wheeler transform. ---*/ - -/*-- - Pointers to compression and decompression - structures. Set by - allocateCompressStructures and - setDecompressStructureSizes - - The structures are always set to be suitable - for a block of size 100000 * blockSize100k. ---*/ -UChar *block; /*-- compress --*/ -UInt16 *quadrant; /*-- compress --*/ -Int32 *zptr; /*-- compress --*/ -UInt16 *szptr; /*-- overlays zptr ---*/ -Int32 *ftab; /*-- compress --*/ - -UInt16 *ll16; /*-- small decompress --*/ -UChar *ll4; /*-- small decompress --*/ - -Int32 *tt; /*-- fast decompress --*/ -UChar *ll8; /*-- fast decompress --*/ - - -/*-- - freq table collected to save a pass over the data - during decompression. ---*/ -Int32 unzftab[256]; - - -/*-- - index of the last char in the block, so - the block size == last + 1. ---*/ -Int32 last; - - -/*-- - index in zptr[] of original string after sorting. ---*/ -Int32 origPtr; - - -/*-- - always: in the range 0 .. 9. - The current block size is 100000 * this number. ---*/ -Int32 blockSize100k; - - -/*-- - Used when sorting. If too many long comparisons - happen, we stop sorting, randomise the block - slightly, and try again. ---*/ - -Int32 workFactor; -Int32 workDone; -Int32 workLimit; -Bool blockRandomised; -Bool firstAttempt; -Int32 nBlocksRandomised; - - - -/*---------------------------------------------------*/ -/*--- Data decls for the back end ---*/ -/*---------------------------------------------------*/ - -#define MAX_ALPHA_SIZE 258 -#define MAX_CODE_LEN 23 - -#define RUNA 0 -#define RUNB 1 - -#define N_GROUPS 6 -#define G_SIZE 50 -#define N_ITERS 4 - -#define MAX_SELECTORS (2 + (900000 / G_SIZE)) - -Bool inUse[256]; -Int32 nInUse; - -UChar seqToUnseq[256]; -UChar unseqToSeq[256]; - -UChar selector [MAX_SELECTORS]; -UChar selectorMtf[MAX_SELECTORS]; - -Int32 nMTF; - -Int32 mtfFreq[MAX_ALPHA_SIZE]; - -UChar len [N_GROUPS][MAX_ALPHA_SIZE]; - -/*-- decompress only --*/ -Int32 limit [N_GROUPS][MAX_ALPHA_SIZE]; -Int32 base [N_GROUPS][MAX_ALPHA_SIZE]; -Int32 perm [N_GROUPS][MAX_ALPHA_SIZE]; -Int32 minLens[N_GROUPS]; - -/*-- compress only --*/ -Int32 code [N_GROUPS][MAX_ALPHA_SIZE]; -Int32 rfreq[N_GROUPS][MAX_ALPHA_SIZE]; - - -/*---------------------------------------------------*/ -/*--- 32-bit CRC grunge ---*/ -/*---------------------------------------------------*/ - -/*-- - I think this is an implementation of the AUTODIN-II, - Ethernet & FDDI 32-bit CRC standard. Vaguely derived - from code by Rob Warnock, in Section 51 of the - comp.compression FAQ. ---*/ - -UInt32 crc32Table[256] = { - - /*-- Ugly, innit? --*/ - - 0x00000000UL, 0x04c11db7UL, 0x09823b6eUL, 0x0d4326d9UL, - 0x130476dcUL, 0x17c56b6bUL, 0x1a864db2UL, 0x1e475005UL, - 0x2608edb8UL, 0x22c9f00fUL, 0x2f8ad6d6UL, 0x2b4bcb61UL, - 0x350c9b64UL, 0x31cd86d3UL, 0x3c8ea00aUL, 0x384fbdbdUL, - 0x4c11db70UL, 0x48d0c6c7UL, 0x4593e01eUL, 0x4152fda9UL, - 0x5f15adacUL, 0x5bd4b01bUL, 0x569796c2UL, 0x52568b75UL, - 0x6a1936c8UL, 0x6ed82b7fUL, 0x639b0da6UL, 0x675a1011UL, - 0x791d4014UL, 0x7ddc5da3UL, 0x709f7b7aUL, 0x745e66cdUL, - 0x9823b6e0UL, 0x9ce2ab57UL, 0x91a18d8eUL, 0x95609039UL, - 0x8b27c03cUL, 0x8fe6dd8bUL, 0x82a5fb52UL, 0x8664e6e5UL, - 0xbe2b5b58UL, 0xbaea46efUL, 0xb7a96036UL, 0xb3687d81UL, - 0xad2f2d84UL, 0xa9ee3033UL, 0xa4ad16eaUL, 0xa06c0b5dUL, - 0xd4326d90UL, 0xd0f37027UL, 0xddb056feUL, 0xd9714b49UL, - 0xc7361b4cUL, 0xc3f706fbUL, 0xceb42022UL, 0xca753d95UL, - 0xf23a8028UL, 0xf6fb9d9fUL, 0xfbb8bb46UL, 0xff79a6f1UL, - 0xe13ef6f4UL, 0xe5ffeb43UL, 0xe8bccd9aUL, 0xec7dd02dUL, - 0x34867077UL, 0x30476dc0UL, 0x3d044b19UL, 0x39c556aeUL, - 0x278206abUL, 0x23431b1cUL, 0x2e003dc5UL, 0x2ac12072UL, - 0x128e9dcfUL, 0x164f8078UL, 0x1b0ca6a1UL, 0x1fcdbb16UL, - 0x018aeb13UL, 0x054bf6a4UL, 0x0808d07dUL, 0x0cc9cdcaUL, - 0x7897ab07UL, 0x7c56b6b0UL, 0x71159069UL, 0x75d48ddeUL, - 0x6b93dddbUL, 0x6f52c06cUL, 0x6211e6b5UL, 0x66d0fb02UL, - 0x5e9f46bfUL, 0x5a5e5b08UL, 0x571d7dd1UL, 0x53dc6066UL, - 0x4d9b3063UL, 0x495a2dd4UL, 0x44190b0dUL, 0x40d816baUL, - 0xaca5c697UL, 0xa864db20UL, 0xa527fdf9UL, 0xa1e6e04eUL, - 0xbfa1b04bUL, 0xbb60adfcUL, 0xb6238b25UL, 0xb2e29692UL, - 0x8aad2b2fUL, 0x8e6c3698UL, 0x832f1041UL, 0x87ee0df6UL, - 0x99a95df3UL, 0x9d684044UL, 0x902b669dUL, 0x94ea7b2aUL, - 0xe0b41de7UL, 0xe4750050UL, 0xe9362689UL, 0xedf73b3eUL, - 0xf3b06b3bUL, 0xf771768cUL, 0xfa325055UL, 0xfef34de2UL, - 0xc6bcf05fUL, 0xc27dede8UL, 0xcf3ecb31UL, 0xcbffd686UL, - 0xd5b88683UL, 0xd1799b34UL, 0xdc3abdedUL, 0xd8fba05aUL, - 0x690ce0eeUL, 0x6dcdfd59UL, 0x608edb80UL, 0x644fc637UL, - 0x7a089632UL, 0x7ec98b85UL, 0x738aad5cUL, 0x774bb0ebUL, - 0x4f040d56UL, 0x4bc510e1UL, 0x46863638UL, 0x42472b8fUL, - 0x5c007b8aUL, 0x58c1663dUL, 0x558240e4UL, 0x51435d53UL, - 0x251d3b9eUL, 0x21dc2629UL, 0x2c9f00f0UL, 0x285e1d47UL, - 0x36194d42UL, 0x32d850f5UL, 0x3f9b762cUL, 0x3b5a6b9bUL, - 0x0315d626UL, 0x07d4cb91UL, 0x0a97ed48UL, 0x0e56f0ffUL, - 0x1011a0faUL, 0x14d0bd4dUL, 0x19939b94UL, 0x1d528623UL, - 0xf12f560eUL, 0xf5ee4bb9UL, 0xf8ad6d60UL, 0xfc6c70d7UL, - 0xe22b20d2UL, 0xe6ea3d65UL, 0xeba91bbcUL, 0xef68060bUL, - 0xd727bbb6UL, 0xd3e6a601UL, 0xdea580d8UL, 0xda649d6fUL, - 0xc423cd6aUL, 0xc0e2d0ddUL, 0xcda1f604UL, 0xc960ebb3UL, - 0xbd3e8d7eUL, 0xb9ff90c9UL, 0xb4bcb610UL, 0xb07daba7UL, - 0xae3afba2UL, 0xaafbe615UL, 0xa7b8c0ccUL, 0xa379dd7bUL, - 0x9b3660c6UL, 0x9ff77d71UL, 0x92b45ba8UL, 0x9675461fUL, - 0x8832161aUL, 0x8cf30badUL, 0x81b02d74UL, 0x857130c3UL, - 0x5d8a9099UL, 0x594b8d2eUL, 0x5408abf7UL, 0x50c9b640UL, - 0x4e8ee645UL, 0x4a4ffbf2UL, 0x470cdd2bUL, 0x43cdc09cUL, - 0x7b827d21UL, 0x7f436096UL, 0x7200464fUL, 0x76c15bf8UL, - 0x68860bfdUL, 0x6c47164aUL, 0x61043093UL, 0x65c52d24UL, - 0x119b4be9UL, 0x155a565eUL, 0x18197087UL, 0x1cd86d30UL, - 0x029f3d35UL, 0x065e2082UL, 0x0b1d065bUL, 0x0fdc1becUL, - 0x3793a651UL, 0x3352bbe6UL, 0x3e119d3fUL, 0x3ad08088UL, - 0x2497d08dUL, 0x2056cd3aUL, 0x2d15ebe3UL, 0x29d4f654UL, - 0xc5a92679UL, 0xc1683bceUL, 0xcc2b1d17UL, 0xc8ea00a0UL, - 0xd6ad50a5UL, 0xd26c4d12UL, 0xdf2f6bcbUL, 0xdbee767cUL, - 0xe3a1cbc1UL, 0xe760d676UL, 0xea23f0afUL, 0xeee2ed18UL, - 0xf0a5bd1dUL, 0xf464a0aaUL, 0xf9278673UL, 0xfde69bc4UL, - 0x89b8fd09UL, 0x8d79e0beUL, 0x803ac667UL, 0x84fbdbd0UL, - 0x9abc8bd5UL, 0x9e7d9662UL, 0x933eb0bbUL, 0x97ffad0cUL, - 0xafb010b1UL, 0xab710d06UL, 0xa6322bdfUL, 0xa2f33668UL, - 0xbcb4666dUL, 0xb8757bdaUL, 0xb5365d03UL, 0xb1f740b4UL -}; - - -/*---------------------------------------------*/ -void initialiseCRC ( void ) -{ - globalCrc = 0xffffffffUL; -} - - -/*---------------------------------------------*/ -UInt32 getFinalCRC ( void ) -{ - return ~globalCrc; -} - - -/*---------------------------------------------*/ -UInt32 getGlobalCRC ( void ) -{ - return globalCrc; -} - - -/*---------------------------------------------*/ -void setGlobalCRC ( UInt32 newCrc ) -{ - globalCrc = newCrc; -} - - -/*---------------------------------------------*/ -#define UPDATE_CRC(crcVar,cha) \ -{ \ - crcVar = (crcVar << 8) ^ \ - crc32Table[(crcVar >> 24) ^ \ - ((UChar)cha)]; \ -} - - -/*---------------------------------------------------*/ -/*--- Bit stream I/O ---*/ -/*---------------------------------------------------*/ - - -UInt32 bsBuff; -Int32 bsLive; -FILE* bsStream; -Bool bsWriting; - - -/*---------------------------------------------*/ -void bsSetStream ( FILE* f, Bool wr ) -{ - if (bsStream != NULL) panic ( "bsSetStream" ); - bsStream = f; - bsLive = 0; - bsBuff = 0; - bytesOut = 0; - bytesIn = 0; - bsWriting = wr; -} - - -/*---------------------------------------------*/ -void bsFinishedWithStream ( void ) -{ - if (bsWriting) - while (bsLive > 0) { - fputc ( (UChar)(bsBuff >> 24), bsStream ); - bsBuff <<= 8; - bsLive -= 8; - bytesOut++; - } - bsStream = NULL; -} - - -/*---------------------------------------------*/ -#define bsNEEDR(nz) \ -{ \ - while (bsLive < nz) { \ - Int32 zzi = fgetc ( bsStream ); \ - if (zzi == EOF) compressedStreamEOF(); \ - bsBuff = (bsBuff << 8) | (zzi & 0xffL); \ - bsLive += 8; \ - } \ -} - - -/*---------------------------------------------*/ -#define bsNEEDW(nz) \ -{ \ - while (bsLive >= 8) { \ - fputc ( (UChar)(bsBuff >> 24), \ - bsStream ); \ - bsBuff <<= 8; \ - bsLive -= 8; \ - bytesOut++; \ - } \ -} - - -/*---------------------------------------------*/ -#define bsR1(vz) \ -{ \ - bsNEEDR(1); \ - vz = (bsBuff >> (bsLive-1)) & 1; \ - bsLive--; \ -} - - -/*---------------------------------------------*/ -INLINE UInt32 bsR ( Int32 n ) -{ - UInt32 v; - bsNEEDR ( n ); - v = (bsBuff >> (bsLive-n)) & ((1 << n)-1); - bsLive -= n; - return v; -} - - -/*---------------------------------------------*/ -INLINE void bsW ( Int32 n, UInt32 v ) -{ - bsNEEDW ( n ); - bsBuff |= (v << (32 - bsLive - n)); - bsLive += n; -} - - -/*---------------------------------------------*/ -UChar bsGetUChar ( void ) -{ - return (UChar)bsR(8); -} - - -/*---------------------------------------------*/ -void bsPutUChar ( UChar c ) -{ - bsW(8, (UInt32)c ); -} - - -/*---------------------------------------------*/ -Int32 bsGetUInt32 ( void ) -{ - UInt32 u; - u = 0; - u = (u << 8) | bsR(8); - u = (u << 8) | bsR(8); - u = (u << 8) | bsR(8); - u = (u << 8) | bsR(8); - return u; -} - - -/*---------------------------------------------*/ -UInt32 bsGetIntVS ( UInt32 numBits ) -{ - return (UInt32)bsR(numBits); -} - - -/*---------------------------------------------*/ -UInt32 bsGetInt32 ( void ) -{ - return (Int32)bsGetUInt32(); -} - - -/*---------------------------------------------*/ -void bsPutUInt32 ( UInt32 u ) -{ - bsW ( 8, (u >> 24) & 0xffL ); - bsW ( 8, (u >> 16) & 0xffL ); - bsW ( 8, (u >> 8) & 0xffL ); - bsW ( 8, u & 0xffL ); -} - - -/*---------------------------------------------*/ -void bsPutInt32 ( Int32 c ) -{ - bsPutUInt32 ( (UInt32)c ); -} - - -/*---------------------------------------------*/ -void bsPutIntVS ( Int32 numBits, UInt32 c ) -{ - bsW ( numBits, c ); -} - - -/*---------------------------------------------------*/ -/*--- Huffman coding low-level stuff ---*/ -/*---------------------------------------------------*/ - -#define WEIGHTOF(zz0) ((zz0) & 0xffffff00) -#define DEPTHOF(zz1) ((zz1) & 0x000000ff) -#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) - -#define ADDWEIGHTS(zw1,zw2) \ - (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ - (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) - -#define UPHEAP(z) \ -{ \ - Int32 zz, tmp; \ - zz = z; tmp = heap[zz]; \ - while (weight[tmp] < weight[heap[zz >> 1]]) { \ - heap[zz] = heap[zz >> 1]; \ - zz >>= 1; \ - } \ - heap[zz] = tmp; \ -} - -#define DOWNHEAP(z) \ -{ \ - Int32 zz, yy, tmp; \ - zz = z; tmp = heap[zz]; \ - while (True) { \ - yy = zz << 1; \ - if (yy > nHeap) break; \ - if (yy < nHeap && \ - weight[heap[yy+1]] < weight[heap[yy]]) \ - yy++; \ - if (weight[tmp] < weight[heap[yy]]) break; \ - heap[zz] = heap[yy]; \ - zz = yy; \ - } \ - heap[zz] = tmp; \ -} - - -/*---------------------------------------------*/ -void hbMakeCodeLengths ( UChar *len, - Int32 *freq, - Int32 alphaSize, - Int32 maxLen ) -{ - /*-- - Nodes and heap entries run from 1. Entry 0 - for both the heap and nodes is a sentinel. - --*/ - Int32 nNodes, nHeap, n1, n2, i, j, k; - Bool tooLong; - - Int32 heap [ MAX_ALPHA_SIZE + 2 ]; - Int32 weight [ MAX_ALPHA_SIZE * 2 ]; - Int32 parent [ MAX_ALPHA_SIZE * 2 ]; - - for (i = 0; i < alphaSize; i++) - weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; - - while (True) { - - nNodes = alphaSize; - nHeap = 0; - - heap[0] = 0; - weight[0] = 0; - parent[0] = -2; - - for (i = 1; i <= alphaSize; i++) { - parent[i] = -1; - nHeap++; - heap[nHeap] = i; - UPHEAP(nHeap); - } - if (!(nHeap < (MAX_ALPHA_SIZE+2))) - panic ( "hbMakeCodeLengths(1)" ); - - while (nHeap > 1) { - n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); - n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); - nNodes++; - parent[n1] = parent[n2] = nNodes; - weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); - parent[nNodes] = -1; - nHeap++; - heap[nHeap] = nNodes; - UPHEAP(nHeap); - } - if (!(nNodes < (MAX_ALPHA_SIZE * 2))) - panic ( "hbMakeCodeLengths(2)" ); - - tooLong = False; - for (i = 1; i <= alphaSize; i++) { - j = 0; - k = i; - while (parent[k] >= 0) { k = parent[k]; j++; } - len[i-1] = j; - if (j > maxLen) tooLong = True; - } - - if (! tooLong) break; - - for (i = 1; i < alphaSize; i++) { - j = weight[i] >> 8; - j = 1 + (j / 2); - weight[i] = j << 8; - } - } -} - - -/*---------------------------------------------*/ -void hbAssignCodes ( Int32 *code, - UChar *length, - Int32 minLen, - Int32 maxLen, - Int32 alphaSize ) -{ - Int32 n, vec, i; - - vec = 0; - for (n = minLen; n <= maxLen; n++) { - for (i = 0; i < alphaSize; i++) - if (length[i] == n) { code[i] = vec; vec++; }; - vec <<= 1; - } -} - - -/*---------------------------------------------*/ -void hbCreateDecodeTables ( Int32 *limit, - Int32 *base, - Int32 *perm, - UChar *length, - Int32 minLen, - Int32 maxLen, - Int32 alphaSize ) -{ - Int32 pp, i, j, vec; - - pp = 0; - for (i = minLen; i <= maxLen; i++) - for (j = 0; j < alphaSize; j++) - if (length[j] == i) { perm[pp] = j; pp++; }; - - for (i = 0; i < MAX_CODE_LEN; i++) base[i] = 0; - for (i = 0; i < alphaSize; i++) base[length[i]+1]++; - - for (i = 1; i < MAX_CODE_LEN; i++) base[i] += base[i-1]; - - for (i = 0; i < MAX_CODE_LEN; i++) limit[i] = 0; - vec = 0; - - for (i = minLen; i <= maxLen; i++) { - vec += (base[i+1] - base[i]); - limit[i] = vec-1; - vec <<= 1; - } - for (i = minLen + 1; i <= maxLen; i++) - base[i] = ((limit[i-1] + 1) << 1) - base[i]; -} - - - -/*---------------------------------------------------*/ -/*--- Undoing the reversible transformation ---*/ -/*---------------------------------------------------*/ - -/*---------------------------------------------*/ -#define SET_LL4(i,n) \ - { if (((i) & 0x1) == 0) \ - ll4[(i) >> 1] = (ll4[(i) >> 1] & 0xf0) | (n); else \ - ll4[(i) >> 1] = (ll4[(i) >> 1] & 0x0f) | ((n) << 4); \ - } - -#define GET_LL4(i) \ - (((UInt32)(ll4[(i) >> 1])) >> (((i) << 2) & 0x4) & 0xF) - -#define SET_LL(i,n) \ - { ll16[i] = (UInt16)(n & 0x0000ffff); \ - SET_LL4(i, n >> 16); \ - } - -#define GET_LL(i) \ - (((UInt32)ll16[i]) | (GET_LL4(i) << 16)) - - -/*---------------------------------------------*/ -/*-- - Manage memory for compression/decompression. - When compressing, a single block size applies to - all files processed, and that's set when the - program starts. But when decompressing, each file - processed could have been compressed with a - different block size, so we may have to free - and reallocate on a per-file basis. - - A call with argument of zero means - `free up everything.' And a value of zero for - blockSize100k means no memory is currently allocated. ---*/ - - -/*---------------------------------------------*/ -void allocateCompressStructures ( void ) -{ - Int32 n = 100000 * blockSize100k; - block = malloc ( (n + 1 + NUM_OVERSHOOT_BYTES) * sizeof(UChar) ); - quadrant = malloc ( (n + NUM_OVERSHOOT_BYTES) * sizeof(Int16) ); - zptr = malloc ( n * sizeof(Int32) ); - ftab = malloc ( 65537 * sizeof(Int32) ); - - if (block == NULL || quadrant == NULL || - zptr == NULL || ftab == NULL) { - Int32 totalDraw - = (n + 1 + NUM_OVERSHOOT_BYTES) * sizeof(UChar) + - (n + NUM_OVERSHOOT_BYTES) * sizeof(Int16) + - n * sizeof(Int32) + - 65537 * sizeof(Int32); - - compressOutOfMemory ( totalDraw, n ); - } - - /*-- - Since we want valid indexes for block of - -1 to n + NUM_OVERSHOOT_BYTES - 1 - inclusive. - --*/ - block++; - - /*-- - The back end needs a place to store the MTF values - whilst it calculates the coding tables. We could - put them in the zptr array. However, these values - will fit in a short, so we overlay szptr at the - start of zptr, in the hope of reducing the number - of cache misses induced by the multiple traversals - of the MTF values when calculating coding tables. - Seems to improve compression speed by about 1%. - --*/ - szptr = (UInt16*)zptr; -} - - -/*---------------------------------------------*/ -void setDecompressStructureSizes ( Int32 newSize100k ) -{ - if (! (0 <= newSize100k && newSize100k <= 9 && - 0 <= blockSize100k && blockSize100k <= 9)) - panic ( "setDecompressStructureSizes" ); - - if (newSize100k == blockSize100k) return; - - blockSize100k = newSize100k; - - if (ll16 != NULL) free ( ll16 ); - if (ll4 != NULL) free ( ll4 ); - if (ll8 != NULL) free ( ll8 ); - if (tt != NULL) free ( tt ); - - if (newSize100k == 0) return; - - if (smallMode) { - - Int32 n = 100000 * newSize100k; - ll16 = malloc ( n * sizeof(UInt16) ); - ll4 = malloc ( ((n+1) >> 1) * sizeof(UChar) ); - - if (ll4 == NULL || ll16 == NULL) { - Int32 totalDraw - = n * sizeof(Int16) + ((n+1) >> 1) * sizeof(UChar); - uncompressOutOfMemory ( totalDraw, n ); - } - - } else { - - Int32 n = 100000 * newSize100k; - ll8 = malloc ( n * sizeof(UChar) ); - tt = malloc ( n * sizeof(Int32) ); - - if (ll8 == NULL || tt == NULL) { - Int32 totalDraw - = n * sizeof(UChar) + n * sizeof(UInt32); - uncompressOutOfMemory ( totalDraw, n ); - } - - } -} - - - -/*---------------------------------------------------*/ -/*--- The new back end ---*/ -/*---------------------------------------------------*/ - -/*---------------------------------------------*/ -void makeMaps ( void ) -{ - Int32 i; - nInUse = 0; - for (i = 0; i < 256; i++) - if (inUse[i]) { - seqToUnseq[nInUse] = i; - unseqToSeq[i] = nInUse; - nInUse++; - } -} - - -/*---------------------------------------------*/ -void generateMTFValues ( void ) -{ - UChar yy[256]; - Int32 i, j; - UChar tmp; - UChar tmp2; - Int32 zPend; - Int32 wr; - Int32 EOB; - - makeMaps(); - EOB = nInUse+1; - - for (i = 0; i <= EOB; i++) mtfFreq[i] = 0; - - wr = 0; - zPend = 0; - for (i = 0; i < nInUse; i++) yy[i] = (UChar) i; - - - for (i = 0; i <= last; i++) { - UChar ll_i; - - #if DEBUG - assert (wr <= i); - #endif - - ll_i = unseqToSeq[block[zptr[i] - 1]]; - #if DEBUG - assert (ll_i < nInUse); - #endif - - j = 0; - tmp = yy[j]; - while ( ll_i != tmp ) { - j++; - tmp2 = tmp; - tmp = yy[j]; - yy[j] = tmp2; - }; - yy[0] = tmp; - - if (j == 0) { - zPend++; - } else { - if (zPend > 0) { - zPend--; - while (True) { - switch (zPend % 2) { - case 0: szptr[wr] = RUNA; wr++; mtfFreq[RUNA]++; break; - case 1: szptr[wr] = RUNB; wr++; mtfFreq[RUNB]++; break; - }; - if (zPend < 2) break; - zPend = (zPend - 2) / 2; - }; - zPend = 0; - } - szptr[wr] = j+1; wr++; mtfFreq[j+1]++; - } - } - - if (zPend > 0) { - zPend--; - while (True) { - switch (zPend % 2) { - case 0: szptr[wr] = RUNA; wr++; mtfFreq[RUNA]++; break; - case 1: szptr[wr] = RUNB; wr++; mtfFreq[RUNB]++; break; - }; - if (zPend < 2) break; - zPend = (zPend - 2) / 2; - }; - } - - szptr[wr] = EOB; wr++; mtfFreq[EOB]++; - - nMTF = wr; -} - - -/*---------------------------------------------*/ -#define LESSER_ICOST 0 -#define GREATER_ICOST 15 - -void sendMTFValues ( void ) -{ - Int32 v, t, i, j, gs, ge, totc, bt, bc, iter; - Int32 nSelectors, alphaSize, minLen, maxLen, selCtr; - Int32 nGroups, nBytes; - - /*-- - UChar len [N_GROUPS][MAX_ALPHA_SIZE]; - is a global since the decoder also needs it. - - Int32 code[N_GROUPS][MAX_ALPHA_SIZE]; - Int32 rfreq[N_GROUPS][MAX_ALPHA_SIZE]; - are also globals only used in this proc. - Made global to keep stack frame size small. - --*/ - - - UInt16 cost[N_GROUPS]; - Int32 fave[N_GROUPS]; - - if (verbosity >= 3) - fprintf ( stderr, - " %d in block, %d after MTF & 1-2 coding, %d+2 syms in use\n", - last+1, nMTF, nInUse ); - - alphaSize = nInUse+2; - for (t = 0; t < N_GROUPS; t++) - for (v = 0; v < alphaSize; v++) - len[t][v] = GREATER_ICOST; - - /*--- Decide how many coding tables to use ---*/ - if (nMTF <= 0) panic ( "sendMTFValues(0)" ); - if (nMTF < 200) nGroups = 2; else - if (nMTF < 800) nGroups = 4; else - nGroups = 6; - - /*--- Generate an initial set of coding tables ---*/ - { - Int32 nPart, remF, tFreq, aFreq; - - nPart = nGroups; - remF = nMTF; - gs = 0; - while (nPart > 0) { - tFreq = remF / nPart; - ge = gs-1; - aFreq = 0; - while (aFreq < tFreq && ge < alphaSize-1) { - ge++; - aFreq += mtfFreq[ge]; - } - - if (ge > gs - && nPart != nGroups && nPart != 1 - && ((nGroups-nPart) % 2 == 1)) { - aFreq -= mtfFreq[ge]; - ge--; - } - - if (verbosity >= 3) - fprintf ( stderr, - " initial group %d, [%d .. %d], has %d syms (%4.1f%%)\n", - nPart, gs, ge, aFreq, - (100.0 * (float)aFreq) / (float)nMTF ); - - for (v = 0; v < alphaSize; v++) - if (v >= gs && v <= ge) - len[nPart-1][v] = LESSER_ICOST; else - len[nPart-1][v] = GREATER_ICOST; - - nPart--; - gs = ge+1; - remF -= aFreq; - } - } - - /*--- - Iterate up to N_ITERS times to improve the tables. - ---*/ - for (iter = 0; iter < N_ITERS; iter++) { - - for (t = 0; t < nGroups; t++) fave[t] = 0; - - for (t = 0; t < nGroups; t++) - for (v = 0; v < alphaSize; v++) - rfreq[t][v] = 0; - - nSelectors = 0; - totc = 0; - gs = 0; - while (True) { - - /*--- Set group start & end marks. --*/ - if (gs >= nMTF) break; - ge = gs + G_SIZE - 1; - if (ge >= nMTF) ge = nMTF-1; - - /*-- - Calculate the cost of this group as coded - by each of the coding tables. - --*/ - for (t = 0; t < nGroups; t++) cost[t] = 0; - - if (nGroups == 6) { - register UInt16 cost0, cost1, cost2, cost3, cost4, cost5; - cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0; - for (i = gs; i <= ge; i++) { - UInt16 icv = szptr[i]; - cost0 += len[0][icv]; - cost1 += len[1][icv]; - cost2 += len[2][icv]; - cost3 += len[3][icv]; - cost4 += len[4][icv]; - cost5 += len[5][icv]; - } - cost[0] = cost0; cost[1] = cost1; cost[2] = cost2; - cost[3] = cost3; cost[4] = cost4; cost[5] = cost5; - } else { - for (i = gs; i <= ge; i++) { - UInt16 icv = szptr[i]; - for (t = 0; t < nGroups; t++) cost[t] += len[t][icv]; - } - } - - /*-- - Find the coding table which is best for this group, - and record its identity in the selector table. - --*/ - bc = 999999999; bt = -1; - for (t = 0; t < nGroups; t++) - if (cost[t] < bc) { bc = cost[t]; bt = t; }; - totc += bc; - fave[bt]++; - selector[nSelectors] = bt; - nSelectors++; - - /*-- - Increment the symbol frequencies for the selected table. - --*/ - for (i = gs; i <= ge; i++) - rfreq[bt][ szptr[i] ]++; - - gs = ge+1; - } - if (verbosity >= 3) { - fprintf ( stderr, - " pass %d: size is %d, grp uses are ", - iter+1, totc/8 ); - for (t = 0; t < nGroups; t++) - fprintf ( stderr, "%d ", fave[t] ); - fprintf ( stderr, "\n" ); - } - - /*-- - Recompute the tables based on the accumulated frequencies. - --*/ - for (t = 0; t < nGroups; t++) - hbMakeCodeLengths ( &len[t][0], &rfreq[t][0], alphaSize, 20 ); - } - - - if (!(nGroups < 8)) panic ( "sendMTFValues(1)" ); - if (!(nSelectors < 32768 && - nSelectors <= (2 + (900000 / G_SIZE)))) - panic ( "sendMTFValues(2)" ); - - - /*--- Compute MTF values for the selectors. ---*/ - { - UChar pos[N_GROUPS], ll_i, tmp2, tmp; - for (i = 0; i < nGroups; i++) pos[i] = i; - for (i = 0; i < nSelectors; i++) { - ll_i = selector[i]; - j = 0; - tmp = pos[j]; - while ( ll_i != tmp ) { - j++; - tmp2 = tmp; - tmp = pos[j]; - pos[j] = tmp2; - }; - pos[0] = tmp; - selectorMtf[i] = j; - } - }; - - /*--- Assign actual codes for the tables. --*/ - for (t = 0; t < nGroups; t++) { - minLen = 32; - maxLen = 0; - for (i = 0; i < alphaSize; i++) { - if (len[t][i] > maxLen) maxLen = len[t][i]; - if (len[t][i] < minLen) minLen = len[t][i]; - } - if (maxLen > 20) panic ( "sendMTFValues(3)" ); - if (minLen < 1) panic ( "sendMTFValues(4)" ); - hbAssignCodes ( &code[t][0], &len[t][0], - minLen, maxLen, alphaSize ); - } - - /*--- Transmit the mapping table. ---*/ - { - Bool inUse16[16]; - for (i = 0; i < 16; i++) { - inUse16[i] = False; - for (j = 0; j < 16; j++) - if (inUse[i * 16 + j]) inUse16[i] = True; - } - - nBytes = bytesOut; - for (i = 0; i < 16; i++) - if (inUse16[i]) bsW(1,1); else bsW(1,0); - - for (i = 0; i < 16; i++) - if (inUse16[i]) - for (j = 0; j < 16; j++) - if (inUse[i * 16 + j]) bsW(1,1); else bsW(1,0); - - if (verbosity >= 3) - fprintf ( stderr, " bytes: mapping %d, ", bytesOut-nBytes ); - } - - /*--- Now the selectors. ---*/ - nBytes = bytesOut; - bsW ( 3, nGroups ); - bsW ( 15, nSelectors ); - for (i = 0; i < nSelectors; i++) { - for (j = 0; j < selectorMtf[i]; j++) bsW(1,1); - bsW(1,0); - } - if (verbosity >= 3) - fprintf ( stderr, "selectors %d, ", bytesOut-nBytes ); - - /*--- Now the coding tables. ---*/ - nBytes = bytesOut; - - for (t = 0; t < nGroups; t++) { - Int32 curr = len[t][0]; - bsW ( 5, curr ); - for (i = 0; i < alphaSize; i++) { - while (curr < len[t][i]) { bsW(2,2); curr++; /* 10 */ }; - while (curr > len[t][i]) { bsW(2,3); curr--; /* 11 */ }; - bsW ( 1, 0 ); - } - } - - if (verbosity >= 3) - fprintf ( stderr, "code lengths %d, ", bytesOut-nBytes ); - - /*--- And finally, the block data proper ---*/ - nBytes = bytesOut; - selCtr = 0; - gs = 0; - while (True) { - if (gs >= nMTF) break; - ge = gs + G_SIZE - 1; - if (ge >= nMTF) ge = nMTF-1; - for (i = gs; i <= ge; i++) { - #if DEBUG - assert (selector[selCtr] < nGroups); - #endif - bsW ( len [selector[selCtr]] [szptr[i]], - code [selector[selCtr]] [szptr[i]] ); - } - - gs = ge+1; - selCtr++; - } - if (!(selCtr == nSelectors)) panic ( "sendMTFValues(5)" ); - - if (verbosity >= 3) - fprintf ( stderr, "codes %d\n", bytesOut-nBytes ); -} - - -/*---------------------------------------------*/ -void moveToFrontCodeAndSend ( void ) -{ - bsPutIntVS ( 24, origPtr ); - generateMTFValues(); - sendMTFValues(); -} - - -/*---------------------------------------------*/ -void recvDecodingTables ( void ) -{ - Int32 i, j, t, nGroups, nSelectors, alphaSize; - Int32 minLen, maxLen; - Bool inUse16[16]; - - /*--- Receive the mapping table ---*/ - for (i = 0; i < 16; i++) - if (bsR(1) == 1) - inUse16[i] = True; else - inUse16[i] = False; - - for (i = 0; i < 256; i++) inUse[i] = False; - - for (i = 0; i < 16; i++) - if (inUse16[i]) - for (j = 0; j < 16; j++) - if (bsR(1) == 1) inUse[i * 16 + j] = True; - - makeMaps(); - alphaSize = nInUse+2; - - /*--- Now the selectors ---*/ - nGroups = bsR ( 3 ); - nSelectors = bsR ( 15 ); - for (i = 0; i < nSelectors; i++) { - j = 0; - while (bsR(1) == 1) j++; - selectorMtf[i] = j; - } - - /*--- Undo the MTF values for the selectors. ---*/ - { - UChar pos[N_GROUPS], tmp, v; - for (v = 0; v < nGroups; v++) pos[v] = v; - - for (i = 0; i < nSelectors; i++) { - v = selectorMtf[i]; - tmp = pos[v]; - while (v > 0) { pos[v] = pos[v-1]; v--; } - pos[0] = tmp; - selector[i] = tmp; - } - } - - /*--- Now the coding tables ---*/ - for (t = 0; t < nGroups; t++) { - Int32 curr = bsR ( 5 ); - for (i = 0; i < alphaSize; i++) { - while (bsR(1) == 1) { - if (bsR(1) == 0) curr++; else curr--; - } - len[t][i] = curr; - } - } - - /*--- Create the Huffman decoding tables ---*/ - for (t = 0; t < nGroups; t++) { - minLen = 32; - maxLen = 0; - for (i = 0; i < alphaSize; i++) { - if (len[t][i] > maxLen) maxLen = len[t][i]; - if (len[t][i] < minLen) minLen = len[t][i]; - } - hbCreateDecodeTables ( - &limit[t][0], &base[t][0], &perm[t][0], &len[t][0], - minLen, maxLen, alphaSize - ); - minLens[t] = minLen; - } -} - - -/*---------------------------------------------*/ -#define GET_MTF_VAL(lval) \ -{ \ - Int32 zt, zn, zvec, zj; \ - if (groupPos == 0) { \ - groupNo++; \ - groupPos = G_SIZE; \ - } \ - groupPos--; \ - zt = selector[groupNo]; \ - zn = minLens[zt]; \ - zvec = bsR ( zn ); \ - while (zvec > limit[zt][zn]) { \ - zn++; bsR1(zj); \ - zvec = (zvec << 1) | zj; \ - }; \ - lval = perm[zt][zvec - base[zt][zn]]; \ -} - - -/*---------------------------------------------*/ -void getAndMoveToFrontDecode ( void ) -{ - UChar yy[256]; - Int32 i, j, nextSym, limitLast; - Int32 EOB, groupNo, groupPos; - - limitLast = 100000 * blockSize100k; - origPtr = bsGetIntVS ( 24 ); - - recvDecodingTables(); - EOB = nInUse+1; - groupNo = -1; - groupPos = 0; - - /*-- - Setting up the unzftab entries here is not strictly - necessary, but it does save having to do it later - in a separate pass, and so saves a block's worth of - cache misses. - --*/ - for (i = 0; i <= 255; i++) unzftab[i] = 0; - - for (i = 0; i <= 255; i++) yy[i] = (UChar) i; - - last = -1; - - GET_MTF_VAL(nextSym); - - while (True) { - - if (nextSym == EOB) break; - - if (nextSym == RUNA || nextSym == RUNB) { - UChar ch; - Int32 s = -1; - Int32 N = 1; - do { - if (nextSym == RUNA) s = s + (0+1) * N; else - if (nextSym == RUNB) s = s + (1+1) * N; - N = N * 2; - GET_MTF_VAL(nextSym); - } - while (nextSym == RUNA || nextSym == RUNB); - - s++; - ch = seqToUnseq[yy[0]]; - unzftab[ch] += s; - - if (smallMode) - while (s > 0) { - last++; - ll16[last] = ch; - s--; - } - else - while (s > 0) { - last++; - ll8[last] = ch; - s--; - }; - - if (last >= limitLast) blockOverrun(); - continue; - - } else { - - UChar tmp; - last++; if (last >= limitLast) blockOverrun(); - - tmp = yy[nextSym-1]; - unzftab[seqToUnseq[tmp]]++; - if (smallMode) - ll16[last] = seqToUnseq[tmp]; else - ll8[last] = seqToUnseq[tmp]; - - /*-- - This loop is hammered during decompression, - hence the unrolling. - - for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1]; - --*/ - - j = nextSym-1; - for (; j > 3; j -= 4) { - yy[j] = yy[j-1]; - yy[j-1] = yy[j-2]; - yy[j-2] = yy[j-3]; - yy[j-3] = yy[j-4]; - } - for (; j > 0; j--) yy[j] = yy[j-1]; - - yy[0] = tmp; - GET_MTF_VAL(nextSym); - continue; - } - } -} - - -/*---------------------------------------------------*/ -/*--- Block-sorting machinery ---*/ -/*---------------------------------------------------*/ - -/*---------------------------------------------*/ -/*-- - Compare two strings in block. We assume (see - discussion above) that i1 and i2 have a max - offset of 10 on entry, and that the first - bytes of both block and quadrant have been - copied into the "overshoot area", ie - into the subscript range - [last+1 .. last+NUM_OVERSHOOT_BYTES]. ---*/ -INLINE Bool fullGtU ( Int32 i1, Int32 i2 ) -{ - Int32 k; - UChar c1, c2; - UInt16 s1, s2; - - #if DEBUG - /*-- - shellsort shouldn't ask to compare - something with itself. - --*/ - assert (i1 != i2); - #endif - - c1 = block[i1]; - c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - - c1 = block[i1]; - c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - - c1 = block[i1]; - c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - - c1 = block[i1]; - c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - - c1 = block[i1]; - c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - - c1 = block[i1]; - c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - - k = last + 1; - - do { - - c1 = block[i1]; - c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - - c1 = block[i1]; - c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - - c1 = block[i1]; - c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - - c1 = block[i1]; - c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - - if (i1 > last) { i1 -= last; i1--; }; - if (i2 > last) { i2 -= last; i2--; }; - - k -= 4; - workDone++; - } - while (k >= 0); - - return False; -} - -/*---------------------------------------------*/ -/*-- - Knuth's increments seem to work better - than Incerpi-Sedgewick here. Possibly - because the number of elems to sort is - usually small, typically <= 20. ---*/ -Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, - 9841, 29524, 88573, 265720, - 797161, 2391484 }; - -void simpleSort ( Int32 lo, Int32 hi, Int32 d ) -{ - Int32 i, j, h, bigN, hp; - Int32 v; - - bigN = hi - lo + 1; - if (bigN < 2) return; - - hp = 0; - while (incs[hp] < bigN) hp++; - hp--; - - for (; hp >= 0; hp--) { - h = incs[hp]; - if (verbosity >= 5) - fprintf ( stderr, " shell increment %d\n", h ); - - i = lo + h; - while (True) { - - /*-- copy 1 --*/ - if (i > hi) break; - v = zptr[i]; - j = i; - while ( fullGtU ( zptr[j-h]+d, v+d ) ) { - zptr[j] = zptr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - zptr[j] = v; - i++; - - /*-- copy 2 --*/ - if (i > hi) break; - v = zptr[i]; - j = i; - while ( fullGtU ( zptr[j-h]+d, v+d ) ) { - zptr[j] = zptr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - zptr[j] = v; - i++; - - /*-- copy 3 --*/ - if (i > hi) break; - v = zptr[i]; - j = i; - while ( fullGtU ( zptr[j-h]+d, v+d ) ) { - zptr[j] = zptr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - zptr[j] = v; - i++; - - if (workDone > workLimit && firstAttempt) return; - } - } -} - - -/*---------------------------------------------*/ -/*-- - The following is an implementation of - an elegant 3-way quicksort for strings, - described in a paper "Fast Algorithms for - Sorting and Searching Strings", by Robert - Sedgewick and Jon L. Bentley. ---*/ - -#define swap(lv1, lv2) \ - { Int32 tmp = lv1; lv1 = lv2; lv2 = tmp; } - -INLINE void vswap ( Int32 p1, Int32 p2, Int32 n ) -{ - while (n > 0) { - swap(zptr[p1], zptr[p2]); - p1++; p2++; n--; - } -} - -INLINE UChar med3 ( UChar a, UChar b, UChar c ) -{ - UChar t; - if (a > b) { t = a; a = b; b = t; }; - if (b > c) { t = b; b = c; c = t; }; - if (a > b) b = a; - return b; -} - - -#define min(a,b) ((a) < (b)) ? (a) : (b) - -typedef - struct { Int32 ll; Int32 hh; Int32 dd; } - StackElem; - -#define push(lz,hz,dz) { stack[sp].ll = lz; \ - stack[sp].hh = hz; \ - stack[sp].dd = dz; \ - sp++; } - -#define pop(lz,hz,dz) { sp--; \ - lz = stack[sp].ll; \ - hz = stack[sp].hh; \ - dz = stack[sp].dd; } - -#define SMALL_THRESH 20 -#define DEPTH_THRESH 10 - -/*-- - If you are ever unlucky/improbable enough - to get a stack overflow whilst sorting, - increase the following constant and try - again. In practice I have never seen the - stack go above 27 elems, so the following - limit seems very generous. ---*/ -#define QSORT_STACK_SIZE 1000 - - -void qSort3 ( Int32 loSt, Int32 hiSt, Int32 dSt ) -{ - Int32 unLo, unHi, ltLo, gtHi, med, n, m; - Int32 sp, lo, hi, d; - StackElem stack[QSORT_STACK_SIZE]; - - sp = 0; - push ( loSt, hiSt, dSt ); - - while (sp > 0) { - - if (sp >= QSORT_STACK_SIZE) panic ( "stack overflow in qSort3" ); - - pop ( lo, hi, d ); - - if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) { - simpleSort ( lo, hi, d ); - if (workDone > workLimit && firstAttempt) return; - continue; - } - - med = med3 ( block[zptr[ lo ]+d], - block[zptr[ hi ]+d], - block[zptr[ (lo+hi)>>1 ]+d] ); - - unLo = ltLo = lo; - unHi = gtHi = hi; - - while (True) { - while (True) { - if (unLo > unHi) break; - n = ((Int32)block[zptr[unLo]+d]) - med; - if (n == 0) { swap(zptr[unLo], zptr[ltLo]); ltLo++; unLo++; continue; }; - if (n > 0) break; - unLo++; - } - while (True) { - if (unLo > unHi) break; - n = ((Int32)block[zptr[unHi]+d]) - med; - if (n == 0) { swap(zptr[unHi], zptr[gtHi]); gtHi--; unHi--; continue; }; - if (n < 0) break; - unHi--; - } - if (unLo > unHi) break; - swap(zptr[unLo], zptr[unHi]); unLo++; unHi--; - } - #if DEBUG - assert (unHi == unLo-1); - #endif - - if (gtHi < ltLo) { - push(lo, hi, d+1 ); - continue; - } - - n = min(ltLo-lo, unLo-ltLo); vswap(lo, unLo-n, n); - m = min(hi-gtHi, gtHi-unHi); vswap(unLo, hi-m+1, m); - - n = lo + unLo - ltLo - 1; - m = hi - (gtHi - unHi) + 1; - - push ( lo, n, d ); - push ( n+1, m-1, d+1 ); - push ( m, hi, d ); - } -} - - -/*---------------------------------------------*/ - -#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) - -#define SETMASK (1 << 21) -#define CLEARMASK (~(SETMASK)) - -void sortIt ( void ) -{ - Int32 i, j, ss, sb; - Int32 runningOrder[256]; - Int32 copy[256]; - Bool bigDone[256]; - UChar c1, c2; - Int32 numQSorted; - - /*-- - In the various block-sized structures, live data runs - from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First, - set up the overshoot area for block. - --*/ - - if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" ); - for (i = 0; i < NUM_OVERSHOOT_BYTES; i++) - block[last+i+1] = block[i % (last+1)]; - for (i = 0; i <= last+NUM_OVERSHOOT_BYTES; i++) - quadrant[i] = 0; - - block[-1] = block[last]; - - if (last < 4000) { - - /*-- - Use simpleSort(), since the full sorting mechanism - has quite a large constant overhead. - --*/ - if (verbosity >= 4) fprintf ( stderr, " simpleSort ...\n" ); - for (i = 0; i <= last; i++) zptr[i] = i; - firstAttempt = False; - workDone = workLimit = 0; - simpleSort ( 0, last, 0 ); - if (verbosity >= 4) fprintf ( stderr, " simpleSort done.\n" ); - - } else { - - numQSorted = 0; - for (i = 0; i <= 255; i++) bigDone[i] = False; - - if (verbosity >= 4) fprintf ( stderr, " bucket sorting ...\n" ); - - for (i = 0; i <= 65536; i++) ftab[i] = 0; - - c1 = block[-1]; - for (i = 0; i <= last; i++) { - c2 = block[i]; - ftab[(c1 << 8) + c2]++; - c1 = c2; - } - - for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; - - c1 = block[0]; - for (i = 0; i < last; i++) { - c2 = block[i+1]; - j = (c1 << 8) + c2; - c1 = c2; - ftab[j]--; - zptr[ftab[j]] = i; - } - j = (block[last] << 8) + block[0]; - ftab[j]--; - zptr[ftab[j]] = last; - - /*-- - Now ftab contains the first loc of every small bucket. - Calculate the running order, from smallest to largest - big bucket. - --*/ - - for (i = 0; i <= 255; i++) runningOrder[i] = i; - - { - Int32 vv; - Int32 h = 1; - do h = 3 * h + 1; while (h <= 256); - do { - h = h / 3; - for (i = h; i <= 255; i++) { - vv = runningOrder[i]; - j = i; - while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { - runningOrder[j] = runningOrder[j-h]; - j = j - h; - if (j <= (h - 1)) goto zero; - } - zero: - runningOrder[j] = vv; - } - } while (h != 1); - } - - /*-- - The main sorting loop. - --*/ - - for (i = 0; i <= 255; i++) { - - /*-- - Process big buckets, starting with the least full. - --*/ - ss = runningOrder[i]; - - /*-- - Complete the big bucket [ss] by quicksorting - any unsorted small buckets [ss, j]. Hopefully - previous pointer-scanning phases have already - completed many of the small buckets [ss, j], so - we don't have to sort them at all. - --*/ - for (j = 0; j <= 255; j++) { - sb = (ss << 8) + j; - if ( ! (ftab[sb] & SETMASK) ) { - Int32 lo = ftab[sb] & CLEARMASK; - Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; - if (hi > lo) { - if (verbosity >= 4) - fprintf ( stderr, - " qsort [0x%x, 0x%x] done %d this %d\n", - ss, j, numQSorted, hi - lo + 1 ); - qSort3 ( lo, hi, 2 ); - numQSorted += ( hi - lo + 1 ); - if (workDone > workLimit && firstAttempt) return; - } - ftab[sb] |= SETMASK; - } - } - - /*-- - The ss big bucket is now done. Record this fact, - and update the quadrant descriptors. Remember to - update quadrants in the overshoot area too, if - necessary. The "if (i < 255)" test merely skips - this updating for the last bucket processed, since - updating for the last bucket is pointless. - --*/ - bigDone[ss] = True; - - if (i < 255) { - Int32 bbStart = ftab[ss << 8] & CLEARMASK; - Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; - Int32 shifts = 0; - - while ((bbSize >> shifts) > 65534) shifts++; - - for (j = 0; j < bbSize; j++) { - Int32 a2update = zptr[bbStart + j]; - UInt16 qVal = (UInt16)(j >> shifts); - quadrant[a2update] = qVal; - if (a2update < NUM_OVERSHOOT_BYTES) - quadrant[a2update + last + 1] = qVal; - } - - if (! ( ((bbSize-1) >> shifts) <= 65535 )) panic ( "sortIt" ); - } - - /*-- - Now scan this big bucket so as to synthesise the - sorted order for small buckets [t, ss] for all t != ss. - --*/ - for (j = 0; j <= 255; j++) - copy[j] = ftab[(j << 8) + ss] & CLEARMASK; - - for (j = ftab[ss << 8] & CLEARMASK; - j < (ftab[(ss+1) << 8] & CLEARMASK); - j++) { - c1 = block[zptr[j]-1]; - if ( ! bigDone[c1] ) { - zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1; - copy[c1] ++; - } - } - - for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; - } - if (verbosity >= 4) - fprintf ( stderr, " %d pointers, %d sorted, %d scanned\n", - last+1, numQSorted, (last+1) - numQSorted ); - } -} - - -/*---------------------------------------------------*/ -/*--- Stuff for randomising repetitive blocks ---*/ -/*---------------------------------------------------*/ - -/*---------------------------------------------*/ -Int32 rNums[512] = { - 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, - 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, - 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, - 419, 436, 278, 496, 867, 210, 399, 680, 480, 51, - 878, 465, 811, 169, 869, 675, 611, 697, 867, 561, - 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, - 150, 238, 59, 379, 684, 877, 625, 169, 643, 105, - 170, 607, 520, 932, 727, 476, 693, 425, 174, 647, - 73, 122, 335, 530, 442, 853, 695, 249, 445, 515, - 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, - 641, 801, 220, 162, 819, 984, 589, 513, 495, 799, - 161, 604, 958, 533, 221, 400, 386, 867, 600, 782, - 382, 596, 414, 171, 516, 375, 682, 485, 911, 276, - 98, 553, 163, 354, 666, 933, 424, 341, 533, 870, - 227, 730, 475, 186, 263, 647, 537, 686, 600, 224, - 469, 68, 770, 919, 190, 373, 294, 822, 808, 206, - 184, 943, 795, 384, 383, 461, 404, 758, 839, 887, - 715, 67, 618, 276, 204, 918, 873, 777, 604, 560, - 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, - 652, 934, 970, 447, 318, 353, 859, 672, 112, 785, - 645, 863, 803, 350, 139, 93, 354, 99, 820, 908, - 609, 772, 154, 274, 580, 184, 79, 626, 630, 742, - 653, 282, 762, 623, 680, 81, 927, 626, 789, 125, - 411, 521, 938, 300, 821, 78, 343, 175, 128, 250, - 170, 774, 972, 275, 999, 639, 495, 78, 352, 126, - 857, 956, 358, 619, 580, 124, 737, 594, 701, 612, - 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, - 944, 375, 748, 52, 600, 747, 642, 182, 862, 81, - 344, 805, 988, 739, 511, 655, 814, 334, 249, 515, - 897, 955, 664, 981, 649, 113, 974, 459, 893, 228, - 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, - 686, 754, 806, 760, 493, 403, 415, 394, 687, 700, - 946, 670, 656, 610, 738, 392, 760, 799, 887, 653, - 978, 321, 576, 617, 626, 502, 894, 679, 243, 440, - 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, - 707, 151, 457, 449, 797, 195, 791, 558, 945, 679, - 297, 59, 87, 824, 713, 663, 412, 693, 342, 606, - 134, 108, 571, 364, 631, 212, 174, 643, 304, 329, - 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, - 140, 206, 73, 263, 980, 736, 876, 478, 430, 305, - 170, 514, 364, 692, 829, 82, 855, 953, 676, 246, - 369, 970, 294, 750, 807, 827, 150, 790, 288, 923, - 804, 378, 215, 828, 592, 281, 565, 555, 710, 82, - 896, 831, 547, 261, 524, 462, 293, 465, 502, 56, - 661, 821, 976, 991, 658, 869, 905, 758, 745, 193, - 768, 550, 608, 933, 378, 286, 215, 979, 792, 961, - 61, 688, 793, 644, 986, 403, 106, 366, 905, 644, - 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, - 780, 773, 635, 389, 707, 100, 626, 958, 165, 504, - 920, 176, 193, 713, 857, 265, 203, 50, 668, 108, - 645, 990, 626, 197, 510, 357, 358, 850, 858, 364, - 936, 638 -}; - - -#define RAND_DECLS \ - Int32 rNToGo = 0; \ - Int32 rTPos = 0; \ - -#define RAND_MASK ((rNToGo == 1) ? 1 : 0) - -#define RAND_UPD_MASK \ - if (rNToGo == 0) { \ - rNToGo = rNums[rTPos]; \ - rTPos++; if (rTPos == 512) rTPos = 0; \ - } \ - rNToGo--; - - - -/*---------------------------------------------------*/ -/*--- The Reversible Transformation (tm) ---*/ -/*---------------------------------------------------*/ - -/*---------------------------------------------*/ -void randomiseBlock ( void ) -{ - Int32 i; - RAND_DECLS; - for (i = 0; i < 256; i++) inUse[i] = False; - - for (i = 0; i <= last; i++) { - RAND_UPD_MASK; - block[i] ^= RAND_MASK; - inUse[block[i]] = True; - } -} - - -/*---------------------------------------------*/ -void doReversibleTransformation ( void ) -{ - Int32 i; - - if (verbosity >= 2) fprintf ( stderr, "\n" ); - - workLimit = workFactor * last; - workDone = 0; - blockRandomised = False; - firstAttempt = True; - - sortIt (); - - if (verbosity >= 3) - fprintf ( stderr, " %d work, %d block, ratio %5.2f\n", - workDone, last, (float)workDone / (float)(last) ); - - if (workDone > workLimit && firstAttempt) { - if (verbosity >= 2) - fprintf ( stderr, " sorting aborted; randomising block\n" ); - randomiseBlock (); - workLimit = workDone = 0; - blockRandomised = True; - firstAttempt = False; - sortIt(); - if (verbosity >= 3) - fprintf ( stderr, " %d work, %d block, ratio %f\n", - workDone, last, (float)workDone / (float)(last) ); - } - - origPtr = -1; - for (i = 0; i <= last; i++) - if (zptr[i] == 0) - { origPtr = i; break; }; - - if (origPtr == -1) panic ( "doReversibleTransformation" ); -} - - -/*---------------------------------------------*/ - -INLINE Int32 indexIntoF ( Int32 indx, Int32 *cftab ) -{ - Int32 nb, na, mid; - nb = 0; - na = 256; - do { - mid = (nb + na) >> 1; - if (indx >= cftab[mid]) nb = mid; else na = mid; - } - while (na - nb != 1); - return nb; -} - - -#define GET_SMALL(cccc) \ - \ - cccc = indexIntoF ( tPos, cftab ); \ - tPos = GET_LL(tPos); - - -void undoReversibleTransformation_small ( FILE* dst ) -{ - Int32 cftab[257], cftabAlso[257]; - Int32 i, j, tmp, tPos; - UChar ch; - - /*-- - We assume here that the global array unzftab will - already be holding the frequency counts for - ll8[0 .. last]. - --*/ - - /*-- Set up cftab to facilitate generation of indexIntoF --*/ - cftab[0] = 0; - for (i = 1; i <= 256; i++) cftab[i] = unzftab[i-1]; - for (i = 1; i <= 256; i++) cftab[i] += cftab[i-1]; - - /*-- Make a copy of it, used in generation of T --*/ - for (i = 0; i <= 256; i++) cftabAlso[i] = cftab[i]; - - /*-- compute the T vector --*/ - for (i = 0; i <= last; i++) { - ch = (UChar)ll16[i]; - SET_LL(i, cftabAlso[ch]); - cftabAlso[ch]++; - } - - /*-- - Compute T^(-1) by pointer reversal on T. This is rather - subtle, in that, if the original block was two or more - (in general, N) concatenated copies of the same thing, - the T vector will consist of N cycles, each of length - blocksize / N, and decoding will involve traversing one - of these cycles N times. Which particular cycle doesn't - matter -- they are all equivalent. The tricky part is to - make sure that the pointer reversal creates a correct - reversed cycle for us to traverse. So, the code below - simply reverses whatever cycle origPtr happens to fall into, - without regard to the cycle length. That gives one reversed - cycle, which for normal blocks, is the entire block-size long. - For repeated blocks, it will be interspersed with the other - N-1 non-reversed cycles. Providing that the F-subscripting - phase which follows starts at origPtr, all then works ok. - --*/ - i = origPtr; - j = GET_LL(i); - do { - tmp = GET_LL(j); - SET_LL(j, i); - i = j; - j = tmp; - } - while (i != origPtr); - - /*-- - We recreate the original by subscripting F through T^(-1). - The run-length-decoder below requires characters incrementally, - so tPos is set to a starting value, and is updated by - the GET_SMALL macro. - --*/ - tPos = origPtr; - - /*-------------------------------------------------*/ - /*-- - This is pretty much a verbatim copy of the - run-length decoder present in the distribution - bzip-0.21; it has to be here to avoid creating - block[] as an intermediary structure. As in 0.21, - this code derives from some sent to me by - Christian von Roques. - - It allows dst==NULL, so as to support the test (-t) - option without slowing down the fast decompression - code. - --*/ - { - IntNative retVal; - Int32 i2, count, chPrev, ch2; - UInt32 localCrc; - - count = 0; - i2 = 0; - ch2 = 256; /*-- not a char and not EOF --*/ - localCrc = getGlobalCRC(); - - { - RAND_DECLS; - while ( i2 <= last ) { - chPrev = ch2; - GET_SMALL(ch2); - if (blockRandomised) { - RAND_UPD_MASK; - ch2 ^= (UInt32)RAND_MASK; - } - i2++; - - if (dst) - retVal = putc ( ch2, dst ); - - UPDATE_CRC ( localCrc, (UChar)ch2 ); - - if (ch2 != chPrev) { - count = 1; - } else { - count++; - if (count >= 4) { - Int32 j2; - UChar z; - GET_SMALL(z); - if (blockRandomised) { - RAND_UPD_MASK; - z ^= RAND_MASK; - } - for (j2 = 0; j2 < (Int32)z; j2++) { - if (dst) retVal = putc (ch2, dst); - UPDATE_CRC ( localCrc, (UChar)ch2 ); - } - i2++; - count = 0; - } - } - } - } +# define SET_BINARY_MODE(fd) /**/ - setGlobalCRC ( localCrc ); - } - /*-- end of the in-line run-length-decoder. --*/ -} -#undef GET_SMALL +# ifdef __GNUC__ +# define NORETURN __attribute__ ((noreturn)) +# else +# define NORETURN /**/ +# endif +#endif -/*---------------------------------------------*/ -#define GET_FAST(cccc) \ - \ - cccc = ll8[tPos]; \ - tPos = tt[tPos]; +#if BZ_LCCWIN32 +# include +# include +# include +# define NORETURN /**/ +# define PATH_SEP '\\' +# define MY_LSTAT _stat +# define MY_STAT _stat +# define MY_S_IFREG(x) ((x) & _S_IFREG) -void undoReversibleTransformation_fast ( FILE* dst ) -{ - Int32 cftab[257]; - Int32 i, tPos; - UChar ch; - - /*-- - We assume here that the global array unzftab will - already be holding the frequency counts for - ll8[0 .. last]. - --*/ - - /*-- Set up cftab to facilitate generation of T^(-1) --*/ - cftab[0] = 0; - for (i = 1; i <= 256; i++) cftab[i] = unzftab[i-1]; - for (i = 1; i <= 256; i++) cftab[i] += cftab[i-1]; - - /*-- compute the T^(-1) vector --*/ - for (i = 0; i <= last; i++) { - ch = (UChar)ll8[i]; - tt[cftab[ch]] = i; - cftab[ch]++; - } +# if 0 + /*-- lcc-win32 seems to expand wildcards itself --*/ +# define APPEND_FILESPEC(root, spec) \ + do { \ + if ((spec)[0] == '-') { \ + root = snocString((root), (spec)); \ + } else { \ + struct _finddata_t c_file; \ + long hFile; \ + hFile = _findfirst((spec), &c_file); \ + if ( hFile == -1L ) { \ + root = snocString ((root), (spec)); \ + } else { \ + int anInt = 0; \ + while ( anInt == 0 ) { \ + root = snocString((root), \ + &c_file.name[0]); \ + anInt = _findnext(hFile, &c_file); \ + } \ + } \ + } \ + } while ( 0 ) +# else +# define APPEND_FILESPEC(root, name) \ + root = snocString ((root), (name)) +# endif - /*-- - We recreate the original by subscripting L through T^(-1). - The run-length-decoder below requires characters incrementally, - so tPos is set to a starting value, and is updated by - the GET_FAST macro. - --*/ - tPos = tt[origPtr]; - - /*-------------------------------------------------*/ - /*-- - This is pretty much a verbatim copy of the - run-length decoder present in the distribution - bzip-0.21; it has to be here to avoid creating - block[] as an intermediary structure. As in 0.21, - this code derives from some sent to me by - Christian von Roques. - --*/ - { - IntNative retVal; - Int32 i2, count, chPrev, ch2; - UInt32 localCrc; - - count = 0; - i2 = 0; - ch2 = 256; /*-- not a char and not EOF --*/ - localCrc = getGlobalCRC(); - - if (blockRandomised) { - RAND_DECLS; - while ( i2 <= last ) { - chPrev = ch2; - GET_FAST(ch2); - RAND_UPD_MASK; - ch2 ^= (UInt32)RAND_MASK; - i2++; - - retVal = putc ( ch2, dst ); - UPDATE_CRC ( localCrc, (UChar)ch2 ); - - if (ch2 != chPrev) { - count = 1; - } else { - count++; - if (count >= 4) { - Int32 j2; - UChar z; - GET_FAST(z); - RAND_UPD_MASK; - z ^= RAND_MASK; - for (j2 = 0; j2 < (Int32)z; j2++) { - retVal = putc (ch2, dst); - UPDATE_CRC ( localCrc, (UChar)ch2 ); - } - i2++; - count = 0; - } - } - } +# define SET_BINARY_MODE(fd) \ + do { \ + int retVal = setmode ( fileno ( fd ), \ + O_BINARY ); \ + ERROR_IF_MINUS_ONE ( retVal ); \ + } while ( 0 ) - } else { - - while ( i2 <= last ) { - chPrev = ch2; - GET_FAST(ch2); - i2++; - - retVal = putc ( ch2, dst ); - UPDATE_CRC ( localCrc, (UChar)ch2 ); - - if (ch2 != chPrev) { - count = 1; - } else { - count++; - if (count >= 4) { - Int32 j2; - UChar z; - GET_FAST(z); - for (j2 = 0; j2 < (Int32)z; j2++) { - retVal = putc (ch2, dst); - UPDATE_CRC ( localCrc, (UChar)ch2 ); - } - i2++; - count = 0; - } - } - } +#endif - } /*-- if (blockRandomised) --*/ - setGlobalCRC ( localCrc ); - } - /*-- end of the in-line run-length-decoder. --*/ -} -#undef GET_FAST +/*---------------------------------------------*/ +/*-- + Some more stuff for all platforms :-) +--*/ + +typedef char Char; +typedef unsigned char Bool; +typedef unsigned char UChar; +typedef int Int32; +typedef unsigned int UInt32; +typedef short Int16; +typedef unsigned short UInt16; + +#define True ((Bool)1) +#define False ((Bool)0) + +/*-- + IntNative is your platform's `native' int size. + Only here to avoid probs with 64-bit platforms. +--*/ +typedef int IntNative; /*---------------------------------------------------*/ -/*--- The block loader and RLEr ---*/ +/*--- Misc (file handling) data decls ---*/ /*---------------------------------------------------*/ -/*---------------------------------------------*/ -/* Top 16: run length, 1 to 255. -* Lower 16: the char, or MY_EOF for EOF. -*/ +Int32 verbosity; +Bool keepInputFiles, smallMode; +Bool forceOverwrite, testFailsExist; +Int32 numFileNames, numFilesProcessed, blockSize100k; -#define MY_EOF 257 -INLINE Int32 getRLEpair ( FILE* src ) -{ - Int32 runLength; - IntNative ch, chLatest; +/*-- source modes; F==file, I==stdin, O==stdout --*/ +#define SM_I2O 1 +#define SM_F2O 2 +#define SM_F2F 3 - ch = getc ( src ); +/*-- operation modes --*/ +#define OM_Z 1 +#define OM_UNZ 2 +#define OM_TEST 3 - /*--- Because I have no idea what kind of a value EOF is. ---*/ - if (ch == EOF) { - ERROR_IF_NOT_ZERO ( ferror(src)); - return (1 << 16) | MY_EOF; - } +Int32 opMode; +Int32 srcMode; - runLength = 0; - do { - chLatest = getc ( src ); - runLength++; - bytesIn++; - } - while (ch == chLatest && runLength < 255); +#define FILE_NAME_LEN 1034 + +Int32 longestFileName; +Char inName[FILE_NAME_LEN]; +Char outName[FILE_NAME_LEN]; +Char *progName; +Char progNameReally[FILE_NAME_LEN]; +FILE *outputHandleJustInCase; +Int32 workFactor; + +void panic ( Char* ) NORETURN; +void ioError ( void ) NORETURN; +void outOfMemory ( void ) NORETURN; +void blockOverrun ( void ) NORETURN; +void badBlockHeader ( void ) NORETURN; +void badBGLengths ( void ) NORETURN; +void crcError ( void ) NORETURN; +void bitStreamEOF ( void ) NORETURN; +void cleanUpAndFail ( Int32 ) NORETURN; +void compressedStreamEOF ( void ) NORETURN; + +void copyFileName ( Char*, Char* ); +void* myMalloc ( Int32 ); - if ( chLatest != EOF ) { - if ( ungetc ( chLatest, src ) == EOF ) - panic ( "getRLEpair: ungetc failed" ); - } else { - ERROR_IF_NOT_ZERO ( ferror(src) ); - } - /*--- Conditional is just a speedup hack. ---*/ - if (runLength == 1) { - UPDATE_CRC ( globalCrc, (UChar)ch ); - return (1 << 16) | ch; - } else { - Int32 i; - for (i = 1; i <= runLength; i++) - UPDATE_CRC ( globalCrc, (UChar)ch ); - return (runLength << 16) | ch; - } -} +/*---------------------------------------------------*/ +/*--- Processing of complete files and streams ---*/ +/*---------------------------------------------------*/ /*---------------------------------------------*/ -void loadAndRLEsource ( FILE* src ) +Bool myfeof ( FILE* f ) { - Int32 ch, allowableBlockSize, i; - - last = -1; - ch = 0; - - for (i = 0; i < 256; i++) inUse[i] = False; - - /*--- 20 is just a paranoia constant ---*/ - allowableBlockSize = 100000 * blockSize100k - 20; - - while (last < allowableBlockSize && ch != MY_EOF) { - Int32 rlePair, runLen; - rlePair = getRLEpair ( src ); - ch = rlePair & 0xFFFF; - runLen = (UInt32)rlePair >> 16; - - #if DEBUG - assert (runLen >= 1 && runLen <= 255); - #endif - - if (ch != MY_EOF) { - inUse[ch] = True; - switch (runLen) { - case 1: - last++; block[last] = (UChar)ch; break; - case 2: - last++; block[last] = (UChar)ch; - last++; block[last] = (UChar)ch; break; - case 3: - last++; block[last] = (UChar)ch; - last++; block[last] = (UChar)ch; - last++; block[last] = (UChar)ch; break; - default: - inUse[runLen-4] = True; - last++; block[last] = (UChar)ch; - last++; block[last] = (UChar)ch; - last++; block[last] = (UChar)ch; - last++; block[last] = (UChar)ch; - last++; block[last] = (UChar)(runLen-4); break; - } - } - } + Int32 c = fgetc ( f ); + if (c == EOF) return True; + ungetc ( c, f ); + return False; } -/*---------------------------------------------------*/ -/*--- Processing of complete files and streams ---*/ -/*---------------------------------------------------*/ - /*---------------------------------------------*/ void compressStream ( FILE *stream, FILE *zStream ) { - IntNative retVal; - UInt32 blockCRC, combinedCRC; - Int32 blockNo; - - blockNo = 0; - bytesIn = 0; - bytesOut = 0; - nBlocksRandomised = 0; + BZFILE* bzf = NULL; + UChar ibuf[5000]; + Int32 nIbuf; + UInt32 nbytes_in, nbytes_out; + Int32 bzerr, bzerr_dummy, ret; SET_BINARY_MODE(stream); SET_BINARY_MODE(zStream); - ERROR_IF_NOT_ZERO ( ferror(stream) ); - ERROR_IF_NOT_ZERO ( ferror(zStream) ); + if (ferror(stream)) goto errhandler_io; + if (ferror(zStream)) goto errhandler_io; - bsSetStream ( zStream, True ); - - /*--- Write `magic' bytes B and Z, - then h indicating file-format == huffmanised, - followed by a digit indicating blockSize100k. - ---*/ - bsPutUChar ( 'B' ); - bsPutUChar ( 'Z' ); - bsPutUChar ( 'h' ); - bsPutUChar ( '0' + blockSize100k ); - - combinedCRC = 0; + bzf = bzWriteOpen ( &bzerr, zStream, + blockSize100k, verbosity, workFactor ); + if (bzerr != BZ_OK) goto errhandler; if (verbosity >= 2) fprintf ( stderr, "\n" ); while (True) { - blockNo++; - initialiseCRC (); - loadAndRLEsource ( stream ); - ERROR_IF_NOT_ZERO ( ferror(stream) ); - if (last == -1) break; - - blockCRC = getFinalCRC (); - combinedCRC = (combinedCRC << 1) | (combinedCRC >> 31); - combinedCRC ^= blockCRC; - - if (verbosity >= 2) - fprintf ( stderr, " block %d: crc = 0x%8x, combined CRC = 0x%8x, size = %d", - blockNo, blockCRC, combinedCRC, last+1 ); - - /*-- sort the block and establish posn of original string --*/ - doReversibleTransformation (); - - /*-- - A 6-byte block header, the value chosen arbitrarily - as 0x314159265359 :-). A 32 bit value does not really - give a strong enough guarantee that the value will not - appear by chance in the compressed datastream. Worst-case - probability of this event, for a 900k block, is about - 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. - For a compressed file of size 100Gb -- about 100000 blocks -- - only a 48-bit marker will do. NB: normal compression/ - decompression do *not* rely on these statistical properties. - They are only important when trying to recover blocks from - damaged files. - --*/ - bsPutUChar ( 0x31 ); bsPutUChar ( 0x41 ); - bsPutUChar ( 0x59 ); bsPutUChar ( 0x26 ); - bsPutUChar ( 0x53 ); bsPutUChar ( 0x59 ); - - /*-- Now the block's CRC, so it is in a known place. --*/ - bsPutUInt32 ( blockCRC ); - - /*-- Now a single bit indicating randomisation. --*/ - if (blockRandomised) { - bsW(1,1); nBlocksRandomised++; - } else - bsW(1,0); - - /*-- Finally, block's contents proper. --*/ - moveToFrontCodeAndSend (); - - ERROR_IF_NOT_ZERO ( ferror(zStream) ); - } - - if (verbosity >= 2 && nBlocksRandomised > 0) - fprintf ( stderr, " %d block%s needed randomisation\n", - nBlocksRandomised, - nBlocksRandomised == 1 ? "" : "s" ); + if (myfeof(stream)) break; + nIbuf = fread ( ibuf, sizeof(UChar), 5000, stream ); + if (ferror(stream)) goto errhandler_io; + if (nIbuf > 0) bzWrite ( &bzerr, bzf, (void*)ibuf, nIbuf ); + if (bzerr != BZ_OK) goto errhandler; - /*-- - Now another magic 48-bit number, 0x177245385090, to - indicate the end of the last block. (sqrt(pi), if - you want to know. I did want to use e, but it contains - too much repetition -- 27 18 28 18 28 46 -- for me - to feel statistically comfortable. Call me paranoid.) - --*/ - - bsPutUChar ( 0x17 ); bsPutUChar ( 0x72 ); - bsPutUChar ( 0x45 ); bsPutUChar ( 0x38 ); - bsPutUChar ( 0x50 ); bsPutUChar ( 0x90 ); - - bsPutUInt32 ( combinedCRC ); - if (verbosity >= 2) - fprintf ( stderr, " final combined CRC = 0x%x\n ", combinedCRC ); - - /*-- Close the files in an utterly paranoid way. --*/ - bsFinishedWithStream (); + } - ERROR_IF_NOT_ZERO ( ferror(zStream) ); - retVal = fflush ( zStream ); - ERROR_IF_EOF ( retVal ); - retVal = fclose ( zStream ); - ERROR_IF_EOF ( retVal ); + bzWriteClose ( &bzerr, bzf, 0, &nbytes_in, &nbytes_out ); + if (bzerr != BZ_OK) goto errhandler; - ERROR_IF_NOT_ZERO ( ferror(stream) ); - retVal = fclose ( stream ); - ERROR_IF_EOF ( retVal ); + if (ferror(zStream)) goto errhandler_io; + ret = fflush ( zStream ); + if (ret == EOF) goto errhandler_io; + if (zStream != stdout) { + ret = fclose ( zStream ); + if (ret == EOF) goto errhandler_io; + } + if (ferror(stream)) goto errhandler_io; + ret = fclose ( stream ); + if (ret == EOF) goto errhandler_io; - if (bytesIn == 0) bytesIn = 1; - if (bytesOut == 0) bytesOut = 1; + if (nbytes_in == 0) nbytes_in = 1; if (verbosity >= 1) fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, " "%5.2f%% saved, %d in, %d out.\n", - (float)bytesIn / (float)bytesOut, - (8.0 * (float)bytesOut) / (float)bytesIn, - 100.0 * (1.0 - (float)bytesOut / (float)bytesIn), - bytesIn, - bytesOut + (float)nbytes_in / (float)nbytes_out, + (8.0 * (float)nbytes_out) / (float)nbytes_in, + 100.0 * (1.0 - (float)nbytes_out / (float)nbytes_in), + nbytes_in, + nbytes_out ); + + return; + + errhandler: + bzWriteClose ( &bzerr_dummy, bzf, 1, &nbytes_in, &nbytes_out ); + switch (bzerr) { + case BZ_MEM_ERROR: + outOfMemory (); + case BZ_IO_ERROR: + errhandler_io: + ioError(); break; + default: + panic ( "compress:unexpected error" ); + } + + panic ( "compress:end" ); + /*notreached*/ } + /*---------------------------------------------*/ Bool uncompressStream ( FILE *zStream, FILE *stream ) { - UChar magic1, magic2, magic3, magic4; - UChar magic5, magic6; - UInt32 storedBlockCRC, storedCombinedCRC; - UInt32 computedBlockCRC, computedCombinedCRC; - Int32 currBlockNo; - IntNative retVal; + BZFILE* bzf = NULL; + Int32 bzerr, bzerr_dummy, ret, nread, streamNo, i; + UChar obuf[5000]; + UChar unused[BZ_MAX_UNUSED]; + Int32 nUnused; + UChar* unusedTmp; + + nUnused = 0; + streamNo = 0; SET_BINARY_MODE(stream); SET_BINARY_MODE(zStream); - ERROR_IF_NOT_ZERO ( ferror(stream) ); - ERROR_IF_NOT_ZERO ( ferror(zStream) ); - - bsSetStream ( zStream, False ); - - /*-- - A bad magic number is `recoverable from'; - return with False so the caller skips the file. - --*/ - magic1 = bsGetUChar (); - magic2 = bsGetUChar (); - magic3 = bsGetUChar (); - magic4 = bsGetUChar (); - if (magic1 != 'B' || - magic2 != 'Z' || - magic3 != 'h' || - magic4 < '1' || - magic4 > '9') { - bsFinishedWithStream(); - retVal = fclose ( stream ); - ERROR_IF_EOF ( retVal ); - return False; - } - - setDecompressStructureSizes ( magic4 - '0' ); - computedCombinedCRC = 0; - - if (verbosity >= 2) fprintf ( stderr, "\n " ); - currBlockNo = 0; + if (ferror(stream)) goto errhandler_io; + if (ferror(zStream)) goto errhandler_io; while (True) { - magic1 = bsGetUChar (); - magic2 = bsGetUChar (); - magic3 = bsGetUChar (); - magic4 = bsGetUChar (); - magic5 = bsGetUChar (); - magic6 = bsGetUChar (); - if (magic1 == 0x17 && magic2 == 0x72 && - magic3 == 0x45 && magic4 == 0x38 && - magic5 == 0x50 && magic6 == 0x90) break; - - if (magic1 != 0x31 || magic2 != 0x41 || - magic3 != 0x59 || magic4 != 0x26 || - magic5 != 0x53 || magic6 != 0x59) badBlockHeader(); - - storedBlockCRC = bsGetUInt32 (); - - if (bsR(1) == 1) - blockRandomised = True; else - blockRandomised = False; - - currBlockNo++; - if (verbosity >= 2) - fprintf ( stderr, "[%d: huff+mtf ", currBlockNo ); - getAndMoveToFrontDecode (); - ERROR_IF_NOT_ZERO ( ferror(zStream) ); - - initialiseCRC(); - if (verbosity >= 2) fprintf ( stderr, "rt+rld" ); - if (smallMode) - undoReversibleTransformation_small ( stream ); - else - undoReversibleTransformation_fast ( stream ); - - ERROR_IF_NOT_ZERO ( ferror(stream) ); - computedBlockCRC = getFinalCRC(); - if (verbosity >= 3) - fprintf ( stderr, " {0x%x, 0x%x}", storedBlockCRC, computedBlockCRC ); - if (verbosity >= 2) fprintf ( stderr, "] " ); + bzf = bzReadOpen ( + &bzerr, zStream, verbosity, + (int)smallMode, unused, nUnused + ); + if (bzf == NULL || bzerr != BZ_OK) goto errhandler; + streamNo++; + + while (bzerr == BZ_OK) { + nread = bzRead ( &bzerr, bzf, obuf, 5000 ); + if (bzerr == BZ_DATA_ERROR_MAGIC) goto errhandler; + if ((bzerr == BZ_OK || bzerr == BZ_STREAM_END) && nread > 0) + fwrite ( obuf, sizeof(UChar), nread, stream ); + if (ferror(stream)) goto errhandler_io; + } + if (bzerr != BZ_STREAM_END) goto errhandler; - /*-- A bad CRC is considered a fatal error. --*/ - if (storedBlockCRC != computedBlockCRC) - crcError ( storedBlockCRC, computedBlockCRC ); + bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused ); + if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" ); - computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >> 31); - computedCombinedCRC ^= computedBlockCRC; - }; + for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i]; - if (verbosity >= 2) fprintf ( stderr, "\n " ); + bzReadClose ( &bzerr, bzf ); + if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" ); - storedCombinedCRC = bsGetUInt32 (); - if (verbosity >= 2) - fprintf ( stderr, - "combined CRCs: stored = 0x%x, computed = 0x%x\n ", - storedCombinedCRC, computedCombinedCRC ); - if (storedCombinedCRC != computedCombinedCRC) - crcError ( storedCombinedCRC, computedCombinedCRC ); + if (nUnused == 0 && myfeof(zStream)) break; + } - bsFinishedWithStream (); - ERROR_IF_NOT_ZERO ( ferror(zStream) ); - retVal = fclose ( zStream ); - ERROR_IF_EOF ( retVal ); + if (ferror(zStream)) goto errhandler_io; + ret = fclose ( zStream ); + if (ret == EOF) goto errhandler_io; - ERROR_IF_NOT_ZERO ( ferror(stream) ); - retVal = fflush ( stream ); - ERROR_IF_NOT_ZERO ( retVal ); + if (ferror(stream)) goto errhandler_io; + ret = fflush ( stream ); + if (ret != 0) goto errhandler_io; if (stream != stdout) { - retVal = fclose ( stream ); - ERROR_IF_EOF ( retVal ); + ret = fclose ( stream ); + if (ret == EOF) goto errhandler_io; } + if (verbosity >= 2) fprintf ( stderr, "\n " ); return True; + + errhandler: + bzReadClose ( &bzerr_dummy, bzf ); + switch (bzerr) { + case BZ_IO_ERROR: + errhandler_io: + ioError(); break; + case BZ_DATA_ERROR: + crcError(); + case BZ_MEM_ERROR: + outOfMemory(); + case BZ_UNEXPECTED_EOF: + compressedStreamEOF(); + case BZ_DATA_ERROR_MAGIC: + if (streamNo == 1) { + return False; + } else { + fprintf ( stderr, + "\n%s: %s: trailing garbage after EOF ignored\n", + progName, inName ); + return True; + } + default: + panic ( "decompress:unexpected error" ); + } + + panic ( "decompress:end" ); + return True; /*notreached*/ } /*---------------------------------------------*/ Bool testStream ( FILE *zStream ) { - UChar magic1, magic2, magic3, magic4; - UChar magic5, magic6; - UInt32 storedBlockCRC, storedCombinedCRC; - UInt32 computedBlockCRC, computedCombinedCRC; - Int32 currBlockNo; - IntNative retVal; + BZFILE* bzf = NULL; + Int32 bzerr, bzerr_dummy, ret, nread, streamNo, i; + UChar obuf[5000]; + UChar unused[BZ_MAX_UNUSED]; + Int32 nUnused; + UChar* unusedTmp; + + nUnused = 0; + streamNo = 0; SET_BINARY_MODE(zStream); - ERROR_IF_NOT_ZERO ( ferror(zStream) ); - - bsSetStream ( zStream, False ); - - magic1 = bsGetUChar (); - magic2 = bsGetUChar (); - magic3 = bsGetUChar (); - magic4 = bsGetUChar (); - if (magic1 != 'B' || - magic2 != 'Z' || - magic3 != 'h' || - magic4 < '1' || - magic4 > '9') { - bsFinishedWithStream(); - fclose ( zStream ); - fprintf ( stderr, "\n%s: bad magic number (ie, not created by bzip2)\n", - inName ); - return False; - } + if (ferror(zStream)) goto errhandler_io; - smallMode = True; - setDecompressStructureSizes ( magic4 - '0' ); - computedCombinedCRC = 0; + while (True) { - if (verbosity >= 2) fprintf ( stderr, "\n" ); - currBlockNo = 0; + bzf = bzReadOpen ( + &bzerr, zStream, verbosity, + (int)smallMode, unused, nUnused + ); + if (bzf == NULL || bzerr != BZ_OK) goto errhandler; + streamNo++; - while (True) { - magic1 = bsGetUChar (); - magic2 = bsGetUChar (); - magic3 = bsGetUChar (); - magic4 = bsGetUChar (); - magic5 = bsGetUChar (); - magic6 = bsGetUChar (); - if (magic1 == 0x17 && magic2 == 0x72 && - magic3 == 0x45 && magic4 == 0x38 && - magic5 == 0x50 && magic6 == 0x90) break; - - currBlockNo++; - if (magic1 != 0x31 || magic2 != 0x41 || - magic3 != 0x59 || magic4 != 0x26 || - magic5 != 0x53 || magic6 != 0x59) { - bsFinishedWithStream(); - fclose ( zStream ); - fprintf ( stderr, - "\n%s, block %d: bad header (not == 0x314159265359)\n", - inName, currBlockNo ); - return False; - } - storedBlockCRC = bsGetUInt32 (); - - if (bsR(1) == 1) - blockRandomised = True; else - blockRandomised = False; - - if (verbosity >= 2) - fprintf ( stderr, " block [%d: huff+mtf ", currBlockNo ); - getAndMoveToFrontDecode (); - ERROR_IF_NOT_ZERO ( ferror(zStream) ); - - initialiseCRC(); - if (verbosity >= 2) fprintf ( stderr, "rt+rld" ); - undoReversibleTransformation_small ( NULL ); - - computedBlockCRC = getFinalCRC(); - if (verbosity >= 3) - fprintf ( stderr, " {0x%x, 0x%x}", storedBlockCRC, computedBlockCRC ); - if (verbosity >= 2) fprintf ( stderr, "] " ); - - if (storedBlockCRC != computedBlockCRC) { - bsFinishedWithStream(); - fclose ( zStream ); - fprintf ( stderr, "\n%s, block %d: computed CRC does not match stored one\n", - inName, currBlockNo ); - return False; + while (bzerr == BZ_OK) { + nread = bzRead ( &bzerr, bzf, obuf, 5000 ); + if (bzerr == BZ_DATA_ERROR_MAGIC) goto errhandler; } + if (bzerr != BZ_STREAM_END) goto errhandler; - if (verbosity >= 2) fprintf ( stderr, "ok\n" ); - computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >> 31); - computedCombinedCRC ^= computedBlockCRC; - }; + bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused ); + if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" ); + + for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i]; + + bzReadClose ( &bzerr, bzf ); + if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" ); + if (nUnused == 0 && myfeof(zStream)) break; - storedCombinedCRC = bsGetUInt32 (); - if (verbosity >= 2) - fprintf ( stderr, - " combined CRCs: stored = 0x%x, computed = 0x%x\n ", - storedCombinedCRC, computedCombinedCRC ); - if (storedCombinedCRC != computedCombinedCRC) { - bsFinishedWithStream(); - fclose ( zStream ); - fprintf ( stderr, "\n%s: computed CRC does not match stored one\n", - inName ); - return False; } - bsFinishedWithStream (); - ERROR_IF_NOT_ZERO ( ferror(zStream) ); - retVal = fclose ( zStream ); - ERROR_IF_EOF ( retVal ); + if (ferror(zStream)) goto errhandler_io; + ret = fclose ( zStream ); + if (ret == EOF) goto errhandler_io; + + if (verbosity >= 2) fprintf ( stderr, "\n " ); return True; -} + errhandler: + bzReadClose ( &bzerr_dummy, bzf ); + switch (bzerr) { + case BZ_IO_ERROR: + errhandler_io: + ioError(); break; + case BZ_DATA_ERROR: + fprintf ( stderr, + "\n%s: data integrity (CRC) error in data\n", + inName ); + return False; + case BZ_MEM_ERROR: + outOfMemory(); + case BZ_UNEXPECTED_EOF: + fprintf ( stderr, + "\n%s: file ends unexpectedly\n", + inName ); + return False; + case BZ_DATA_ERROR_MAGIC: + if (streamNo == 1) { + fprintf ( stderr, + "\n%s: bad magic number (ie, not created by bzip2)\n", + inName ); + return False; + } else { + fprintf ( stderr, + "\n%s: %s: trailing garbage after EOF ignored\n", + progName, inName ); + return True; + } + default: + panic ( "test:unexpected error" ); + } + + panic ( "test:end" ); + return True; /*notreached*/ +} /*---------------------------------------------------*/ @@ -3059,8 +592,7 @@ void showFileNames ( void ) fprintf ( stderr, "\tInput file = %s, output file = %s\n", - inName==NULL ? "(null)" : inName, - outName==NULL ? "(null)" : outName + inName, outName ); } @@ -3072,8 +604,7 @@ void cleanUpAndFail ( Int32 ec ) if ( srcMode == SM_F2F && opMode != OM_TEST ) { fprintf ( stderr, "%s: Deleting output file %s, if it exists.\n", - progName, - outName==NULL ? "(null)" : outName ); + progName, outName ); if (outputHandleJustInCase != NULL) fclose ( outputHandleJustInCase ); retVal = remove ( outName ); @@ -3108,11 +639,10 @@ void panic ( Char* s ) /*---------------------------------------------*/ -void badBGLengths ( void ) +void crcError () { fprintf ( stderr, - "\n%s: error when reading background model code lengths,\n" - "\twhich probably means the compressed file is corrupted.\n", + "\n%s: Data integrity error when decompressing.\n", progName ); showFileNames(); cadvise(); @@ -3120,19 +650,6 @@ void badBGLengths ( void ) } -/*---------------------------------------------*/ -void crcError ( UInt32 crcStored, UInt32 crcComputed ) -{ - fprintf ( stderr, - "\n%s: Data integrity error when decompressing.\n" - "\tStored CRC = 0x%x, computed CRC = 0x%x\n", - progName, crcStored, crcComputed ); - showFileNames(); - cadvise(); - cleanUpAndFail( 2 ); -} - - /*---------------------------------------------*/ void compressedStreamEOF ( void ) { @@ -3159,46 +676,6 @@ void ioError ( ) } -/*---------------------------------------------*/ -void blockOverrun () -{ - fprintf ( stderr, - "\n%s: block overrun during decompression,\n" - "\twhich probably means the compressed file\n" - "\tis corrupted.\n", - progName ); - showFileNames(); - cadvise(); - cleanUpAndFail( 2 ); -} - - -/*---------------------------------------------*/ -void badBlockHeader () -{ - fprintf ( stderr, - "\n%s: bad block header in the compressed file,\n" - "\twhich probably means it is corrupted.\n", - progName ); - showFileNames(); - cadvise(); - cleanUpAndFail( 2 ); -} - - -/*---------------------------------------------*/ -void bitStreamEOF () -{ - fprintf ( stderr, - "\n%s: read past the end of compressed data,\n" - "\twhich probably means it is corrupted.\n", - progName ); - showFileNames(); - cadvise(); - cleanUpAndFail( 2 ); -} - - /*---------------------------------------------*/ void mySignalCatcher ( IntNative n ) { @@ -3233,27 +710,11 @@ void mySIGSEGVorSIGBUScatcher ( IntNative n ) /*---------------------------------------------*/ -void uncompressOutOfMemory ( Int32 draw, Int32 blockSize ) -{ - fprintf ( stderr, - "\n%s: Can't allocate enough memory for decompression.\n" - "\tRequested %d bytes for a block size of %d.\n" - "\tTry selecting space-economic decompress (with flag -s)\n" - "\tand failing that, find a machine with more memory.\n", - progName, draw, blockSize ); - showFileNames(); - cleanUpAndFail(1); -} - - -/*---------------------------------------------*/ -void compressOutOfMemory ( Int32 draw, Int32 blockSize ) +void outOfMemory ( void ) { fprintf ( stderr, - "\n%s: Can't allocate enough memory for compression.\n" - "\tRequested %d bytes for a block size of %d.\n" - "\tTry selecting a small block size (with flag -s).\n", - progName, draw, blockSize ); + "\n%s: couldn't allocate enough memory\n", + progName ); showFileNames(); cleanUpAndFail(1); } @@ -3273,6 +734,24 @@ void pad ( Char *s ) } +/*---------------------------------------------*/ +void copyFileName ( Char* to, Char* from ) +{ + if ( strlen(from) > FILE_NAME_LEN-10 ) { + fprintf ( + stderr, + "bzip2: file name\n`%s'\nis suspiciously (> 1024 chars) long.\n" + "Try using a reasonable file name instead. Sorry! :)\n", + from + ); + exit(1); + } + + strncpy(to,from,FILE_NAME_LEN-10); + to[FILE_NAME_LEN-10]='\0'; +} + + /*---------------------------------------------*/ Bool fileExists ( Char* name ) { @@ -3287,7 +766,7 @@ Bool fileExists ( Char* name ) /*-- if in doubt, return True --*/ -Bool notABogStandardFile ( Char* name ) +Bool notAStandardFile ( Char* name ) { IntNative i; struct MY_STAT statBuf; @@ -3300,9 +779,9 @@ Bool notABogStandardFile ( Char* name ) /*---------------------------------------------*/ -void copyDateAndPermissions ( Char *srcName, Char *dstName ) +void copyDatePermissionsAndOwner ( Char *srcName, Char *dstName ) { - #if BZ_UNIX +#if BZ_UNIX IntNative retVal; struct MY_STAT statBuf; struct utimbuf uTimBuf; @@ -3314,12 +793,33 @@ void copyDateAndPermissions ( Char *srcName, Char *dstName ) retVal = chmod ( dstName, statBuf.st_mode ); ERROR_IF_NOT_ZERO ( retVal ); + /* Not sure if this is really portable or not. Causes + problems on my x86-Linux Redhat 5.0 box. Decided + to omit it from 0.9.0. JRS, 27 June 98. If you + understand Unix file semantics and portability issues + well enough to fix this properly, drop me a line + at jseward@acm.org. + retVal = chown ( dstName, statBuf.st_uid, statBuf.st_gid ); + ERROR_IF_NOT_ZERO ( retVal ); + */ retVal = utime ( dstName, &uTimBuf ); ERROR_IF_NOT_ZERO ( retVal ); - #endif +#endif +} + + +/*---------------------------------------------*/ +void setInterimPermissions ( Char *dstName ) +{ +#if BZ_UNIX + IntNative retVal; + retVal = chmod ( dstName, S_IRUSR | S_IWUSR ); + ERROR_IF_NOT_ZERO ( retVal ); +#endif } + /*---------------------------------------------*/ Bool endsInBz2 ( Char* name ) { @@ -3353,13 +853,13 @@ void compress ( Char *name ) panic ( "compress: bad modes\n" ); switch (srcMode) { - case SM_I2O: strcpy ( inName, "(stdin)" ); - strcpy ( outName, "(stdout)" ); break; - case SM_F2F: strcpy ( inName, name ); - strcpy ( outName, name ); + case SM_I2O: copyFileName ( inName, "(stdin)" ); + copyFileName ( outName, "(stdout)" ); break; + case SM_F2F: copyFileName ( inName, name ); + copyFileName ( outName, name ); strcat ( outName, ".bz2" ); break; - case SM_F2O: strcpy ( inName, name ); - strcpy ( outName, "(stdout)" ); break; + case SM_F2O: copyFileName ( inName, name ); + copyFileName ( outName, "(stdout)" ); break; } if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) { @@ -3377,12 +877,12 @@ void compress ( Char *name ) progName, inName ); return; } - if ( srcMode != SM_I2O && notABogStandardFile ( inName )) { + if ( srcMode != SM_I2O && notAStandardFile ( inName )) { fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n", progName, inName ); return; } - if ( srcMode == SM_F2F && fileExists ( outName ) ) { + if ( srcMode == SM_F2F && !forceOverwrite && fileExists ( outName ) ) { fprintf ( stderr, "%s: Output file %s already exists, skipping.\n", progName, outName ); return; @@ -3434,6 +934,7 @@ void compress ( Char *name ) progName, inName ); return; }; + setInterimPermissions ( outName ); break; default: @@ -3454,7 +955,7 @@ void compress ( Char *name ) /*--- If there was an I/O error, we won't get here. ---*/ if ( srcMode == SM_F2F ) { - copyDateAndPermissions ( inName, outName ); + copyDatePermissionsAndOwner ( inName, outName ); if ( !keepInputFiles ) { IntNative retVal = remove ( inName ); ERROR_IF_NOT_ZERO ( retVal ); @@ -3474,15 +975,15 @@ void uncompress ( Char *name ) panic ( "uncompress: bad modes\n" ); switch (srcMode) { - case SM_I2O: strcpy ( inName, "(stdin)" ); - strcpy ( outName, "(stdout)" ); break; - case SM_F2F: strcpy ( inName, name ); - strcpy ( outName, name ); + case SM_I2O: copyFileName ( inName, "(stdin)" ); + copyFileName ( outName, "(stdout)" ); break; + case SM_F2F: copyFileName ( inName, name ); + copyFileName ( outName, name ); if (endsInBz2 ( outName )) outName [ strlen ( outName ) - 4 ] = '\0'; break; - case SM_F2O: strcpy ( inName, name ); - strcpy ( outName, "(stdout)" ); break; + case SM_F2O: copyFileName ( inName, name ); + copyFileName ( outName, "(stdout)" ); break; } if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) { @@ -3501,12 +1002,12 @@ void uncompress ( Char *name ) progName, inName ); return; } - if ( srcMode != SM_I2O && notABogStandardFile ( inName )) { + if ( srcMode != SM_I2O && notAStandardFile ( inName )) { fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n", progName, inName ); return; } - if ( srcMode == SM_F2F && fileExists ( outName ) ) { + if ( srcMode == SM_F2F && !forceOverwrite && fileExists ( outName ) ) { fprintf ( stderr, "%s: Output file %s already exists, skipping.\n", progName, outName ); return; @@ -3550,6 +1051,7 @@ void uncompress ( Char *name ) progName, inName ); return; }; + setInterimPermissions ( outName ); break; default: @@ -3571,7 +1073,7 @@ void uncompress ( Char *name ) /*--- If there was an I/O error, we won't get here. ---*/ if ( magicNumberOK ) { if ( srcMode == SM_F2F ) { - copyDateAndPermissions ( inName, outName ); + copyDatePermissionsAndOwner ( inName, outName ); if ( !keepInputFiles ) { IntNative retVal = remove ( inName ); ERROR_IF_NOT_ZERO ( retVal ); @@ -3607,11 +1109,11 @@ void testf ( Char *name ) if (name == NULL && srcMode != SM_I2O) panic ( "testf: bad modes\n" ); - strcpy ( outName, "(none)" ); + copyFileName ( outName, "(none)" ); switch (srcMode) { - case SM_I2O: strcpy ( inName, "(stdin)" ); break; - case SM_F2F: strcpy ( inName, name ); break; - case SM_F2O: strcpy ( inName, name ); break; + case SM_I2O: copyFileName ( inName, "(stdin)" ); break; + case SM_F2F: copyFileName ( inName, name ); break; + case SM_F2O: copyFileName ( inName, name ); break; } if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) { @@ -3630,7 +1132,7 @@ void testf ( Char *name ) progName, inName ); return; } - if ( srcMode != SM_I2O && notABogStandardFile ( inName )) { + if ( srcMode != SM_I2O && notAStandardFile ( inName )) { fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n", progName, inName ); return; @@ -3684,25 +1186,18 @@ void license ( void ) fprintf ( stderr, "bzip2, a block-sorting file compressor. " - "Version 0.1pl2, 29-Aug-97.\n" + "Version 0.9.0c, 18-Oct-98.\n" " \n" - " Copyright (C) 1996, 1997 by Julian Seward.\n" + " Copyright (C) 1996, 1997, 1998 by Julian Seward.\n" " \n" " This program is free software; you can redistribute it and/or modify\n" - " it under the terms of the GNU General Public License as published by\n" - " the Free Software Foundation; either version 2 of the License, or\n" - " (at your option) any later version.\n" + " it under the terms set out in the LICENSE file, which is included\n" + " in the bzip2-0.9.0c source distribution.\n" " \n" " This program is distributed in the hope that it will be useful,\n" " but WITHOUT ANY WARRANTY; without even the implied warranty of\n" " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" - " GNU General Public License for more details.\n" - " \n" - " You should have received a copy of the GNU General Public License\n" - " along with this program; if not, write to the Free Software\n" - " Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n" - " \n" - " The GNU General Public License is contained in the file LICENSE.\n" + " LICENSE file for more details.\n" " \n" ); } @@ -3714,16 +1209,17 @@ void usage ( Char *fullProgName ) fprintf ( stderr, "bzip2, a block-sorting file compressor. " - "Version 0.1pl2, 29-Aug-97.\n" + "Version 0.9.0c, 18-Oct-98.\n" "\n usage: %s [flags and input files in any order]\n" "\n" " -h --help print this message\n" " -d --decompress force decompression\n" - " -f --compress force compression\n" + " -z --compress force compression\n" + " -k --keep keep (don't delete) input files\n" + " -f --force overwrite existing output filess\n" " -t --test test compressed file integrity\n" " -c --stdout output to standard out\n" " -v --verbose be verbose (a 2nd -v gives more)\n" - " -k --keep keep (don't delete) input files\n" " -L --license display software version & license\n" " -V --version display software version & license\n" " -s --small use less memory (at most 2500k)\n" @@ -3731,15 +1227,16 @@ void usage ( Char *fullProgName ) " --repetitive-fast compress repetitive blocks faster\n" " --repetitive-best compress repetitive blocks better\n" "\n" - " If invoked as `bzip2', the default action is to compress.\n" - " as `bunzip2', the default action is to decompress.\n" + " If invoked as `bzip2', default action is to compress.\n" + " as `bunzip2', default action is to decompress.\n" + " as `bz2cat', default action is to decompress to stdout.\n" "\n" " If no file names are given, bzip2 compresses or decompresses\n" " from standard input to standard output. You can combine\n" - " flags, so `-v -4' means the same as -v4 or -4v, &c.\n" - #if BZ_UNIX + " short flags, so `-v -4' means the same as -v4 or -4v, &c.\n" +#if BZ_UNIX "\n" - #endif +#endif , fullProgName @@ -3776,14 +1273,7 @@ void *myMalloc ( Int32 n ) void* p; p = malloc ( (size_t)n ); - if (p == NULL) { - fprintf ( - stderr, - "%s: `malloc' failed on request for %d bytes.\n", - progName, n - ); - exit ( 1 ); - } + if (p == NULL) outOfMemory (); return p; } @@ -3817,7 +1307,6 @@ Cell *snocString ( Cell *root, Char *name ) } - /*---------------------------------------------*/ #define ISFLAG(s) (strcmp(aa->name, (s))==0) @@ -3829,11 +1318,6 @@ IntNative main ( IntNative argc, Char *argv[] ) Cell *argList; Cell *aa; - - #if DEBUG - fprintf ( stderr, "bzip2: *** compiled with debugging ON ***\n" ); - #endif - /*-- Be really really really paranoid :-) --*/ if (sizeof(Int32) != 4 || sizeof(UInt32) != 4 || sizeof(Int16) != 2 || sizeof(UInt16) != 2 || @@ -3844,7 +1328,7 @@ IntNative main ( IntNative argc, Char *argv[] ) "\tof 4, 2 and 1 bytes to run properly, and they don't.\n" "\tProbably you can fix this by defining them correctly,\n" "\tand recompiling. Bye!\n" ); - exit(1); + exit(3); } @@ -3852,35 +1336,28 @@ IntNative main ( IntNative argc, Char *argv[] ) signal (SIGINT, mySignalCatcher); signal (SIGTERM, mySignalCatcher); signal (SIGSEGV, mySIGSEGVorSIGBUScatcher); - #if BZ_UNIX +#if BZ_UNIX signal (SIGHUP, mySignalCatcher); signal (SIGBUS, mySIGSEGVorSIGBUScatcher); - #endif +#endif /*-- Initialise --*/ outputHandleJustInCase = NULL; - ftab = NULL; - ll4 = NULL; - ll16 = NULL; - ll8 = NULL; - tt = NULL; - block = NULL; - zptr = NULL; smallMode = False; keepInputFiles = False; + forceOverwrite = False; verbosity = 0; blockSize100k = 9; testFailsExist = False; - bsStream = NULL; numFileNames = 0; numFilesProcessed = 0; workFactor = 30; - strcpy ( inName, "(none)" ); - strcpy ( outName, "(none)" ); + copyFileName ( inName, "(none)" ); + copyFileName ( outName, "(none)" ); - strcpy ( progNameReally, argv[0] ); + copyFileName ( progNameReally, argv[0] ); progName = &progNameReally[0]; for (tmp = &progNameReally[0]; *tmp != '\0'; tmp++) if (*tmp == PATH_SEP) progName = tmp + 1; @@ -3903,20 +1380,26 @@ IntNative main ( IntNative argc, Char *argv[] ) } - /*-- Determine what to do (compress/uncompress/test). --*/ + /*-- Determine source modes; flag handling may change this too. --*/ + if (numFileNames == 0) + srcMode = SM_I2O; else srcMode = SM_F2F; + + + /*-- Determine what to do (compress/uncompress/test/cat). --*/ /*-- Note that subsequent flag handling may change this. --*/ opMode = OM_Z; - if ( (strcmp ( "bunzip2", progName ) == 0) || - (strcmp ( "BUNZIP2", progName ) == 0) || - (strcmp ( "bunzip2.exe", progName ) == 0) || - (strcmp ( "BUNZIP2.EXE", progName ) == 0) ) + if ( (strstr ( progName, "unzip" ) != 0) || + (strstr ( progName, "UNZIP" ) != 0) ) opMode = OM_UNZ; - - /*-- Determine source modes; flag handling may change this too. --*/ - if (numFileNames == 0) - srcMode = SM_I2O; else srcMode = SM_F2F; + if ( (strstr ( progName, "z2cat" ) != 0) || + (strstr ( progName, "Z2CAT" ) != 0) || + (strstr ( progName, "zcat" ) != 0) || + (strstr ( progName, "ZCAT" ) != 0) ) { + opMode = OM_UNZ; + srcMode = (numFileNames == 0) ? SM_I2O : SM_F2O; + } /*-- Look at the flags. --*/ @@ -3926,7 +1409,8 @@ IntNative main ( IntNative argc, Char *argv[] ) switch (aa->name[j]) { case 'c': srcMode = SM_F2O; break; case 'd': opMode = OM_UNZ; break; - case 'f': opMode = OM_Z; break; + case 'z': opMode = OM_Z; break; + case 'f': forceOverwrite = True; break; case 't': opMode = OM_TEST; break; case 'k': keepInputFiles = True; break; case 's': smallMode = True; break; @@ -3957,6 +1441,7 @@ IntNative main ( IntNative argc, Char *argv[] ) if (ISFLAG("--stdout")) srcMode = SM_F2O; else if (ISFLAG("--decompress")) opMode = OM_UNZ; else if (ISFLAG("--compress")) opMode = OM_Z; else + if (ISFLAG("--force")) forceOverwrite = True; else if (ISFLAG("--test")) opMode = OM_TEST; else if (ISFLAG("--keep")) keepInputFiles = True; else if (ISFLAG("--small")) smallMode = True; else @@ -3974,14 +1459,9 @@ IntNative main ( IntNative argc, Char *argv[] ) } } + if (verbosity > 4) verbosity = 4; if (opMode == OM_Z && smallMode) blockSize100k = 2; - if (opMode == OM_Z && srcMode == SM_F2O && numFileNames > 1) { - fprintf ( stderr, "%s: I won't compress multiple files to stdout.\n", - progName ); - exit ( 1 ); - } - if (srcMode == SM_F2O && numFileNames == 0) { fprintf ( stderr, "%s: -c expects at least one filename.\n", progName ); @@ -3997,7 +1477,6 @@ IntNative main ( IntNative argc, Char *argv[] ) if (opMode != OM_Z) blockSize100k = 0; if (opMode == OM_Z) { - allocateCompressStructures(); if (srcMode == SM_I2O) compress ( NULL ); else -- cgit v1.2.3-55-g6feb