aboutsummaryrefslogtreecommitdiff
path: root/bzip2.c
diff options
context:
space:
mode:
authorJulian Seward <jseward@acm.org>2001-12-30 22:13:13 +0100
committerJulian Seward <jseward@acm.org>2001-12-30 22:13:13 +0100
commit099d844292f60f9d58914da29e5773204dc55e7a (patch)
tree04bdb38dbcd894d6fdbbc3253e216d029cade5c6 /bzip2.c
parent795b859eee96c700e8f3c3fe68e6a9a39d95797c (diff)
downloadbzip2-099d844292f60f9d58914da29e5773204dc55e7a.tar.gz
bzip2-099d844292f60f9d58914da29e5773204dc55e7a.tar.bz2
bzip2-099d844292f60f9d58914da29e5773204dc55e7a.zip
bzip2-1.0.2bzip2-1.0.2
Diffstat (limited to 'bzip2.c')
-rw-r--r--bzip2.c533
1 files changed, 296 insertions, 237 deletions
diff --git a/bzip2.c b/bzip2.c
index 56adfdc..807f420 100644
--- a/bzip2.c
+++ b/bzip2.c
@@ -7,7 +7,7 @@
7 This file is a part of bzip2 and/or libbzip2, a program and 7 This file is a part of bzip2 and/or libbzip2, a program and
8 library for lossless, block-sorting data compression. 8 library for lossless, block-sorting data compression.
9 9
10 Copyright (C) 1996-2000 Julian R Seward. All rights reserved. 10 Copyright (C) 1996-2002 Julian R Seward. All rights reserved.
11 11
12 Redistribution and use in source and binary forms, with or without 12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions 13 modification, are permitted provided that the following conditions
@@ -113,13 +113,16 @@
113/*-- 113/*--
114 Generic 32-bit Unix. 114 Generic 32-bit Unix.
115 Also works on 64-bit Unix boxes. 115 Also works on 64-bit Unix boxes.
116 This is the default.
116--*/ 117--*/
117#define BZ_UNIX 1 118#define BZ_UNIX 1
118 119
119/*-- 120/*--
120 Win32, as seen by Jacob Navia's excellent 121 Win32, as seen by Jacob Navia's excellent
121 port of (Chris Fraser & David Hanson)'s excellent 122 port of (Chris Fraser & David Hanson)'s excellent
122 lcc compiler. 123 lcc compiler. Or with MS Visual C.
124 This is selected automatically if compiled by a compiler which
125 defines _WIN32, not including the Cygwin GCC.
123--*/ 126--*/
124#define BZ_LCCWIN32 0 127#define BZ_LCCWIN32 0
125 128
@@ -156,6 +159,7 @@
156--*/ 159--*/
157 160
158#if BZ_UNIX 161#if BZ_UNIX
162# include <fcntl.h>
159# include <sys/types.h> 163# include <sys/types.h>
160# include <utime.h> 164# include <utime.h>
161# include <unistd.h> 165# include <unistd.h>
@@ -164,8 +168,9 @@
164 168
165# define PATH_SEP '/' 169# define PATH_SEP '/'
166# define MY_LSTAT lstat 170# define MY_LSTAT lstat
167# define MY_S_IFREG S_ISREG
168# define MY_STAT stat 171# define MY_STAT stat
172# define MY_S_ISREG S_ISREG
173# define MY_S_ISDIR S_ISDIR
169 174
170# define APPEND_FILESPEC(root, name) \ 175# define APPEND_FILESPEC(root, name) \
171 root=snocString((root), (name)) 176 root=snocString((root), (name))
@@ -180,19 +185,23 @@
180# else 185# else
181# define NORETURN /**/ 186# define NORETURN /**/
182# endif 187# endif
188
183# ifdef __DJGPP__ 189# ifdef __DJGPP__
184# include <io.h> 190# include <io.h>
185# include <fcntl.h> 191# include <fcntl.h>
186# undef MY_LSTAT 192# undef MY_LSTAT
193# undef MY_STAT
187# define MY_LSTAT stat 194# define MY_LSTAT stat
195# define MY_STAT stat
188# undef SET_BINARY_MODE 196# undef SET_BINARY_MODE
189# define SET_BINARY_MODE(fd) \ 197# define SET_BINARY_MODE(fd) \
190 do { \ 198 do { \
191 int retVal = setmode ( fileno ( fd ), \ 199 int retVal = setmode ( fileno ( fd ), \
192 O_BINARY ); \ 200 O_BINARY ); \
193 ERROR_IF_MINUS_ONE ( retVal ); \ 201 ERROR_IF_MINUS_ONE ( retVal ); \
194 } while ( 0 ) 202 } while ( 0 )
195# endif 203# endif
204
196# ifdef __CYGWIN__ 205# ifdef __CYGWIN__
197# include <io.h> 206# include <io.h>
198# include <fcntl.h> 207# include <fcntl.h>
@@ -200,11 +209,11 @@
200# define SET_BINARY_MODE(fd) \ 209# define SET_BINARY_MODE(fd) \
201 do { \ 210 do { \
202 int retVal = setmode ( fileno ( fd ), \ 211 int retVal = setmode ( fileno ( fd ), \
203 O_BINARY ); \ 212 O_BINARY ); \
204 ERROR_IF_MINUS_ONE ( retVal ); \ 213 ERROR_IF_MINUS_ONE ( retVal ); \
205 } while ( 0 ) 214 } while ( 0 )
206# endif 215# endif
207#endif 216#endif /* BZ_UNIX */
208 217
209 218
210 219
@@ -217,46 +226,23 @@
217# define PATH_SEP '\\' 226# define PATH_SEP '\\'
218# define MY_LSTAT _stat 227# define MY_LSTAT _stat
219# define MY_STAT _stat 228# define MY_STAT _stat
220# define MY_S_IFREG(x) ((x) & _S_IFREG) 229# define MY_S_ISREG(x) ((x) & _S_IFREG)
230# define MY_S_ISDIR(x) ((x) & _S_IFDIR)
221 231
222# define APPEND_FLAG(root, name) \ 232# define APPEND_FLAG(root, name) \
223 root=snocString((root), (name)) 233 root=snocString((root), (name))
224 234
225# if 0
226 /*-- lcc-win32 seems to expand wildcards itself --*/
227# define APPEND_FILESPEC(root, spec) \
228 do { \
229 if ((spec)[0] == '-') { \
230 root = snocString((root), (spec)); \
231 } else { \
232 struct _finddata_t c_file; \
233 long hFile; \
234 hFile = _findfirst((spec), &c_file); \
235 if ( hFile == -1L ) { \
236 root = snocString ((root), (spec)); \
237 } else { \
238 int anInt = 0; \
239 while ( anInt == 0 ) { \
240 root = snocString((root), \
241 &c_file.name[0]); \
242 anInt = _findnext(hFile, &c_file); \
243 } \
244 } \
245 } \
246 } while ( 0 )
247# else
248# define APPEND_FILESPEC(root, name) \ 235# define APPEND_FILESPEC(root, name) \
249 root = snocString ((root), (name)) 236 root = snocString ((root), (name))
250# endif
251 237
252# define SET_BINARY_MODE(fd) \ 238# define SET_BINARY_MODE(fd) \
253 do { \ 239 do { \
254 int retVal = setmode ( fileno ( fd ), \ 240 int retVal = setmode ( fileno ( fd ), \
255 O_BINARY ); \ 241 O_BINARY ); \
256 ERROR_IF_MINUS_ONE ( retVal ); \ 242 ERROR_IF_MINUS_ONE ( retVal ); \
257 } while ( 0 ) 243 } while ( 0 )
258 244
259#endif 245#endif /* BZ_LCCWIN32 */
260 246
261 247
262/*---------------------------------------------*/ 248/*---------------------------------------------*/
@@ -338,6 +324,7 @@ typedef
338 struct { UChar b[8]; } 324 struct { UChar b[8]; }
339 UInt64; 325 UInt64;
340 326
327
341static 328static
342void uInt64_from_UInt32s ( UInt64* n, UInt32 lo32, UInt32 hi32 ) 329void uInt64_from_UInt32s ( UInt64* n, UInt32 lo32, UInt32 hi32 )
343{ 330{
@@ -351,6 +338,7 @@ void uInt64_from_UInt32s ( UInt64* n, UInt32 lo32, UInt32 hi32 )
351 n->b[0] = (UChar) (lo32 & 0xFF); 338 n->b[0] = (UChar) (lo32 & 0xFF);
352} 339}
353 340
341
354static 342static
355double uInt64_to_double ( UInt64* n ) 343double uInt64_to_double ( UInt64* n )
356{ 344{
@@ -364,77 +352,6 @@ double uInt64_to_double ( UInt64* n )
364 return sum; 352 return sum;
365} 353}
366 354
367static
368void uInt64_add ( UInt64* src, UInt64* dst )
369{
370 Int32 i;
371 Int32 carry = 0;
372 for (i = 0; i < 8; i++) {
373 carry += ( ((Int32)src->b[i]) + ((Int32)dst->b[i]) );
374 dst->b[i] = (UChar)(carry & 0xFF);
375 carry >>= 8;
376 }
377}
378
379static
380void uInt64_sub ( UInt64* src, UInt64* dst )
381{
382 Int32 t, i;
383 Int32 borrow = 0;
384 for (i = 0; i < 8; i++) {
385 t = ((Int32)dst->b[i]) - ((Int32)src->b[i]) - borrow;
386 if (t < 0) {
387 dst->b[i] = (UChar)(t + 256);
388 borrow = 1;
389 } else {
390 dst->b[i] = (UChar)t;
391 borrow = 0;
392 }
393 }
394}
395
396static
397void uInt64_mul ( UInt64* a, UInt64* b, UInt64* r_hi, UInt64* r_lo )
398{
399 UChar sum[16];
400 Int32 ia, ib, carry;
401 for (ia = 0; ia < 16; ia++) sum[ia] = 0;
402 for (ia = 0; ia < 8; ia++) {
403 carry = 0;
404 for (ib = 0; ib < 8; ib++) {
405 carry += ( ((Int32)sum[ia+ib])
406 + ((Int32)a->b[ia]) * ((Int32)b->b[ib]) );
407 sum[ia+ib] = (UChar)(carry & 0xFF);
408 carry >>= 8;
409 }
410 sum[ia+8] = (UChar)(carry & 0xFF);
411 if ((carry >>= 8) != 0) panic ( "uInt64_mul" );
412 }
413
414 for (ia = 0; ia < 8; ia++) r_hi->b[ia] = sum[ia+8];
415 for (ia = 0; ia < 8; ia++) r_lo->b[ia] = sum[ia];
416}
417
418
419static
420void uInt64_shr1 ( UInt64* n )
421{
422 Int32 i;
423 for (i = 0; i < 8; i++) {
424 n->b[i] >>= 1;
425 if (i < 7 && (n->b[i+1] & 1)) n->b[i] |= 0x80;
426 }
427}
428
429static
430void uInt64_shl1 ( UInt64* n )
431{
432 Int32 i;
433 for (i = 7; i >= 0; i--) {
434 n->b[i] <<= 1;
435 if (i > 0 && (n->b[i-1] & 0x80)) n->b[i]++;
436 }
437}
438 355
439static 356static
440Bool uInt64_isZero ( UInt64* n ) 357Bool uInt64_isZero ( UInt64* n )
@@ -445,49 +362,23 @@ Bool uInt64_isZero ( UInt64* n )
445 return 1; 362 return 1;
446} 363}
447 364
448static 365
366/* Divide *n by 10, and return the remainder. */
367static
449Int32 uInt64_qrm10 ( UInt64* n ) 368Int32 uInt64_qrm10 ( UInt64* n )
450{ 369{
451 /* Divide *n by 10, and return the remainder. Long division 370 UInt32 rem, tmp;
452 is difficult, so we cheat and instead multiply by
453 0xCCCC CCCC CCCC CCCD, which is 0.8 (viz, 0.1 << 3).
454 */
455 Int32 i; 371 Int32 i;
456 UInt64 tmp1, tmp2, n_orig, zero_point_eight; 372 rem = 0;
457 373 for (i = 7; i >= 0; i--) {
458 zero_point_eight.b[1] = zero_point_eight.b[2] = 374 tmp = rem * 256 + n->b[i];
459 zero_point_eight.b[3] = zero_point_eight.b[4] = 375 n->b[i] = tmp / 10;
460 zero_point_eight.b[5] = zero_point_eight.b[6] = 376 rem = tmp % 10;
461 zero_point_eight.b[7] = 0xCC; 377 }
462 zero_point_eight.b[0] = 0xCD; 378 return rem;
463
464 n_orig = *n;
465
466 /* divide n by 10,
467 by multiplying by 0.8 and then shifting right 3 times */
468 uInt64_mul ( n, &zero_point_eight, &tmp1, &tmp2 );
469 uInt64_shr1(&tmp1); uInt64_shr1(&tmp1); uInt64_shr1(&tmp1);
470 *n = tmp1;
471
472 /* tmp1 = 8*n, tmp2 = 2*n */
473 uInt64_shl1(&tmp1); uInt64_shl1(&tmp1); uInt64_shl1(&tmp1);
474 tmp2 = *n; uInt64_shl1(&tmp2);
475
476 /* tmp1 = 10*n */
477 uInt64_add ( &tmp2, &tmp1 );
478
479 /* n_orig = n_orig - 10*n */
480 uInt64_sub ( &tmp1, &n_orig );
481
482 /* n_orig should now hold quotient, in range 0 .. 9 */
483 for (i = 7; i >= 1; i--)
484 if (n_orig.b[i] != 0) panic ( "uInt64_qrm10(1)" );
485 if (n_orig.b[0] > 9)
486 panic ( "uInt64_qrm10(2)" );
487
488 return (int)n_orig.b[0];
489} 379}
490 380
381
491/* ... and the Whole Entire Point of all this UInt64 stuff is 382/* ... and the Whole Entire Point of all this UInt64 stuff is
492 so that we can supply the following function. 383 so that we can supply the following function.
493*/ 384*/
@@ -504,7 +395,8 @@ void uInt64_toAscii ( char* outbuf, UInt64* n )
504 nBuf++; 395 nBuf++;
505 } while (!uInt64_isZero(&n_copy)); 396 } while (!uInt64_isZero(&n_copy));
506 outbuf[nBuf] = 0; 397 outbuf[nBuf] = 0;
507 for (i = 0; i < nBuf; i++) outbuf[i] = buf[nBuf-i-1]; 398 for (i = 0; i < nBuf; i++)
399 outbuf[i] = buf[nBuf-i-1];
508} 400}
509 401
510 402
@@ -566,35 +458,38 @@ void compressStream ( FILE *stream, FILE *zStream )
566 if (ret == EOF) goto errhandler_io; 458 if (ret == EOF) goto errhandler_io;
567 if (zStream != stdout) { 459 if (zStream != stdout) {
568 ret = fclose ( zStream ); 460 ret = fclose ( zStream );
461 outputHandleJustInCase = NULL;
569 if (ret == EOF) goto errhandler_io; 462 if (ret == EOF) goto errhandler_io;
570 } 463 }
464 outputHandleJustInCase = NULL;
571 if (ferror(stream)) goto errhandler_io; 465 if (ferror(stream)) goto errhandler_io;
572 ret = fclose ( stream ); 466 ret = fclose ( stream );
573 if (ret == EOF) goto errhandler_io; 467 if (ret == EOF) goto errhandler_io;
574 468
575 if (nbytes_in_lo32 == 0 && nbytes_in_hi32 == 0)
576 nbytes_in_lo32 = 1;
577
578 if (verbosity >= 1) { 469 if (verbosity >= 1) {
579 Char buf_nin[32], buf_nout[32]; 470 if (nbytes_in_lo32 == 0 && nbytes_in_hi32 == 0) {
580 UInt64 nbytes_in, nbytes_out; 471 fprintf ( stderr, " no data compressed.\n");
581 double nbytes_in_d, nbytes_out_d; 472 } else {
582 uInt64_from_UInt32s ( &nbytes_in, 473 Char buf_nin[32], buf_nout[32];
583 nbytes_in_lo32, nbytes_in_hi32 ); 474 UInt64 nbytes_in, nbytes_out;
584 uInt64_from_UInt32s ( &nbytes_out, 475 double nbytes_in_d, nbytes_out_d;
585 nbytes_out_lo32, nbytes_out_hi32 ); 476 uInt64_from_UInt32s ( &nbytes_in,
586 nbytes_in_d = uInt64_to_double ( &nbytes_in ); 477 nbytes_in_lo32, nbytes_in_hi32 );
587 nbytes_out_d = uInt64_to_double ( &nbytes_out ); 478 uInt64_from_UInt32s ( &nbytes_out,
588 uInt64_toAscii ( buf_nin, &nbytes_in ); 479 nbytes_out_lo32, nbytes_out_hi32 );
589 uInt64_toAscii ( buf_nout, &nbytes_out ); 480 nbytes_in_d = uInt64_to_double ( &nbytes_in );
590 fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, " 481 nbytes_out_d = uInt64_to_double ( &nbytes_out );
591 "%5.2f%% saved, %s in, %s out.\n", 482 uInt64_toAscii ( buf_nin, &nbytes_in );
592 nbytes_in_d / nbytes_out_d, 483 uInt64_toAscii ( buf_nout, &nbytes_out );
593 (8.0 * nbytes_out_d) / nbytes_in_d, 484 fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, "
594 100.0 * (1.0 - nbytes_out_d / nbytes_in_d), 485 "%5.2f%% saved, %s in, %s out.\n",
595 buf_nin, 486 nbytes_in_d / nbytes_out_d,
596 buf_nout 487 (8.0 * nbytes_out_d) / nbytes_in_d,
597 ); 488 100.0 * (1.0 - nbytes_out_d / nbytes_in_d),
489 buf_nin,
490 buf_nout
491 );
492 }
598 } 493 }
599 494
600 return; 495 return;
@@ -652,7 +547,7 @@ Bool uncompressStream ( FILE *zStream, FILE *stream )
652 547
653 while (bzerr == BZ_OK) { 548 while (bzerr == BZ_OK) {
654 nread = BZ2_bzRead ( &bzerr, bzf, obuf, 5000 ); 549 nread = BZ2_bzRead ( &bzerr, bzf, obuf, 5000 );
655 if (bzerr == BZ_DATA_ERROR_MAGIC) goto errhandler; 550 if (bzerr == BZ_DATA_ERROR_MAGIC) goto trycat;
656 if ((bzerr == BZ_OK || bzerr == BZ_STREAM_END) && nread > 0) 551 if ((bzerr == BZ_OK || bzerr == BZ_STREAM_END) && nread > 0)
657 fwrite ( obuf, sizeof(UChar), nread, stream ); 552 fwrite ( obuf, sizeof(UChar), nread, stream );
658 if (ferror(stream)) goto errhandler_io; 553 if (ferror(stream)) goto errhandler_io;
@@ -668,9 +563,9 @@ Bool uncompressStream ( FILE *zStream, FILE *stream )
668 if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" ); 563 if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" );
669 564
670 if (nUnused == 0 && myfeof(zStream)) break; 565 if (nUnused == 0 && myfeof(zStream)) break;
671
672 } 566 }
673 567
568 closeok:
674 if (ferror(zStream)) goto errhandler_io; 569 if (ferror(zStream)) goto errhandler_io;
675 ret = fclose ( zStream ); 570 ret = fclose ( zStream );
676 if (ret == EOF) goto errhandler_io; 571 if (ret == EOF) goto errhandler_io;
@@ -680,11 +575,26 @@ Bool uncompressStream ( FILE *zStream, FILE *stream )
680 if (ret != 0) goto errhandler_io; 575 if (ret != 0) goto errhandler_io;
681 if (stream != stdout) { 576 if (stream != stdout) {
682 ret = fclose ( stream ); 577 ret = fclose ( stream );
578 outputHandleJustInCase = NULL;
683 if (ret == EOF) goto errhandler_io; 579 if (ret == EOF) goto errhandler_io;
684 } 580 }
581 outputHandleJustInCase = NULL;
685 if (verbosity >= 2) fprintf ( stderr, "\n " ); 582 if (verbosity >= 2) fprintf ( stderr, "\n " );
686 return True; 583 return True;
687 584
585 trycat:
586 if (forceOverwrite) {
587 rewind(zStream);
588 while (True) {
589 if (myfeof(zStream)) break;
590 nread = fread ( obuf, sizeof(UChar), 5000, zStream );
591 if (ferror(zStream)) goto errhandler_io;
592 if (nread > 0) fwrite ( obuf, sizeof(UChar), nread, stream );
593 if (ferror(stream)) goto errhandler_io;
594 }
595 goto closeok;
596 }
597
688 errhandler: 598 errhandler:
689 BZ2_bzReadClose ( &bzerr_dummy, bzf ); 599 BZ2_bzReadClose ( &bzerr_dummy, bzf );
690 switch (bzerr) { 600 switch (bzerr) {
@@ -832,7 +742,7 @@ void cadvise ( void )
832 stderr, 742 stderr,
833 "\nIt is possible that the compressed file(s) have become corrupted.\n" 743 "\nIt is possible that the compressed file(s) have become corrupted.\n"
834 "You can use the -tvv option to test integrity of such files.\n\n" 744 "You can use the -tvv option to test integrity of such files.\n\n"
835 "You can use the `bzip2recover' program to *attempt* to recover\n" 745 "You can use the `bzip2recover' program to attempt to recover\n"
836 "data from undamaged sections of corrupted files.\n\n" 746 "data from undamaged sections of corrupted files.\n\n"
837 ); 747 );
838} 748}
@@ -855,28 +765,55 @@ void showFileNames ( void )
855static 765static
856void cleanUpAndFail ( Int32 ec ) 766void cleanUpAndFail ( Int32 ec )
857{ 767{
858 IntNative retVal; 768 IntNative retVal;
769 struct MY_STAT statBuf;
859 770
860 if ( srcMode == SM_F2F 771 if ( srcMode == SM_F2F
861 && opMode != OM_TEST 772 && opMode != OM_TEST
862 && deleteOutputOnInterrupt ) { 773 && deleteOutputOnInterrupt ) {
863 if (noisy) 774
864 fprintf ( stderr, "%s: Deleting output file %s, if it exists.\n", 775 /* Check whether input file still exists. Delete output file
865 progName, outName ); 776 only if input exists to avoid loss of data. Joerg Prante, 5
866 if (outputHandleJustInCase != NULL) 777 January 2002. (JRS 06-Jan-2002: other changes in 1.0.2 mean
867 fclose ( outputHandleJustInCase ); 778 this is less likely to happen. But to be ultra-paranoid, we
868 retVal = remove ( outName ); 779 do the check anyway.) */
869 if (retVal != 0) 780 retVal = MY_STAT ( inName, &statBuf );
781 if (retVal == 0) {
782 if (noisy)
783 fprintf ( stderr,
784 "%s: Deleting output file %s, if it exists.\n",
785 progName, outName );
786 if (outputHandleJustInCase != NULL)
787 fclose ( outputHandleJustInCase );
788 retVal = remove ( outName );
789 if (retVal != 0)
790 fprintf ( stderr,
791 "%s: WARNING: deletion of output file "
792 "(apparently) failed.\n",
793 progName );
794 } else {
870 fprintf ( stderr, 795 fprintf ( stderr,
871 "%s: WARNING: deletion of output file (apparently) failed.\n", 796 "%s: WARNING: deletion of output file suppressed\n",
797 progName );
798 fprintf ( stderr,
799 "%s: since input file no longer exists. Output file\n",
872 progName ); 800 progName );
801 fprintf ( stderr,
802 "%s: `%s' may be incomplete.\n",
803 progName, outName );
804 fprintf ( stderr,
805 "%s: I suggest doing an integrity test (bzip2 -tv)"
806 " of it.\n",
807 progName );
808 }
873 } 809 }
810
874 if (noisy && numFileNames > 0 && numFilesProcessed < numFileNames) { 811 if (noisy && numFileNames > 0 && numFilesProcessed < numFileNames) {
875 fprintf ( stderr, 812 fprintf ( stderr,
876 "%s: WARNING: some files have not been processed:\n" 813 "%s: WARNING: some files have not been processed:\n"
877 "\t%d specified on command line, %d not processed yet.\n\n", 814 "%s: %d specified on command line, %d not processed yet.\n\n",
878 progName, numFileNames, 815 progName, progName,
879 numFileNames - numFilesProcessed ); 816 numFileNames, numFileNames - numFilesProcessed );
880 } 817 }
881 setExit(ec); 818 setExit(ec);
882 exit(exitValue); 819 exit(exitValue);
@@ -915,14 +852,16 @@ void crcError ( void )
915static 852static
916void compressedStreamEOF ( void ) 853void compressedStreamEOF ( void )
917{ 854{
918 fprintf ( stderr, 855 if (noisy) {
919 "\n%s: Compressed file ends unexpectedly;\n\t" 856 fprintf ( stderr,
920 "perhaps it is corrupted? *Possible* reason follows.\n", 857 "\n%s: Compressed file ends unexpectedly;\n\t"
921 progName ); 858 "perhaps it is corrupted? *Possible* reason follows.\n",
922 perror ( progName ); 859 progName );
923 showFileNames(); 860 perror ( progName );
924 cadvise(); 861 showFileNames();
925 cleanUpAndFail( 2 ); 862 cadvise();
863 }
864 cleanUpAndFail( 2 );
926} 865}
927 866
928 867
@@ -1038,6 +977,11 @@ void configError ( void )
1038/*--- The main driver machinery ---*/ 977/*--- The main driver machinery ---*/
1039/*---------------------------------------------------*/ 978/*---------------------------------------------------*/
1040 979
980/* All rather crufty. The main problem is that input files
981 are stat()d multiple times before use. This should be
982 cleaned up.
983*/
984
1041/*---------------------------------------------*/ 985/*---------------------------------------------*/
1042static 986static
1043void pad ( Char *s ) 987void pad ( Char *s )
@@ -1082,6 +1026,32 @@ Bool fileExists ( Char* name )
1082 1026
1083 1027
1084/*---------------------------------------------*/ 1028/*---------------------------------------------*/
1029/* Open an output file safely with O_EXCL and good permissions.
1030 This avoids a race condition in versions < 1.0.2, in which
1031 the file was first opened and then had its interim permissions
1032 set safely. We instead use open() to create the file with
1033 the interim permissions required. (--- --- rw-).
1034
1035 For non-Unix platforms, if we are not worrying about
1036 security issues, simple this simply behaves like fopen.
1037*/
1038FILE* fopen_output_safely ( Char* name, const char* mode )
1039{
1040# if BZ_UNIX
1041 FILE* fp;
1042 IntNative fh;
1043 fh = open(name, O_WRONLY|O_CREAT|O_EXCL, S_IWUSR|S_IRUSR);
1044 if (fh == -1) return NULL;
1045 fp = fdopen(fh, mode);
1046 if (fp == NULL) close(fh);
1047 return fp;
1048# else
1049 return fopen(name, mode);
1050# endif
1051}
1052
1053
1054/*---------------------------------------------*/
1085/*-- 1055/*--
1086 if in doubt, return True 1056 if in doubt, return True
1087--*/ 1057--*/
@@ -1093,7 +1063,7 @@ Bool notAStandardFile ( Char* name )
1093 1063
1094 i = MY_LSTAT ( name, &statBuf ); 1064 i = MY_LSTAT ( name, &statBuf );
1095 if (i != 0) return True; 1065 if (i != 0) return True;
1096 if (MY_S_IFREG(statBuf.st_mode)) return False; 1066 if (MY_S_ISREG(statBuf.st_mode)) return False;
1097 return True; 1067 return True;
1098} 1068}
1099 1069
@@ -1115,42 +1085,66 @@ Int32 countHardLinks ( Char* name )
1115 1085
1116 1086
1117/*---------------------------------------------*/ 1087/*---------------------------------------------*/
1088/* Copy modification date, access date, permissions and owner from the
1089 source to destination file. We have to copy this meta-info off
1090 into fileMetaInfo before starting to compress / decompress it,
1091 because doing it afterwards means we get the wrong access time.
1092
1093 To complicate matters, in compress() and decompress() below, the
1094 sequence of tests preceding the call to saveInputFileMetaInfo()
1095 involves calling fileExists(), which in turn establishes its result
1096 by attempting to fopen() the file, and if successful, immediately
1097 fclose()ing it again. So we have to assume that the fopen() call
1098 does not cause the access time field to be updated.
1099
1100 Reading of the man page for stat() (man 2 stat) on RedHat 7.2 seems
1101 to imply that merely doing open() will not affect the access time.
1102 Therefore we merely need to hope that the C library only does
1103 open() as a result of fopen(), and not any kind of read()-ahead
1104 cleverness.
1105
1106 It sounds pretty fragile to me. Whether this carries across
1107 robustly to arbitrary Unix-like platforms (or even works robustly
1108 on this one, RedHat 7.2) is unknown to me. Nevertheless ...
1109*/
1110#if BZ_UNIX
1111static
1112struct MY_STAT fileMetaInfo;
1113#endif
1114
1118static 1115static
1119void copyDatePermissionsAndOwner ( Char *srcName, Char *dstName ) 1116void saveInputFileMetaInfo ( Char *srcName )
1120{ 1117{
1121#if BZ_UNIX 1118# if BZ_UNIX
1119 IntNative retVal;
1120 /* Note use of stat here, not lstat. */
1121 retVal = MY_STAT( srcName, &fileMetaInfo );
1122 ERROR_IF_NOT_ZERO ( retVal );
1123# endif
1124}
1125
1126
1127static
1128void applySavedMetaInfoToOutputFile ( Char *dstName )
1129{
1130# if BZ_UNIX
1122 IntNative retVal; 1131 IntNative retVal;
1123 struct MY_STAT statBuf;
1124 struct utimbuf uTimBuf; 1132 struct utimbuf uTimBuf;
1125 1133
1126 retVal = MY_LSTAT ( srcName, &statBuf ); 1134 uTimBuf.actime = fileMetaInfo.st_atime;
1127 ERROR_IF_NOT_ZERO ( retVal ); 1135 uTimBuf.modtime = fileMetaInfo.st_mtime;
1128 uTimBuf.actime = statBuf.st_atime;
1129 uTimBuf.modtime = statBuf.st_mtime;
1130 1136
1131 retVal = chmod ( dstName, statBuf.st_mode ); 1137 retVal = chmod ( dstName, fileMetaInfo.st_mode );
1132 ERROR_IF_NOT_ZERO ( retVal ); 1138 ERROR_IF_NOT_ZERO ( retVal );
1133 1139
1134 retVal = utime ( dstName, &uTimBuf ); 1140 retVal = utime ( dstName, &uTimBuf );
1135 ERROR_IF_NOT_ZERO ( retVal ); 1141 ERROR_IF_NOT_ZERO ( retVal );
1136 1142
1137 retVal = chown ( dstName, statBuf.st_uid, statBuf.st_gid ); 1143 retVal = chown ( dstName, fileMetaInfo.st_uid, fileMetaInfo.st_gid );
1138 /* chown() will in many cases return with EPERM, which can 1144 /* chown() will in many cases return with EPERM, which can
1139 be safely ignored. 1145 be safely ignored.
1140 */ 1146 */
1141#endif 1147# endif
1142}
1143
1144
1145/*---------------------------------------------*/
1146static
1147void setInterimPermissions ( Char *dstName )
1148{
1149#if BZ_UNIX
1150 IntNative retVal;
1151 retVal = chmod ( dstName, S_IRUSR | S_IWUSR );
1152 ERROR_IF_NOT_ZERO ( retVal );
1153#endif
1154} 1148}
1155 1149
1156 1150
@@ -1158,10 +1152,19 @@ void setInterimPermissions ( Char *dstName )
1158static 1152static
1159Bool containsDubiousChars ( Char* name ) 1153Bool containsDubiousChars ( Char* name )
1160{ 1154{
1161 Bool cdc = False; 1155# if BZ_UNIX
1156 /* On unix, files can contain any characters and the file expansion
1157 * is performed by the shell.
1158 */
1159 return False;
1160# else /* ! BZ_UNIX */
1161 /* On non-unix (Win* platforms), wildcard characters are not allowed in
1162 * filenames.
1163 */
1162 for (; *name != '\0'; name++) 1164 for (; *name != '\0'; name++)
1163 if (*name == '?' || *name == '*') cdc = True; 1165 if (*name == '?' || *name == '*') return True;
1164 return cdc; 1166 return False;
1167# endif /* BZ_UNIX */
1165} 1168}
1166 1169
1167 1170
@@ -1201,6 +1204,7 @@ void compress ( Char *name )
1201 FILE *inStr; 1204 FILE *inStr;
1202 FILE *outStr; 1205 FILE *outStr;
1203 Int32 n, i; 1206 Int32 n, i;
1207 struct MY_STAT statBuf;
1204 1208
1205 deleteOutputOnInterrupt = False; 1209 deleteOutputOnInterrupt = False;
1206 1210
@@ -1246,6 +1250,16 @@ void compress ( Char *name )
1246 return; 1250 return;
1247 } 1251 }
1248 } 1252 }
1253 if ( srcMode == SM_F2F || srcMode == SM_F2O ) {
1254 MY_STAT(inName, &statBuf);
1255 if ( MY_S_ISDIR(statBuf.st_mode) ) {
1256 fprintf( stderr,
1257 "%s: Input file %s is a directory.\n",
1258 progName,inName);
1259 setExit(1);
1260 return;
1261 }
1262 }
1249 if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) { 1263 if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) {
1250 if (noisy) 1264 if (noisy)
1251 fprintf ( stderr, "%s: Input file %s is not a normal file.\n", 1265 fprintf ( stderr, "%s: Input file %s is not a normal file.\n",
@@ -1253,11 +1267,15 @@ void compress ( Char *name )
1253 setExit(1); 1267 setExit(1);
1254 return; 1268 return;
1255 } 1269 }
1256 if ( srcMode == SM_F2F && !forceOverwrite && fileExists ( outName ) ) { 1270 if ( srcMode == SM_F2F && fileExists ( outName ) ) {
1257 fprintf ( stderr, "%s: Output file %s already exists.\n", 1271 if (forceOverwrite) {
1258 progName, outName ); 1272 remove(outName);
1259 setExit(1); 1273 } else {
1260 return; 1274 fprintf ( stderr, "%s: Output file %s already exists.\n",
1275 progName, outName );
1276 setExit(1);
1277 return;
1278 }
1261 } 1279 }
1262 if ( srcMode == SM_F2F && !forceOverwrite && 1280 if ( srcMode == SM_F2F && !forceOverwrite &&
1263 (n=countHardLinks ( inName )) > 0) { 1281 (n=countHardLinks ( inName )) > 0) {
@@ -1267,6 +1285,12 @@ void compress ( Char *name )
1267 return; 1285 return;
1268 } 1286 }
1269 1287
1288 if ( srcMode == SM_F2F ) {
1289 /* Save the file's meta-info before we open it. Doing it later
1290 means we mess up the access times. */
1291 saveInputFileMetaInfo ( inName );
1292 }
1293
1270 switch ( srcMode ) { 1294 switch ( srcMode ) {
1271 1295
1272 case SM_I2O: 1296 case SM_I2O:
@@ -1306,7 +1330,7 @@ void compress ( Char *name )
1306 1330
1307 case SM_F2F: 1331 case SM_F2F:
1308 inStr = fopen ( inName, "rb" ); 1332 inStr = fopen ( inName, "rb" );
1309 outStr = fopen ( outName, "wb" ); 1333 outStr = fopen_output_safely ( outName, "wb" );
1310 if ( outStr == NULL) { 1334 if ( outStr == NULL) {
1311 fprintf ( stderr, "%s: Can't create output file %s: %s.\n", 1335 fprintf ( stderr, "%s: Can't create output file %s: %s.\n",
1312 progName, outName, strerror(errno) ); 1336 progName, outName, strerror(errno) );
@@ -1321,7 +1345,6 @@ void compress ( Char *name )
1321 setExit(1); 1345 setExit(1);
1322 return; 1346 return;
1323 }; 1347 };
1324 setInterimPermissions ( outName );
1325 break; 1348 break;
1326 1349
1327 default: 1350 default:
@@ -1343,7 +1366,7 @@ void compress ( Char *name )
1343 1366
1344 /*--- If there was an I/O error, we won't get here. ---*/ 1367 /*--- If there was an I/O error, we won't get here. ---*/
1345 if ( srcMode == SM_F2F ) { 1368 if ( srcMode == SM_F2F ) {
1346 copyDatePermissionsAndOwner ( inName, outName ); 1369 applySavedMetaInfoToOutputFile ( outName );
1347 deleteOutputOnInterrupt = False; 1370 deleteOutputOnInterrupt = False;
1348 if ( !keepInputFiles ) { 1371 if ( !keepInputFiles ) {
1349 IntNative retVal = remove ( inName ); 1372 IntNative retVal = remove ( inName );
@@ -1364,6 +1387,7 @@ void uncompress ( Char *name )
1364 Int32 n, i; 1387 Int32 n, i;
1365 Bool magicNumberOK; 1388 Bool magicNumberOK;
1366 Bool cantGuess; 1389 Bool cantGuess;
1390 struct MY_STAT statBuf;
1367 1391
1368 deleteOutputOnInterrupt = False; 1392 deleteOutputOnInterrupt = False;
1369 1393
@@ -1405,6 +1429,16 @@ void uncompress ( Char *name )
1405 setExit(1); 1429 setExit(1);
1406 return; 1430 return;
1407 } 1431 }
1432 if ( srcMode == SM_F2F || srcMode == SM_F2O ) {
1433 MY_STAT(inName, &statBuf);
1434 if ( MY_S_ISDIR(statBuf.st_mode) ) {
1435 fprintf( stderr,
1436 "%s: Input file %s is a directory.\n",
1437 progName,inName);
1438 setExit(1);
1439 return;
1440 }
1441 }
1408 if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) { 1442 if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) {
1409 if (noisy) 1443 if (noisy)
1410 fprintf ( stderr, "%s: Input file %s is not a normal file.\n", 1444 fprintf ( stderr, "%s: Input file %s is not a normal file.\n",
@@ -1419,11 +1453,15 @@ void uncompress ( Char *name )
1419 progName, inName, outName ); 1453 progName, inName, outName );
1420 /* just a warning, no return */ 1454 /* just a warning, no return */
1421 } 1455 }
1422 if ( srcMode == SM_F2F && !forceOverwrite && fileExists ( outName ) ) { 1456 if ( srcMode == SM_F2F && fileExists ( outName ) ) {
1423 fprintf ( stderr, "%s: Output file %s already exists.\n", 1457 if (forceOverwrite) {
1424 progName, outName ); 1458 remove(outName);
1425 setExit(1); 1459 } else {
1426 return; 1460 fprintf ( stderr, "%s: Output file %s already exists.\n",
1461 progName, outName );
1462 setExit(1);
1463 return;
1464 }
1427 } 1465 }
1428 if ( srcMode == SM_F2F && !forceOverwrite && 1466 if ( srcMode == SM_F2F && !forceOverwrite &&
1429 (n=countHardLinks ( inName ) ) > 0) { 1467 (n=countHardLinks ( inName ) ) > 0) {
@@ -1433,6 +1471,12 @@ void uncompress ( Char *name )
1433 return; 1471 return;
1434 } 1472 }
1435 1473
1474 if ( srcMode == SM_F2F ) {
1475 /* Save the file's meta-info before we open it. Doing it later
1476 means we mess up the access times. */
1477 saveInputFileMetaInfo ( inName );
1478 }
1479
1436 switch ( srcMode ) { 1480 switch ( srcMode ) {
1437 1481
1438 case SM_I2O: 1482 case SM_I2O:
@@ -1463,7 +1507,7 @@ void uncompress ( Char *name )
1463 1507
1464 case SM_F2F: 1508 case SM_F2F:
1465 inStr = fopen ( inName, "rb" ); 1509 inStr = fopen ( inName, "rb" );
1466 outStr = fopen ( outName, "wb" ); 1510 outStr = fopen_output_safely ( outName, "wb" );
1467 if ( outStr == NULL) { 1511 if ( outStr == NULL) {
1468 fprintf ( stderr, "%s: Can't create output file %s: %s.\n", 1512 fprintf ( stderr, "%s: Can't create output file %s: %s.\n",
1469 progName, outName, strerror(errno) ); 1513 progName, outName, strerror(errno) );
@@ -1478,7 +1522,6 @@ void uncompress ( Char *name )
1478 setExit(1); 1522 setExit(1);
1479 return; 1523 return;
1480 }; 1524 };
1481 setInterimPermissions ( outName );
1482 break; 1525 break;
1483 1526
1484 default: 1527 default:
@@ -1501,7 +1544,7 @@ void uncompress ( Char *name )
1501 /*--- If there was an I/O error, we won't get here. ---*/ 1544 /*--- If there was an I/O error, we won't get here. ---*/
1502 if ( magicNumberOK ) { 1545 if ( magicNumberOK ) {
1503 if ( srcMode == SM_F2F ) { 1546 if ( srcMode == SM_F2F ) {
1504 copyDatePermissionsAndOwner ( inName, outName ); 1547 applySavedMetaInfoToOutputFile ( outName );
1505 deleteOutputOnInterrupt = False; 1548 deleteOutputOnInterrupt = False;
1506 if ( !keepInputFiles ) { 1549 if ( !keepInputFiles ) {
1507 IntNative retVal = remove ( inName ); 1550 IntNative retVal = remove ( inName );
@@ -1539,6 +1582,7 @@ void testf ( Char *name )
1539{ 1582{
1540 FILE *inStr; 1583 FILE *inStr;
1541 Bool allOK; 1584 Bool allOK;
1585 struct MY_STAT statBuf;
1542 1586
1543 deleteOutputOnInterrupt = False; 1587 deleteOutputOnInterrupt = False;
1544 1588
@@ -1565,6 +1609,16 @@ void testf ( Char *name )
1565 setExit(1); 1609 setExit(1);
1566 return; 1610 return;
1567 } 1611 }
1612 if ( srcMode != SM_I2O ) {
1613 MY_STAT(inName, &statBuf);
1614 if ( MY_S_ISDIR(statBuf.st_mode) ) {
1615 fprintf( stderr,
1616 "%s: Input file %s is a directory.\n",
1617 progName,inName);
1618 setExit(1);
1619 return;
1620 }
1621 }
1568 1622
1569 switch ( srcMode ) { 1623 switch ( srcMode ) {
1570 1624
@@ -1603,6 +1657,7 @@ void testf ( Char *name )
1603 } 1657 }
1604 1658
1605 /*--- Now the input handle is sane. Do the Biz. ---*/ 1659 /*--- Now the input handle is sane. Do the Biz. ---*/
1660 outputHandleJustInCase = NULL;
1606 allOK = testStream ( inStr ); 1661 allOK = testStream ( inStr );
1607 1662
1608 if (allOK && verbosity >= 1) fprintf ( stderr, "ok\n" ); 1663 if (allOK && verbosity >= 1) fprintf ( stderr, "ok\n" );
@@ -1619,7 +1674,7 @@ void license ( void )
1619 "bzip2, a block-sorting file compressor. " 1674 "bzip2, a block-sorting file compressor. "
1620 "Version %s.\n" 1675 "Version %s.\n"
1621 " \n" 1676 " \n"
1622 " Copyright (C) 1996-2000 by Julian Seward.\n" 1677 " Copyright (C) 1996-2002 by Julian Seward.\n"
1623 " \n" 1678 " \n"
1624 " This program is free software; you can redistribute it and/or modify\n" 1679 " This program is free software; you can redistribute it and/or modify\n"
1625 " it under the terms set out in the LICENSE file, which is included\n" 1680 " it under the terms set out in the LICENSE file, which is included\n"
@@ -1658,6 +1713,8 @@ void usage ( Char *fullProgName )
1658 " -V --version display software version & license\n" 1713 " -V --version display software version & license\n"
1659 " -s --small use less memory (at most 2500k)\n" 1714 " -s --small use less memory (at most 2500k)\n"
1660 " -1 .. -9 set block size to 100k .. 900k\n" 1715 " -1 .. -9 set block size to 100k .. 900k\n"
1716 " --fast alias for -1\n"
1717 " --best alias for -9\n"
1661 "\n" 1718 "\n"
1662 " If invoked as `bzip2', default action is to compress.\n" 1719 " If invoked as `bzip2', default action is to compress.\n"
1663 " as `bunzip2', default action is to decompress.\n" 1720 " as `bunzip2', default action is to decompress.\n"
@@ -1666,9 +1723,9 @@ void usage ( Char *fullProgName )
1666 " If no file names are given, bzip2 compresses or decompresses\n" 1723 " If no file names are given, bzip2 compresses or decompresses\n"
1667 " from standard input to standard output. You can combine\n" 1724 " from standard input to standard output. You can combine\n"
1668 " short flags, so `-v -4' means the same as -v4 or -4v, &c.\n" 1725 " short flags, so `-v -4' means the same as -v4 or -4v, &c.\n"
1669#if BZ_UNIX 1726# if BZ_UNIX
1670 "\n" 1727 "\n"
1671#endif 1728# endif
1672 , 1729 ,
1673 1730
1674 BZ2_bzlibVersion(), 1731 BZ2_bzlibVersion(),
@@ -1818,11 +1875,11 @@ IntNative main ( IntNative argc, Char *argv[] )
1818 1875
1819 /*-- Set up signal handlers for mem access errors --*/ 1876 /*-- Set up signal handlers for mem access errors --*/
1820 signal (SIGSEGV, mySIGSEGVorSIGBUScatcher); 1877 signal (SIGSEGV, mySIGSEGVorSIGBUScatcher);
1821#if BZ_UNIX 1878# if BZ_UNIX
1822#ifndef __DJGPP__ 1879# ifndef __DJGPP__
1823 signal (SIGBUS, mySIGSEGVorSIGBUScatcher); 1880 signal (SIGBUS, mySIGSEGVorSIGBUScatcher);
1824#endif 1881# endif
1825#endif 1882# endif
1826 1883
1827 copyFileName ( inName, "(none)" ); 1884 copyFileName ( inName, "(none)" );
1828 copyFileName ( outName, "(none)" ); 1885 copyFileName ( outName, "(none)" );
@@ -1933,6 +1990,8 @@ IntNative main ( IntNative argc, Char *argv[] )
1933 if (ISFLAG("--exponential")) workFactor = 1; else 1990 if (ISFLAG("--exponential")) workFactor = 1; else
1934 if (ISFLAG("--repetitive-best")) redundant(aa->name); else 1991 if (ISFLAG("--repetitive-best")) redundant(aa->name); else
1935 if (ISFLAG("--repetitive-fast")) redundant(aa->name); else 1992 if (ISFLAG("--repetitive-fast")) redundant(aa->name); else
1993 if (ISFLAG("--fast")) blockSize100k = 1; else
1994 if (ISFLAG("--best")) blockSize100k = 9; else
1936 if (ISFLAG("--verbose")) verbosity++; else 1995 if (ISFLAG("--verbose")) verbosity++; else
1937 if (ISFLAG("--help")) { usage ( progName ); exit ( 0 ); } 1996 if (ISFLAG("--help")) { usage ( progName ); exit ( 0 ); }
1938 else 1997 else