diff options
author | Julian Seward <jseward@acm.org> | 2001-12-30 22:13:13 +0100 |
---|---|---|
committer | Julian Seward <jseward@acm.org> | 2001-12-30 22:13:13 +0100 |
commit | 099d844292f60f9d58914da29e5773204dc55e7a (patch) | |
tree | 04bdb38dbcd894d6fdbbc3253e216d029cade5c6 | |
parent | 795b859eee96c700e8f3c3fe68e6a9a39d95797c (diff) | |
download | bzip2-099d844292f60f9d58914da29e5773204dc55e7a.tar.gz bzip2-099d844292f60f9d58914da29e5773204dc55e7a.tar.bz2 bzip2-099d844292f60f9d58914da29e5773204dc55e7a.zip |
bzip2-1.0.2bzip2-1.0.2
-rw-r--r-- | CHANGES | 88 | ||||
-rw-r--r-- | LICENSE | 4 | ||||
-rw-r--r-- | Makefile | 81 | ||||
-rw-r--r-- | Makefile-libbz2_so | 15 | ||||
-rw-r--r-- | README | 89 | ||||
-rw-r--r-- | README.COMPILATION.PROBLEMS | 4 | ||||
-rw-r--r-- | blocksort.c | 11 | ||||
-rw-r--r-- | bzdiff | 76 | ||||
-rw-r--r-- | bzdiff.1 | 47 | ||||
-rw-r--r-- | bzgrep | 71 | ||||
-rw-r--r-- | bzgrep.1 | 56 | ||||
-rw-r--r-- | bzip2.1 | 56 | ||||
-rw-r--r-- | bzip2.1.preformatted | 226 | ||||
-rw-r--r-- | bzip2.c | 533 | ||||
-rw-r--r-- | bzip2.txt | 134 | ||||
-rw-r--r-- | bzip2recover.c | 161 | ||||
-rw-r--r-- | bzlib.c | 35 | ||||
-rw-r--r-- | bzlib.h | 6 | ||||
-rw-r--r-- | bzlib_private.h | 11 | ||||
-rw-r--r-- | bzmore | 61 | ||||
-rw-r--r-- | bzmore.1 | 152 | ||||
-rw-r--r-- | compress.c | 10 | ||||
-rw-r--r-- | crctable.c | 2 | ||||
-rw-r--r-- | decompress.c | 14 | ||||
-rw-r--r-- | dlltest.c | 6 | ||||
-rw-r--r-- | huffman.c | 2 | ||||
-rw-r--r-- | makefile.msc | 2 | ||||
-rw-r--r-- | manual.texi | 114 | ||||
-rw-r--r-- | mk251.c | 16 | ||||
-rw-r--r-- | randtable.c | 2 | ||||
-rw-r--r-- | words3 | 4 |
31 files changed, 1464 insertions, 625 deletions
@@ -134,7 +134,7 @@ Several minor bugfixes and enhancements: | |||
134 | 134 | ||
135 | * Advance the version number to 1.0, so as to counteract the | 135 | * Advance the version number to 1.0, so as to counteract the |
136 | (false-in-this-case) impression some people have that programs | 136 | (false-in-this-case) impression some people have that programs |
137 | with version numbers less than 1.0 are in someway, experimental, | 137 | with version numbers less than 1.0 are in some way, experimental, |
138 | pre-release versions. | 138 | pre-release versions. |
139 | 139 | ||
140 | * Create an initial Makefile-libbz2_so to build a shared library. | 140 | * Create an initial Makefile-libbz2_so to build a shared library. |
@@ -165,3 +165,89 @@ There are no functionality changes or bug fixes relative to version | |||
165 | 1.0.0. This is just a documentation update + a fix for minor Win32 | 165 | 1.0.0. This is just a documentation update + a fix for minor Win32 |
166 | build problems. For almost everyone, upgrading from 1.0.0 to 1.0.1 is | 166 | build problems. For almost everyone, upgrading from 1.0.0 to 1.0.1 is |
167 | utterly pointless. Don't bother. | 167 | utterly pointless. Don't bother. |
168 | |||
169 | |||
170 | 1.0.2 | ||
171 | ~~~~~ | ||
172 | A bug fix release, addressing various minor issues which have appeared | ||
173 | in the 18 or so months since 1.0.1 was released. Most of the fixes | ||
174 | are to do with file-handling or documentation bugs. To the best of my | ||
175 | knowledge, there have been no data-loss-causing bugs reported in the | ||
176 | compression/decompression engine of 1.0.0 or 1.0.1. | ||
177 | |||
178 | Note that this release does not improve the rather crude build system | ||
179 | for Unix platforms. The general plan here is to autoconfiscate/ | ||
180 | libtoolise 1.0.2 soon after release, and release the result as 1.1.0 | ||
181 | or perhaps 1.2.0. That, however, is still just a plan at this point. | ||
182 | |||
183 | Here are the changes in 1.0.2. Bug-reporters and/or patch-senders in | ||
184 | parentheses. | ||
185 | |||
186 | * Fix an infinite segfault loop in 1.0.1 when a directory is | ||
187 | encountered in -f (force) mode. | ||
188 | (Trond Eivind Glomsrod, Nicholas Nethercote, Volker Schmidt) | ||
189 | |||
190 | * Avoid double fclose() of output file on certain I/O error paths. | ||
191 | (Solar Designer) | ||
192 | |||
193 | * Don't fail with internal error 1007 when fed a long stream (> 48MB) | ||
194 | of byte 251. Also print useful message suggesting that 1007s may be | ||
195 | caused by bad memory. | ||
196 | (noticed by Juan Pedro Vallejo, fixed by me) | ||
197 | |||
198 | * Fix uninitialised variable silly bug in demo prog dlltest.c. | ||
199 | (Jorj Bauer) | ||
200 | |||
201 | * Remove 512-MB limitation on recovered file size for bzip2recover | ||
202 | on selected platforms which support 64-bit ints. At the moment | ||
203 | all GCC supported platforms, and Win32. | ||
204 | (me, Alson van der Meulen) | ||
205 | |||
206 | * Hard-code header byte values, to give correct operation on platforms | ||
207 | using EBCDIC as their native character set (IBM's OS/390). | ||
208 | (Leland Lucius) | ||
209 | |||
210 | * Copy file access times correctly. | ||
211 | (Marty Leisner) | ||
212 | |||
213 | * Add distclean and check targets to Makefile. | ||
214 | (Michael Carmack) | ||
215 | |||
216 | * Parameterise use of ar and ranlib in Makefile. Also add $(LDFLAGS). | ||
217 | (Rich Ireland, Bo Thorsen) | ||
218 | |||
219 | * Pass -p (create parent dirs as needed) to mkdir during make install. | ||
220 | (Jeremy Fusco) | ||
221 | |||
222 | * Dereference symlinks when copying file permissions in -f mode. | ||
223 | (Volker Schmidt) | ||
224 | |||
225 | * Majorly simplify implementation of uInt64_qrm10. | ||
226 | (Bo Lindbergh) | ||
227 | |||
228 | * Check the input file still exists before deleting the output one, | ||
229 | when aborting in cleanUpAndFail(). | ||
230 | (Joerg Prante, Robert Linden, Matthias Krings) | ||
231 | |||
232 | Also a bunch of patches courtesy of Philippe Troin, the Debian maintainer | ||
233 | of bzip2: | ||
234 | |||
235 | * Wrapper scripts (with manpages): bzdiff, bzgrep, bzmore. | ||
236 | |||
237 | * Spelling changes and minor enhancements in bzip2.1. | ||
238 | |||
239 | * Avoid race condition between creating the output file and setting its | ||
240 | interim permissions safely, by using fopen_output_safely(). | ||
241 | No changes to bzip2recover since there is no issue with file | ||
242 | permissions there. | ||
243 | |||
244 | * do not print senseless report with -v when compressing an empty | ||
245 | file. | ||
246 | |||
247 | * bzcat -f works on non-bzip2 files. | ||
248 | |||
249 | * do not try to escape shell meta-characters on unix (the shell takes | ||
250 | care of these). | ||
251 | |||
252 | * added --fast and --best aliases for -1 -9 for gzip compatibility. | ||
253 | |||
@@ -1,6 +1,6 @@ | |||
1 | 1 | ||
2 | This program, "bzip2" and associated library "libbzip2", are | 2 | This program, "bzip2" and associated library "libbzip2", are |
3 | copyright (C) 1996-2000 Julian R Seward. All rights reserved. | 3 | copyright (C) 1996-2002 Julian R Seward. All rights reserved. |
4 | 4 | ||
5 | Redistribution and use in source and binary forms, with or without | 5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the following conditions | 6 | modification, are permitted provided that the following conditions |
@@ -35,5 +35,5 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
35 | 35 | ||
36 | Julian Seward, Cambridge, UK. | 36 | Julian Seward, Cambridge, UK. |
37 | jseward@acm.org | 37 | jseward@acm.org |
38 | bzip2/libbzip2 version 1.0 of 21 March 2000 | 38 | bzip2/libbzip2 version 1.0.2 of 30 December 2001 |
39 | 39 | ||
@@ -1,9 +1,20 @@ | |||
1 | 1 | ||
2 | SHELL=/bin/sh | 2 | SHELL=/bin/sh |
3 | |||
4 | # To assist in cross-compiling | ||
3 | CC=gcc | 5 | CC=gcc |
6 | AR=ar | ||
7 | RANLIB=ranlib | ||
8 | LDFLAGS= | ||
9 | |||
10 | # Suitably paranoid flags to avoid bugs in gcc-2.7 | ||
4 | BIGFILES=-D_FILE_OFFSET_BITS=64 | 11 | BIGFILES=-D_FILE_OFFSET_BITS=64 |
5 | CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES) | 12 | CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES) |
6 | 13 | ||
14 | # Where you want it installed when you do 'make install' | ||
15 | PREFIX=/usr | ||
16 | |||
17 | |||
7 | OBJS= blocksort.o \ | 18 | OBJS= blocksort.o \ |
8 | huffman.o \ | 19 | huffman.o \ |
9 | crctable.o \ | 20 | crctable.o \ |
@@ -15,20 +26,21 @@ OBJS= blocksort.o \ | |||
15 | all: libbz2.a bzip2 bzip2recover test | 26 | all: libbz2.a bzip2 bzip2recover test |
16 | 27 | ||
17 | bzip2: libbz2.a bzip2.o | 28 | bzip2: libbz2.a bzip2.o |
18 | $(CC) $(CFLAGS) -o bzip2 bzip2.o -L. -lbz2 | 29 | $(CC) $(CFLAGS) $(LDFLAGS) -o bzip2 bzip2.o -L. -lbz2 |
19 | 30 | ||
20 | bzip2recover: bzip2recover.o | 31 | bzip2recover: bzip2recover.o |
21 | $(CC) $(CFLAGS) -o bzip2recover bzip2recover.o | 32 | $(CC) $(CFLAGS) $(LDFLAGS) -o bzip2recover bzip2recover.o |
22 | 33 | ||
23 | libbz2.a: $(OBJS) | 34 | libbz2.a: $(OBJS) |
24 | rm -f libbz2.a | 35 | rm -f libbz2.a |
25 | ar cq libbz2.a $(OBJS) | 36 | $(AR) cq libbz2.a $(OBJS) |
26 | @if ( test -f /usr/bin/ranlib -o -f /bin/ranlib -o \ | 37 | @if ( test -f $(RANLIB) -o -f /usr/bin/ranlib -o \ |
27 | -f /usr/ccs/bin/ranlib ) ; then \ | 38 | -f /bin/ranlib -o -f /usr/ccs/bin/ranlib ) ; then \ |
28 | echo ranlib libbz2.a ; \ | 39 | echo $(RANLIB) libbz2.a ; \ |
29 | ranlib libbz2.a ; \ | 40 | $(RANLIB) libbz2.a ; \ |
30 | fi | 41 | fi |
31 | 42 | ||
43 | check: test | ||
32 | test: bzip2 | 44 | test: bzip2 |
33 | @cat words1 | 45 | @cat words1 |
34 | ./bzip2 -1 < sample1.ref > sample1.rb2 | 46 | ./bzip2 -1 < sample1.ref > sample1.rb2 |
@@ -45,14 +57,12 @@ test: bzip2 | |||
45 | cmp sample3.tst sample3.ref | 57 | cmp sample3.tst sample3.ref |
46 | @cat words3 | 58 | @cat words3 |
47 | 59 | ||
48 | PREFIX=/usr | ||
49 | |||
50 | install: bzip2 bzip2recover | 60 | install: bzip2 bzip2recover |
51 | if ( test ! -d $(PREFIX)/bin ) ; then mkdir $(PREFIX)/bin ; fi | 61 | if ( test ! -d $(PREFIX)/bin ) ; then mkdir -p $(PREFIX)/bin ; fi |
52 | if ( test ! -d $(PREFIX)/lib ) ; then mkdir $(PREFIX)/lib ; fi | 62 | if ( test ! -d $(PREFIX)/lib ) ; then mkdir -p $(PREFIX)/lib ; fi |
53 | if ( test ! -d $(PREFIX)/man ) ; then mkdir $(PREFIX)/man ; fi | 63 | if ( test ! -d $(PREFIX)/man ) ; then mkdir -p $(PREFIX)/man ; fi |
54 | if ( test ! -d $(PREFIX)/man/man1 ) ; then mkdir $(PREFIX)/man/man1 ; fi | 64 | if ( test ! -d $(PREFIX)/man/man1 ) ; then mkdir -p $(PREFIX)/man/man1 ; fi |
55 | if ( test ! -d $(PREFIX)/include ) ; then mkdir $(PREFIX)/include ; fi | 65 | if ( test ! -d $(PREFIX)/include ) ; then mkdir -p $(PREFIX)/include ; fi |
56 | cp -f bzip2 $(PREFIX)/bin/bzip2 | 66 | cp -f bzip2 $(PREFIX)/bin/bzip2 |
57 | cp -f bzip2 $(PREFIX)/bin/bunzip2 | 67 | cp -f bzip2 $(PREFIX)/bin/bunzip2 |
58 | cp -f bzip2 $(PREFIX)/bin/bzcat | 68 | cp -f bzip2 $(PREFIX)/bin/bzcat |
@@ -67,7 +77,26 @@ install: bzip2 bzip2recover | |||
67 | chmod a+r $(PREFIX)/include/bzlib.h | 77 | chmod a+r $(PREFIX)/include/bzlib.h |
68 | cp -f libbz2.a $(PREFIX)/lib | 78 | cp -f libbz2.a $(PREFIX)/lib |
69 | chmod a+r $(PREFIX)/lib/libbz2.a | 79 | chmod a+r $(PREFIX)/lib/libbz2.a |
80 | cp -f bzgrep $(PREFIX)/bin/bzgrep | ||
81 | ln $(PREFIX)/bin/bzgrep $(PREFIX)/bin/bzegrep | ||
82 | ln $(PREFIX)/bin/bzgrep $(PREFIX)/bin/bzfgrep | ||
83 | chmod a+x $(PREFIX)/bin/bzgrep | ||
84 | cp -f bzmore $(PREFIX)/bin/bzmore | ||
85 | ln $(PREFIX)/bin/bzmore $(PREFIX)/bin/bzless | ||
86 | chmod a+x $(PREFIX)/bin/bzmore | ||
87 | cp -f bzdiff $(PREFIX)/bin/bzdiff | ||
88 | ln $(PREFIX)/bin/bzdiff $(PREFIX)/bin/bzcmp | ||
89 | chmod a+x $(PREFIX)/bin/bzdiff | ||
90 | cp -f bzgrep.1 bzmore.1 bzdiff.1 $(PREFIX)/man/man1 | ||
91 | chmod a+r $(PREFIX)/man/man1/bzgrep.1 | ||
92 | chmod a+r $(PREFIX)/man/man1/bzmore.1 | ||
93 | chmod a+r $(PREFIX)/man/man1/bzdiff.1 | ||
94 | echo ".so man1/bzgrep.1" > $(PREFIX)/man/man1/bzegrep.1 | ||
95 | echo ".so man1/bzgrep.1" > $(PREFIX)/man/man1/bzfgrep.1 | ||
96 | echo ".so man1/bzmore.1" > $(PREFIX)/man/man1/bzless.1 | ||
97 | echo ".so man1/bzdiff.1" > $(PREFIX)/man/man1/bzcmp.1 | ||
70 | 98 | ||
99 | distclean: clean | ||
71 | clean: | 100 | clean: |
72 | rm -f *.o libbz2.a bzip2 bzip2recover \ | 101 | rm -f *.o libbz2.a bzip2 bzip2recover \ |
73 | sample1.rb2 sample2.rb2 sample3.rb2 \ | 102 | sample1.rb2 sample2.rb2 sample3.rb2 \ |
@@ -93,7 +122,7 @@ bzip2.o: bzip2.c | |||
93 | bzip2recover.o: bzip2recover.c | 122 | bzip2recover.o: bzip2recover.c |
94 | $(CC) $(CFLAGS) -c bzip2recover.c | 123 | $(CC) $(CFLAGS) -c bzip2recover.c |
95 | 124 | ||
96 | DISTNAME=bzip2-1.0.1 | 125 | DISTNAME=bzip2-1.0.2 |
97 | tarfile: | 126 | tarfile: |
98 | rm -f $(DISTNAME) | 127 | rm -f $(DISTNAME) |
99 | ln -sf . $(DISTNAME) | 128 | ln -sf . $(DISTNAME) |
@@ -112,6 +141,7 @@ tarfile: | |||
112 | $(DISTNAME)/Makefile \ | 141 | $(DISTNAME)/Makefile \ |
113 | $(DISTNAME)/manual.texi \ | 142 | $(DISTNAME)/manual.texi \ |
114 | $(DISTNAME)/manual.ps \ | 143 | $(DISTNAME)/manual.ps \ |
144 | $(DISTNAME)/manual.pdf \ | ||
115 | $(DISTNAME)/LICENSE \ | 145 | $(DISTNAME)/LICENSE \ |
116 | $(DISTNAME)/bzip2.1 \ | 146 | $(DISTNAME)/bzip2.1 \ |
117 | $(DISTNAME)/bzip2.1.preformatted \ | 147 | $(DISTNAME)/bzip2.1.preformatted \ |
@@ -138,4 +168,25 @@ tarfile: | |||
138 | $(DISTNAME)/Y2K_INFO \ | 168 | $(DISTNAME)/Y2K_INFO \ |
139 | $(DISTNAME)/unzcrash.c \ | 169 | $(DISTNAME)/unzcrash.c \ |
140 | $(DISTNAME)/spewG.c \ | 170 | $(DISTNAME)/spewG.c \ |
171 | $(DISTNAME)/mk251.c \ | ||
172 | $(DISTNAME)/bzdiff \ | ||
173 | $(DISTNAME)/bzdiff.1 \ | ||
174 | $(DISTNAME)/bzmore \ | ||
175 | $(DISTNAME)/bzmore.1 \ | ||
176 | $(DISTNAME)/bzgrep \ | ||
177 | $(DISTNAME)/bzgrep.1 \ | ||
141 | $(DISTNAME)/Makefile-libbz2_so | 178 | $(DISTNAME)/Makefile-libbz2_so |
179 | gzip -v $(DISTNAME).tar | ||
180 | |||
181 | # For rebuilding the manual from sources on my RedHat 7.2 box | ||
182 | manual: manual.ps manual.pdf manual.html | ||
183 | |||
184 | manual.ps: manual.texi | ||
185 | tex manual.texi | ||
186 | dvips -o manual.ps manual.dvi | ||
187 | |||
188 | manual.pdf: manual.ps | ||
189 | ps2pdf manual.ps | ||
190 | |||
191 | manual.html: manual.texi | ||
192 | texi2html -split_chapter manual.texi | ||
diff --git a/Makefile-libbz2_so b/Makefile-libbz2_so index a347c50..4986fe2 100644 --- a/Makefile-libbz2_so +++ b/Makefile-libbz2_so | |||
@@ -1,8 +1,9 @@ | |||
1 | 1 | ||
2 | # This Makefile builds a shared version of the library, | 2 | # This Makefile builds a shared version of the library, |
3 | # libbz2.so.1.0.1, with soname libbz2.so.1.0, | 3 | # libbz2.so.1.0.2, with soname libbz2.so.1.0, |
4 | # at least on x86-Linux (RedHat 5.2), | 4 | # at least on x86-Linux (RedHat 7.2), |
5 | # with gcc-2.7.2.3. Please see the README file for some | 5 | # with gcc-2.96 20000731 (Red Hat Linux 7.1 2.96-98). |
6 | # Please see the README file for some | ||
6 | # important info about building the library like this. | 7 | # important info about building the library like this. |
7 | 8 | ||
8 | SHELL=/bin/sh | 9 | SHELL=/bin/sh |
@@ -19,13 +20,13 @@ OBJS= blocksort.o \ | |||
19 | bzlib.o | 20 | bzlib.o |
20 | 21 | ||
21 | all: $(OBJS) | 22 | all: $(OBJS) |
22 | $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.1 $(OBJS) | 23 | $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.2 $(OBJS) |
23 | $(CC) $(CFLAGS) -o bzip2-shared bzip2.c libbz2.so.1.0.1 | 24 | $(CC) $(CFLAGS) -o bzip2-shared bzip2.c libbz2.so.1.0.2 |
24 | rm -f libbz2.so.1.0 | 25 | rm -f libbz2.so.1.0 |
25 | ln -s libbz2.so.1.0.1 libbz2.so.1.0 | 26 | ln -s libbz2.so.1.0.2 libbz2.so.1.0 |
26 | 27 | ||
27 | clean: | 28 | clean: |
28 | rm -f $(OBJS) bzip2.o libbz2.so.1.0.1 libbz2.so.1.0 bzip2-shared | 29 | rm -f $(OBJS) bzip2.o libbz2.so.1.0.2 libbz2.so.1.0 bzip2-shared |
29 | 30 | ||
30 | blocksort.o: blocksort.c | 31 | blocksort.o: blocksort.c |
31 | $(CC) $(CFLAGS) -c blocksort.c | 32 | $(CC) $(CFLAGS) -c blocksort.c |
@@ -1,15 +1,15 @@ | |||
1 | 1 | ||
2 | This is the README for bzip2, a block-sorting file compressor, version | 2 | This is the README for bzip2, a block-sorting file compressor, version |
3 | 1.0. This version is fully compatible with the previous public | 3 | 1.0.2. This version is fully compatible with the previous public |
4 | releases, bzip2-0.1pl2, bzip2-0.9.0 and bzip2-0.9.5. | 4 | releases, versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1. |
5 | 5 | ||
6 | bzip2-1.0 is distributed under a BSD-style license. For details, | 6 | bzip2-1.0.2 is distributed under a BSD-style license. For details, |
7 | see the file LICENSE. | 7 | see the file LICENSE. |
8 | 8 | ||
9 | Complete documentation is available in Postscript form (manual.ps) or | 9 | Complete documentation is available in Postscript form (manual.ps), |
10 | html (manual_toc.html). A plain-text version of the manual page is | 10 | PDF (manual.pdf, amazingly enough) or html (manual_toc.html). A |
11 | available as bzip2.txt. A statement about Y2K issues is now included | 11 | plain-text version of the manual page is available as bzip2.txt. |
12 | in the file Y2K_INFO. | 12 | A statement about Y2K issues is now included in the file Y2K_INFO. |
13 | 13 | ||
14 | 14 | ||
15 | HOW TO BUILD -- UNIX | 15 | HOW TO BUILD -- UNIX |
@@ -33,34 +33,41 @@ not actually execute them. | |||
33 | HOW TO BUILD -- UNIX, shared library libbz2.so. | 33 | HOW TO BUILD -- UNIX, shared library libbz2.so. |
34 | 34 | ||
35 | Do 'make -f Makefile-libbz2_so'. This Makefile seems to work for | 35 | Do 'make -f Makefile-libbz2_so'. This Makefile seems to work for |
36 | Linux-ELF (RedHat 5.2 on an x86 box), with gcc. I make no claims | 36 | Linux-ELF (RedHat 7.2 on an x86 box), with gcc. I make no claims |
37 | that it works for any other platform, though I suspect it probably | 37 | that it works for any other platform, though I suspect it probably |
38 | will work for most platforms employing both ELF and gcc. | 38 | will work for most platforms employing both ELF and gcc. |
39 | 39 | ||
40 | bzip2-shared, a client of the shared library, is also build, but | 40 | bzip2-shared, a client of the shared library, is also built, but not |
41 | not self-tested. So I suggest you also build using the normal | 41 | self-tested. So I suggest you also build using the normal Makefile, |
42 | Makefile, since that conducts a self-test. | 42 | since that conducts a self-test. A second reason to prefer the |
43 | version statically linked to the library is that, on x86 platforms, | ||
44 | building shared objects makes a valuable register (%ebx) unavailable | ||
45 | to gcc, resulting in a slowdown of 10%-20%, at least for bzip2. | ||
43 | 46 | ||
44 | Important note for people upgrading .so's from 0.9.0/0.9.5 to | 47 | Important note for people upgrading .so's from 0.9.0/0.9.5 to version |
45 | version 1.0. All the functions in the library have been renamed, | 48 | 1.0.X. All the functions in the library have been renamed, from (eg) |
46 | from (eg) bzCompress to BZ2_bzCompress, to avoid namespace pollution. | 49 | bzCompress to BZ2_bzCompress, to avoid namespace pollution. |
47 | Unfortunately this means that the libbz2.so created by | 50 | Unfortunately this means that the libbz2.so created by |
48 | Makefile-libbz2_so will not work with any program which used an | 51 | Makefile-libbz2_so will not work with any program which used an older |
49 | older version of the library. Sorry. I do encourage library | 52 | version of the library. Sorry. I do encourage library clients to |
50 | clients to make the effort to upgrade to use version 1.0, since | 53 | make the effort to upgrade to use version 1.0, since it is both faster |
51 | it is both faster and more robust than previous versions. | 54 | and more robust than previous versions. |
52 | 55 | ||
53 | 56 | ||
54 | HOW TO BUILD -- Windows 95, NT, DOS, Mac, etc. | 57 | HOW TO BUILD -- Windows 95, NT, DOS, Mac, etc. |
55 | 58 | ||
56 | It's difficult for me to support compilation on all these platforms. | 59 | It's difficult for me to support compilation on all these platforms. |
57 | My approach is to collect binaries for these platforms, and put them | 60 | My approach is to collect binaries for these platforms, and put them |
58 | on the master web page (http://sourceware.cygnus.com/bzip2). Look | 61 | on the master web page (http://sources.redhat.com/bzip2). Look there. |
59 | there. However (FWIW), bzip2-1.0 is very standard ANSI C and should | 62 | However (FWIW), bzip2-1.0.X is very standard ANSI C and should compile |
60 | compile unmodified with MS Visual C. For Win32, there is one | 63 | unmodified with MS Visual C. If you have difficulties building, you |
61 | important caveat: in bzip2.c, you must set BZ_UNIX to 0 and | 64 | might want to read README.COMPILATION.PROBLEMS. |
62 | BZ_LCCWIN32 to 1 before building. If you have difficulties building, | 65 | |
63 | you might want to read README.COMPILATION.PROBLEMS. | 66 | At least using MS Visual C++ 6, you can build from the unmodified |
67 | sources by issuing, in a command shell: | ||
68 | nmake -f makefile.msc | ||
69 | (you may need to first run the MSVC-provided script VCVARS32.BAT | ||
70 | so as to set up paths to the MSVC tools correctly). | ||
64 | 71 | ||
65 | 72 | ||
66 | VALIDATION | 73 | VALIDATION |
@@ -138,29 +145,37 @@ WHAT'S NEW IN 0.9.5 ? | |||
138 | * Many small improvements in file and flag handling. | 145 | * Many small improvements in file and flag handling. |
139 | * A Y2K statement. | 146 | * A Y2K statement. |
140 | 147 | ||
141 | WHAT'S NEW IN 1.0 | 148 | WHAT'S NEW IN 1.0.0 ? |
142 | 149 | ||
143 | See the CHANGES file. | 150 | See the CHANGES file. |
144 | 151 | ||
152 | WHAT'S NEW IN 1.0.2 ? | ||
153 | |||
154 | See the CHANGES file. | ||
155 | |||
156 | |||
145 | I hope you find bzip2 useful. Feel free to contact me at | 157 | I hope you find bzip2 useful. Feel free to contact me at |
146 | jseward@acm.org | 158 | jseward@acm.org |
147 | if you have any suggestions or queries. Many people mailed me with | 159 | if you have any suggestions or queries. Many people mailed me with |
148 | comments, suggestions and patches after the releases of bzip-0.15, | 160 | comments, suggestions and patches after the releases of bzip-0.15, |
149 | bzip-0.21, bzip2-0.1pl2 and bzip2-0.9.0, and the changes in bzip2 are | 161 | bzip-0.21, and bzip2 versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, |
150 | largely a result of this feedback. I thank you for your comments. | 162 | and the changes in bzip2 are largely a result of this feedback. |
163 | I thank you for your comments. | ||
151 | 164 | ||
152 | At least for the time being, bzip2's "home" is (or can be reached via) | 165 | At least for the time being, bzip2's "home" is (or can be reached via) |
153 | http://www.muraroa.demon.co.uk. | 166 | http://sources.redhat.com/bzip2. |
154 | 167 | ||
155 | Julian Seward | 168 | Julian Seward |
156 | jseward@acm.org | 169 | jseward@acm.org |
157 | 170 | ||
158 | Cambridge, UK | 171 | Cambridge, UK (and what a great town this is!) |
159 | 18 July 1996 (version 0.15) | 172 | |
160 | 25 August 1996 (version 0.21) | 173 | 18 July 1996 (version 0.15) |
161 | 7 August 1997 (bzip2, version 0.1) | 174 | 25 August 1996 (version 0.21) |
162 | 29 August 1997 (bzip2, version 0.1pl2) | 175 | 7 August 1997 (bzip2, version 0.1) |
163 | 23 August 1998 (bzip2, version 0.9.0) | 176 | 29 August 1997 (bzip2, version 0.1pl2) |
164 | 8 June 1999 (bzip2, version 0.9.5) | 177 | 23 August 1998 (bzip2, version 0.9.0) |
165 | 4 Sept 1999 (bzip2, version 0.9.5d) | 178 | 8 June 1999 (bzip2, version 0.9.5) |
166 | 5 May 2000 (bzip2, version 1.0pre8) | 179 | 4 Sept 1999 (bzip2, version 0.9.5d) |
180 | 5 May 2000 (bzip2, version 1.0pre8) | ||
181 | 30 December 2001 (bzip2, version 1.0.2pre1) \ No newline at end of file | ||
diff --git a/README.COMPILATION.PROBLEMS b/README.COMPILATION.PROBLEMS index d621ad5..bd1822d 100644 --- a/README.COMPILATION.PROBLEMS +++ b/README.COMPILATION.PROBLEMS | |||
@@ -117,11 +117,11 @@ Known problems as of 1.0pre8: | |||
117 | All that said: you might be able to get somewhere | 117 | All that said: you might be able to get somewhere |
118 | by finding the line in Makefile-libbz2_so which says | 118 | by finding the line in Makefile-libbz2_so which says |
119 | 119 | ||
120 | $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.1 $(OBJS) | 120 | $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.2 $(OBJS) |
121 | 121 | ||
122 | and replacing with | 122 | and replacing with |
123 | 123 | ||
124 | ($CC) -G -shared -o libbz2.so.1.0.1 -h libbz2.so.1.0 $(OBJS) | 124 | $(CC) -G -shared -o libbz2.so.1.0.2 -h libbz2.so.1.0 $(OBJS) |
125 | 125 | ||
126 | If gcc objects to the combination -fpic -fPIC, get rid of | 126 | If gcc objects to the combination -fpic -fPIC, get rid of |
127 | the second one, leaving just "-fpic". | 127 | the second one, leaving just "-fpic". |
diff --git a/blocksort.c b/blocksort.c index ec42672..aba3efc 100644 --- a/blocksort.c +++ b/blocksort.c | |||
@@ -8,7 +8,7 @@ | |||
8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
10 | 10 | ||
11 | Copyright (C) 1996-2000 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. |
12 | 12 | ||
13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
@@ -981,7 +981,14 @@ void mainSort ( UInt32* ptr, | |||
981 | } | 981 | } |
982 | } | 982 | } |
983 | 983 | ||
984 | AssertH ( copyStart[ss]-1 == copyEnd[ss], 1007 ); | 984 | AssertH ( (copyStart[ss]-1 == copyEnd[ss]) |
985 | || | ||
986 | /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. | ||
987 | Necessity for this case is demonstrated by compressing | ||
988 | a sequence of approximately 48.5 million of character | ||
989 | 251; 1.0.0/1.0.1 will then die here. */ | ||
990 | (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), | ||
991 | 1007 ) | ||
985 | 992 | ||
986 | for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; | 993 | for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; |
987 | 994 | ||
@@ -0,0 +1,76 @@ | |||
1 | #!/bin/sh | ||
2 | # sh is buggy on RS/6000 AIX 3.2. Replace above line with #!/bin/ksh | ||
3 | |||
4 | # Bzcmp/diff wrapped for bzip2, | ||
5 | # adapted from zdiff by Philippe Troin <phil@fifi.org> for Debian GNU/Linux. | ||
6 | |||
7 | # Bzcmp and bzdiff are used to invoke the cmp or the diff pro- | ||
8 | # gram on compressed files. All options specified are passed | ||
9 | # directly to cmp or diff. If only 1 file is specified, then | ||
10 | # the files compared are file1 and an uncompressed file1.gz. | ||
11 | # If two files are specified, then they are uncompressed (if | ||
12 | # necessary) and fed to cmp or diff. The exit status from cmp | ||
13 | # or diff is preserved. | ||
14 | |||
15 | PATH="/usr/bin:$PATH"; export PATH | ||
16 | prog=`echo $0 | sed 's|.*/||'` | ||
17 | case "$prog" in | ||
18 | *cmp) comp=${CMP-cmp} ;; | ||
19 | *) comp=${DIFF-diff} ;; | ||
20 | esac | ||
21 | |||
22 | OPTIONS= | ||
23 | FILES= | ||
24 | for ARG | ||
25 | do | ||
26 | case "$ARG" in | ||
27 | -*) OPTIONS="$OPTIONS $ARG";; | ||
28 | *) if test -f "$ARG"; then | ||
29 | FILES="$FILES $ARG" | ||
30 | else | ||
31 | echo "${prog}: $ARG not found or not a regular file" | ||
32 | exit 1 | ||
33 | fi ;; | ||
34 | esac | ||
35 | done | ||
36 | if test -z "$FILES"; then | ||
37 | echo "Usage: $prog [${comp}_options] file [file]" | ||
38 | exit 1 | ||
39 | fi | ||
40 | tmp=`tempfile -d /tmp -p bz` || { | ||
41 | echo 'cannot create a temporary file' >&2 | ||
42 | exit 1 | ||
43 | } | ||
44 | set $FILES | ||
45 | if test $# -eq 1; then | ||
46 | FILE=`echo "$1" | sed 's/.bz2$//'` | ||
47 | bzip2 -cd "$FILE.bz2" | $comp $OPTIONS - "$FILE" | ||
48 | STAT="$?" | ||
49 | |||
50 | elif test $# -eq 2; then | ||
51 | case "$1" in | ||
52 | *.bz2) | ||
53 | case "$2" in | ||
54 | *.bz2) | ||
55 | F=`echo "$2" | sed 's|.*/||;s|.bz2$||'` | ||
56 | bzip2 -cdfq "$2" > $tmp | ||
57 | bzip2 -cdfq "$1" | $comp $OPTIONS - $tmp | ||
58 | STAT="$?" | ||
59 | /bin/rm -f $tmp;; | ||
60 | |||
61 | *) bzip2 -cdfq "$1" | $comp $OPTIONS - "$2" | ||
62 | STAT="$?";; | ||
63 | esac;; | ||
64 | *) case "$2" in | ||
65 | *.bz2) | ||
66 | bzip2 -cdfq "$2" | $comp $OPTIONS "$1" - | ||
67 | STAT="$?";; | ||
68 | *) $comp $OPTIONS "$1" "$2" | ||
69 | STAT="$?";; | ||
70 | esac;; | ||
71 | esac | ||
72 | exit "$STAT" | ||
73 | else | ||
74 | echo "Usage: $prog [${comp}_options] file [file]" | ||
75 | exit 1 | ||
76 | fi | ||
diff --git a/bzdiff.1 b/bzdiff.1 new file mode 100644 index 0000000..adb7a8e --- /dev/null +++ b/bzdiff.1 | |||
@@ -0,0 +1,47 @@ | |||
1 | \"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org> | ||
2 | \"for Debian GNU/Linux | ||
3 | .TH BZDIFF 1 | ||
4 | .SH NAME | ||
5 | bzcmp, bzdiff \- compare bzip2 compressed files | ||
6 | .SH SYNOPSIS | ||
7 | .B bzcmp | ||
8 | [ cmp_options ] file1 | ||
9 | [ file2 ] | ||
10 | .br | ||
11 | .B bzdiff | ||
12 | [ diff_options ] file1 | ||
13 | [ file2 ] | ||
14 | .SH DESCRIPTION | ||
15 | .I Bzcmp | ||
16 | and | ||
17 | .I bzdiff | ||
18 | are used to invoke the | ||
19 | .I cmp | ||
20 | or the | ||
21 | .I diff | ||
22 | program on bzip2 compressed files. All options specified are passed | ||
23 | directly to | ||
24 | .I cmp | ||
25 | or | ||
26 | .IR diff "." | ||
27 | If only 1 file is specified, then the files compared are | ||
28 | .I file1 | ||
29 | and an uncompressed | ||
30 | .IR file1 ".bz2." | ||
31 | If two files are specified, then they are uncompressed if necessary and fed to | ||
32 | .I cmp | ||
33 | or | ||
34 | .IR diff "." | ||
35 | The exit status from | ||
36 | .I cmp | ||
37 | or | ||
38 | .I diff | ||
39 | is preserved. | ||
40 | .SH "SEE ALSO" | ||
41 | cmp(1), diff(1), bzmore(1), bzless(1), bzgrep(1), bzip2(1) | ||
42 | .SH BUGS | ||
43 | Messages from the | ||
44 | .I cmp | ||
45 | or | ||
46 | .I diff | ||
47 | programs refer to temporary filenames instead of those specified. | ||
@@ -0,0 +1,71 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | # Bzgrep wrapped for bzip2, | ||
4 | # adapted from zgrep by Philippe Troin <phil@fifi.org> for Debian GNU/Linux. | ||
5 | ## zgrep notice: | ||
6 | ## zgrep -- a wrapper around a grep program that decompresses files as needed | ||
7 | ## Adapted from a version sent by Charles Levert <charles@comm.polymtl.ca> | ||
8 | |||
9 | PATH="/usr/bin:$PATH"; export PATH | ||
10 | |||
11 | prog=`echo $0 | sed 's|.*/||'` | ||
12 | case "$prog" in | ||
13 | *egrep) grep=${EGREP-egrep} ;; | ||
14 | *fgrep) grep=${FGREP-fgrep} ;; | ||
15 | *) grep=${GREP-grep} ;; | ||
16 | esac | ||
17 | pat="" | ||
18 | while test $# -ne 0; do | ||
19 | case "$1" in | ||
20 | -e | -f) opt="$opt $1"; shift; pat="$1" | ||
21 | if test "$grep" = grep; then # grep is buggy with -e on SVR4 | ||
22 | grep=egrep | ||
23 | fi;; | ||
24 | -A | -B) opt="$opt $1 $2"; shift;; | ||
25 | -*) opt="$opt $1";; | ||
26 | *) if test -z "$pat"; then | ||
27 | pat="$1" | ||
28 | else | ||
29 | break; | ||
30 | fi;; | ||
31 | esac | ||
32 | shift | ||
33 | done | ||
34 | |||
35 | if test -z "$pat"; then | ||
36 | echo "grep through bzip2 files" | ||
37 | echo "usage: $prog [grep_options] pattern [files]" | ||
38 | exit 1 | ||
39 | fi | ||
40 | |||
41 | list=0 | ||
42 | silent=0 | ||
43 | op=`echo "$opt" | sed -e 's/ //g' -e 's/-//g'` | ||
44 | case "$op" in | ||
45 | *l*) list=1 | ||
46 | esac | ||
47 | case "$op" in | ||
48 | *h*) silent=1 | ||
49 | esac | ||
50 | |||
51 | if test $# -eq 0; then | ||
52 | bzip2 -cdfq | $grep $opt "$pat" | ||
53 | exit $? | ||
54 | fi | ||
55 | |||
56 | res=0 | ||
57 | for i do | ||
58 | if test -f "$i"; then :; else if test -f "$i.bz2"; then i="$i.bz2"; fi; fi | ||
59 | if test $list -eq 1; then | ||
60 | bzip2 -cdfq "$i" | $grep $opt "$pat" 2>&1 > /dev/null && echo $i | ||
61 | r=$? | ||
62 | elif test $# -eq 1 -o $silent -eq 1; then | ||
63 | bzip2 -cdfq "$i" | $grep $opt "$pat" | ||
64 | r=$? | ||
65 | else | ||
66 | bzip2 -cdfq "$i" | $grep $opt "$pat" | sed "s|^|${i}:|" | ||
67 | r=$? | ||
68 | fi | ||
69 | test "$r" -ne 0 && res="$r" | ||
70 | done | ||
71 | exit $res | ||
diff --git a/bzgrep.1 b/bzgrep.1 new file mode 100644 index 0000000..930af8c --- /dev/null +++ b/bzgrep.1 | |||
@@ -0,0 +1,56 @@ | |||
1 | \"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org> | ||
2 | \"for Debian GNU/Linux | ||
3 | .TH BZGREP 1 | ||
4 | .SH NAME | ||
5 | bzgrep, bzfgrep, bzegrep \- search possibly bzip2 compressed files for a regular expression | ||
6 | .SH SYNOPSIS | ||
7 | .B bzgrep | ||
8 | [ grep_options ] | ||
9 | .BI [\ -e\ ] " pattern" | ||
10 | .IR filename ".\|.\|." | ||
11 | .br | ||
12 | .B bzegrep | ||
13 | [ egrep_options ] | ||
14 | .BI [\ -e\ ] " pattern" | ||
15 | .IR filename ".\|.\|." | ||
16 | .br | ||
17 | .B bzfgrep | ||
18 | [ fgrep_options ] | ||
19 | .BI [\ -e\ ] " pattern" | ||
20 | .IR filename ".\|.\|." | ||
21 | .SH DESCRIPTION | ||
22 | .IR Bzgrep | ||
23 | is used to invoke the | ||
24 | .I grep | ||
25 | on bzip2-compressed files. All options specified are passed directly to | ||
26 | .I grep. | ||
27 | If no file is specified, then the standard input is decompressed | ||
28 | if necessary and fed to grep. | ||
29 | Otherwise the given files are uncompressed if necessary and fed to | ||
30 | .I grep. | ||
31 | .PP | ||
32 | If | ||
33 | .I bzgrep | ||
34 | is invoked as | ||
35 | .I bzegrep | ||
36 | or | ||
37 | .I bzfgrep | ||
38 | then | ||
39 | .I egrep | ||
40 | or | ||
41 | .I fgrep | ||
42 | is used instead of | ||
43 | .I grep. | ||
44 | If the GREP environment variable is set, | ||
45 | .I bzgrep | ||
46 | uses it as the | ||
47 | .I grep | ||
48 | program to be invoked. For example: | ||
49 | |||
50 | for sh: GREP=fgrep bzgrep string files | ||
51 | for csh: (setenv GREP fgrep; bzgrep string files) | ||
52 | .SH AUTHOR | ||
53 | Charles Levert (charles@comm.polymtl.ca). Adapted to bzip2 by Philippe | ||
54 | Troin <phil@fifi.org> for Debian GNU/Linux. | ||
55 | .SH "SEE ALSO" | ||
56 | grep(1), egrep(1), fgrep(1), bzdiff(1), bzmore(1), bzless(1), bzip2(1) | ||
@@ -1,7 +1,7 @@ | |||
1 | .PU | 1 | .PU |
2 | .TH bzip2 1 | 2 | .TH bzip2 1 |
3 | .SH NAME | 3 | .SH NAME |
4 | bzip2, bunzip2 \- a block-sorting file compressor, v1.0 | 4 | bzip2, bunzip2 \- a block-sorting file compressor, v1.0.2 |
5 | .br | 5 | .br |
6 | bzcat \- decompresses files to stdout | 6 | bzcat \- decompresses files to stdout |
7 | .br | 7 | .br |
@@ -197,7 +197,7 @@ to decompress. | |||
197 | .TP | 197 | .TP |
198 | .B \-z --compress | 198 | .B \-z --compress |
199 | The complement to \-d: forces compression, regardless of the | 199 | The complement to \-d: forces compression, regardless of the |
200 | invokation name. | 200 | invocation name. |
201 | .TP | 201 | .TP |
202 | .B \-t --test | 202 | .B \-t --test |
203 | Check integrity of the specified file(s), but don't decompress them. | 203 | Check integrity of the specified file(s), but don't decompress them. |
@@ -211,6 +211,10 @@ existing output files. Also forces | |||
211 | .I bzip2 | 211 | .I bzip2 |
212 | to break hard links | 212 | to break hard links |
213 | to files, which it otherwise wouldn't do. | 213 | to files, which it otherwise wouldn't do. |
214 | |||
215 | bzip2 normally declines to decompress files which don't have the | ||
216 | correct magic header bytes. If forced (-f), however, it will pass | ||
217 | such files through unmodified. This is how GNU gzip behaves. | ||
214 | .TP | 218 | .TP |
215 | .B \-k --keep | 219 | .B \-k --keep |
216 | Keep (don't delete) input files during compression | 220 | Keep (don't delete) input files during compression |
@@ -239,9 +243,13 @@ information which is primarily of interest for diagnostic purposes. | |||
239 | .B \-L --license -V --version | 243 | .B \-L --license -V --version |
240 | Display the software version, license terms and conditions. | 244 | Display the software version, license terms and conditions. |
241 | .TP | 245 | .TP |
242 | .B \-1 to \-9 | 246 | .B \-1 (or \-\-fast) to \-9 (or \-\-best) |
243 | Set the block size to 100 k, 200 k .. 900 k when compressing. Has no | 247 | Set the block size to 100 k, 200 k .. 900 k when compressing. Has no |
244 | effect when decompressing. See MEMORY MANAGEMENT below. | 248 | effect when decompressing. See MEMORY MANAGEMENT below. |
249 | The \-\-fast and \-\-best aliases are primarily for GNU gzip | ||
250 | compatibility. In particular, \-\-fast doesn't make things | ||
251 | significantly faster. | ||
252 | And \-\-best merely selects the default behaviour. | ||
245 | .TP | 253 | .TP |
246 | .B \-- | 254 | .B \-- |
247 | Treats all subsequent arguments as file names, even if they start | 255 | Treats all subsequent arguments as file names, even if they start |
@@ -352,11 +360,11 @@ undamaged. | |||
352 | 360 | ||
353 | .I bzip2recover | 361 | .I bzip2recover |
354 | takes a single argument, the name of the damaged file, | 362 | takes a single argument, the name of the damaged file, |
355 | and writes a number of files "rec0001file.bz2", | 363 | and writes a number of files "rec00001file.bz2", |
356 | "rec0002file.bz2", etc, containing the extracted blocks. | 364 | "rec00002file.bz2", etc, containing the extracted blocks. |
357 | The output filenames are designed so that the use of | 365 | The output filenames are designed so that the use of |
358 | wildcards in subsequent processing -- for example, | 366 | wildcards in subsequent processing -- for example, |
359 | "bzip2 -dc rec*file.bz2 > recovered_data" -- lists the files in | 367 | "bzip2 -dc rec*file.bz2 > recovered_data" -- processes the files in |
360 | the correct order. | 368 | the correct order. |
361 | 369 | ||
362 | .I bzip2recover | 370 | .I bzip2recover |
@@ -397,27 +405,31 @@ I/O error messages are not as helpful as they could be. | |||
397 | tries hard to detect I/O errors and exit cleanly, but the details of | 405 | tries hard to detect I/O errors and exit cleanly, but the details of |
398 | what the problem is sometimes seem rather misleading. | 406 | what the problem is sometimes seem rather misleading. |
399 | 407 | ||
400 | This manual page pertains to version 1.0 of | 408 | This manual page pertains to version 1.0.2 of |
401 | .I bzip2. | 409 | .I bzip2. |
402 | Compressed | 410 | Compressed data created by this version is entirely forwards and |
403 | data created by this version is entirely forwards and backwards | 411 | backwards compatible with the previous public releases, versions |
404 | compatible with the previous public releases, versions 0.1pl2, 0.9.0 | 412 | 0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, but with the following |
405 | and 0.9.5, | 413 | exception: 0.9.0 and above can correctly decompress multiple |
406 | but with the following exception: 0.9.0 and above can correctly | 414 | concatenated compressed files. 0.1pl2 cannot do this; it will stop |
407 | decompress multiple concatenated compressed files. 0.1pl2 cannot do | 415 | after decompressing just the first file in the stream. |
408 | this; it will stop after decompressing just the first file in the | ||
409 | stream. | ||
410 | 416 | ||
411 | .I bzip2recover | 417 | .I bzip2recover |
412 | uses 32-bit integers to represent bit positions in | 418 | versions prior to this one, 1.0.2, used 32-bit integers to represent |
413 | compressed files, so it cannot handle compressed files more than 512 | 419 | bit positions in compressed files, so it could not handle compressed |
414 | megabytes long. This could easily be fixed. | 420 | files more than 512 megabytes long. Version 1.0.2 and above uses |
421 | 64-bit ints on some platforms which support them (GNU supported | ||
422 | targets, and Windows). To establish whether or not bzip2recover was | ||
423 | built with such a limitation, run it without arguments. In any event | ||
424 | you can build yourself an unlimited version if you can recompile it | ||
425 | with MaybeUInt64 set to be an unsigned 64-bit integer. | ||
426 | |||
427 | |||
415 | 428 | ||
416 | .SH AUTHOR | 429 | .SH AUTHOR |
417 | Julian Seward, jseward@acm.org. | 430 | Julian Seward, jseward@acm.org. |
418 | 431 | ||
419 | http://sourceware.cygnus.com/bzip2 | 432 | http://sources.redhat.com/bzip2 |
420 | http://www.muraroa.demon.co.uk | ||
421 | 433 | ||
422 | The ideas embodied in | 434 | The ideas embodied in |
423 | .I bzip2 | 435 | .I bzip2 |
@@ -434,6 +446,8 @@ indebted for their help, support and advice. See the manual in the | |||
434 | source distribution for pointers to sources of documentation. Christian | 446 | source distribution for pointers to sources of documentation. Christian |
435 | von Roques encouraged me to look for faster sorting algorithms, so as to | 447 | von Roques encouraged me to look for faster sorting algorithms, so as to |
436 | speed up compression. Bela Lubkin encouraged me to improve the | 448 | speed up compression. Bela Lubkin encouraged me to improve the |
437 | worst-case compression performance. Many people sent patches, helped | 449 | worst-case compression performance. |
450 | The bz* scripts are derived from those of GNU gzip. | ||
451 | Many people sent patches, helped | ||
438 | with portability problems, lent machines, gave advice and were generally | 452 | with portability problems, lent machines, gave advice and were generally |
439 | helpful. | 453 | helpful. |
diff --git a/bzip2.1.preformatted b/bzip2.1.preformatted index 9f18339..0f20cb5 100644 --- a/bzip2.1.preformatted +++ b/bzip2.1.preformatted | |||
@@ -1,11 +1,9 @@ | |||
1 | |||
2 | |||
3 | |||
4 | bzip2(1) bzip2(1) | 1 | bzip2(1) bzip2(1) |
5 | 2 | ||
6 | 3 | ||
4 | |||
7 | NNAAMMEE | 5 | NNAAMMEE |
8 | bzip2, bunzip2 - a block-sorting file compressor, v1.0 | 6 | bzip2, bunzip2 - a block-sorting file compressor, v1.0.2 |
9 | bzcat - decompresses files to stdout | 7 | bzcat - decompresses files to stdout |
10 | bzip2recover - recovers data from damaged bzip2 files | 8 | bzip2recover - recovers data from damaged bzip2 files |
11 | 9 | ||
@@ -22,20 +20,20 @@ DDEESSCCRRIIPPTTIIOONN | |||
22 | sorting text compression algorithm, and Huffman coding. | 20 | sorting text compression algorithm, and Huffman coding. |
23 | Compression is generally considerably better than that | 21 | Compression is generally considerably better than that |
24 | achieved by more conventional LZ77/LZ78-based compressors, | 22 | achieved by more conventional LZ77/LZ78-based compressors, |
25 | and approaches the performance of the PPM family of sta- | 23 | and approaches the performance of the PPM family of sta |
26 | tistical compressors. | 24 | tistical compressors. |
27 | 25 | ||
28 | The command-line options are deliberately very similar to | 26 | The command-line options are deliberately very similar to |
29 | those of _G_N_U _g_z_i_p_, but they are not identical. | 27 | those of _G_N_U _g_z_i_p_, but they are not identical. |
30 | 28 | ||
31 | _b_z_i_p_2 expects a list of file names to accompany the com- | 29 | _b_z_i_p_2 expects a list of file names to accompany the com |
32 | mand-line flags. Each file is replaced by a compressed | 30 | mand-line flags. Each file is replaced by a compressed |
33 | version of itself, with the name "original_name.bz2". | 31 | version of itself, with the name "original_name.bz2". |
34 | Each compressed file has the same modification date, per- | 32 | Each compressed file has the same modification date, per |
35 | missions, and, when possible, ownership as the correspond- | 33 | missions, and, when possible, ownership as the correspond |
36 | ing original, so that these properties can be correctly | 34 | ing original, so that these properties can be correctly |
37 | restored at decompression time. File name handling is | 35 | restored at decompression time. File name handling is |
38 | naive in the sense that there is no mechanism for preserv- | 36 | naive in the sense that there is no mechanism for preserv |
39 | ing original file names, permissions, ownerships or dates | 37 | ing original file names, permissions, ownerships or dates |
40 | in filesystems which lack these concepts, or have serious | 38 | in filesystems which lack these concepts, or have serious |
41 | file name length restrictions, such as MS-DOS. | 39 | file name length restrictions, such as MS-DOS. |
@@ -58,18 +56,6 @@ DDEESSCCRRIIPPTTIIOONN | |||
58 | filename.bz2 becomes filename | 56 | filename.bz2 becomes filename |
59 | filename.bz becomes filename | 57 | filename.bz becomes filename |
60 | filename.tbz2 becomes filename.tar | 58 | filename.tbz2 becomes filename.tar |
61 | |||
62 | |||
63 | |||
64 | 1 | ||
65 | |||
66 | |||
67 | |||
68 | |||
69 | |||
70 | bzip2(1) bzip2(1) | ||
71 | |||
72 | |||
73 | filename.tbz becomes filename.tar | 59 | filename.tbz becomes filename.tar |
74 | anyothername becomes anyothername.out | 60 | anyothername becomes anyothername.out |
75 | 61 | ||
@@ -78,23 +64,23 @@ bzip2(1) bzip2(1) | |||
78 | guess the name of the original file, and uses the original | 64 | guess the name of the original file, and uses the original |
79 | name with _._o_u_t appended. | 65 | name with _._o_u_t appended. |
80 | 66 | ||
81 | As with compression, supplying no filenames causes decom- | 67 | As with compression, supplying no filenames causes decom |
82 | pression from standard input to standard output. | 68 | pression from standard input to standard output. |
83 | 69 | ||
84 | _b_u_n_z_i_p_2 will correctly decompress a file which is the con- | 70 | _b_u_n_z_i_p_2 will correctly decompress a file which is the con |
85 | catenation of two or more compressed files. The result is | 71 | catenation of two or more compressed files. The result is |
86 | the concatenation of the corresponding uncompressed files. | 72 | the concatenation of the corresponding uncompressed files. |
87 | Integrity testing (-t) of concatenated compressed files is | 73 | Integrity testing (-t) of concatenated compressed files is |
88 | also supported. | 74 | also supported. |
89 | 75 | ||
90 | You can also compress or decompress files to the standard | 76 | You can also compress or decompress files to the standard |
91 | output by giving the -c flag. Multiple files may be com- | 77 | output by giving the -c flag. Multiple files may be com |
92 | pressed and decompressed like this. The resulting outputs | 78 | pressed and decompressed like this. The resulting outputs |
93 | are fed sequentially to stdout. Compression of multiple | 79 | are fed sequentially to stdout. Compression of multiple |
94 | files in this manner generates a stream containing multi- | 80 | files in this manner generates a stream containing multi |
95 | ple compressed file representations. Such a stream can be | 81 | ple compressed file representations. Such a stream can be |
96 | decompressed correctly only by _b_z_i_p_2 version 0.9.0 or | 82 | decompressed correctly only by _b_z_i_p_2 version 0.9.0 or |
97 | later. Earlier versions of _b_z_i_p_2 will stop after decom- | 83 | later. Earlier versions of _b_z_i_p_2 will stop after decom |
98 | pressing the first file in the stream. | 84 | pressing the first file in the stream. |
99 | 85 | ||
100 | _b_z_c_a_t (or _b_z_i_p_2 _-_d_c_) decompresses all specified files to | 86 | _b_z_c_a_t (or _b_z_i_p_2 _-_d_c_) decompresses all specified files to |
@@ -115,7 +101,7 @@ bzip2(1) bzip2(1) | |||
115 | 101 | ||
116 | As a self-check for your protection, _b_z_i_p_2 uses 32-bit | 102 | As a self-check for your protection, _b_z_i_p_2 uses 32-bit |
117 | CRCs to make sure that the decompressed version of a file | 103 | CRCs to make sure that the decompressed version of a file |
118 | is identical to the original. This guards against corrup- | 104 | is identical to the original. This guards against corrup |
119 | tion of the compressed data, and against undetected bugs | 105 | tion of the compressed data, and against undetected bugs |
120 | in _b_z_i_p_2 (hopefully very unlikely). The chances of data | 106 | in _b_z_i_p_2 (hopefully very unlikely). The chances of data |
121 | corruption going undetected is microscopic, about one | 107 | corruption going undetected is microscopic, about one |
@@ -125,17 +111,6 @@ bzip2(1) bzip2(1) | |||
125 | you recover the original uncompressed data. You can use | 111 | you recover the original uncompressed data. You can use |
126 | _b_z_i_p_2_r_e_c_o_v_e_r to try to recover data from damaged files. | 112 | _b_z_i_p_2_r_e_c_o_v_e_r to try to recover data from damaged files. |
127 | 113 | ||
128 | |||
129 | |||
130 | 2 | ||
131 | |||
132 | |||
133 | |||
134 | |||
135 | |||
136 | bzip2(1) bzip2(1) | ||
137 | |||
138 | |||
139 | Return values: 0 for a normal exit, 1 for environmental | 114 | Return values: 0 for a normal exit, 1 for environmental |
140 | problems (file not found, invalid flags, I/O errors, &c), | 115 | problems (file not found, invalid flags, I/O errors, &c), |
141 | 2 to indicate a corrupt compressed file, 3 for an internal | 116 | 2 to indicate a corrupt compressed file, 3 for an internal |
@@ -154,8 +129,8 @@ OOPPTTIIOONNSS | |||
154 | and forces _b_z_i_p_2 to decompress. | 129 | and forces _b_z_i_p_2 to decompress. |
155 | 130 | ||
156 | --zz ----ccoommpprreessss | 131 | --zz ----ccoommpprreessss |
157 | The complement to -d: forces compression, regard- | 132 | The complement to -d: forces compression, |
158 | less of the invokation name. | 133 | regardless of the invocation name. |
159 | 134 | ||
160 | --tt ----tteesstt | 135 | --tt ----tteesstt |
161 | Check integrity of the specified file(s), but don't | 136 | Check integrity of the specified file(s), but don't |
@@ -168,6 +143,11 @@ OOPPTTIIOONNSS | |||
168 | forces _b_z_i_p_2 to break hard links to files, which it | 143 | forces _b_z_i_p_2 to break hard links to files, which it |
169 | otherwise wouldn't do. | 144 | otherwise wouldn't do. |
170 | 145 | ||
146 | bzip2 normally declines to decompress files which | ||
147 | don't have the correct magic header bytes. If | ||
148 | forced (-f), however, it will pass such files | ||
149 | through unmodified. This is how GNU gzip behaves. | ||
150 | |||
171 | --kk ----kkeeeepp | 151 | --kk ----kkeeeepp |
172 | Keep (don't delete) input files during compression | 152 | Keep (don't delete) input files during compression |
173 | or decompression. | 153 | or decompression. |
@@ -190,23 +170,11 @@ OOPPTTIIOONNSS | |||
190 | --qq ----qquuiieett | 170 | --qq ----qquuiieett |
191 | Suppress non-essential warning messages. Messages | 171 | Suppress non-essential warning messages. Messages |
192 | pertaining to I/O errors and other critical events | 172 | pertaining to I/O errors and other critical events |
193 | |||
194 | |||
195 | |||
196 | 3 | ||
197 | |||
198 | |||
199 | |||
200 | |||
201 | |||
202 | bzip2(1) bzip2(1) | ||
203 | |||
204 | |||
205 | will not be suppressed. | 173 | will not be suppressed. |
206 | 174 | ||
207 | --vv ----vveerrbboossee | 175 | --vv ----vveerrbboossee |
208 | Verbose mode -- show the compression ratio for each | 176 | Verbose mode -- show the compression ratio for each |
209 | file processed. Further -v's increase the ver- | 177 | file processed. Further -v's increase the ver |
210 | bosity level, spewing out lots of information which | 178 | bosity level, spewing out lots of information which |
211 | is primarily of interest for diagnostic purposes. | 179 | is primarily of interest for diagnostic purposes. |
212 | 180 | ||
@@ -214,20 +182,24 @@ bzip2(1) bzip2(1) | |||
214 | Display the software version, license terms and | 182 | Display the software version, license terms and |
215 | conditions. | 183 | conditions. |
216 | 184 | ||
217 | --11 ttoo --99 | 185 | --11 ((oorr ----ffaasstt)) ttoo --99 ((oorr ----bbeesstt)) |
218 | Set the block size to 100 k, 200 k .. 900 k when | 186 | Set the block size to 100 k, 200 k .. 900 k when |
219 | compressing. Has no effect when decompressing. | 187 | compressing. Has no effect when decompressing. |
220 | See MEMORY MANAGEMENT below. | 188 | See MEMORY MANAGEMENT below. The --fast and --best |
189 | aliases are primarily for GNU gzip compatibility. | ||
190 | In particular, --fast doesn't make things signifi | ||
191 | cantly faster. And --best merely selects the | ||
192 | default behaviour. | ||
221 | 193 | ||
222 | ---- Treats all subsequent arguments as file names, even | 194 | ---- Treats all subsequent arguments as file names, even |
223 | if they start with a dash. This is so you can han- | 195 | if they start with a dash. This is so you can han |
224 | dle files with names beginning with a dash, for | 196 | dle files with names beginning with a dash, for |
225 | example: bzip2 -- -myfilename. | 197 | example: bzip2 -- -myfilename. |
226 | 198 | ||
227 | ----rreeppeettiittiivvee--ffaasstt ----rreeppeettiittiivvee--bbeesstt | 199 | ----rreeppeettiittiivvee--ffaasstt ----rreeppeettiittiivvee--bbeesstt |
228 | These flags are redundant in versions 0.9.5 and | 200 | These flags are redundant in versions 0.9.5 and |
229 | above. They provided some coarse control over the | 201 | above. They provided some coarse control over the |
230 | behaviour of the sorting algorithm in earlier ver- | 202 | behaviour of the sorting algorithm in earlier ver |
231 | sions, which was sometimes useful. 0.9.5 and above | 203 | sions, which was sometimes useful. 0.9.5 and above |
232 | have an improved algorithm which renders these | 204 | have an improved algorithm which renders these |
233 | flags irrelevant. | 205 | flags irrelevant. |
@@ -238,7 +210,7 @@ MMEEMMOORRYY MMAANNAAGGEEMMEENNTT | |||
238 | affects both the compression ratio achieved, and the | 210 | affects both the compression ratio achieved, and the |
239 | amount of memory needed for compression and decompression. | 211 | amount of memory needed for compression and decompression. |
240 | The flags -1 through -9 specify the block size to be | 212 | The flags -1 through -9 specify the block size to be |
241 | 100,000 bytes through 900,000 bytes (the default) respec- | 213 | 100,000 bytes through 900,000 bytes (the default) respec |
242 | tively. At decompression time, the block size used for | 214 | tively. At decompression time, the block size used for |
243 | compression is read from the header of the compressed | 215 | compression is read from the header of the compressed |
244 | file, and _b_u_n_z_i_p_2 then allocates itself just enough memory | 216 | file, and _b_u_n_z_i_p_2 then allocates itself just enough memory |
@@ -256,18 +228,6 @@ MMEEMMOORRYY MMAANNAAGGEEMMEENNTT | |||
256 | 228 | ||
257 | Larger block sizes give rapidly diminishing marginal | 229 | Larger block sizes give rapidly diminishing marginal |
258 | returns. Most of the compression comes from the first two | 230 | returns. Most of the compression comes from the first two |
259 | |||
260 | |||
261 | |||
262 | 4 | ||
263 | |||
264 | |||
265 | |||
266 | |||
267 | |||
268 | bzip2(1) bzip2(1) | ||
269 | |||
270 | |||
271 | or three hundred k of block size, a fact worth bearing in | 231 | or three hundred k of block size, a fact worth bearing in |
272 | mind when using _b_z_i_p_2 on small machines. It is also | 232 | mind when using _b_z_i_p_2 on small machines. It is also |
273 | important to appreciate that the decompression memory | 233 | important to appreciate that the decompression memory |
@@ -278,13 +238,13 @@ bzip2(1) bzip2(1) | |||
278 | _b_u_n_z_i_p_2 will require about 3700 kbytes to decompress. To | 238 | _b_u_n_z_i_p_2 will require about 3700 kbytes to decompress. To |
279 | support decompression of any file on a 4 megabyte machine, | 239 | support decompression of any file on a 4 megabyte machine, |
280 | _b_u_n_z_i_p_2 has an option to decompress using approximately | 240 | _b_u_n_z_i_p_2 has an option to decompress using approximately |
281 | half this amount of memory, about 2300 kbytes. Decompres- | 241 | half this amount of memory, about 2300 kbytes. Decompres |
282 | sion speed is also halved, so you should use this option | 242 | sion speed is also halved, so you should use this option |
283 | only where necessary. The relevant flag is -s. | 243 | only where necessary. The relevant flag is -s. |
284 | 244 | ||
285 | In general, try and use the largest block size memory con- | 245 | In general, try and use the largest block size memory con |
286 | straints allow, since that maximises the compression | 246 | straints allow, since that maximises the compression |
287 | achieved. Compression and decompression speed are virtu- | 247 | achieved. Compression and decompression speed are virtu |
288 | ally unaffected by block size. | 248 | ally unaffected by block size. |
289 | 249 | ||
290 | Another significant point applies to files which fit in a | 250 | Another significant point applies to files which fit in a |
@@ -300,11 +260,11 @@ bzip2(1) bzip2(1) | |||
300 | 260 | ||
301 | Here is a table which summarises the maximum memory usage | 261 | Here is a table which summarises the maximum memory usage |
302 | for different block sizes. Also recorded is the total | 262 | for different block sizes. Also recorded is the total |
303 | compressed size for 14 files of the Calgary Text Compres- | 263 | compressed size for 14 files of the Calgary Text Compres |
304 | sion Corpus totalling 3,141,622 bytes. This column gives | 264 | sion Corpus totalling 3,141,622 bytes. This column gives |
305 | some feel for how compression varies with block size. | 265 | some feel for how compression varies with block size. |
306 | These figures tend to understate the advantage of larger | 266 | These figures tend to understate the advantage of larger |
307 | block sizes for larger files, since the Corpus is domi- | 267 | block sizes for larger files, since the Corpus is domi |
308 | nated by smaller files. | 268 | nated by smaller files. |
309 | 269 | ||
310 | Compress Decompress Decompress Corpus | 270 | Compress Decompress Decompress Corpus |
@@ -321,22 +281,9 @@ bzip2(1) bzip2(1) | |||
321 | -9 7600k 3700k 2350k 828642 | 281 | -9 7600k 3700k 2350k 828642 |
322 | 282 | ||
323 | 283 | ||
324 | |||
325 | |||
326 | |||
327 | |||
328 | 5 | ||
329 | |||
330 | |||
331 | |||
332 | |||
333 | |||
334 | bzip2(1) bzip2(1) | ||
335 | |||
336 | |||
337 | RREECCOOVVEERRIINNGG DDAATTAA FFRROOMM DDAAMMAAGGEEDD FFIILLEESS | 284 | RREECCOOVVEERRIINNGG DDAATTAA FFRROOMM DDAAMMAAGGEEDD FFIILLEESS |
338 | _b_z_i_p_2 compresses files in blocks, usually 900kbytes long. | 285 | _b_z_i_p_2 compresses files in blocks, usually 900kbytes long. |
339 | Each block is handled independently. If a media or trans- | 286 | Each block is handled independently. If a media or trans |
340 | mission error causes a multi-block .bz2 file to become | 287 | mission error causes a multi-block .bz2 file to become |
341 | damaged, it may be possible to recover data from the | 288 | damaged, it may be possible to recover data from the |
342 | undamaged blocks in the file. | 289 | undamaged blocks in the file. |
@@ -353,19 +300,19 @@ RREECCOOVVEERRIINNGG DDAATTAA FFRROOMM DDAAMMAAGGEEDD F | |||
353 | the integrity of the resulting files, and decompress those | 300 | the integrity of the resulting files, and decompress those |
354 | which are undamaged. | 301 | which are undamaged. |
355 | 302 | ||
356 | _b_z_i_p_2_r_e_c_o_v_e_r takes a single argument, the name of the dam- | 303 | _b_z_i_p_2_r_e_c_o_v_e_r takes a single argument, the name of the dam |
357 | aged file, and writes a number of files "rec0001file.bz2", | 304 | aged file, and writes a number of files |
358 | "rec0002file.bz2", etc, containing the extracted blocks. | 305 | "rec00001file.bz2", "rec00002file.bz2", etc, containing |
359 | The output filenames are designed so that the use of | 306 | the extracted blocks. The output filenames are |
360 | wildcards in subsequent processing -- for example, "bzip2 | 307 | designed so that the use of wildcards in subsequent pro |
361 | -dc rec*file.bz2 > recovered_data" -- lists the files in | 308 | cessing -- for example, "bzip2 -dc rec*file.bz2 > recov |
362 | the correct order. | 309 | ered_data" -- processes the files in the correct order. |
363 | 310 | ||
364 | _b_z_i_p_2_r_e_c_o_v_e_r should be of most use dealing with large .bz2 | 311 | _b_z_i_p_2_r_e_c_o_v_e_r should be of most use dealing with large .bz2 |
365 | files, as these will contain many blocks. It is clearly | 312 | files, as these will contain many blocks. It is clearly |
366 | futile to use it on damaged single-block files, since a | 313 | futile to use it on damaged single-block files, since a |
367 | damaged block cannot be recovered. If you wish to min- | 314 | damaged block cannot be recovered. If you wish to min |
368 | imise any potential data loss through media or transmis- | 315 | imise any potential data loss through media or transmis |
369 | sion errors, you might consider compressing with a smaller | 316 | sion errors, you might consider compressing with a smaller |
370 | block size. | 317 | block size. |
371 | 318 | ||
@@ -379,31 +326,19 @@ PPEERRFFOORRMMAANNCCEE NNOOTTEESS | |||
379 | better than previous versions in this respect. The ratio | 326 | better than previous versions in this respect. The ratio |
380 | between worst-case and average-case compression time is in | 327 | between worst-case and average-case compression time is in |
381 | the region of 10:1. For previous versions, this figure | 328 | the region of 10:1. For previous versions, this figure |
382 | was more like 100:1. You can use the -vvvv option to mon- | 329 | was more like 100:1. You can use the -vvvv option to mon |
383 | itor progress in great detail, if you want. | 330 | itor progress in great detail, if you want. |
384 | 331 | ||
385 | Decompression speed is unaffected by these phenomena. | 332 | Decompression speed is unaffected by these phenomena. |
386 | 333 | ||
387 | _b_z_i_p_2 usually allocates several megabytes of memory to | 334 | _b_z_i_p_2 usually allocates several megabytes of memory to |
388 | operate in, and then charges all over it in a fairly ran- | 335 | operate in, and then charges all over it in a fairly ran |
389 | dom fashion. This means that performance, both for com- | 336 | dom fashion. This means that performance, both for com |
390 | pressing and decompressing, is largely determined by the | 337 | pressing and decompressing, is largely determined by the |
391 | |||
392 | |||
393 | |||
394 | 6 | ||
395 | |||
396 | |||
397 | |||
398 | |||
399 | |||
400 | bzip2(1) bzip2(1) | ||
401 | |||
402 | |||
403 | speed at which your machine can service cache misses. | 338 | speed at which your machine can service cache misses. |
404 | Because of this, small changes to the code to reduce the | 339 | Because of this, small changes to the code to reduce the |
405 | miss rate have been observed to give disproportionately | 340 | miss rate have been observed to give disproportionately |
406 | large performance improvements. I imagine _b_z_i_p_2 will per- | 341 | large performance improvements. I imagine _b_z_i_p_2 will per |
407 | form best on machines with very large caches. | 342 | form best on machines with very large caches. |
408 | 343 | ||
409 | 344 | ||
@@ -413,50 +348,51 @@ CCAAVVEEAATTSS | |||
413 | but the details of what the problem is sometimes seem | 348 | but the details of what the problem is sometimes seem |
414 | rather misleading. | 349 | rather misleading. |
415 | 350 | ||
416 | This manual page pertains to version 1.0 of _b_z_i_p_2_. Com- | 351 | This manual page pertains to version 1.0.2 of _b_z_i_p_2_. Com |
417 | pressed data created by this version is entirely forwards | 352 | pressed data created by this version is entirely forwards |
418 | and backwards compatible with the previous public | 353 | and backwards compatible with the previous public |
419 | releases, versions 0.1pl2, 0.9.0 and 0.9.5, but with the | 354 | releases, versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, |
420 | following exception: 0.9.0 and above can correctly decom- | 355 | but with the following exception: 0.9.0 and above can cor |
421 | press multiple concatenated compressed files. 0.1pl2 can- | 356 | rectly decompress multiple concatenated compressed files. |
422 | not do this; it will stop after decompressing just the | 357 | 0.1pl2 cannot do this; it will stop after decompressing |
423 | first file in the stream. | 358 | just the first file in the stream. |
359 | |||
360 | _b_z_i_p_2_r_e_c_o_v_e_r versions prior to this one, 1.0.2, used | ||
361 | 32-bit integers to represent bit positions in compressed | ||
362 | files, so it could not handle compressed files more than | ||
363 | 512 megabytes long. Version 1.0.2 and above uses 64-bit | ||
364 | ints on some platforms which support them (GNU supported | ||
365 | targets, and Windows). To establish whether or not | ||
366 | bzip2recover was built with such a limitation, run it | ||
367 | without arguments. In any event you can build yourself an | ||
368 | unlimited version if you can recompile it with MaybeUInt64 | ||
369 | set to be an unsigned 64-bit integer. | ||
370 | |||
424 | 371 | ||
425 | _b_z_i_p_2_r_e_c_o_v_e_r uses 32-bit integers to represent bit posi- | ||
426 | tions in compressed files, so it cannot handle compressed | ||
427 | files more than 512 megabytes long. This could easily be | ||
428 | fixed. | ||
429 | 372 | ||
430 | 373 | ||
431 | AAUUTTHHOORR | 374 | AAUUTTHHOORR |
432 | Julian Seward, jseward@acm.org. | 375 | Julian Seward, jseward@acm.org. |
433 | 376 | ||
434 | http://sourceware.cygnus.com/bzip2 | 377 | http://sources.redhat.com/bzip2 |
435 | http://www.muraroa.demon.co.uk | ||
436 | 378 | ||
437 | The ideas embodied in _b_z_i_p_2 are due to (at least) the fol- | 379 | The ideas embodied in _b_z_i_p_2 are due to (at least) the fol |
438 | lowing people: Michael Burrows and David Wheeler (for the | 380 | lowing people: Michael Burrows and David Wheeler (for the |
439 | block sorting transformation), David Wheeler (again, for | 381 | block sorting transformation), David Wheeler (again, for |
440 | the Huffman coder), Peter Fenwick (for the structured cod- | 382 | the Huffman coder), Peter Fenwick (for the structured cod |
441 | ing model in the original _b_z_i_p_, and many refinements), and | 383 | ing model in the original _b_z_i_p_, and many refinements), and |
442 | Alistair Moffat, Radford Neal and Ian Witten (for the | 384 | Alistair Moffat, Radford Neal and Ian Witten (for the |
443 | arithmetic coder in the original _b_z_i_p_)_. I am much | 385 | arithmetic coder in the original _b_z_i_p_)_. I am much |
444 | indebted for their help, support and advice. See the man- | 386 | indebted for their help, support and advice. See the man |
445 | ual in the source distribution for pointers to sources of | 387 | ual in the source distribution for pointers to sources of |
446 | documentation. Christian von Roques encouraged me to look | 388 | documentation. Christian von Roques encouraged me to look |
447 | for faster sorting algorithms, so as to speed up compres- | 389 | for faster sorting algorithms, so as to speed up compres |
448 | sion. Bela Lubkin encouraged me to improve the worst-case | 390 | sion. Bela Lubkin encouraged me to improve the worst-case |
449 | compression performance. Many people sent patches, helped | 391 | compression performance. The bz* scripts are derived from |
450 | with portability problems, lent machines, gave advice and | 392 | those of GNU gzip. Many people sent patches, helped with |
451 | were generally helpful. | 393 | portability problems, lent machines, gave advice and were |
452 | 394 | generally helpful. | |
453 | |||
454 | |||
455 | |||
456 | |||
457 | |||
458 | |||
459 | 395 | ||
460 | 7 | ||
461 | 396 | ||
462 | 397 | ||
398 | bzip2(1) | ||
@@ -7,7 +7,7 @@ | |||
7 | This file is a part of bzip2 and/or libbzip2, a program and | 7 | This file is a part of bzip2 and/or libbzip2, a program and |
8 | library for lossless, block-sorting data compression. | 8 | library for lossless, block-sorting data compression. |
9 | 9 | ||
10 | Copyright (C) 1996-2000 Julian R Seward. All rights reserved. | 10 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. |
11 | 11 | ||
12 | Redistribution and use in source and binary forms, with or without | 12 | Redistribution and use in source and binary forms, with or without |
13 | modification, are permitted provided that the following conditions | 13 | modification, are permitted provided that the following conditions |
@@ -113,13 +113,16 @@ | |||
113 | /*-- | 113 | /*-- |
114 | Generic 32-bit Unix. | 114 | Generic 32-bit Unix. |
115 | Also works on 64-bit Unix boxes. | 115 | Also works on 64-bit Unix boxes. |
116 | This is the default. | ||
116 | --*/ | 117 | --*/ |
117 | #define BZ_UNIX 1 | 118 | #define BZ_UNIX 1 |
118 | 119 | ||
119 | /*-- | 120 | /*-- |
120 | Win32, as seen by Jacob Navia's excellent | 121 | Win32, as seen by Jacob Navia's excellent |
121 | port of (Chris Fraser & David Hanson)'s excellent | 122 | port of (Chris Fraser & David Hanson)'s excellent |
122 | lcc compiler. | 123 | lcc compiler. Or with MS Visual C. |
124 | This is selected automatically if compiled by a compiler which | ||
125 | defines _WIN32, not including the Cygwin GCC. | ||
123 | --*/ | 126 | --*/ |
124 | #define BZ_LCCWIN32 0 | 127 | #define BZ_LCCWIN32 0 |
125 | 128 | ||
@@ -156,6 +159,7 @@ | |||
156 | --*/ | 159 | --*/ |
157 | 160 | ||
158 | #if BZ_UNIX | 161 | #if BZ_UNIX |
162 | # include <fcntl.h> | ||
159 | # include <sys/types.h> | 163 | # include <sys/types.h> |
160 | # include <utime.h> | 164 | # include <utime.h> |
161 | # include <unistd.h> | 165 | # include <unistd.h> |
@@ -164,8 +168,9 @@ | |||
164 | 168 | ||
165 | # define PATH_SEP '/' | 169 | # define PATH_SEP '/' |
166 | # define MY_LSTAT lstat | 170 | # define MY_LSTAT lstat |
167 | # define MY_S_IFREG S_ISREG | ||
168 | # define MY_STAT stat | 171 | # define MY_STAT stat |
172 | # define MY_S_ISREG S_ISREG | ||
173 | # define MY_S_ISDIR S_ISDIR | ||
169 | 174 | ||
170 | # define APPEND_FILESPEC(root, name) \ | 175 | # define APPEND_FILESPEC(root, name) \ |
171 | root=snocString((root), (name)) | 176 | root=snocString((root), (name)) |
@@ -180,19 +185,23 @@ | |||
180 | # else | 185 | # else |
181 | # define NORETURN /**/ | 186 | # define NORETURN /**/ |
182 | # endif | 187 | # endif |
188 | |||
183 | # ifdef __DJGPP__ | 189 | # ifdef __DJGPP__ |
184 | # include <io.h> | 190 | # include <io.h> |
185 | # include <fcntl.h> | 191 | # include <fcntl.h> |
186 | # undef MY_LSTAT | 192 | # undef MY_LSTAT |
193 | # undef MY_STAT | ||
187 | # define MY_LSTAT stat | 194 | # define MY_LSTAT stat |
195 | # define MY_STAT stat | ||
188 | # undef SET_BINARY_MODE | 196 | # undef SET_BINARY_MODE |
189 | # define SET_BINARY_MODE(fd) \ | 197 | # define SET_BINARY_MODE(fd) \ |
190 | do { \ | 198 | do { \ |
191 | int retVal = setmode ( fileno ( fd ), \ | 199 | int retVal = setmode ( fileno ( fd ), \ |
192 | O_BINARY ); \ | 200 | O_BINARY ); \ |
193 | ERROR_IF_MINUS_ONE ( retVal ); \ | 201 | ERROR_IF_MINUS_ONE ( retVal ); \ |
194 | } while ( 0 ) | 202 | } while ( 0 ) |
195 | # endif | 203 | # endif |
204 | |||
196 | # ifdef __CYGWIN__ | 205 | # ifdef __CYGWIN__ |
197 | # include <io.h> | 206 | # include <io.h> |
198 | # include <fcntl.h> | 207 | # include <fcntl.h> |
@@ -200,11 +209,11 @@ | |||
200 | # define SET_BINARY_MODE(fd) \ | 209 | # define SET_BINARY_MODE(fd) \ |
201 | do { \ | 210 | do { \ |
202 | int retVal = setmode ( fileno ( fd ), \ | 211 | int retVal = setmode ( fileno ( fd ), \ |
203 | O_BINARY ); \ | 212 | O_BINARY ); \ |
204 | ERROR_IF_MINUS_ONE ( retVal ); \ | 213 | ERROR_IF_MINUS_ONE ( retVal ); \ |
205 | } while ( 0 ) | 214 | } while ( 0 ) |
206 | # endif | 215 | # endif |
207 | #endif | 216 | #endif /* BZ_UNIX */ |
208 | 217 | ||
209 | 218 | ||
210 | 219 | ||
@@ -217,46 +226,23 @@ | |||
217 | # define PATH_SEP '\\' | 226 | # define PATH_SEP '\\' |
218 | # define MY_LSTAT _stat | 227 | # define MY_LSTAT _stat |
219 | # define MY_STAT _stat | 228 | # define MY_STAT _stat |
220 | # define MY_S_IFREG(x) ((x) & _S_IFREG) | 229 | # define MY_S_ISREG(x) ((x) & _S_IFREG) |
230 | # define MY_S_ISDIR(x) ((x) & _S_IFDIR) | ||
221 | 231 | ||
222 | # define APPEND_FLAG(root, name) \ | 232 | # define APPEND_FLAG(root, name) \ |
223 | root=snocString((root), (name)) | 233 | root=snocString((root), (name)) |
224 | 234 | ||
225 | # if 0 | ||
226 | /*-- lcc-win32 seems to expand wildcards itself --*/ | ||
227 | # define APPEND_FILESPEC(root, spec) \ | ||
228 | do { \ | ||
229 | if ((spec)[0] == '-') { \ | ||
230 | root = snocString((root), (spec)); \ | ||
231 | } else { \ | ||
232 | struct _finddata_t c_file; \ | ||
233 | long hFile; \ | ||
234 | hFile = _findfirst((spec), &c_file); \ | ||
235 | if ( hFile == -1L ) { \ | ||
236 | root = snocString ((root), (spec)); \ | ||
237 | } else { \ | ||
238 | int anInt = 0; \ | ||
239 | while ( anInt == 0 ) { \ | ||
240 | root = snocString((root), \ | ||
241 | &c_file.name[0]); \ | ||
242 | anInt = _findnext(hFile, &c_file); \ | ||
243 | } \ | ||
244 | } \ | ||
245 | } \ | ||
246 | } while ( 0 ) | ||
247 | # else | ||
248 | # define APPEND_FILESPEC(root, name) \ | 235 | # define APPEND_FILESPEC(root, name) \ |
249 | root = snocString ((root), (name)) | 236 | root = snocString ((root), (name)) |
250 | # endif | ||
251 | 237 | ||
252 | # define SET_BINARY_MODE(fd) \ | 238 | # define SET_BINARY_MODE(fd) \ |
253 | do { \ | 239 | do { \ |
254 | int retVal = setmode ( fileno ( fd ), \ | 240 | int retVal = setmode ( fileno ( fd ), \ |
255 | O_BINARY ); \ | 241 | O_BINARY ); \ |
256 | ERROR_IF_MINUS_ONE ( retVal ); \ | 242 | ERROR_IF_MINUS_ONE ( retVal ); \ |
257 | } while ( 0 ) | 243 | } while ( 0 ) |
258 | 244 | ||
259 | #endif | 245 | #endif /* BZ_LCCWIN32 */ |
260 | 246 | ||
261 | 247 | ||
262 | /*---------------------------------------------*/ | 248 | /*---------------------------------------------*/ |
@@ -338,6 +324,7 @@ typedef | |||
338 | struct { UChar b[8]; } | 324 | struct { UChar b[8]; } |
339 | UInt64; | 325 | UInt64; |
340 | 326 | ||
327 | |||
341 | static | 328 | static |
342 | void uInt64_from_UInt32s ( UInt64* n, UInt32 lo32, UInt32 hi32 ) | 329 | void uInt64_from_UInt32s ( UInt64* n, UInt32 lo32, UInt32 hi32 ) |
343 | { | 330 | { |
@@ -351,6 +338,7 @@ void uInt64_from_UInt32s ( UInt64* n, UInt32 lo32, UInt32 hi32 ) | |||
351 | n->b[0] = (UChar) (lo32 & 0xFF); | 338 | n->b[0] = (UChar) (lo32 & 0xFF); |
352 | } | 339 | } |
353 | 340 | ||
341 | |||
354 | static | 342 | static |
355 | double uInt64_to_double ( UInt64* n ) | 343 | double uInt64_to_double ( UInt64* n ) |
356 | { | 344 | { |
@@ -364,77 +352,6 @@ double uInt64_to_double ( UInt64* n ) | |||
364 | return sum; | 352 | return sum; |
365 | } | 353 | } |
366 | 354 | ||
367 | static | ||
368 | void uInt64_add ( UInt64* src, UInt64* dst ) | ||
369 | { | ||
370 | Int32 i; | ||
371 | Int32 carry = 0; | ||
372 | for (i = 0; i < 8; i++) { | ||
373 | carry += ( ((Int32)src->b[i]) + ((Int32)dst->b[i]) ); | ||
374 | dst->b[i] = (UChar)(carry & 0xFF); | ||
375 | carry >>= 8; | ||
376 | } | ||
377 | } | ||
378 | |||
379 | static | ||
380 | void uInt64_sub ( UInt64* src, UInt64* dst ) | ||
381 | { | ||
382 | Int32 t, i; | ||
383 | Int32 borrow = 0; | ||
384 | for (i = 0; i < 8; i++) { | ||
385 | t = ((Int32)dst->b[i]) - ((Int32)src->b[i]) - borrow; | ||
386 | if (t < 0) { | ||
387 | dst->b[i] = (UChar)(t + 256); | ||
388 | borrow = 1; | ||
389 | } else { | ||
390 | dst->b[i] = (UChar)t; | ||
391 | borrow = 0; | ||
392 | } | ||
393 | } | ||
394 | } | ||
395 | |||
396 | static | ||
397 | void uInt64_mul ( UInt64* a, UInt64* b, UInt64* r_hi, UInt64* r_lo ) | ||
398 | { | ||
399 | UChar sum[16]; | ||
400 | Int32 ia, ib, carry; | ||
401 | for (ia = 0; ia < 16; ia++) sum[ia] = 0; | ||
402 | for (ia = 0; ia < 8; ia++) { | ||
403 | carry = 0; | ||
404 | for (ib = 0; ib < 8; ib++) { | ||
405 | carry += ( ((Int32)sum[ia+ib]) | ||
406 | + ((Int32)a->b[ia]) * ((Int32)b->b[ib]) ); | ||
407 | sum[ia+ib] = (UChar)(carry & 0xFF); | ||
408 | carry >>= 8; | ||
409 | } | ||
410 | sum[ia+8] = (UChar)(carry & 0xFF); | ||
411 | if ((carry >>= 8) != 0) panic ( "uInt64_mul" ); | ||
412 | } | ||
413 | |||
414 | for (ia = 0; ia < 8; ia++) r_hi->b[ia] = sum[ia+8]; | ||
415 | for (ia = 0; ia < 8; ia++) r_lo->b[ia] = sum[ia]; | ||
416 | } | ||
417 | |||
418 | |||
419 | static | ||
420 | void uInt64_shr1 ( UInt64* n ) | ||
421 | { | ||
422 | Int32 i; | ||
423 | for (i = 0; i < 8; i++) { | ||
424 | n->b[i] >>= 1; | ||
425 | if (i < 7 && (n->b[i+1] & 1)) n->b[i] |= 0x80; | ||
426 | } | ||
427 | } | ||
428 | |||
429 | static | ||
430 | void uInt64_shl1 ( UInt64* n ) | ||
431 | { | ||
432 | Int32 i; | ||
433 | for (i = 7; i >= 0; i--) { | ||
434 | n->b[i] <<= 1; | ||
435 | if (i > 0 && (n->b[i-1] & 0x80)) n->b[i]++; | ||
436 | } | ||
437 | } | ||
438 | 355 | ||
439 | static | 356 | static |
440 | Bool uInt64_isZero ( UInt64* n ) | 357 | Bool uInt64_isZero ( UInt64* n ) |
@@ -445,49 +362,23 @@ Bool uInt64_isZero ( UInt64* n ) | |||
445 | return 1; | 362 | return 1; |
446 | } | 363 | } |
447 | 364 | ||
448 | static | 365 | |
366 | /* Divide *n by 10, and return the remainder. */ | ||
367 | static | ||
449 | Int32 uInt64_qrm10 ( UInt64* n ) | 368 | Int32 uInt64_qrm10 ( UInt64* n ) |
450 | { | 369 | { |
451 | /* Divide *n by 10, and return the remainder. Long division | 370 | UInt32 rem, tmp; |
452 | is difficult, so we cheat and instead multiply by | ||
453 | 0xCCCC CCCC CCCC CCCD, which is 0.8 (viz, 0.1 << 3). | ||
454 | */ | ||
455 | Int32 i; | 371 | Int32 i; |
456 | UInt64 tmp1, tmp2, n_orig, zero_point_eight; | 372 | rem = 0; |
457 | 373 | for (i = 7; i >= 0; i--) { | |
458 | zero_point_eight.b[1] = zero_point_eight.b[2] = | 374 | tmp = rem * 256 + n->b[i]; |
459 | zero_point_eight.b[3] = zero_point_eight.b[4] = | 375 | n->b[i] = tmp / 10; |
460 | zero_point_eight.b[5] = zero_point_eight.b[6] = | 376 | rem = tmp % 10; |
461 | zero_point_eight.b[7] = 0xCC; | 377 | } |
462 | zero_point_eight.b[0] = 0xCD; | 378 | return rem; |
463 | |||
464 | n_orig = *n; | ||
465 | |||
466 | /* divide n by 10, | ||
467 | by multiplying by 0.8 and then shifting right 3 times */ | ||
468 | uInt64_mul ( n, &zero_point_eight, &tmp1, &tmp2 ); | ||
469 | uInt64_shr1(&tmp1); uInt64_shr1(&tmp1); uInt64_shr1(&tmp1); | ||
470 | *n = tmp1; | ||
471 | |||
472 | /* tmp1 = 8*n, tmp2 = 2*n */ | ||
473 | uInt64_shl1(&tmp1); uInt64_shl1(&tmp1); uInt64_shl1(&tmp1); | ||
474 | tmp2 = *n; uInt64_shl1(&tmp2); | ||
475 | |||
476 | /* tmp1 = 10*n */ | ||
477 | uInt64_add ( &tmp2, &tmp1 ); | ||
478 | |||
479 | /* n_orig = n_orig - 10*n */ | ||
480 | uInt64_sub ( &tmp1, &n_orig ); | ||
481 | |||
482 | /* n_orig should now hold quotient, in range 0 .. 9 */ | ||
483 | for (i = 7; i >= 1; i--) | ||
484 | if (n_orig.b[i] != 0) panic ( "uInt64_qrm10(1)" ); | ||
485 | if (n_orig.b[0] > 9) | ||
486 | panic ( "uInt64_qrm10(2)" ); | ||
487 | |||
488 | return (int)n_orig.b[0]; | ||
489 | } | 379 | } |
490 | 380 | ||
381 | |||
491 | /* ... and the Whole Entire Point of all this UInt64 stuff is | 382 | /* ... and the Whole Entire Point of all this UInt64 stuff is |
492 | so that we can supply the following function. | 383 | so that we can supply the following function. |
493 | */ | 384 | */ |
@@ -504,7 +395,8 @@ void uInt64_toAscii ( char* outbuf, UInt64* n ) | |||
504 | nBuf++; | 395 | nBuf++; |
505 | } while (!uInt64_isZero(&n_copy)); | 396 | } while (!uInt64_isZero(&n_copy)); |
506 | outbuf[nBuf] = 0; | 397 | outbuf[nBuf] = 0; |
507 | for (i = 0; i < nBuf; i++) outbuf[i] = buf[nBuf-i-1]; | 398 | for (i = 0; i < nBuf; i++) |
399 | outbuf[i] = buf[nBuf-i-1]; | ||
508 | } | 400 | } |
509 | 401 | ||
510 | 402 | ||
@@ -566,35 +458,38 @@ void compressStream ( FILE *stream, FILE *zStream ) | |||
566 | if (ret == EOF) goto errhandler_io; | 458 | if (ret == EOF) goto errhandler_io; |
567 | if (zStream != stdout) { | 459 | if (zStream != stdout) { |
568 | ret = fclose ( zStream ); | 460 | ret = fclose ( zStream ); |
461 | outputHandleJustInCase = NULL; | ||
569 | if (ret == EOF) goto errhandler_io; | 462 | if (ret == EOF) goto errhandler_io; |
570 | } | 463 | } |
464 | outputHandleJustInCase = NULL; | ||
571 | if (ferror(stream)) goto errhandler_io; | 465 | if (ferror(stream)) goto errhandler_io; |
572 | ret = fclose ( stream ); | 466 | ret = fclose ( stream ); |
573 | if (ret == EOF) goto errhandler_io; | 467 | if (ret == EOF) goto errhandler_io; |
574 | 468 | ||
575 | if (nbytes_in_lo32 == 0 && nbytes_in_hi32 == 0) | ||
576 | nbytes_in_lo32 = 1; | ||
577 | |||
578 | if (verbosity >= 1) { | 469 | if (verbosity >= 1) { |
579 | Char buf_nin[32], buf_nout[32]; | 470 | if (nbytes_in_lo32 == 0 && nbytes_in_hi32 == 0) { |
580 | UInt64 nbytes_in, nbytes_out; | 471 | fprintf ( stderr, " no data compressed.\n"); |
581 | double nbytes_in_d, nbytes_out_d; | 472 | } else { |
582 | uInt64_from_UInt32s ( &nbytes_in, | 473 | Char buf_nin[32], buf_nout[32]; |
583 | nbytes_in_lo32, nbytes_in_hi32 ); | 474 | UInt64 nbytes_in, nbytes_out; |
584 | uInt64_from_UInt32s ( &nbytes_out, | 475 | double nbytes_in_d, nbytes_out_d; |
585 | nbytes_out_lo32, nbytes_out_hi32 ); | 476 | uInt64_from_UInt32s ( &nbytes_in, |
586 | nbytes_in_d = uInt64_to_double ( &nbytes_in ); | 477 | nbytes_in_lo32, nbytes_in_hi32 ); |
587 | nbytes_out_d = uInt64_to_double ( &nbytes_out ); | 478 | uInt64_from_UInt32s ( &nbytes_out, |
588 | uInt64_toAscii ( buf_nin, &nbytes_in ); | 479 | nbytes_out_lo32, nbytes_out_hi32 ); |
589 | uInt64_toAscii ( buf_nout, &nbytes_out ); | 480 | nbytes_in_d = uInt64_to_double ( &nbytes_in ); |
590 | fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, " | 481 | nbytes_out_d = uInt64_to_double ( &nbytes_out ); |
591 | "%5.2f%% saved, %s in, %s out.\n", | 482 | uInt64_toAscii ( buf_nin, &nbytes_in ); |
592 | nbytes_in_d / nbytes_out_d, | 483 | uInt64_toAscii ( buf_nout, &nbytes_out ); |
593 | (8.0 * nbytes_out_d) / nbytes_in_d, | 484 | fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, " |
594 | 100.0 * (1.0 - nbytes_out_d / nbytes_in_d), | 485 | "%5.2f%% saved, %s in, %s out.\n", |
595 | buf_nin, | 486 | nbytes_in_d / nbytes_out_d, |
596 | buf_nout | 487 | (8.0 * nbytes_out_d) / nbytes_in_d, |
597 | ); | 488 | 100.0 * (1.0 - nbytes_out_d / nbytes_in_d), |
489 | buf_nin, | ||
490 | buf_nout | ||
491 | ); | ||
492 | } | ||
598 | } | 493 | } |
599 | 494 | ||
600 | return; | 495 | return; |
@@ -652,7 +547,7 @@ Bool uncompressStream ( FILE *zStream, FILE *stream ) | |||
652 | 547 | ||
653 | while (bzerr == BZ_OK) { | 548 | while (bzerr == BZ_OK) { |
654 | nread = BZ2_bzRead ( &bzerr, bzf, obuf, 5000 ); | 549 | nread = BZ2_bzRead ( &bzerr, bzf, obuf, 5000 ); |
655 | if (bzerr == BZ_DATA_ERROR_MAGIC) goto errhandler; | 550 | if (bzerr == BZ_DATA_ERROR_MAGIC) goto trycat; |
656 | if ((bzerr == BZ_OK || bzerr == BZ_STREAM_END) && nread > 0) | 551 | if ((bzerr == BZ_OK || bzerr == BZ_STREAM_END) && nread > 0) |
657 | fwrite ( obuf, sizeof(UChar), nread, stream ); | 552 | fwrite ( obuf, sizeof(UChar), nread, stream ); |
658 | if (ferror(stream)) goto errhandler_io; | 553 | if (ferror(stream)) goto errhandler_io; |
@@ -668,9 +563,9 @@ Bool uncompressStream ( FILE *zStream, FILE *stream ) | |||
668 | if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" ); | 563 | if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" ); |
669 | 564 | ||
670 | if (nUnused == 0 && myfeof(zStream)) break; | 565 | if (nUnused == 0 && myfeof(zStream)) break; |
671 | |||
672 | } | 566 | } |
673 | 567 | ||
568 | closeok: | ||
674 | if (ferror(zStream)) goto errhandler_io; | 569 | if (ferror(zStream)) goto errhandler_io; |
675 | ret = fclose ( zStream ); | 570 | ret = fclose ( zStream ); |
676 | if (ret == EOF) goto errhandler_io; | 571 | if (ret == EOF) goto errhandler_io; |
@@ -680,11 +575,26 @@ Bool uncompressStream ( FILE *zStream, FILE *stream ) | |||
680 | if (ret != 0) goto errhandler_io; | 575 | if (ret != 0) goto errhandler_io; |
681 | if (stream != stdout) { | 576 | if (stream != stdout) { |
682 | ret = fclose ( stream ); | 577 | ret = fclose ( stream ); |
578 | outputHandleJustInCase = NULL; | ||
683 | if (ret == EOF) goto errhandler_io; | 579 | if (ret == EOF) goto errhandler_io; |
684 | } | 580 | } |
581 | outputHandleJustInCase = NULL; | ||
685 | if (verbosity >= 2) fprintf ( stderr, "\n " ); | 582 | if (verbosity >= 2) fprintf ( stderr, "\n " ); |
686 | return True; | 583 | return True; |
687 | 584 | ||
585 | trycat: | ||
586 | if (forceOverwrite) { | ||
587 | rewind(zStream); | ||
588 | while (True) { | ||
589 | if (myfeof(zStream)) break; | ||
590 | nread = fread ( obuf, sizeof(UChar), 5000, zStream ); | ||
591 | if (ferror(zStream)) goto errhandler_io; | ||
592 | if (nread > 0) fwrite ( obuf, sizeof(UChar), nread, stream ); | ||
593 | if (ferror(stream)) goto errhandler_io; | ||
594 | } | ||
595 | goto closeok; | ||
596 | } | ||
597 | |||
688 | errhandler: | 598 | errhandler: |
689 | BZ2_bzReadClose ( &bzerr_dummy, bzf ); | 599 | BZ2_bzReadClose ( &bzerr_dummy, bzf ); |
690 | switch (bzerr) { | 600 | switch (bzerr) { |
@@ -832,7 +742,7 @@ void cadvise ( void ) | |||
832 | stderr, | 742 | stderr, |
833 | "\nIt is possible that the compressed file(s) have become corrupted.\n" | 743 | "\nIt is possible that the compressed file(s) have become corrupted.\n" |
834 | "You can use the -tvv option to test integrity of such files.\n\n" | 744 | "You can use the -tvv option to test integrity of such files.\n\n" |
835 | "You can use the `bzip2recover' program to *attempt* to recover\n" | 745 | "You can use the `bzip2recover' program to attempt to recover\n" |
836 | "data from undamaged sections of corrupted files.\n\n" | 746 | "data from undamaged sections of corrupted files.\n\n" |
837 | ); | 747 | ); |
838 | } | 748 | } |
@@ -855,28 +765,55 @@ void showFileNames ( void ) | |||
855 | static | 765 | static |
856 | void cleanUpAndFail ( Int32 ec ) | 766 | void cleanUpAndFail ( Int32 ec ) |
857 | { | 767 | { |
858 | IntNative retVal; | 768 | IntNative retVal; |
769 | struct MY_STAT statBuf; | ||
859 | 770 | ||
860 | if ( srcMode == SM_F2F | 771 | if ( srcMode == SM_F2F |
861 | && opMode != OM_TEST | 772 | && opMode != OM_TEST |
862 | && deleteOutputOnInterrupt ) { | 773 | && deleteOutputOnInterrupt ) { |
863 | if (noisy) | 774 | |
864 | fprintf ( stderr, "%s: Deleting output file %s, if it exists.\n", | 775 | /* Check whether input file still exists. Delete output file |
865 | progName, outName ); | 776 | only if input exists to avoid loss of data. Joerg Prante, 5 |
866 | if (outputHandleJustInCase != NULL) | 777 | January 2002. (JRS 06-Jan-2002: other changes in 1.0.2 mean |
867 | fclose ( outputHandleJustInCase ); | 778 | this is less likely to happen. But to be ultra-paranoid, we |
868 | retVal = remove ( outName ); | 779 | do the check anyway.) */ |
869 | if (retVal != 0) | 780 | retVal = MY_STAT ( inName, &statBuf ); |
781 | if (retVal == 0) { | ||
782 | if (noisy) | ||
783 | fprintf ( stderr, | ||
784 | "%s: Deleting output file %s, if it exists.\n", | ||
785 | progName, outName ); | ||
786 | if (outputHandleJustInCase != NULL) | ||
787 | fclose ( outputHandleJustInCase ); | ||
788 | retVal = remove ( outName ); | ||
789 | if (retVal != 0) | ||
790 | fprintf ( stderr, | ||
791 | "%s: WARNING: deletion of output file " | ||
792 | "(apparently) failed.\n", | ||
793 | progName ); | ||
794 | } else { | ||
870 | fprintf ( stderr, | 795 | fprintf ( stderr, |
871 | "%s: WARNING: deletion of output file (apparently) failed.\n", | 796 | "%s: WARNING: deletion of output file suppressed\n", |
797 | progName ); | ||
798 | fprintf ( stderr, | ||
799 | "%s: since input file no longer exists. Output file\n", | ||
872 | progName ); | 800 | progName ); |
801 | fprintf ( stderr, | ||
802 | "%s: `%s' may be incomplete.\n", | ||
803 | progName, outName ); | ||
804 | fprintf ( stderr, | ||
805 | "%s: I suggest doing an integrity test (bzip2 -tv)" | ||
806 | " of it.\n", | ||
807 | progName ); | ||
808 | } | ||
873 | } | 809 | } |
810 | |||
874 | if (noisy && numFileNames > 0 && numFilesProcessed < numFileNames) { | 811 | if (noisy && numFileNames > 0 && numFilesProcessed < numFileNames) { |
875 | fprintf ( stderr, | 812 | fprintf ( stderr, |
876 | "%s: WARNING: some files have not been processed:\n" | 813 | "%s: WARNING: some files have not been processed:\n" |
877 | "\t%d specified on command line, %d not processed yet.\n\n", | 814 | "%s: %d specified on command line, %d not processed yet.\n\n", |
878 | progName, numFileNames, | 815 | progName, progName, |
879 | numFileNames - numFilesProcessed ); | 816 | numFileNames, numFileNames - numFilesProcessed ); |
880 | } | 817 | } |
881 | setExit(ec); | 818 | setExit(ec); |
882 | exit(exitValue); | 819 | exit(exitValue); |
@@ -915,14 +852,16 @@ void crcError ( void ) | |||
915 | static | 852 | static |
916 | void compressedStreamEOF ( void ) | 853 | void compressedStreamEOF ( void ) |
917 | { | 854 | { |
918 | fprintf ( stderr, | 855 | if (noisy) { |
919 | "\n%s: Compressed file ends unexpectedly;\n\t" | 856 | fprintf ( stderr, |
920 | "perhaps it is corrupted? *Possible* reason follows.\n", | 857 | "\n%s: Compressed file ends unexpectedly;\n\t" |
921 | progName ); | 858 | "perhaps it is corrupted? *Possible* reason follows.\n", |
922 | perror ( progName ); | 859 | progName ); |
923 | showFileNames(); | 860 | perror ( progName ); |
924 | cadvise(); | 861 | showFileNames(); |
925 | cleanUpAndFail( 2 ); | 862 | cadvise(); |
863 | } | ||
864 | cleanUpAndFail( 2 ); | ||
926 | } | 865 | } |
927 | 866 | ||
928 | 867 | ||
@@ -1038,6 +977,11 @@ void configError ( void ) | |||
1038 | /*--- The main driver machinery ---*/ | 977 | /*--- The main driver machinery ---*/ |
1039 | /*---------------------------------------------------*/ | 978 | /*---------------------------------------------------*/ |
1040 | 979 | ||
980 | /* All rather crufty. The main problem is that input files | ||
981 | are stat()d multiple times before use. This should be | ||
982 | cleaned up. | ||
983 | */ | ||
984 | |||
1041 | /*---------------------------------------------*/ | 985 | /*---------------------------------------------*/ |
1042 | static | 986 | static |
1043 | void pad ( Char *s ) | 987 | void pad ( Char *s ) |
@@ -1082,6 +1026,32 @@ Bool fileExists ( Char* name ) | |||
1082 | 1026 | ||
1083 | 1027 | ||
1084 | /*---------------------------------------------*/ | 1028 | /*---------------------------------------------*/ |
1029 | /* Open an output file safely with O_EXCL and good permissions. | ||
1030 | This avoids a race condition in versions < 1.0.2, in which | ||
1031 | the file was first opened and then had its interim permissions | ||
1032 | set safely. We instead use open() to create the file with | ||
1033 | the interim permissions required. (--- --- rw-). | ||
1034 | |||
1035 | For non-Unix platforms, if we are not worrying about | ||
1036 | security issues, simple this simply behaves like fopen. | ||
1037 | */ | ||
1038 | FILE* fopen_output_safely ( Char* name, const char* mode ) | ||
1039 | { | ||
1040 | # if BZ_UNIX | ||
1041 | FILE* fp; | ||
1042 | IntNative fh; | ||
1043 | fh = open(name, O_WRONLY|O_CREAT|O_EXCL, S_IWUSR|S_IRUSR); | ||
1044 | if (fh == -1) return NULL; | ||
1045 | fp = fdopen(fh, mode); | ||
1046 | if (fp == NULL) close(fh); | ||
1047 | return fp; | ||
1048 | # else | ||
1049 | return fopen(name, mode); | ||
1050 | # endif | ||
1051 | } | ||
1052 | |||
1053 | |||
1054 | /*---------------------------------------------*/ | ||
1085 | /*-- | 1055 | /*-- |
1086 | if in doubt, return True | 1056 | if in doubt, return True |
1087 | --*/ | 1057 | --*/ |
@@ -1093,7 +1063,7 @@ Bool notAStandardFile ( Char* name ) | |||
1093 | 1063 | ||
1094 | i = MY_LSTAT ( name, &statBuf ); | 1064 | i = MY_LSTAT ( name, &statBuf ); |
1095 | if (i != 0) return True; | 1065 | if (i != 0) return True; |
1096 | if (MY_S_IFREG(statBuf.st_mode)) return False; | 1066 | if (MY_S_ISREG(statBuf.st_mode)) return False; |
1097 | return True; | 1067 | return True; |
1098 | } | 1068 | } |
1099 | 1069 | ||
@@ -1115,42 +1085,66 @@ Int32 countHardLinks ( Char* name ) | |||
1115 | 1085 | ||
1116 | 1086 | ||
1117 | /*---------------------------------------------*/ | 1087 | /*---------------------------------------------*/ |
1088 | /* Copy modification date, access date, permissions and owner from the | ||
1089 | source to destination file. We have to copy this meta-info off | ||
1090 | into fileMetaInfo before starting to compress / decompress it, | ||
1091 | because doing it afterwards means we get the wrong access time. | ||
1092 | |||
1093 | To complicate matters, in compress() and decompress() below, the | ||
1094 | sequence of tests preceding the call to saveInputFileMetaInfo() | ||
1095 | involves calling fileExists(), which in turn establishes its result | ||
1096 | by attempting to fopen() the file, and if successful, immediately | ||
1097 | fclose()ing it again. So we have to assume that the fopen() call | ||
1098 | does not cause the access time field to be updated. | ||
1099 | |||
1100 | Reading of the man page for stat() (man 2 stat) on RedHat 7.2 seems | ||
1101 | to imply that merely doing open() will not affect the access time. | ||
1102 | Therefore we merely need to hope that the C library only does | ||
1103 | open() as a result of fopen(), and not any kind of read()-ahead | ||
1104 | cleverness. | ||
1105 | |||
1106 | It sounds pretty fragile to me. Whether this carries across | ||
1107 | robustly to arbitrary Unix-like platforms (or even works robustly | ||
1108 | on this one, RedHat 7.2) is unknown to me. Nevertheless ... | ||
1109 | */ | ||
1110 | #if BZ_UNIX | ||
1111 | static | ||
1112 | struct MY_STAT fileMetaInfo; | ||
1113 | #endif | ||
1114 | |||
1118 | static | 1115 | static |
1119 | void copyDatePermissionsAndOwner ( Char *srcName, Char *dstName ) | 1116 | void saveInputFileMetaInfo ( Char *srcName ) |
1120 | { | 1117 | { |
1121 | #if BZ_UNIX | 1118 | # if BZ_UNIX |
1119 | IntNative retVal; | ||
1120 | /* Note use of stat here, not lstat. */ | ||
1121 | retVal = MY_STAT( srcName, &fileMetaInfo ); | ||
1122 | ERROR_IF_NOT_ZERO ( retVal ); | ||
1123 | # endif | ||
1124 | } | ||
1125 | |||
1126 | |||
1127 | static | ||
1128 | void applySavedMetaInfoToOutputFile ( Char *dstName ) | ||
1129 | { | ||
1130 | # if BZ_UNIX | ||
1122 | IntNative retVal; | 1131 | IntNative retVal; |
1123 | struct MY_STAT statBuf; | ||
1124 | struct utimbuf uTimBuf; | 1132 | struct utimbuf uTimBuf; |
1125 | 1133 | ||
1126 | retVal = MY_LSTAT ( srcName, &statBuf ); | 1134 | uTimBuf.actime = fileMetaInfo.st_atime; |
1127 | ERROR_IF_NOT_ZERO ( retVal ); | 1135 | uTimBuf.modtime = fileMetaInfo.st_mtime; |
1128 | uTimBuf.actime = statBuf.st_atime; | ||
1129 | uTimBuf.modtime = statBuf.st_mtime; | ||
1130 | 1136 | ||
1131 | retVal = chmod ( dstName, statBuf.st_mode ); | 1137 | retVal = chmod ( dstName, fileMetaInfo.st_mode ); |
1132 | ERROR_IF_NOT_ZERO ( retVal ); | 1138 | ERROR_IF_NOT_ZERO ( retVal ); |
1133 | 1139 | ||
1134 | retVal = utime ( dstName, &uTimBuf ); | 1140 | retVal = utime ( dstName, &uTimBuf ); |
1135 | ERROR_IF_NOT_ZERO ( retVal ); | 1141 | ERROR_IF_NOT_ZERO ( retVal ); |
1136 | 1142 | ||
1137 | retVal = chown ( dstName, statBuf.st_uid, statBuf.st_gid ); | 1143 | retVal = chown ( dstName, fileMetaInfo.st_uid, fileMetaInfo.st_gid ); |
1138 | /* chown() will in many cases return with EPERM, which can | 1144 | /* chown() will in many cases return with EPERM, which can |
1139 | be safely ignored. | 1145 | be safely ignored. |
1140 | */ | 1146 | */ |
1141 | #endif | 1147 | # endif |
1142 | } | ||
1143 | |||
1144 | |||
1145 | /*---------------------------------------------*/ | ||
1146 | static | ||
1147 | void setInterimPermissions ( Char *dstName ) | ||
1148 | { | ||
1149 | #if BZ_UNIX | ||
1150 | IntNative retVal; | ||
1151 | retVal = chmod ( dstName, S_IRUSR | S_IWUSR ); | ||
1152 | ERROR_IF_NOT_ZERO ( retVal ); | ||
1153 | #endif | ||
1154 | } | 1148 | } |
1155 | 1149 | ||
1156 | 1150 | ||
@@ -1158,10 +1152,19 @@ void setInterimPermissions ( Char *dstName ) | |||
1158 | static | 1152 | static |
1159 | Bool containsDubiousChars ( Char* name ) | 1153 | Bool containsDubiousChars ( Char* name ) |
1160 | { | 1154 | { |
1161 | Bool cdc = False; | 1155 | # if BZ_UNIX |
1156 | /* On unix, files can contain any characters and the file expansion | ||
1157 | * is performed by the shell. | ||
1158 | */ | ||
1159 | return False; | ||
1160 | # else /* ! BZ_UNIX */ | ||
1161 | /* On non-unix (Win* platforms), wildcard characters are not allowed in | ||
1162 | * filenames. | ||
1163 | */ | ||
1162 | for (; *name != '\0'; name++) | 1164 | for (; *name != '\0'; name++) |
1163 | if (*name == '?' || *name == '*') cdc = True; | 1165 | if (*name == '?' || *name == '*') return True; |
1164 | return cdc; | 1166 | return False; |
1167 | # endif /* BZ_UNIX */ | ||
1165 | } | 1168 | } |
1166 | 1169 | ||
1167 | 1170 | ||
@@ -1201,6 +1204,7 @@ void compress ( Char *name ) | |||
1201 | FILE *inStr; | 1204 | FILE *inStr; |
1202 | FILE *outStr; | 1205 | FILE *outStr; |
1203 | Int32 n, i; | 1206 | Int32 n, i; |
1207 | struct MY_STAT statBuf; | ||
1204 | 1208 | ||
1205 | deleteOutputOnInterrupt = False; | 1209 | deleteOutputOnInterrupt = False; |
1206 | 1210 | ||
@@ -1246,6 +1250,16 @@ void compress ( Char *name ) | |||
1246 | return; | 1250 | return; |
1247 | } | 1251 | } |
1248 | } | 1252 | } |
1253 | if ( srcMode == SM_F2F || srcMode == SM_F2O ) { | ||
1254 | MY_STAT(inName, &statBuf); | ||
1255 | if ( MY_S_ISDIR(statBuf.st_mode) ) { | ||
1256 | fprintf( stderr, | ||
1257 | "%s: Input file %s is a directory.\n", | ||
1258 | progName,inName); | ||
1259 | setExit(1); | ||
1260 | return; | ||
1261 | } | ||
1262 | } | ||
1249 | if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) { | 1263 | if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) { |
1250 | if (noisy) | 1264 | if (noisy) |
1251 | fprintf ( stderr, "%s: Input file %s is not a normal file.\n", | 1265 | fprintf ( stderr, "%s: Input file %s is not a normal file.\n", |
@@ -1253,11 +1267,15 @@ void compress ( Char *name ) | |||
1253 | setExit(1); | 1267 | setExit(1); |
1254 | return; | 1268 | return; |
1255 | } | 1269 | } |
1256 | if ( srcMode == SM_F2F && !forceOverwrite && fileExists ( outName ) ) { | 1270 | if ( srcMode == SM_F2F && fileExists ( outName ) ) { |
1257 | fprintf ( stderr, "%s: Output file %s already exists.\n", | 1271 | if (forceOverwrite) { |
1258 | progName, outName ); | 1272 | remove(outName); |
1259 | setExit(1); | 1273 | } else { |
1260 | return; | 1274 | fprintf ( stderr, "%s: Output file %s already exists.\n", |
1275 | progName, outName ); | ||
1276 | setExit(1); | ||
1277 | return; | ||
1278 | } | ||
1261 | } | 1279 | } |
1262 | if ( srcMode == SM_F2F && !forceOverwrite && | 1280 | if ( srcMode == SM_F2F && !forceOverwrite && |
1263 | (n=countHardLinks ( inName )) > 0) { | 1281 | (n=countHardLinks ( inName )) > 0) { |
@@ -1267,6 +1285,12 @@ void compress ( Char *name ) | |||
1267 | return; | 1285 | return; |
1268 | } | 1286 | } |
1269 | 1287 | ||
1288 | if ( srcMode == SM_F2F ) { | ||
1289 | /* Save the file's meta-info before we open it. Doing it later | ||
1290 | means we mess up the access times. */ | ||
1291 | saveInputFileMetaInfo ( inName ); | ||
1292 | } | ||
1293 | |||
1270 | switch ( srcMode ) { | 1294 | switch ( srcMode ) { |
1271 | 1295 | ||
1272 | case SM_I2O: | 1296 | case SM_I2O: |
@@ -1306,7 +1330,7 @@ void compress ( Char *name ) | |||
1306 | 1330 | ||
1307 | case SM_F2F: | 1331 | case SM_F2F: |
1308 | inStr = fopen ( inName, "rb" ); | 1332 | inStr = fopen ( inName, "rb" ); |
1309 | outStr = fopen ( outName, "wb" ); | 1333 | outStr = fopen_output_safely ( outName, "wb" ); |
1310 | if ( outStr == NULL) { | 1334 | if ( outStr == NULL) { |
1311 | fprintf ( stderr, "%s: Can't create output file %s: %s.\n", | 1335 | fprintf ( stderr, "%s: Can't create output file %s: %s.\n", |
1312 | progName, outName, strerror(errno) ); | 1336 | progName, outName, strerror(errno) ); |
@@ -1321,7 +1345,6 @@ void compress ( Char *name ) | |||
1321 | setExit(1); | 1345 | setExit(1); |
1322 | return; | 1346 | return; |
1323 | }; | 1347 | }; |
1324 | setInterimPermissions ( outName ); | ||
1325 | break; | 1348 | break; |
1326 | 1349 | ||
1327 | default: | 1350 | default: |
@@ -1343,7 +1366,7 @@ void compress ( Char *name ) | |||
1343 | 1366 | ||
1344 | /*--- If there was an I/O error, we won't get here. ---*/ | 1367 | /*--- If there was an I/O error, we won't get here. ---*/ |
1345 | if ( srcMode == SM_F2F ) { | 1368 | if ( srcMode == SM_F2F ) { |
1346 | copyDatePermissionsAndOwner ( inName, outName ); | 1369 | applySavedMetaInfoToOutputFile ( outName ); |
1347 | deleteOutputOnInterrupt = False; | 1370 | deleteOutputOnInterrupt = False; |
1348 | if ( !keepInputFiles ) { | 1371 | if ( !keepInputFiles ) { |
1349 | IntNative retVal = remove ( inName ); | 1372 | IntNative retVal = remove ( inName ); |
@@ -1364,6 +1387,7 @@ void uncompress ( Char *name ) | |||
1364 | Int32 n, i; | 1387 | Int32 n, i; |
1365 | Bool magicNumberOK; | 1388 | Bool magicNumberOK; |
1366 | Bool cantGuess; | 1389 | Bool cantGuess; |
1390 | struct MY_STAT statBuf; | ||
1367 | 1391 | ||
1368 | deleteOutputOnInterrupt = False; | 1392 | deleteOutputOnInterrupt = False; |
1369 | 1393 | ||
@@ -1405,6 +1429,16 @@ void uncompress ( Char *name ) | |||
1405 | setExit(1); | 1429 | setExit(1); |
1406 | return; | 1430 | return; |
1407 | } | 1431 | } |
1432 | if ( srcMode == SM_F2F || srcMode == SM_F2O ) { | ||
1433 | MY_STAT(inName, &statBuf); | ||
1434 | if ( MY_S_ISDIR(statBuf.st_mode) ) { | ||
1435 | fprintf( stderr, | ||
1436 | "%s: Input file %s is a directory.\n", | ||
1437 | progName,inName); | ||
1438 | setExit(1); | ||
1439 | return; | ||
1440 | } | ||
1441 | } | ||
1408 | if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) { | 1442 | if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) { |
1409 | if (noisy) | 1443 | if (noisy) |
1410 | fprintf ( stderr, "%s: Input file %s is not a normal file.\n", | 1444 | fprintf ( stderr, "%s: Input file %s is not a normal file.\n", |
@@ -1419,11 +1453,15 @@ void uncompress ( Char *name ) | |||
1419 | progName, inName, outName ); | 1453 | progName, inName, outName ); |
1420 | /* just a warning, no return */ | 1454 | /* just a warning, no return */ |
1421 | } | 1455 | } |
1422 | if ( srcMode == SM_F2F && !forceOverwrite && fileExists ( outName ) ) { | 1456 | if ( srcMode == SM_F2F && fileExists ( outName ) ) { |
1423 | fprintf ( stderr, "%s: Output file %s already exists.\n", | 1457 | if (forceOverwrite) { |
1424 | progName, outName ); | 1458 | remove(outName); |
1425 | setExit(1); | 1459 | } else { |
1426 | return; | 1460 | fprintf ( stderr, "%s: Output file %s already exists.\n", |
1461 | progName, outName ); | ||
1462 | setExit(1); | ||
1463 | return; | ||
1464 | } | ||
1427 | } | 1465 | } |
1428 | if ( srcMode == SM_F2F && !forceOverwrite && | 1466 | if ( srcMode == SM_F2F && !forceOverwrite && |
1429 | (n=countHardLinks ( inName ) ) > 0) { | 1467 | (n=countHardLinks ( inName ) ) > 0) { |
@@ -1433,6 +1471,12 @@ void uncompress ( Char *name ) | |||
1433 | return; | 1471 | return; |
1434 | } | 1472 | } |
1435 | 1473 | ||
1474 | if ( srcMode == SM_F2F ) { | ||
1475 | /* Save the file's meta-info before we open it. Doing it later | ||
1476 | means we mess up the access times. */ | ||
1477 | saveInputFileMetaInfo ( inName ); | ||
1478 | } | ||
1479 | |||
1436 | switch ( srcMode ) { | 1480 | switch ( srcMode ) { |
1437 | 1481 | ||
1438 | case SM_I2O: | 1482 | case SM_I2O: |
@@ -1463,7 +1507,7 @@ void uncompress ( Char *name ) | |||
1463 | 1507 | ||
1464 | case SM_F2F: | 1508 | case SM_F2F: |
1465 | inStr = fopen ( inName, "rb" ); | 1509 | inStr = fopen ( inName, "rb" ); |
1466 | outStr = fopen ( outName, "wb" ); | 1510 | outStr = fopen_output_safely ( outName, "wb" ); |
1467 | if ( outStr == NULL) { | 1511 | if ( outStr == NULL) { |
1468 | fprintf ( stderr, "%s: Can't create output file %s: %s.\n", | 1512 | fprintf ( stderr, "%s: Can't create output file %s: %s.\n", |
1469 | progName, outName, strerror(errno) ); | 1513 | progName, outName, strerror(errno) ); |
@@ -1478,7 +1522,6 @@ void uncompress ( Char *name ) | |||
1478 | setExit(1); | 1522 | setExit(1); |
1479 | return; | 1523 | return; |
1480 | }; | 1524 | }; |
1481 | setInterimPermissions ( outName ); | ||
1482 | break; | 1525 | break; |
1483 | 1526 | ||
1484 | default: | 1527 | default: |
@@ -1501,7 +1544,7 @@ void uncompress ( Char *name ) | |||
1501 | /*--- If there was an I/O error, we won't get here. ---*/ | 1544 | /*--- If there was an I/O error, we won't get here. ---*/ |
1502 | if ( magicNumberOK ) { | 1545 | if ( magicNumberOK ) { |
1503 | if ( srcMode == SM_F2F ) { | 1546 | if ( srcMode == SM_F2F ) { |
1504 | copyDatePermissionsAndOwner ( inName, outName ); | 1547 | applySavedMetaInfoToOutputFile ( outName ); |
1505 | deleteOutputOnInterrupt = False; | 1548 | deleteOutputOnInterrupt = False; |
1506 | if ( !keepInputFiles ) { | 1549 | if ( !keepInputFiles ) { |
1507 | IntNative retVal = remove ( inName ); | 1550 | IntNative retVal = remove ( inName ); |
@@ -1539,6 +1582,7 @@ void testf ( Char *name ) | |||
1539 | { | 1582 | { |
1540 | FILE *inStr; | 1583 | FILE *inStr; |
1541 | Bool allOK; | 1584 | Bool allOK; |
1585 | struct MY_STAT statBuf; | ||
1542 | 1586 | ||
1543 | deleteOutputOnInterrupt = False; | 1587 | deleteOutputOnInterrupt = False; |
1544 | 1588 | ||
@@ -1565,6 +1609,16 @@ void testf ( Char *name ) | |||
1565 | setExit(1); | 1609 | setExit(1); |
1566 | return; | 1610 | return; |
1567 | } | 1611 | } |
1612 | if ( srcMode != SM_I2O ) { | ||
1613 | MY_STAT(inName, &statBuf); | ||
1614 | if ( MY_S_ISDIR(statBuf.st_mode) ) { | ||
1615 | fprintf( stderr, | ||
1616 | "%s: Input file %s is a directory.\n", | ||
1617 | progName,inName); | ||
1618 | setExit(1); | ||
1619 | return; | ||
1620 | } | ||
1621 | } | ||
1568 | 1622 | ||
1569 | switch ( srcMode ) { | 1623 | switch ( srcMode ) { |
1570 | 1624 | ||
@@ -1603,6 +1657,7 @@ void testf ( Char *name ) | |||
1603 | } | 1657 | } |
1604 | 1658 | ||
1605 | /*--- Now the input handle is sane. Do the Biz. ---*/ | 1659 | /*--- Now the input handle is sane. Do the Biz. ---*/ |
1660 | outputHandleJustInCase = NULL; | ||
1606 | allOK = testStream ( inStr ); | 1661 | allOK = testStream ( inStr ); |
1607 | 1662 | ||
1608 | if (allOK && verbosity >= 1) fprintf ( stderr, "ok\n" ); | 1663 | if (allOK && verbosity >= 1) fprintf ( stderr, "ok\n" ); |
@@ -1619,7 +1674,7 @@ void license ( void ) | |||
1619 | "bzip2, a block-sorting file compressor. " | 1674 | "bzip2, a block-sorting file compressor. " |
1620 | "Version %s.\n" | 1675 | "Version %s.\n" |
1621 | " \n" | 1676 | " \n" |
1622 | " Copyright (C) 1996-2000 by Julian Seward.\n" | 1677 | " Copyright (C) 1996-2002 by Julian Seward.\n" |
1623 | " \n" | 1678 | " \n" |
1624 | " This program is free software; you can redistribute it and/or modify\n" | 1679 | " This program is free software; you can redistribute it and/or modify\n" |
1625 | " it under the terms set out in the LICENSE file, which is included\n" | 1680 | " it under the terms set out in the LICENSE file, which is included\n" |
@@ -1658,6 +1713,8 @@ void usage ( Char *fullProgName ) | |||
1658 | " -V --version display software version & license\n" | 1713 | " -V --version display software version & license\n" |
1659 | " -s --small use less memory (at most 2500k)\n" | 1714 | " -s --small use less memory (at most 2500k)\n" |
1660 | " -1 .. -9 set block size to 100k .. 900k\n" | 1715 | " -1 .. -9 set block size to 100k .. 900k\n" |
1716 | " --fast alias for -1\n" | ||
1717 | " --best alias for -9\n" | ||
1661 | "\n" | 1718 | "\n" |
1662 | " If invoked as `bzip2', default action is to compress.\n" | 1719 | " If invoked as `bzip2', default action is to compress.\n" |
1663 | " as `bunzip2', default action is to decompress.\n" | 1720 | " as `bunzip2', default action is to decompress.\n" |
@@ -1666,9 +1723,9 @@ void usage ( Char *fullProgName ) | |||
1666 | " If no file names are given, bzip2 compresses or decompresses\n" | 1723 | " If no file names are given, bzip2 compresses or decompresses\n" |
1667 | " from standard input to standard output. You can combine\n" | 1724 | " from standard input to standard output. You can combine\n" |
1668 | " short flags, so `-v -4' means the same as -v4 or -4v, &c.\n" | 1725 | " short flags, so `-v -4' means the same as -v4 or -4v, &c.\n" |
1669 | #if BZ_UNIX | 1726 | # if BZ_UNIX |
1670 | "\n" | 1727 | "\n" |
1671 | #endif | 1728 | # endif |
1672 | , | 1729 | , |
1673 | 1730 | ||
1674 | BZ2_bzlibVersion(), | 1731 | BZ2_bzlibVersion(), |
@@ -1818,11 +1875,11 @@ IntNative main ( IntNative argc, Char *argv[] ) | |||
1818 | 1875 | ||
1819 | /*-- Set up signal handlers for mem access errors --*/ | 1876 | /*-- Set up signal handlers for mem access errors --*/ |
1820 | signal (SIGSEGV, mySIGSEGVorSIGBUScatcher); | 1877 | signal (SIGSEGV, mySIGSEGVorSIGBUScatcher); |
1821 | #if BZ_UNIX | 1878 | # if BZ_UNIX |
1822 | #ifndef __DJGPP__ | 1879 | # ifndef __DJGPP__ |
1823 | signal (SIGBUS, mySIGSEGVorSIGBUScatcher); | 1880 | signal (SIGBUS, mySIGSEGVorSIGBUScatcher); |
1824 | #endif | 1881 | # endif |
1825 | #endif | 1882 | # endif |
1826 | 1883 | ||
1827 | copyFileName ( inName, "(none)" ); | 1884 | copyFileName ( inName, "(none)" ); |
1828 | copyFileName ( outName, "(none)" ); | 1885 | copyFileName ( outName, "(none)" ); |
@@ -1933,6 +1990,8 @@ IntNative main ( IntNative argc, Char *argv[] ) | |||
1933 | if (ISFLAG("--exponential")) workFactor = 1; else | 1990 | if (ISFLAG("--exponential")) workFactor = 1; else |
1934 | if (ISFLAG("--repetitive-best")) redundant(aa->name); else | 1991 | if (ISFLAG("--repetitive-best")) redundant(aa->name); else |
1935 | if (ISFLAG("--repetitive-fast")) redundant(aa->name); else | 1992 | if (ISFLAG("--repetitive-fast")) redundant(aa->name); else |
1993 | if (ISFLAG("--fast")) blockSize100k = 1; else | ||
1994 | if (ISFLAG("--best")) blockSize100k = 9; else | ||
1936 | if (ISFLAG("--verbose")) verbosity++; else | 1995 | if (ISFLAG("--verbose")) verbosity++; else |
1937 | if (ISFLAG("--help")) { usage ( progName ); exit ( 0 ); } | 1996 | if (ISFLAG("--help")) { usage ( progName ); exit ( 0 ); } |
1938 | else | 1997 | else |
@@ -1,7 +1,6 @@ | |||
1 | 1 | ||
2 | |||
3 | NAME | 2 | NAME |
4 | bzip2, bunzip2 - a block-sorting file compressor, v1.0 | 3 | bzip2, bunzip2 - a block-sorting file compressor, v1.0.2 |
5 | bzcat - decompresses files to stdout | 4 | bzcat - decompresses files to stdout |
6 | bzip2recover - recovers data from damaged bzip2 files | 5 | bzip2recover - recovers data from damaged bzip2 files |
7 | 6 | ||
@@ -18,20 +17,20 @@ DESCRIPTION | |||
18 | sorting text compression algorithm, and Huffman coding. | 17 | sorting text compression algorithm, and Huffman coding. |
19 | Compression is generally considerably better than that | 18 | Compression is generally considerably better than that |
20 | achieved by more conventional LZ77/LZ78-based compressors, | 19 | achieved by more conventional LZ77/LZ78-based compressors, |
21 | and approaches the performance of the PPM family of sta- | 20 | and approaches the performance of the PPM family of sta |
22 | tistical compressors. | 21 | tistical compressors. |
23 | 22 | ||
24 | The command-line options are deliberately very similar to | 23 | The command-line options are deliberately very similar to |
25 | those of GNU gzip, but they are not identical. | 24 | those of GNU gzip, but they are not identical. |
26 | 25 | ||
27 | bzip2 expects a list of file names to accompany the com- | 26 | bzip2 expects a list of file names to accompany the com |
28 | mand-line flags. Each file is replaced by a compressed | 27 | mand-line flags. Each file is replaced by a compressed |
29 | version of itself, with the name "original_name.bz2". | 28 | version of itself, with the name "original_name.bz2". |
30 | Each compressed file has the same modification date, per- | 29 | Each compressed file has the same modification date, per |
31 | missions, and, when possible, ownership as the correspond- | 30 | missions, and, when possible, ownership as the correspond |
32 | ing original, so that these properties can be correctly | 31 | ing original, so that these properties can be correctly |
33 | restored at decompression time. File name handling is | 32 | restored at decompression time. File name handling is |
34 | naive in the sense that there is no mechanism for preserv- | 33 | naive in the sense that there is no mechanism for preserv |
35 | ing original file names, permissions, ownerships or dates | 34 | ing original file names, permissions, ownerships or dates |
36 | in filesystems which lack these concepts, or have serious | 35 | in filesystems which lack these concepts, or have serious |
37 | file name length restrictions, such as MS-DOS. | 36 | file name length restrictions, such as MS-DOS. |
@@ -62,23 +61,23 @@ DESCRIPTION | |||
62 | guess the name of the original file, and uses the original | 61 | guess the name of the original file, and uses the original |
63 | name with .out appended. | 62 | name with .out appended. |
64 | 63 | ||
65 | As with compression, supplying no filenames causes decom- | 64 | As with compression, supplying no filenames causes decom |
66 | pression from standard input to standard output. | 65 | pression from standard input to standard output. |
67 | 66 | ||
68 | bunzip2 will correctly decompress a file which is the con- | 67 | bunzip2 will correctly decompress a file which is the con |
69 | catenation of two or more compressed files. The result is | 68 | catenation of two or more compressed files. The result is |
70 | the concatenation of the corresponding uncompressed files. | 69 | the concatenation of the corresponding uncompressed files. |
71 | Integrity testing (-t) of concatenated compressed files is | 70 | Integrity testing (-t) of concatenated compressed files is |
72 | also supported. | 71 | also supported. |
73 | 72 | ||
74 | You can also compress or decompress files to the standard | 73 | You can also compress or decompress files to the standard |
75 | output by giving the -c flag. Multiple files may be com- | 74 | output by giving the -c flag. Multiple files may be com |
76 | pressed and decompressed like this. The resulting outputs | 75 | pressed and decompressed like this. The resulting outputs |
77 | are fed sequentially to stdout. Compression of multiple | 76 | are fed sequentially to stdout. Compression of multiple |
78 | files in this manner generates a stream containing multi- | 77 | files in this manner generates a stream containing multi |
79 | ple compressed file representations. Such a stream can be | 78 | ple compressed file representations. Such a stream can be |
80 | decompressed correctly only by bzip2 version 0.9.0 or | 79 | decompressed correctly only by bzip2 version 0.9.0 or |
81 | later. Earlier versions of bzip2 will stop after decom- | 80 | later. Earlier versions of bzip2 will stop after decom |
82 | pressing the first file in the stream. | 81 | pressing the first file in the stream. |
83 | 82 | ||
84 | bzcat (or bzip2 -dc) decompresses all specified files to | 83 | bzcat (or bzip2 -dc) decompresses all specified files to |
@@ -99,7 +98,7 @@ DESCRIPTION | |||
99 | 98 | ||
100 | As a self-check for your protection, bzip2 uses 32-bit | 99 | As a self-check for your protection, bzip2 uses 32-bit |
101 | CRCs to make sure that the decompressed version of a file | 100 | CRCs to make sure that the decompressed version of a file |
102 | is identical to the original. This guards against corrup- | 101 | is identical to the original. This guards against corrup |
103 | tion of the compressed data, and against undetected bugs | 102 | tion of the compressed data, and against undetected bugs |
104 | in bzip2 (hopefully very unlikely). The chances of data | 103 | in bzip2 (hopefully very unlikely). The chances of data |
105 | corruption going undetected is microscopic, about one | 104 | corruption going undetected is microscopic, about one |
@@ -127,8 +126,8 @@ OPTIONS | |||
127 | and forces bzip2 to decompress. | 126 | and forces bzip2 to decompress. |
128 | 127 | ||
129 | -z --compress | 128 | -z --compress |
130 | The complement to -d: forces compression, regard- | 129 | The complement to -d: forces compression, |
131 | less of the invokation name. | 130 | regardless of the invocation name. |
132 | 131 | ||
133 | -t --test | 132 | -t --test |
134 | Check integrity of the specified file(s), but don't | 133 | Check integrity of the specified file(s), but don't |
@@ -141,6 +140,11 @@ OPTIONS | |||
141 | forces bzip2 to break hard links to files, which it | 140 | forces bzip2 to break hard links to files, which it |
142 | otherwise wouldn't do. | 141 | otherwise wouldn't do. |
143 | 142 | ||
143 | bzip2 normally declines to decompress files which | ||
144 | don't have the correct magic header bytes. If | ||
145 | forced (-f), however, it will pass such files | ||
146 | through unmodified. This is how GNU gzip behaves. | ||
147 | |||
144 | -k --keep | 148 | -k --keep |
145 | Keep (don't delete) input files during compression | 149 | Keep (don't delete) input files during compression |
146 | or decompression. | 150 | or decompression. |
@@ -167,7 +171,7 @@ OPTIONS | |||
167 | 171 | ||
168 | -v --verbose | 172 | -v --verbose |
169 | Verbose mode -- show the compression ratio for each | 173 | Verbose mode -- show the compression ratio for each |
170 | file processed. Further -v's increase the ver- | 174 | file processed. Further -v's increase the ver |
171 | bosity level, spewing out lots of information which | 175 | bosity level, spewing out lots of information which |
172 | is primarily of interest for diagnostic purposes. | 176 | is primarily of interest for diagnostic purposes. |
173 | 177 | ||
@@ -175,20 +179,24 @@ OPTIONS | |||
175 | Display the software version, license terms and | 179 | Display the software version, license terms and |
176 | conditions. | 180 | conditions. |
177 | 181 | ||
178 | -1 to -9 | 182 | -1 (or --fast) to -9 (or --best) |
179 | Set the block size to 100 k, 200 k .. 900 k when | 183 | Set the block size to 100 k, 200 k .. 900 k when |
180 | compressing. Has no effect when decompressing. | 184 | compressing. Has no effect when decompressing. |
181 | See MEMORY MANAGEMENT below. | 185 | See MEMORY MANAGEMENT below. The --fast and --best |
186 | aliases are primarily for GNU gzip compatibility. | ||
187 | In particular, --fast doesn't make things signifi | ||
188 | cantly faster. And --best merely selects the | ||
189 | default behaviour. | ||
182 | 190 | ||
183 | -- Treats all subsequent arguments as file names, even | 191 | -- Treats all subsequent arguments as file names, even |
184 | if they start with a dash. This is so you can han- | 192 | if they start with a dash. This is so you can han |
185 | dle files with names beginning with a dash, for | 193 | dle files with names beginning with a dash, for |
186 | example: bzip2 -- -myfilename. | 194 | example: bzip2 -- -myfilename. |
187 | 195 | ||
188 | --repetitive-fast --repetitive-best | 196 | --repetitive-fast --repetitive-best |
189 | These flags are redundant in versions 0.9.5 and | 197 | These flags are redundant in versions 0.9.5 and |
190 | above. They provided some coarse control over the | 198 | above. They provided some coarse control over the |
191 | behaviour of the sorting algorithm in earlier ver- | 199 | behaviour of the sorting algorithm in earlier ver |
192 | sions, which was sometimes useful. 0.9.5 and above | 200 | sions, which was sometimes useful. 0.9.5 and above |
193 | have an improved algorithm which renders these | 201 | have an improved algorithm which renders these |
194 | flags irrelevant. | 202 | flags irrelevant. |
@@ -199,7 +207,7 @@ MEMORY MANAGEMENT | |||
199 | affects both the compression ratio achieved, and the | 207 | affects both the compression ratio achieved, and the |
200 | amount of memory needed for compression and decompression. | 208 | amount of memory needed for compression and decompression. |
201 | The flags -1 through -9 specify the block size to be | 209 | The flags -1 through -9 specify the block size to be |
202 | 100,000 bytes through 900,000 bytes (the default) respec- | 210 | 100,000 bytes through 900,000 bytes (the default) respec |
203 | tively. At decompression time, the block size used for | 211 | tively. At decompression time, the block size used for |
204 | compression is read from the header of the compressed | 212 | compression is read from the header of the compressed |
205 | file, and bunzip2 then allocates itself just enough memory | 213 | file, and bunzip2 then allocates itself just enough memory |
@@ -227,13 +235,13 @@ MEMORY MANAGEMENT | |||
227 | bunzip2 will require about 3700 kbytes to decompress. To | 235 | bunzip2 will require about 3700 kbytes to decompress. To |
228 | support decompression of any file on a 4 megabyte machine, | 236 | support decompression of any file on a 4 megabyte machine, |
229 | bunzip2 has an option to decompress using approximately | 237 | bunzip2 has an option to decompress using approximately |
230 | half this amount of memory, about 2300 kbytes. Decompres- | 238 | half this amount of memory, about 2300 kbytes. Decompres |
231 | sion speed is also halved, so you should use this option | 239 | sion speed is also halved, so you should use this option |
232 | only where necessary. The relevant flag is -s. | 240 | only where necessary. The relevant flag is -s. |
233 | 241 | ||
234 | In general, try and use the largest block size memory con- | 242 | In general, try and use the largest block size memory con |
235 | straints allow, since that maximises the compression | 243 | straints allow, since that maximises the compression |
236 | achieved. Compression and decompression speed are virtu- | 244 | achieved. Compression and decompression speed are virtu |
237 | ally unaffected by block size. | 245 | ally unaffected by block size. |
238 | 246 | ||
239 | Another significant point applies to files which fit in a | 247 | Another significant point applies to files which fit in a |
@@ -249,11 +257,11 @@ MEMORY MANAGEMENT | |||
249 | 257 | ||
250 | Here is a table which summarises the maximum memory usage | 258 | Here is a table which summarises the maximum memory usage |
251 | for different block sizes. Also recorded is the total | 259 | for different block sizes. Also recorded is the total |
252 | compressed size for 14 files of the Calgary Text Compres- | 260 | compressed size for 14 files of the Calgary Text Compres |
253 | sion Corpus totalling 3,141,622 bytes. This column gives | 261 | sion Corpus totalling 3,141,622 bytes. This column gives |
254 | some feel for how compression varies with block size. | 262 | some feel for how compression varies with block size. |
255 | These figures tend to understate the advantage of larger | 263 | These figures tend to understate the advantage of larger |
256 | block sizes for larger files, since the Corpus is domi- | 264 | block sizes for larger files, since the Corpus is domi |
257 | nated by smaller files. | 265 | nated by smaller files. |
258 | 266 | ||
259 | Compress Decompress Decompress Corpus | 267 | Compress Decompress Decompress Corpus |
@@ -272,7 +280,7 @@ MEMORY MANAGEMENT | |||
272 | 280 | ||
273 | RECOVERING DATA FROM DAMAGED FILES | 281 | RECOVERING DATA FROM DAMAGED FILES |
274 | bzip2 compresses files in blocks, usually 900kbytes long. | 282 | bzip2 compresses files in blocks, usually 900kbytes long. |
275 | Each block is handled independently. If a media or trans- | 283 | Each block is handled independently. If a media or trans |
276 | mission error causes a multi-block .bz2 file to become | 284 | mission error causes a multi-block .bz2 file to become |
277 | damaged, it may be possible to recover data from the | 285 | damaged, it may be possible to recover data from the |
278 | undamaged blocks in the file. | 286 | undamaged blocks in the file. |
@@ -289,19 +297,19 @@ RECOVERING DATA FROM DAMAGED FILES | |||
289 | the integrity of the resulting files, and decompress those | 297 | the integrity of the resulting files, and decompress those |
290 | which are undamaged. | 298 | which are undamaged. |
291 | 299 | ||
292 | bzip2recover takes a single argument, the name of the dam- | 300 | bzip2recover takes a single argument, the name of the dam |
293 | aged file, and writes a number of files "rec0001file.bz2", | 301 | aged file, and writes a number of files |
294 | "rec0002file.bz2", etc, containing the extracted blocks. | 302 | "rec00001file.bz2", "rec00002file.bz2", etc, containing |
295 | The output filenames are designed so that the use of | 303 | the extracted blocks. The output filenames are |
296 | wildcards in subsequent processing -- for example, "bzip2 | 304 | designed so that the use of wildcards in subsequent pro |
297 | -dc rec*file.bz2 > recovered_data" -- lists the files in | 305 | cessing -- for example, "bzip2 -dc rec*file.bz2 > recov |
298 | the correct order. | 306 | ered_data" -- processes the files in the correct order. |
299 | 307 | ||
300 | bzip2recover should be of most use dealing with large .bz2 | 308 | bzip2recover should be of most use dealing with large .bz2 |
301 | files, as these will contain many blocks. It is clearly | 309 | files, as these will contain many blocks. It is clearly |
302 | futile to use it on damaged single-block files, since a | 310 | futile to use it on damaged single-block files, since a |
303 | damaged block cannot be recovered. If you wish to min- | 311 | damaged block cannot be recovered. If you wish to min |
304 | imise any potential data loss through media or transmis- | 312 | imise any potential data loss through media or transmis |
305 | sion errors, you might consider compressing with a smaller | 313 | sion errors, you might consider compressing with a smaller |
306 | block size. | 314 | block size. |
307 | 315 | ||
@@ -315,19 +323,19 @@ PERFORMANCE NOTES | |||
315 | better than previous versions in this respect. The ratio | 323 | better than previous versions in this respect. The ratio |
316 | between worst-case and average-case compression time is in | 324 | between worst-case and average-case compression time is in |
317 | the region of 10:1. For previous versions, this figure | 325 | the region of 10:1. For previous versions, this figure |
318 | was more like 100:1. You can use the -vvvv option to mon- | 326 | was more like 100:1. You can use the -vvvv option to mon |
319 | itor progress in great detail, if you want. | 327 | itor progress in great detail, if you want. |
320 | 328 | ||
321 | Decompression speed is unaffected by these phenomena. | 329 | Decompression speed is unaffected by these phenomena. |
322 | 330 | ||
323 | bzip2 usually allocates several megabytes of memory to | 331 | bzip2 usually allocates several megabytes of memory to |
324 | operate in, and then charges all over it in a fairly ran- | 332 | operate in, and then charges all over it in a fairly ran |
325 | dom fashion. This means that performance, both for com- | 333 | dom fashion. This means that performance, both for com |
326 | pressing and decompressing, is largely determined by the | 334 | pressing and decompressing, is largely determined by the |
327 | speed at which your machine can service cache misses. | 335 | speed at which your machine can service cache misses. |
328 | Because of this, small changes to the code to reduce the | 336 | Because of this, small changes to the code to reduce the |
329 | miss rate have been observed to give disproportionately | 337 | miss rate have been observed to give disproportionately |
330 | large performance improvements. I imagine bzip2 will per- | 338 | large performance improvements. I imagine bzip2 will per |
331 | form best on machines with very large caches. | 339 | form best on machines with very large caches. |
332 | 340 | ||
333 | 341 | ||
@@ -337,40 +345,46 @@ CAVEATS | |||
337 | but the details of what the problem is sometimes seem | 345 | but the details of what the problem is sometimes seem |
338 | rather misleading. | 346 | rather misleading. |
339 | 347 | ||
340 | This manual page pertains to version 1.0 of bzip2. Com- | 348 | This manual page pertains to version 1.0.2 of bzip2. Com |
341 | pressed data created by this version is entirely forwards | 349 | pressed data created by this version is entirely forwards |
342 | and backwards compatible with the previous public | 350 | and backwards compatible with the previous public |
343 | releases, versions 0.1pl2, 0.9.0 and 0.9.5, but with the | 351 | releases, versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, |
344 | following exception: 0.9.0 and above can correctly decom- | 352 | but with the following exception: 0.9.0 and above can cor |
345 | press multiple concatenated compressed files. 0.1pl2 can- | 353 | rectly decompress multiple concatenated compressed files. |
346 | not do this; it will stop after decompressing just the | 354 | 0.1pl2 cannot do this; it will stop after decompressing |
347 | first file in the stream. | 355 | just the first file in the stream. |
348 | 356 | ||
349 | bzip2recover uses 32-bit integers to represent bit posi- | 357 | bzip2recover versions prior to this one, 1.0.2, used |
350 | tions in compressed files, so it cannot handle compressed | 358 | 32-bit integers to represent bit positions in compressed |
351 | files more than 512 megabytes long. This could easily be | 359 | files, so it could not handle compressed files more than |
352 | fixed. | 360 | 512 megabytes long. Version 1.0.2 and above uses 64-bit |
361 | ints on some platforms which support them (GNU supported | ||
362 | targets, and Windows). To establish whether or not | ||
363 | bzip2recover was built with such a limitation, run it | ||
364 | without arguments. In any event you can build yourself an | ||
365 | unlimited version if you can recompile it with MaybeUInt64 | ||
366 | set to be an unsigned 64-bit integer. | ||
353 | 367 | ||
354 | 368 | ||
355 | AUTHOR | 369 | AUTHOR |
356 | Julian Seward, jseward@acm.org. | 370 | Julian Seward, jseward@acm.org. |
357 | 371 | ||
358 | http://sourceware.cygnus.com/bzip2 | 372 | http://sources.redhat.com/bzip2 |
359 | http://www.muraroa.demon.co.uk | ||
360 | 373 | ||
361 | The ideas embodied in bzip2 are due to (at least) the fol- | 374 | The ideas embodied in bzip2 are due to (at least) the fol |
362 | lowing people: Michael Burrows and David Wheeler (for the | 375 | lowing people: Michael Burrows and David Wheeler (for the |
363 | block sorting transformation), David Wheeler (again, for | 376 | block sorting transformation), David Wheeler (again, for |
364 | the Huffman coder), Peter Fenwick (for the structured cod- | 377 | the Huffman coder), Peter Fenwick (for the structured cod |
365 | ing model in the original bzip, and many refinements), and | 378 | ing model in the original bzip, and many refinements), and |
366 | Alistair Moffat, Radford Neal and Ian Witten (for the | 379 | Alistair Moffat, Radford Neal and Ian Witten (for the |
367 | arithmetic coder in the original bzip). I am much | 380 | arithmetic coder in the original bzip). I am much |
368 | indebted for their help, support and advice. See the man- | 381 | indebted for their help, support and advice. See the man |
369 | ual in the source distribution for pointers to sources of | 382 | ual in the source distribution for pointers to sources of |
370 | documentation. Christian von Roques encouraged me to look | 383 | documentation. Christian von Roques encouraged me to look |
371 | for faster sorting algorithms, so as to speed up compres- | 384 | for faster sorting algorithms, so as to speed up compres |
372 | sion. Bela Lubkin encouraged me to improve the worst-case | 385 | sion. Bela Lubkin encouraged me to improve the worst-case |
373 | compression performance. Many people sent patches, helped | 386 | compression performance. The bz* scripts are derived from |
374 | with portability problems, lent machines, gave advice and | 387 | those of GNU gzip. Many people sent patches, helped with |
375 | were generally helpful. | 388 | portability problems, lent machines, gave advice and were |
389 | generally helpful. | ||
376 | 390 | ||
diff --git a/bzip2recover.c b/bzip2recover.c index ba3d175..286873b 100644 --- a/bzip2recover.c +++ b/bzip2recover.c | |||
@@ -9,7 +9,7 @@ | |||
9 | salvage from damaged files created by the accompanying | 9 | salvage from damaged files created by the accompanying |
10 | bzip2-1.0 program. | 10 | bzip2-1.0 program. |
11 | 11 | ||
12 | Copyright (C) 1996-2000 Julian R Seward. All rights reserved. | 12 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. |
13 | 13 | ||
14 | Redistribution and use in source and binary forms, with or without | 14 | Redistribution and use in source and binary forms, with or without |
15 | modification, are permitted provided that the following conditions | 15 | modification, are permitted provided that the following conditions |
@@ -57,6 +57,29 @@ | |||
57 | #include <stdlib.h> | 57 | #include <stdlib.h> |
58 | #include <string.h> | 58 | #include <string.h> |
59 | 59 | ||
60 | |||
61 | /* This program records bit locations in the file to be recovered. | ||
62 | That means that if 64-bit ints are not supported, we will not | ||
63 | be able to recover .bz2 files over 512MB (2^32 bits) long. | ||
64 | On GNU supported platforms, we take advantage of the 64-bit | ||
65 | int support to circumvent this problem. Ditto MSVC. | ||
66 | |||
67 | This change occurred in version 1.0.2; all prior versions have | ||
68 | the 512MB limitation. | ||
69 | */ | ||
70 | #ifdef __GNUC__ | ||
71 | typedef unsigned long long int MaybeUInt64; | ||
72 | # define MaybeUInt64_FMT "%Lu" | ||
73 | #else | ||
74 | #ifdef _MSC_VER | ||
75 | typedef unsigned __int64 MaybeUInt64; | ||
76 | # define MaybeUInt64_FMT "%I64u" | ||
77 | #else | ||
78 | typedef unsigned int MaybeUInt64; | ||
79 | # define MaybeUInt64_FMT "%u" | ||
80 | #endif | ||
81 | #endif | ||
82 | |||
60 | typedef unsigned int UInt32; | 83 | typedef unsigned int UInt32; |
61 | typedef int Int32; | 84 | typedef int Int32; |
62 | typedef unsigned char UChar; | 85 | typedef unsigned char UChar; |
@@ -66,13 +89,25 @@ typedef unsigned char Bool; | |||
66 | #define False ((Bool)0) | 89 | #define False ((Bool)0) |
67 | 90 | ||
68 | 91 | ||
69 | Char inFileName[2000]; | 92 | #define BZ_MAX_FILENAME 2000 |
70 | Char outFileName[2000]; | 93 | |
71 | Char progName[2000]; | 94 | Char inFileName[BZ_MAX_FILENAME]; |
95 | Char outFileName[BZ_MAX_FILENAME]; | ||
96 | Char progName[BZ_MAX_FILENAME]; | ||
97 | |||
98 | MaybeUInt64 bytesOut = 0; | ||
99 | MaybeUInt64 bytesIn = 0; | ||
72 | 100 | ||
73 | UInt32 bytesOut = 0; | ||
74 | UInt32 bytesIn = 0; | ||
75 | 101 | ||
102 | /*---------------------------------------------------*/ | ||
103 | /*--- Header bytes ---*/ | ||
104 | /*---------------------------------------------------*/ | ||
105 | |||
106 | #define BZ_HDR_B 0x42 /* 'B' */ | ||
107 | #define BZ_HDR_Z 0x5a /* 'Z' */ | ||
108 | #define BZ_HDR_h 0x68 /* 'h' */ | ||
109 | #define BZ_HDR_0 0x30 /* '0' */ | ||
110 | |||
76 | 111 | ||
77 | /*---------------------------------------------------*/ | 112 | /*---------------------------------------------------*/ |
78 | /*--- I/O errors ---*/ | 113 | /*--- I/O errors ---*/ |
@@ -116,6 +151,23 @@ void mallocFail ( Int32 n ) | |||
116 | } | 151 | } |
117 | 152 | ||
118 | 153 | ||
154 | /*---------------------------------------------*/ | ||
155 | void tooManyBlocks ( Int32 max_handled_blocks ) | ||
156 | { | ||
157 | fprintf ( stderr, | ||
158 | "%s: `%s' appears to contain more than %d blocks\n", | ||
159 | progName, inFileName, max_handled_blocks ); | ||
160 | fprintf ( stderr, | ||
161 | "%s: and cannot be handled. To fix, increase\n", | ||
162 | progName ); | ||
163 | fprintf ( stderr, | ||
164 | "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n", | ||
165 | progName ); | ||
166 | exit ( 1 ); | ||
167 | } | ||
168 | |||
169 | |||
170 | |||
119 | /*---------------------------------------------------*/ | 171 | /*---------------------------------------------------*/ |
120 | /*--- Bit stream I/O ---*/ | 172 | /*--- Bit stream I/O ---*/ |
121 | /*---------------------------------------------------*/ | 173 | /*---------------------------------------------------*/ |
@@ -254,27 +306,37 @@ Bool endsInBz2 ( Char* name ) | |||
254 | /*--- ---*/ | 306 | /*--- ---*/ |
255 | /*---------------------------------------------------*/ | 307 | /*---------------------------------------------------*/ |
256 | 308 | ||
309 | /* This logic isn't really right when it comes to Cygwin. */ | ||
310 | #ifdef _WIN32 | ||
311 | # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */ | ||
312 | #else | ||
313 | # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */ | ||
314 | #endif | ||
315 | |||
257 | #define BLOCK_HEADER_HI 0x00003141UL | 316 | #define BLOCK_HEADER_HI 0x00003141UL |
258 | #define BLOCK_HEADER_LO 0x59265359UL | 317 | #define BLOCK_HEADER_LO 0x59265359UL |
259 | 318 | ||
260 | #define BLOCK_ENDMARK_HI 0x00001772UL | 319 | #define BLOCK_ENDMARK_HI 0x00001772UL |
261 | #define BLOCK_ENDMARK_LO 0x45385090UL | 320 | #define BLOCK_ENDMARK_LO 0x45385090UL |
262 | 321 | ||
322 | /* Increase if necessary. However, a .bz2 file with > 50000 blocks | ||
323 | would have an uncompressed size of at least 40GB, so the chances | ||
324 | are low you'll need to up this. | ||
325 | */ | ||
326 | #define BZ_MAX_HANDLED_BLOCKS 50000 | ||
263 | 327 | ||
264 | UInt32 bStart[20000]; | 328 | MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS]; |
265 | UInt32 bEnd[20000]; | 329 | MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS]; |
266 | UInt32 rbStart[20000]; | 330 | MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS]; |
267 | UInt32 rbEnd[20000]; | 331 | MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS]; |
268 | 332 | ||
269 | Int32 main ( Int32 argc, Char** argv ) | 333 | Int32 main ( Int32 argc, Char** argv ) |
270 | { | 334 | { |
271 | FILE* inFile; | 335 | FILE* inFile; |
272 | FILE* outFile; | 336 | FILE* outFile; |
273 | BitStream* bsIn, *bsWr; | 337 | BitStream* bsIn, *bsWr; |
274 | Int32 currBlock, b, wrBlock; | 338 | Int32 b, wrBlock, currBlock, rbCtr; |
275 | UInt32 bitsRead; | 339 | MaybeUInt64 bitsRead; |
276 | Int32 rbCtr; | ||
277 | |||
278 | 340 | ||
279 | UInt32 buffHi, buffLo, blockCRC; | 341 | UInt32 buffHi, buffLo, blockCRC; |
280 | Char* p; | 342 | Char* p; |
@@ -282,11 +344,37 @@ Int32 main ( Int32 argc, Char** argv ) | |||
282 | strcpy ( progName, argv[0] ); | 344 | strcpy ( progName, argv[0] ); |
283 | inFileName[0] = outFileName[0] = 0; | 345 | inFileName[0] = outFileName[0] = 0; |
284 | 346 | ||
285 | fprintf ( stderr, "bzip2recover 1.0: extracts blocks from damaged .bz2 files.\n" ); | 347 | fprintf ( stderr, |
348 | "bzip2recover 1.0.2: extracts blocks from damaged .bz2 files.\n" ); | ||
286 | 349 | ||
287 | if (argc != 2) { | 350 | if (argc != 2) { |
288 | fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", | 351 | fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", |
289 | progName, progName ); | 352 | progName, progName ); |
353 | switch (sizeof(MaybeUInt64)) { | ||
354 | case 8: | ||
355 | fprintf(stderr, | ||
356 | "\trestrictions on size of recovered file: None\n"); | ||
357 | break; | ||
358 | case 4: | ||
359 | fprintf(stderr, | ||
360 | "\trestrictions on size of recovered file: 512 MB\n"); | ||
361 | fprintf(stderr, | ||
362 | "\tto circumvent, recompile with MaybeUInt64 as an\n" | ||
363 | "\tunsigned 64-bit int.\n"); | ||
364 | break; | ||
365 | default: | ||
366 | fprintf(stderr, | ||
367 | "\tsizeof(MaybeUInt64) is not 4 or 8 -- " | ||
368 | "configuration error.\n"); | ||
369 | break; | ||
370 | } | ||
371 | exit(1); | ||
372 | } | ||
373 | |||
374 | if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) { | ||
375 | fprintf ( stderr, | ||
376 | "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n", | ||
377 | progName, strlen(argv[1]) ); | ||
290 | exit(1); | 378 | exit(1); |
291 | } | 379 | } |
292 | 380 | ||
@@ -316,7 +404,8 @@ Int32 main ( Int32 argc, Char** argv ) | |||
316 | (bitsRead - bStart[currBlock]) >= 40) { | 404 | (bitsRead - bStart[currBlock]) >= 40) { |
317 | bEnd[currBlock] = bitsRead-1; | 405 | bEnd[currBlock] = bitsRead-1; |
318 | if (currBlock > 0) | 406 | if (currBlock > 0) |
319 | fprintf ( stderr, " block %d runs from %d to %d (incomplete)\n", | 407 | fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT |
408 | " to " MaybeUInt64_FMT " (incomplete)\n", | ||
320 | currBlock, bStart[currBlock], bEnd[currBlock] ); | 409 | currBlock, bStart[currBlock], bEnd[currBlock] ); |
321 | } else | 410 | } else |
322 | currBlock--; | 411 | currBlock--; |
@@ -330,17 +419,22 @@ Int32 main ( Int32 argc, Char** argv ) | |||
330 | ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI | 419 | ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI |
331 | && buffLo == BLOCK_ENDMARK_LO) | 420 | && buffLo == BLOCK_ENDMARK_LO) |
332 | ) { | 421 | ) { |
333 | if (bitsRead > 49) | 422 | if (bitsRead > 49) { |
334 | bEnd[currBlock] = bitsRead-49; else | 423 | bEnd[currBlock] = bitsRead-49; |
424 | } else { | ||
335 | bEnd[currBlock] = 0; | 425 | bEnd[currBlock] = 0; |
426 | } | ||
336 | if (currBlock > 0 && | 427 | if (currBlock > 0 && |
337 | (bEnd[currBlock] - bStart[currBlock]) >= 130) { | 428 | (bEnd[currBlock] - bStart[currBlock]) >= 130) { |
338 | fprintf ( stderr, " block %d runs from %d to %d\n", | 429 | fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT |
430 | " to " MaybeUInt64_FMT "\n", | ||
339 | rbCtr+1, bStart[currBlock], bEnd[currBlock] ); | 431 | rbCtr+1, bStart[currBlock], bEnd[currBlock] ); |
340 | rbStart[rbCtr] = bStart[currBlock]; | 432 | rbStart[rbCtr] = bStart[currBlock]; |
341 | rbEnd[rbCtr] = bEnd[currBlock]; | 433 | rbEnd[rbCtr] = bEnd[currBlock]; |
342 | rbCtr++; | 434 | rbCtr++; |
343 | } | 435 | } |
436 | if (currBlock >= BZ_MAX_HANDLED_BLOCKS) | ||
437 | tooManyBlocks(BZ_MAX_HANDLED_BLOCKS); | ||
344 | currBlock++; | 438 | currBlock++; |
345 | 439 | ||
346 | bStart[currBlock] = bitsRead; | 440 | bStart[currBlock] = bitsRead; |
@@ -400,10 +494,25 @@ Int32 main ( Int32 argc, Char** argv ) | |||
400 | wrBlock++; | 494 | wrBlock++; |
401 | } else | 495 | } else |
402 | if (bitsRead == rbStart[wrBlock]) { | 496 | if (bitsRead == rbStart[wrBlock]) { |
403 | outFileName[0] = 0; | 497 | /* Create the output file name, correctly handling leading paths. |
404 | sprintf ( outFileName, "rec%4d", wrBlock+1 ); | 498 | (31.10.2001 by Sergey E. Kusikov) */ |
405 | for (p = outFileName; *p != 0; p++) if (*p == ' ') *p = '0'; | 499 | Char* split; |
406 | strcat ( outFileName, inFileName ); | 500 | Int32 ofs, k; |
501 | for (k = 0; k < BZ_MAX_FILENAME; k++) | ||
502 | outFileName[k] = 0; | ||
503 | strcpy (outFileName, inFileName); | ||
504 | split = strrchr (outFileName, BZ_SPLIT_SYM); | ||
505 | if (split == NULL) { | ||
506 | split = outFileName; | ||
507 | } else { | ||
508 | ++split; | ||
509 | } | ||
510 | /* Now split points to the start of the basename. */ | ||
511 | ofs = split - outFileName; | ||
512 | sprintf (split, "rec%5d", wrBlock+1); | ||
513 | for (p = split; *p != 0; p++) if (*p == ' ') *p = '0'; | ||
514 | strcat (outFileName, inFileName + ofs); | ||
515 | |||
407 | if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); | 516 | if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); |
408 | 517 | ||
409 | fprintf ( stderr, " writing block %d to `%s' ...\n", | 518 | fprintf ( stderr, " writing block %d to `%s' ...\n", |
@@ -416,8 +525,10 @@ Int32 main ( Int32 argc, Char** argv ) | |||
416 | exit(1); | 525 | exit(1); |
417 | } | 526 | } |
418 | bsWr = bsOpenWriteStream ( outFile ); | 527 | bsWr = bsOpenWriteStream ( outFile ); |
419 | bsPutUChar ( bsWr, 'B' ); bsPutUChar ( bsWr, 'Z' ); | 528 | bsPutUChar ( bsWr, BZ_HDR_B ); |
420 | bsPutUChar ( bsWr, 'h' ); bsPutUChar ( bsWr, '9' ); | 529 | bsPutUChar ( bsWr, BZ_HDR_Z ); |
530 | bsPutUChar ( bsWr, BZ_HDR_h ); | ||
531 | bsPutUChar ( bsWr, BZ_HDR_0 + 9 ); | ||
421 | bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 ); | 532 | bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 ); |
422 | bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 ); | 533 | bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 ); |
423 | bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 ); | 534 | bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 ); |
@@ -8,7 +8,7 @@ | |||
8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
10 | 10 | ||
11 | Copyright (C) 1996-2000 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. |
12 | 12 | ||
13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
@@ -93,10 +93,39 @@ void BZ2_bz__AssertH__fail ( int errcode ) | |||
93 | "component, you should also report this bug to the author(s)\n" | 93 | "component, you should also report this bug to the author(s)\n" |
94 | "of that program. Please make an effort to report this bug;\n" | 94 | "of that program. Please make an effort to report this bug;\n" |
95 | "timely and accurate bug reports eventually lead to higher\n" | 95 | "timely and accurate bug reports eventually lead to higher\n" |
96 | "quality software. Thanks. Julian Seward, 21 March 2000.\n\n", | 96 | "quality software. Thanks. Julian Seward, 30 December 2001.\n\n", |
97 | errcode, | 97 | errcode, |
98 | BZ2_bzlibVersion() | 98 | BZ2_bzlibVersion() |
99 | ); | 99 | ); |
100 | |||
101 | if (errcode == 1007) { | ||
102 | fprintf(stderr, | ||
103 | "\n*** A special note about internal error number 1007 ***\n" | ||
104 | "\n" | ||
105 | "Experience suggests that a common cause of i.e. 1007\n" | ||
106 | "is unreliable memory or other hardware. The 1007 assertion\n" | ||
107 | "just happens to cross-check the results of huge numbers of\n" | ||
108 | "memory reads/writes, and so acts (unintendedly) as a stress\n" | ||
109 | "test of your memory system.\n" | ||
110 | "\n" | ||
111 | "I suggest the following: try compressing the file again,\n" | ||
112 | "possibly monitoring progress in detail with the -vv flag.\n" | ||
113 | "\n" | ||
114 | "* If the error cannot be reproduced, and/or happens at different\n" | ||
115 | " points in compression, you may have a flaky memory system.\n" | ||
116 | " Try a memory-test program. I have used Memtest86\n" | ||
117 | " (www.memtest86.com). At the time of writing it is free (GPLd).\n" | ||
118 | " Memtest86 tests memory much more thorougly than your BIOSs\n" | ||
119 | " power-on test, and may find failures that the BIOS doesn't.\n" | ||
120 | "\n" | ||
121 | "* If the error can be repeatably reproduced, this is a bug in\n" | ||
122 | " bzip2, and I would very much like to hear about it. Please\n" | ||
123 | " let me know, and, ideally, save a copy of the file causing the\n" | ||
124 | " problem -- without which I will be unable to investigate it.\n" | ||
125 | "\n" | ||
126 | ); | ||
127 | } | ||
128 | |||
100 | exit(3); | 129 | exit(3); |
101 | } | 130 | } |
102 | #endif | 131 | #endif |
@@ -1402,7 +1431,7 @@ BZFILE * bzopen_or_bzdopen | |||
1402 | smallMode = 1; break; | 1431 | smallMode = 1; break; |
1403 | default: | 1432 | default: |
1404 | if (isdigit((int)(*mode))) { | 1433 | if (isdigit((int)(*mode))) { |
1405 | blockSize100k = *mode-'0'; | 1434 | blockSize100k = *mode-BZ_HDR_0; |
1406 | } | 1435 | } |
1407 | } | 1436 | } |
1408 | mode++; | 1437 | mode++; |
@@ -8,7 +8,7 @@ | |||
8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
10 | 10 | ||
11 | Copyright (C) 1996-2000 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. |
12 | 12 | ||
13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
@@ -110,8 +110,10 @@ typedef | |||
110 | #define BZ_EXPORT | 110 | #define BZ_EXPORT |
111 | #endif | 111 | #endif |
112 | 112 | ||
113 | /* Need a definitition for FILE */ | ||
114 | #include <stdio.h> | ||
115 | |||
113 | #ifdef _WIN32 | 116 | #ifdef _WIN32 |
114 | # include <stdio.h> | ||
115 | # include <windows.h> | 117 | # include <windows.h> |
116 | # ifdef small | 118 | # ifdef small |
117 | /* windows.h define small to char */ | 119 | /* windows.h define small to char */ |
diff --git a/bzlib_private.h b/bzlib_private.h index fb51c7a..ff973c3 100644 --- a/bzlib_private.h +++ b/bzlib_private.h | |||
@@ -8,7 +8,7 @@ | |||
8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
10 | 10 | ||
11 | Copyright (C) 1996-2000 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. |
12 | 12 | ||
13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
@@ -76,7 +76,7 @@ | |||
76 | 76 | ||
77 | /*-- General stuff. --*/ | 77 | /*-- General stuff. --*/ |
78 | 78 | ||
79 | #define BZ_VERSION "1.0.1, 23-June-2000" | 79 | #define BZ_VERSION "1.0.2, 30-Dec-2001" |
80 | 80 | ||
81 | typedef char Char; | 81 | typedef char Char; |
82 | typedef unsigned char Bool; | 82 | typedef unsigned char Bool; |
@@ -137,6 +137,13 @@ extern void bz_internal_error ( int errcode ); | |||
137 | #define BZFREE(ppp) (strm->bzfree)(strm->opaque,(ppp)) | 137 | #define BZFREE(ppp) (strm->bzfree)(strm->opaque,(ppp)) |
138 | 138 | ||
139 | 139 | ||
140 | /*-- Header bytes. --*/ | ||
141 | |||
142 | #define BZ_HDR_B 0x42 /* 'B' */ | ||
143 | #define BZ_HDR_Z 0x5a /* 'Z' */ | ||
144 | #define BZ_HDR_h 0x68 /* 'h' */ | ||
145 | #define BZ_HDR_0 0x30 /* '0' */ | ||
146 | |||
140 | /*-- Constants for the back end. --*/ | 147 | /*-- Constants for the back end. --*/ |
141 | 148 | ||
142 | #define BZ_MAX_ALPHA_SIZE 258 | 149 | #define BZ_MAX_ALPHA_SIZE 258 |
@@ -0,0 +1,61 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | # Bzmore wrapped for bzip2, | ||
4 | # adapted from zmore by Philippe Troin <phil@fifi.org> for Debian GNU/Linux. | ||
5 | |||
6 | PATH="/usr/bin:$PATH"; export PATH | ||
7 | |||
8 | prog=`echo $0 | sed 's|.*/||'` | ||
9 | case "$prog" in | ||
10 | *less) more=less ;; | ||
11 | *) more=more ;; | ||
12 | esac | ||
13 | |||
14 | if test "`echo -n a`" = "-n a"; then | ||
15 | # looks like a SysV system: | ||
16 | n1=''; n2='\c' | ||
17 | else | ||
18 | n1='-n'; n2='' | ||
19 | fi | ||
20 | oldtty=`stty -g 2>/dev/null` | ||
21 | if stty -cbreak 2>/dev/null; then | ||
22 | cb='cbreak'; ncb='-cbreak' | ||
23 | else | ||
24 | # 'stty min 1' resets eof to ^a on both SunOS and SysV! | ||
25 | cb='min 1 -icanon'; ncb='icanon eof ^d' | ||
26 | fi | ||
27 | if test $? -eq 0 -a -n "$oldtty"; then | ||
28 | trap 'stty $oldtty 2>/dev/null; exit' 0 2 3 5 10 13 15 | ||
29 | else | ||
30 | trap 'stty $ncb echo 2>/dev/null; exit' 0 2 3 5 10 13 15 | ||
31 | fi | ||
32 | |||
33 | if test $# = 0; then | ||
34 | if test -t 0; then | ||
35 | echo usage: $prog files... | ||
36 | else | ||
37 | bzip2 -cdfq | eval $more | ||
38 | fi | ||
39 | else | ||
40 | FIRST=1 | ||
41 | for FILE | ||
42 | do | ||
43 | if test $FIRST -eq 0; then | ||
44 | echo $n1 "--More--(Next file: $FILE)$n2" | ||
45 | stty $cb -echo 2>/dev/null | ||
46 | ANS=`dd bs=1 count=1 2>/dev/null` | ||
47 | stty $ncb echo 2>/dev/null | ||
48 | echo " " | ||
49 | if test "$ANS" = 'e' -o "$ANS" = 'q'; then | ||
50 | exit | ||
51 | fi | ||
52 | fi | ||
53 | if test "$ANS" != 's'; then | ||
54 | echo "------> $FILE <------" | ||
55 | bzip2 -cdfq "$FILE" | eval $more | ||
56 | fi | ||
57 | if test -t; then | ||
58 | FIRST=0 | ||
59 | fi | ||
60 | done | ||
61 | fi | ||
diff --git a/bzmore.1 b/bzmore.1 new file mode 100644 index 0000000..b437d3b --- /dev/null +++ b/bzmore.1 | |||
@@ -0,0 +1,152 @@ | |||
1 | .\"Shamelessly copied from zmore.1 by Philippe Troin <phil@fifi.org> | ||
2 | .\"for Debian GNU/Linux | ||
3 | .TH BZMORE 1 | ||
4 | .SH NAME | ||
5 | bzmore, bzless \- file perusal filter for crt viewing of bzip2 compressed text | ||
6 | .SH SYNOPSIS | ||
7 | .B bzmore | ||
8 | [ name ... ] | ||
9 | .br | ||
10 | .B bzless | ||
11 | [ name ... ] | ||
12 | .SH NOTE | ||
13 | In the following description, | ||
14 | .I bzless | ||
15 | and | ||
16 | .I less | ||
17 | can be used interchangeably with | ||
18 | .I bzmore | ||
19 | and | ||
20 | .I more. | ||
21 | .SH DESCRIPTION | ||
22 | .I Bzmore | ||
23 | is a filter which allows examination of compressed or plain text files | ||
24 | one screenful at a time on a soft-copy terminal. | ||
25 | .I bzmore | ||
26 | works on files compressed with | ||
27 | .I bzip2 | ||
28 | and also on uncompressed files. | ||
29 | If a file does not exist, | ||
30 | .I bzmore | ||
31 | looks for a file of the same name with the addition of a .bz2 suffix. | ||
32 | .PP | ||
33 | .I Bzmore | ||
34 | normally pauses after each screenful, printing --More-- | ||
35 | at the bottom of the screen. | ||
36 | If the user then types a carriage return, one more line is displayed. | ||
37 | If the user hits a space, | ||
38 | another screenful is displayed. Other possibilities are enumerated later. | ||
39 | .PP | ||
40 | .I Bzmore | ||
41 | looks in the file | ||
42 | .I /etc/termcap | ||
43 | to determine terminal characteristics, | ||
44 | and to determine the default window size. | ||
45 | On a terminal capable of displaying 24 lines, | ||
46 | the default window size is 22 lines. | ||
47 | Other sequences which may be typed when | ||
48 | .I bzmore | ||
49 | pauses, and their effects, are as follows (\fIi\fP is an optional integer | ||
50 | argument, defaulting to 1) : | ||
51 | .PP | ||
52 | .IP \fIi\|\fP<space> | ||
53 | display | ||
54 | .I i | ||
55 | more lines, (or another screenful if no argument is given) | ||
56 | .PP | ||
57 | .IP ^D | ||
58 | display 11 more lines (a ``scroll''). | ||
59 | If | ||
60 | .I i | ||
61 | is given, then the scroll size is set to \fIi\|\fP. | ||
62 | .PP | ||
63 | .IP d | ||
64 | same as ^D (control-D) | ||
65 | .PP | ||
66 | .IP \fIi\|\fPz | ||
67 | same as typing a space except that \fIi\|\fP, if present, becomes the new | ||
68 | window size. Note that the window size reverts back to the default at the | ||
69 | end of the current file. | ||
70 | .PP | ||
71 | .IP \fIi\|\fPs | ||
72 | skip \fIi\|\fP lines and print a screenful of lines | ||
73 | .PP | ||
74 | .IP \fIi\|\fPf | ||
75 | skip \fIi\fP screenfuls and print a screenful of lines | ||
76 | .PP | ||
77 | .IP "q or Q" | ||
78 | quit reading the current file; go on to the next (if any) | ||
79 | .PP | ||
80 | .IP "e or q" | ||
81 | When the prompt --More--(Next file: | ||
82 | .IR file ) | ||
83 | is printed, this command causes bzmore to exit. | ||
84 | .PP | ||
85 | .IP s | ||
86 | When the prompt --More--(Next file: | ||
87 | .IR file ) | ||
88 | is printed, this command causes bzmore to skip the next file and continue. | ||
89 | .PP | ||
90 | .IP = | ||
91 | Display the current line number. | ||
92 | .PP | ||
93 | .IP \fIi\|\fP/expr | ||
94 | search for the \fIi\|\fP-th occurrence of the regular expression \fIexpr.\fP | ||
95 | If the pattern is not found, | ||
96 | .I bzmore | ||
97 | goes on to the next file (if any). | ||
98 | Otherwise, a screenful is displayed, starting two lines before the place | ||
99 | where the expression was found. | ||
100 | The user's erase and kill characters may be used to edit the regular | ||
101 | expression. | ||
102 | Erasing back past the first column cancels the search command. | ||
103 | .PP | ||
104 | .IP \fIi\|\fPn | ||
105 | search for the \fIi\|\fP-th occurrence of the last regular expression entered. | ||
106 | .PP | ||
107 | .IP !command | ||
108 | invoke a shell with \fIcommand\|\fP. | ||
109 | The character `!' in "command" are replaced with the | ||
110 | previous shell command. The sequence "\\!" is replaced by "!". | ||
111 | .PP | ||
112 | .IP ":q or :Q" | ||
113 | quit reading the current file; go on to the next (if any) | ||
114 | (same as q or Q). | ||
115 | .PP | ||
116 | .IP . | ||
117 | (dot) repeat the previous command. | ||
118 | .PP | ||
119 | The commands take effect immediately, i.e., it is not necessary to | ||
120 | type a carriage return. | ||
121 | Up to the time when the command character itself is given, | ||
122 | the user may hit the line kill character to cancel the numerical | ||
123 | argument being formed. | ||
124 | In addition, the user may hit the erase character to redisplay the | ||
125 | --More-- message. | ||
126 | .PP | ||
127 | At any time when output is being sent to the terminal, the user can | ||
128 | hit the quit key (normally control\-\\). | ||
129 | .I Bzmore | ||
130 | will stop sending output, and will display the usual --More-- | ||
131 | prompt. | ||
132 | The user may then enter one of the above commands in the normal manner. | ||
133 | Unfortunately, some output is lost when this is done, due to the | ||
134 | fact that any characters waiting in the terminal's output queue | ||
135 | are flushed when the quit signal occurs. | ||
136 | .PP | ||
137 | The terminal is set to | ||
138 | .I noecho | ||
139 | mode by this program so that the output can be continuous. | ||
140 | What you type will thus not show on your terminal, except for the / and ! | ||
141 | commands. | ||
142 | .PP | ||
143 | If the standard output is not a teletype, then | ||
144 | .I bzmore | ||
145 | acts just like | ||
146 | .I bzcat, | ||
147 | except that a header is printed before each file. | ||
148 | .SH FILES | ||
149 | .DT | ||
150 | /etc/termcap Terminal data base | ||
151 | .SH "SEE ALSO" | ||
152 | more(1), less(1), bzip2(1), bzdiff(1), bzgrep(1) | ||
@@ -8,7 +8,7 @@ | |||
8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
10 | 10 | ||
11 | Copyright (C) 1996-2000 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. |
12 | 12 | ||
13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
@@ -663,10 +663,10 @@ void BZ2_compressBlock ( EState* s, Bool is_last_block ) | |||
663 | /*-- If this is the first block, create the stream header. --*/ | 663 | /*-- If this is the first block, create the stream header. --*/ |
664 | if (s->blockNo == 1) { | 664 | if (s->blockNo == 1) { |
665 | BZ2_bsInitWrite ( s ); | 665 | BZ2_bsInitWrite ( s ); |
666 | bsPutUChar ( s, 'B' ); | 666 | bsPutUChar ( s, BZ_HDR_B ); |
667 | bsPutUChar ( s, 'Z' ); | 667 | bsPutUChar ( s, BZ_HDR_Z ); |
668 | bsPutUChar ( s, 'h' ); | 668 | bsPutUChar ( s, BZ_HDR_h ); |
669 | bsPutUChar ( s, (UChar)('0' + s->blockSize100k) ); | 669 | bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) ); |
670 | } | 670 | } |
671 | 671 | ||
672 | if (s->nblock > 0) { | 672 | if (s->nblock > 0) { |
@@ -8,7 +8,7 @@ | |||
8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
10 | 10 | ||
11 | Copyright (C) 1996-2000 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. |
12 | 12 | ||
13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
diff --git a/decompress.c b/decompress.c index cdced18..e921347 100644 --- a/decompress.c +++ b/decompress.c | |||
@@ -8,7 +8,7 @@ | |||
8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
10 | 10 | ||
11 | Copyright (C) 1996-2000 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. |
12 | 12 | ||
13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
@@ -235,18 +235,18 @@ Int32 BZ2_decompress ( DState* s ) | |||
235 | switch (s->state) { | 235 | switch (s->state) { |
236 | 236 | ||
237 | GET_UCHAR(BZ_X_MAGIC_1, uc); | 237 | GET_UCHAR(BZ_X_MAGIC_1, uc); |
238 | if (uc != 'B') RETURN(BZ_DATA_ERROR_MAGIC); | 238 | if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC); |
239 | 239 | ||
240 | GET_UCHAR(BZ_X_MAGIC_2, uc); | 240 | GET_UCHAR(BZ_X_MAGIC_2, uc); |
241 | if (uc != 'Z') RETURN(BZ_DATA_ERROR_MAGIC); | 241 | if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC); |
242 | 242 | ||
243 | GET_UCHAR(BZ_X_MAGIC_3, uc) | 243 | GET_UCHAR(BZ_X_MAGIC_3, uc) |
244 | if (uc != 'h') RETURN(BZ_DATA_ERROR_MAGIC); | 244 | if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC); |
245 | 245 | ||
246 | GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8) | 246 | GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8) |
247 | if (s->blockSize100k < '1' || | 247 | if (s->blockSize100k < (BZ_HDR_0 + 1) || |
248 | s->blockSize100k > '9') RETURN(BZ_DATA_ERROR_MAGIC); | 248 | s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC); |
249 | s->blockSize100k -= '0'; | 249 | s->blockSize100k -= BZ_HDR_0; |
250 | 250 | ||
251 | if (s->smallDecompress) { | 251 | if (s->smallDecompress) { |
252 | s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) ); | 252 | s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) ); |
@@ -19,7 +19,7 @@ | |||
19 | 19 | ||
20 | #ifdef _WIN32 | 20 | #ifdef _WIN32 |
21 | 21 | ||
22 | #define BZ2_LIBNAME "libbz2-1.0.0.DLL" | 22 | #define BZ2_LIBNAME "libbz2-1.0.2.DLL" |
23 | 23 | ||
24 | #include <windows.h> | 24 | #include <windows.h> |
25 | static int BZ2DLLLoaded = 0; | 25 | static int BZ2DLLLoaded = 0; |
@@ -130,8 +130,8 @@ int main(int argc,char *argv[]) | |||
130 | }else{ | 130 | }else{ |
131 | fp_w = stdout; | 131 | fp_w = stdout; |
132 | } | 132 | } |
133 | if((BZ2fp_r == NULL && (BZ2fp_r = BZ2_bzdopen(fileno(stdin),"rb"))==NULL) | 133 | if((fn_r == NULL && (BZ2fp_r = BZ2_bzdopen(fileno(stdin),"rb"))==NULL) |
134 | || (BZ2fp_r != NULL && (BZ2fp_r = BZ2_bzopen(fn_r,"rb"))==NULL)){ | 134 | || (fn_r != NULL && (BZ2fp_r = BZ2_bzopen(fn_r,"rb"))==NULL)){ |
135 | printf("can't bz2openstream\n"); | 135 | printf("can't bz2openstream\n"); |
136 | exit(1); | 136 | exit(1); |
137 | } | 137 | } |
@@ -8,7 +8,7 @@ | |||
8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
10 | 10 | ||
11 | Copyright (C) 1996-2000 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. |
12 | 12 | ||
13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
diff --git a/makefile.msc b/makefile.msc index 3fe4232..799a18a 100644 --- a/makefile.msc +++ b/makefile.msc | |||
@@ -4,7 +4,7 @@ | |||
4 | # Fixed up by JRS for bzip2-0.9.5d release. | 4 | # Fixed up by JRS for bzip2-0.9.5d release. |
5 | 5 | ||
6 | CC=cl | 6 | CC=cl |
7 | CFLAGS= -DWIN32 -MD -Ox -D_FILE_OFFSET_BITS=64 | 7 | CFLAGS= -DWIN32 -MD -Ox -D_FILE_OFFSET_BITS=64 -nologo |
8 | 8 | ||
9 | OBJS= blocksort.obj \ | 9 | OBJS= blocksort.obj \ |
10 | huffman.obj \ | 10 | huffman.obj \ |
diff --git a/manual.texi b/manual.texi index 336776a..5bc27d5 100644 --- a/manual.texi +++ b/manual.texi | |||
@@ -2,10 +2,10 @@ | |||
2 | @setfilename bzip2.info | 2 | @setfilename bzip2.info |
3 | 3 | ||
4 | @ignore | 4 | @ignore |
5 | This file documents bzip2 version 1.0, and associated library | 5 | This file documents bzip2 version 1.0.2, and associated library |
6 | libbzip2, written by Julian Seward (jseward@acm.org). | 6 | libbzip2, written by Julian Seward (jseward@acm.org). |
7 | 7 | ||
8 | Copyright (C) 1996-2000 Julian R Seward | 8 | Copyright (C) 1996-2002 Julian R Seward |
9 | 9 | ||
10 | Permission is granted to make and distribute verbatim copies of | 10 | Permission is granted to make and distribute verbatim copies of |
11 | this manual provided the copyright notice and this permission notice | 11 | this manual provided the copyright notice and this permission notice |
@@ -30,8 +30,8 @@ END-INFO-DIR-ENTRY | |||
30 | @titlepage | 30 | @titlepage |
31 | @title bzip2 and libbzip2 | 31 | @title bzip2 and libbzip2 |
32 | @subtitle a program and library for data compression | 32 | @subtitle a program and library for data compression |
33 | @subtitle copyright (C) 1996-2000 Julian Seward | 33 | @subtitle copyright (C) 1996-2002 Julian Seward |
34 | @subtitle version 1.0 of 21 March 2000 | 34 | @subtitle version 1.0.2 of 30 December 2001 |
35 | @author Julian Seward | 35 | @author Julian Seward |
36 | 36 | ||
37 | @end titlepage | 37 | @end titlepage |
@@ -40,11 +40,17 @@ END-INFO-DIR-ENTRY | |||
40 | @parskip 2mm | 40 | @parskip 2mm |
41 | 41 | ||
42 | @end iftex | 42 | @end iftex |
43 | @node Top, Overview, (dir), (dir) | 43 | @node Top,,, (dir) |
44 | |||
45 | The following text is the License for this software. You should | ||
46 | find it identical to that contained in the file LICENSE in the | ||
47 | source distribution. | ||
48 | |||
49 | @bf{------------------ START OF THE LICENSE ------------------} | ||
44 | 50 | ||
45 | This program, @code{bzip2}, | 51 | This program, @code{bzip2}, |
46 | and associated library @code{libbzip2}, are | 52 | and associated library @code{libbzip2}, are |
47 | Copyright (C) 1996-2000 Julian R Seward. All rights reserved. | 53 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. |
48 | 54 | ||
49 | Redistribution and use in source and binary forms, with or without | 55 | Redistribution and use in source and binary forms, with or without |
50 | modification, are permitted provided that the following conditions | 56 | modification, are permitted provided that the following conditions |
@@ -82,13 +88,15 @@ Julian Seward, Cambridge, UK. | |||
82 | 88 | ||
83 | @code{jseward@@acm.org} | 89 | @code{jseward@@acm.org} |
84 | 90 | ||
85 | @code{http://sourceware.cygnus.com/bzip2} | 91 | @code{bzip2}/@code{libbzip2} version 1.0.2 of 30 December 2001. |
86 | 92 | ||
87 | @code{http://www.cacheprof.org} | 93 | @bf{------------------ END OF THE LICENSE ------------------} |
88 | 94 | ||
89 | @code{http://www.muraroa.demon.co.uk} | 95 | Web sites: |
90 | 96 | ||
91 | @code{bzip2}/@code{libbzip2} version 1.0 of 21 March 2000. | 97 | @code{http://sources.redhat.com/bzip2} |
98 | |||
99 | @code{http://www.cacheprof.org} | ||
92 | 100 | ||
93 | PATENTS: To the best of my knowledge, @code{bzip2} does not use any patented | 101 | PATENTS: To the best of my knowledge, @code{bzip2} does not use any patented |
94 | algorithms. However, I do not have the resources available to carry out | 102 | algorithms. However, I do not have the resources available to carry out |
@@ -101,7 +109,6 @@ above statement. | |||
101 | 109 | ||
102 | 110 | ||
103 | 111 | ||
104 | @node Overview, Implementation, Top, Top | ||
105 | @chapter Introduction | 112 | @chapter Introduction |
106 | 113 | ||
107 | @code{bzip2} compresses files using the Burrows-Wheeler | 114 | @code{bzip2} compresses files using the Burrows-Wheeler |
@@ -134,7 +141,7 @@ and nothing else. | |||
134 | @unnumberedsubsubsec NAME | 141 | @unnumberedsubsubsec NAME |
135 | @itemize | 142 | @itemize |
136 | @item @code{bzip2}, @code{bunzip2} | 143 | @item @code{bzip2}, @code{bunzip2} |
137 | - a block-sorting file compressor, v1.0 | 144 | - a block-sorting file compressor, v1.0.2 |
138 | @item @code{bzcat} | 145 | @item @code{bzcat} |
139 | - decompresses files to stdout | 146 | - decompresses files to stdout |
140 | @item @code{bzip2recover} | 147 | @item @code{bzip2recover} |
@@ -264,6 +271,11 @@ This really performs a trial decompression and throws away the result. | |||
264 | Force overwrite of output files. Normally, @code{bzip2} will not overwrite | 271 | Force overwrite of output files. Normally, @code{bzip2} will not overwrite |
265 | existing output files. Also forces @code{bzip2} to break hard links | 272 | existing output files. Also forces @code{bzip2} to break hard links |
266 | to files, which it otherwise wouldn't do. | 273 | to files, which it otherwise wouldn't do. |
274 | |||
275 | @code{bzip2} normally declines to decompress files which don't have the | ||
276 | correct magic header bytes. If forced (@code{-f}), however, it will | ||
277 | pass such files through unmodified. This is how GNU @code{gzip} | ||
278 | behaves. | ||
267 | @item -k --keep | 279 | @item -k --keep |
268 | Keep (don't delete) input files during compression | 280 | Keep (don't delete) input files during compression |
269 | or decompression. | 281 | or decompression. |
@@ -286,9 +298,13 @@ Further @code{-v}'s increase the verbosity level, spewing out lots of | |||
286 | information which is primarily of interest for diagnostic purposes. | 298 | information which is primarily of interest for diagnostic purposes. |
287 | @item -L --license -V --version | 299 | @item -L --license -V --version |
288 | Display the software version, license terms and conditions. | 300 | Display the software version, license terms and conditions. |
289 | @item -1 to -9 | 301 | @item -1 (or --fast) to -9 (or --best) |
290 | Set the block size to 100 k, 200 k .. 900 k when compressing. Has no | 302 | Set the block size to 100 k, 200 k .. 900 k when compressing. Has no |
291 | effect when decompressing. See MEMORY MANAGEMENT below. | 303 | effect when decompressing. See MEMORY MANAGEMENT below. |
304 | The @code{--fast} and @code{--best} aliases are primarily for GNU | ||
305 | @code{gzip} compatibility. In particular, @code{--fast} doesn't make | ||
306 | things significantly faster. And @code{--best} merely selects the | ||
307 | default behaviour. | ||
292 | @item -- | 308 | @item -- |
293 | Treats all subsequent arguments as file names, even if they start | 309 | Treats all subsequent arguments as file names, even if they start |
294 | with a dash. This is so you can handle files with names beginning | 310 | with a dash. This is so you can handle files with names beginning |
@@ -389,21 +405,19 @@ integrity of the resulting files, and decompress those which are | |||
389 | undamaged. | 405 | undamaged. |
390 | 406 | ||
391 | @code{bzip2recover} | 407 | @code{bzip2recover} |
392 | takes a single argument, the name of the damaged file, | 408 | takes a single argument, the name of the damaged file, and writes a |
393 | and writes a number of files @code{rec0001file.bz2}, | 409 | number of files @code{rec00001file.bz2}, @code{rec00002file.bz2}, etc, |
394 | @code{rec0002file.bz2}, etc, containing the extracted blocks. | 410 | containing the extracted blocks. The output filenames are designed so |
395 | The output filenames are designed so that the use of | 411 | that the use of wildcards in subsequent processing -- for example, |
396 | wildcards in subsequent processing -- for example, | 412 | @code{bzip2 -dc rec*file.bz2 > recovered_data} -- processes the files in |
397 | @code{bzip2 -dc rec*file.bz2 > recovered_data} -- lists the files in | 413 | the correct order. |
398 | the correct order. | ||
399 | 414 | ||
400 | @code{bzip2recover} should be of most use dealing with large @code{.bz2} | 415 | @code{bzip2recover} should be of most use dealing with large @code{.bz2} |
401 | files, as these will contain many blocks. It is clearly | 416 | files, as these will contain many blocks. It is clearly futile to use |
402 | futile to use it on damaged single-block files, since a | 417 | it on damaged single-block files, since a damaged block cannot be |
403 | damaged block cannot be recovered. If you wish to minimise | 418 | recovered. If you wish to minimise any potential data loss through |
404 | any potential data loss through media or transmission errors, | 419 | media or transmission errors, you might consider compressing with a |
405 | you might consider compressing with a smaller | 420 | smaller block size. |
406 | block size. | ||
407 | 421 | ||
408 | 422 | ||
409 | @unnumberedsubsubsec PERFORMANCE NOTES | 423 | @unnumberedsubsubsec PERFORMANCE NOTES |
@@ -435,22 +449,31 @@ I/O error messages are not as helpful as they could be. @code{bzip2} | |||
435 | tries hard to detect I/O errors and exit cleanly, but the details of | 449 | tries hard to detect I/O errors and exit cleanly, but the details of |
436 | what the problem is sometimes seem rather misleading. | 450 | what the problem is sometimes seem rather misleading. |
437 | 451 | ||
438 | This manual page pertains to version 1.0 of @code{bzip2}. Compressed | 452 | This manual page pertains to version 1.0.2 of @code{bzip2}. Compressed |
439 | data created by this version is entirely forwards and backwards | 453 | data created by this version is entirely forwards and backwards |
440 | compatible with the previous public releases, versions 0.1pl2, 0.9.0 and | 454 | compatible with the previous public releases, versions 0.1pl2, 0.9.0, |
441 | 0.9.5, but with the following exception: 0.9.0 and above can correctly | 455 | 0.9.5, 1.0.0 and 1.0.1, but with the following exception: 0.9.0 and |
442 | decompress multiple concatenated compressed files. 0.1pl2 cannot do | 456 | above can correctly decompress multiple concatenated compressed files. |
443 | this; it will stop after decompressing just the first file in the | 457 | 0.1pl2 cannot do this; it will stop after decompressing just the first |
444 | stream. | 458 | file in the stream. |
459 | |||
460 | @code{bzip2recover} versions prior to this one, 1.0.2, used 32-bit | ||
461 | integers to represent bit positions in compressed files, so it could not | ||
462 | handle compressed files more than 512 megabytes long. Version 1.0.2 and | ||
463 | above uses 64-bit ints on some platforms which support them (GNU | ||
464 | supported targets, and Windows). To establish whether or not | ||
465 | @code{bzip2recover} was built with such a limitation, run it without | ||
466 | arguments. In any event you can build yourself an unlimited version if | ||
467 | you can recompile it with @code{MaybeUInt64} set to be an unsigned | ||
468 | 64-bit integer. | ||
445 | 469 | ||
446 | @code{bzip2recover} uses 32-bit integers to represent bit positions in | ||
447 | compressed files, so it cannot handle compressed files more than 512 | ||
448 | megabytes long. This could easily be fixed. | ||
449 | 470 | ||
450 | 471 | ||
451 | @unnumberedsubsubsec AUTHOR | 472 | @unnumberedsubsubsec AUTHOR |
452 | Julian Seward, @code{jseward@@acm.org}. | 473 | Julian Seward, @code{jseward@@acm.org}. |
453 | 474 | ||
475 | @code{http://sources.redhat.com/bzip2} | ||
476 | |||
454 | The ideas embodied in @code{bzip2} are due to (at least) the following | 477 | The ideas embodied in @code{bzip2} are due to (at least) the following |
455 | people: Michael Burrows and David Wheeler (for the block sorting | 478 | people: Michael Burrows and David Wheeler (for the block sorting |
456 | transformation), David Wheeler (again, for the Huffman coder), Peter | 479 | transformation), David Wheeler (again, for the Huffman coder), Peter |
@@ -461,8 +484,9 @@ indebted for their help, support and advice. See the manual in the | |||
461 | source distribution for pointers to sources of documentation. Christian | 484 | source distribution for pointers to sources of documentation. Christian |
462 | von Roques encouraged me to look for faster sorting algorithms, so as to | 485 | von Roques encouraged me to look for faster sorting algorithms, so as to |
463 | speed up compression. Bela Lubkin encouraged me to improve the | 486 | speed up compression. Bela Lubkin encouraged me to improve the |
464 | worst-case compression performance. Many people sent patches, helped | 487 | worst-case compression performance. The @code{bz*} scripts are derived |
465 | with portability problems, lent machines, gave advice and were generally | 488 | from those of GNU @code{gzip}. Many people sent patches, helped with |
489 | portability problems, lent machines, gave advice and were generally | ||
466 | helpful. | 490 | helpful. |
467 | 491 | ||
468 | @end quotation | 492 | @end quotation |
@@ -1769,16 +1793,20 @@ was compiled with @code{BZ_NO_STDIO} set. | |||
1769 | For a normal compile, an assertion failure yields the message | 1793 | For a normal compile, an assertion failure yields the message |
1770 | @example | 1794 | @example |
1771 | bzip2/libbzip2: internal error number N. | 1795 | bzip2/libbzip2: internal error number N. |
1772 | This is a bug in bzip2/libbzip2, 1.0 of 21-Mar-2000. | 1796 | This is a bug in bzip2/libbzip2, 1.0.2, 30-Dec-2001. |
1773 | Please report it to me at: jseward@@acm.org. If this happened | 1797 | Please report it to me at: jseward@@acm.org. If this happened |
1774 | when you were using some program which uses libbzip2 as a | 1798 | when you were using some program which uses libbzip2 as a |
1775 | component, you should also report this bug to the author(s) | 1799 | component, you should also report this bug to the author(s) |
1776 | of that program. Please make an effort to report this bug; | 1800 | of that program. Please make an effort to report this bug; |
1777 | timely and accurate bug reports eventually lead to higher | 1801 | timely and accurate bug reports eventually lead to higher |
1778 | quality software. Thanks. Julian Seward, 21 March 2000. | 1802 | quality software. Thanks. Julian Seward, 30 December 2001. |
1779 | @end example | 1803 | @end example |
1780 | where @code{N} is some error code number. @code{exit(3)} | 1804 | where @code{N} is some error code number. If @code{N == 1007}, it also |
1781 | is then called. | 1805 | prints some extra text advising the reader that unreliable memory is |
1806 | often associated with internal error 1007. (This is a | ||
1807 | frequently-observed-phenomenon with versions 1.0.0/1.0.1). | ||
1808 | |||
1809 | @code{exit(3)} is then called. | ||
1782 | 1810 | ||
1783 | For a @code{stdio}-free library, assertion failures result | 1811 | For a @code{stdio}-free library, assertion failures result |
1784 | in a call to a function declared as: | 1812 | in a call to a function declared as: |
@@ -2056,10 +2084,10 @@ Maybe this isn't what you want. | |||
2056 | If you want a compressor and/or library which is faster, uses less | 2084 | If you want a compressor and/or library which is faster, uses less |
2057 | memory but gets pretty good compression, and has minimal latency, | 2085 | memory but gets pretty good compression, and has minimal latency, |
2058 | consider Jean-loup | 2086 | consider Jean-loup |
2059 | Gailly's and Mark Adler's work, @code{zlib-1.1.2} and | 2087 | Gailly's and Mark Adler's work, @code{zlib-1.1.3} and |
2060 | @code{gzip-1.2.4}. Look for them at | 2088 | @code{gzip-1.2.4}. Look for them at |
2061 | 2089 | ||
2062 | @code{http://www.cdrom.com/pub/infozip/zlib} and | 2090 | @code{http://www.zlib.org} and |
2063 | @code{http://www.gzip.org} respectively. | 2091 | @code{http://www.gzip.org} respectively. |
2064 | 2092 | ||
2065 | For something faster and lighter still, you might try Markus F X J | 2093 | For something faster and lighter still, you might try Markus F X J |
@@ -0,0 +1,16 @@ | |||
1 | |||
2 | /* Spew out a long sequence of the byte 251. When fed to bzip2 | ||
3 | versions 1.0.0 or 1.0.1, causes it to die with internal error | ||
4 | 1007 in blocksort.c. This assertion misses an extremely rare | ||
5 | case, which is fixed in this version (1.0.2) and above. | ||
6 | */ | ||
7 | |||
8 | #include <stdio.h> | ||
9 | |||
10 | int main () | ||
11 | { | ||
12 | int i; | ||
13 | for (i = 0; i < 48500000 ; i++) | ||
14 | putchar(251); | ||
15 | return 0; | ||
16 | } | ||
diff --git a/randtable.c b/randtable.c index 983089d..5c922e9 100644 --- a/randtable.c +++ b/randtable.c | |||
@@ -8,7 +8,7 @@ | |||
8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
10 | 10 | ||
11 | Copyright (C) 1996-2000 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. |
12 | 12 | ||
13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
@@ -15,8 +15,8 @@ not actually execute them. | |||
15 | 15 | ||
16 | Instructions for use are in the preformatted manual page, in the file | 16 | Instructions for use are in the preformatted manual page, in the file |
17 | bzip2.txt. For more detailed documentation, read the full manual. | 17 | bzip2.txt. For more detailed documentation, read the full manual. |
18 | It is available in Postscript form (manual.ps) and HTML form | 18 | It is available in Postscript form (manual.ps), PDF form (manual.pdf), |
19 | (manual_toc.html). | 19 | and HTML form (manual_toc.html). |
20 | 20 | ||
21 | You can also do "bzip2 --help" to see some helpful information. | 21 | You can also do "bzip2 --help" to see some helpful information. |
22 | "bzip2 -L" displays the software license. | 22 | "bzip2 -L" displays the software license. |