diff options
Diffstat (limited to '')
| -rw-r--r-- | CHANGES | 22 | ||||
| -rw-r--r-- | LICENSE | 7 | ||||
| -rw-r--r-- | Makefile | 51 | ||||
| -rw-r--r-- | Makefile-libbz2_so | 12 | ||||
| -rw-r--r-- | README | 40 | ||||
| -rw-r--r-- | README.COMPILATION.PROBLEMS | 103 | ||||
| -rw-r--r-- | README.XML.STUFF | 31 | ||||
| -rw-r--r-- | blocksort.c | 4 | ||||
| -rw-r--r-- | bz-common.xsl | 39 | ||||
| -rw-r--r-- | bz-fo.xsl | 257 | ||||
| -rw-r--r-- | bz-html.xsl | 20 | ||||
| -rw-r--r-- | bzip.css | 74 | ||||
| -rw-r--r-- | bzip2.1 | 17 | ||||
| -rw-r--r-- | bzip2.1.preformatted | 247 | ||||
| -rw-r--r-- | bzip2.c | 20 | ||||
| -rw-r--r-- | bzip2.txt | 119 | ||||
| -rw-r--r-- | bzip2recover.c | 12 | ||||
| -rw-r--r-- | bzlib.c | 59 | ||||
| -rw-r--r-- | bzlib.h | 6 | ||||
| -rw-r--r-- | bzlib_private.h | 6 | ||||
| -rw-r--r-- | compress.c | 16 | ||||
| -rw-r--r-- | crctable.c | 4 | ||||
| -rw-r--r-- | decompress.c | 20 | ||||
| -rw-r--r-- | entities.xml | 9 | ||||
| -rwxr-xr-x | format.pl | 53 | ||||
| -rw-r--r-- | huffman.c | 23 | ||||
| -rw-r--r-- | manual.texi | 2243 | ||||
| -rw-r--r-- | manual.xml | 2966 | ||||
| -rw-r--r-- | randtable.c | 4 | ||||
| -rwxr-xr-x | xmlproc.sh | 99 |
30 files changed, 3947 insertions, 2636 deletions
| @@ -251,3 +251,25 @@ of bzip2: | |||
| 251 | 251 | ||
| 252 | * added --fast and --best aliases for -1 -9 for gzip compatibility. | 252 | * added --fast and --best aliases for -1 -9 for gzip compatibility. |
| 253 | 253 | ||
| 254 | |||
| 255 | 1.0.3 (15 Feb 05) | ||
| 256 | ~~~~~~~~~~~~~~~~~ | ||
| 257 | Fixes some minor bugs since the last version, 1.0.2. | ||
| 258 | |||
| 259 | * Further robustification against corrupted compressed data. | ||
| 260 | There are currently no known bitstreams which can cause the | ||
| 261 | decompressor to crash, loop or access memory which does not | ||
| 262 | belong to it. If you are using bzip2 or the library to | ||
| 263 | decompress bitstreams from untrusted sources, an upgrade | ||
| 264 | to 1.0.3 is recommended. | ||
| 265 | |||
| 266 | * The documentation has been converted to XML, from which html | ||
| 267 | and pdf can be derived. | ||
| 268 | |||
| 269 | * Various minor bugs in the documentation have been fixed. | ||
| 270 | |||
| 271 | * Fixes for various compilation warnings with newer versions of | ||
| 272 | gcc, and on 64-bit platforms. | ||
| 273 | |||
| 274 | * The BZ_NO_STDIO cpp symbol was not properly observed in 1.0.2. | ||
| 275 | This has been fixed. | ||
| @@ -1,6 +1,7 @@ | |||
| 1 | 1 | ||
| 2 | This program, "bzip2" and associated library "libbzip2", are | 2 | This program, "bzip2", the associated library "libbzip2", and all |
| 3 | copyright (C) 1996-2002 Julian R Seward. All rights reserved. | 3 | documentation, are copyright (C) 1996-2005 Julian R Seward. All |
| 4 | rights reserved. | ||
| 4 | 5 | ||
| 5 | Redistribution and use in source and binary forms, with or without | 6 | Redistribution and use in source and binary forms, with or without |
| 6 | modification, are permitted provided that the following conditions | 7 | modification, are permitted provided that the following conditions |
| @@ -35,5 +36,5 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| 35 | 36 | ||
| 36 | Julian Seward, Cambridge, UK. | 37 | Julian Seward, Cambridge, UK. |
| 37 | jseward@acm.org | 38 | jseward@acm.org |
| 38 | bzip2/libbzip2 version 1.0.2 of 30 December 2001 | 39 | bzip2/libbzip2 version 1.0.3 of 15 February 2005 |
| 39 | 40 | ||
| @@ -7,9 +7,8 @@ AR=ar | |||
| 7 | RANLIB=ranlib | 7 | RANLIB=ranlib |
| 8 | LDFLAGS= | 8 | LDFLAGS= |
| 9 | 9 | ||
| 10 | # Suitably paranoid flags to avoid bugs in gcc-2.7 | ||
| 11 | BIGFILES=-D_FILE_OFFSET_BITS=64 | 10 | BIGFILES=-D_FILE_OFFSET_BITS=64 |
| 12 | CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES) | 11 | CFLAGS=-Wall -Winline -O -g $(BIGFILES) |
| 13 | 12 | ||
| 14 | # Where you want it installed when you do 'make install' | 13 | # Where you want it installed when you do 'make install' |
| 15 | PREFIX=/usr | 14 | PREFIX=/usr |
| @@ -96,7 +95,6 @@ install: bzip2 bzip2recover | |||
| 96 | echo ".so man1/bzmore.1" > $(PREFIX)/man/man1/bzless.1 | 95 | echo ".so man1/bzmore.1" > $(PREFIX)/man/man1/bzless.1 |
| 97 | echo ".so man1/bzdiff.1" > $(PREFIX)/man/man1/bzcmp.1 | 96 | echo ".so man1/bzdiff.1" > $(PREFIX)/man/man1/bzcmp.1 |
| 98 | 97 | ||
| 99 | distclean: clean | ||
| 100 | clean: | 98 | clean: |
| 101 | rm -f *.o libbz2.a bzip2 bzip2recover \ | 99 | rm -f *.o libbz2.a bzip2 bzip2recover \ |
| 102 | sample1.rb2 sample2.rb2 sample3.rb2 \ | 100 | sample1.rb2 sample2.rb2 sample3.rb2 \ |
| @@ -122,8 +120,12 @@ bzip2.o: bzip2.c | |||
| 122 | bzip2recover.o: bzip2recover.c | 120 | bzip2recover.o: bzip2recover.c |
| 123 | $(CC) $(CFLAGS) -c bzip2recover.c | 121 | $(CC) $(CFLAGS) -c bzip2recover.c |
| 124 | 122 | ||
| 125 | DISTNAME=bzip2-1.0.2 | 123 | |
| 126 | tarfile: | 124 | distclean: clean |
| 125 | rm -f manual.ps manual.html manual.pdf | ||
| 126 | |||
| 127 | DISTNAME=bzip2-1.0.3 | ||
| 128 | dist: check manual | ||
| 127 | rm -f $(DISTNAME) | 129 | rm -f $(DISTNAME) |
| 128 | ln -sf . $(DISTNAME) | 130 | ln -sf . $(DISTNAME) |
| 129 | tar cvf $(DISTNAME).tar \ | 131 | tar cvf $(DISTNAME).tar \ |
| @@ -139,9 +141,6 @@ tarfile: | |||
| 139 | $(DISTNAME)/bzlib.h \ | 141 | $(DISTNAME)/bzlib.h \ |
| 140 | $(DISTNAME)/bzlib_private.h \ | 142 | $(DISTNAME)/bzlib_private.h \ |
| 141 | $(DISTNAME)/Makefile \ | 143 | $(DISTNAME)/Makefile \ |
| 142 | $(DISTNAME)/manual.texi \ | ||
| 143 | $(DISTNAME)/manual.ps \ | ||
| 144 | $(DISTNAME)/manual.pdf \ | ||
| 145 | $(DISTNAME)/LICENSE \ | 144 | $(DISTNAME)/LICENSE \ |
| 146 | $(DISTNAME)/bzip2.1 \ | 145 | $(DISTNAME)/bzip2.1 \ |
| 147 | $(DISTNAME)/bzip2.1.preformatted \ | 146 | $(DISTNAME)/bzip2.1.preformatted \ |
| @@ -157,9 +156,12 @@ tarfile: | |||
| 157 | $(DISTNAME)/sample2.bz2 \ | 156 | $(DISTNAME)/sample2.bz2 \ |
| 158 | $(DISTNAME)/sample3.bz2 \ | 157 | $(DISTNAME)/sample3.bz2 \ |
| 159 | $(DISTNAME)/dlltest.c \ | 158 | $(DISTNAME)/dlltest.c \ |
| 160 | $(DISTNAME)/*.html \ | 159 | $(DISTNAME)/manual.html \ |
| 160 | $(DISTNAME)/manual.pdf \ | ||
| 161 | $(DISTNAME)/manual.ps \ | ||
| 161 | $(DISTNAME)/README \ | 162 | $(DISTNAME)/README \ |
| 162 | $(DISTNAME)/README.COMPILATION.PROBLEMS \ | 163 | $(DISTNAME)/README.COMPILATION.PROBLEMS \ |
| 164 | $(DISTNAME)/README.XML.STUFF \ | ||
| 163 | $(DISTNAME)/CHANGES \ | 165 | $(DISTNAME)/CHANGES \ |
| 164 | $(DISTNAME)/libbz2.def \ | 166 | $(DISTNAME)/libbz2.def \ |
| 165 | $(DISTNAME)/libbz2.dsp \ | 167 | $(DISTNAME)/libbz2.dsp \ |
| @@ -175,18 +177,29 @@ tarfile: | |||
| 175 | $(DISTNAME)/bzmore.1 \ | 177 | $(DISTNAME)/bzmore.1 \ |
| 176 | $(DISTNAME)/bzgrep \ | 178 | $(DISTNAME)/bzgrep \ |
| 177 | $(DISTNAME)/bzgrep.1 \ | 179 | $(DISTNAME)/bzgrep.1 \ |
| 178 | $(DISTNAME)/Makefile-libbz2_so | 180 | $(DISTNAME)/Makefile-libbz2_so \ |
| 181 | $(DISTNAME)/bz-common.xsl \ | ||
| 182 | $(DISTNAME)/bz-fo.xsl \ | ||
| 183 | $(DISTNAME)/bz-html.xsl \ | ||
| 184 | $(DISTNAME)/bzip.css \ | ||
| 185 | $(DISTNAME)/entities.xml \ | ||
| 186 | $(DISTNAME)/manual.xml \ | ||
| 187 | $(DISTNAME)/format.pl \ | ||
| 188 | $(DISTNAME)/xmlproc.sh | ||
| 179 | gzip -v $(DISTNAME).tar | 189 | gzip -v $(DISTNAME).tar |
| 180 | 190 | ||
| 181 | # For rebuilding the manual from sources on my RedHat 7.2 box | 191 | # For rebuilding the manual from sources on my SuSE 9.1 box |
| 182 | manual: manual.ps manual.pdf manual.html | 192 | |
| 193 | MANUAL_SRCS= bz-common.xsl bz-fo.xsl bz-html.xsl bzip.css \ | ||
| 194 | entities.xml manual.xml | ||
| 195 | |||
| 196 | manual: manual.html manual.ps manual.pdf | ||
| 183 | 197 | ||
| 184 | manual.ps: manual.texi | 198 | manual.ps: $(MANUAL_SRCS) |
| 185 | tex manual.texi | 199 | ./xmlproc.sh -ps manual.xml |
| 186 | dvips -o manual.ps manual.dvi | ||
| 187 | 200 | ||
| 188 | manual.pdf: manual.ps | 201 | manual.pdf: $(MANUAL_SRCS) |
| 189 | ps2pdf manual.ps | 202 | ./xmlproc.sh -pdf manual.xml |
| 190 | 203 | ||
| 191 | manual.html: manual.texi | 204 | manual.html: $(MANUAL_SRCS) |
| 192 | texi2html -split_chapter manual.texi | 205 | ./xmlproc.sh -html manual.xml |
diff --git a/Makefile-libbz2_so b/Makefile-libbz2_so index 4986fe2..458c5a1 100644 --- a/Makefile-libbz2_so +++ b/Makefile-libbz2_so | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | 1 | ||
| 2 | # This Makefile builds a shared version of the library, | 2 | # This Makefile builds a shared version of the library, |
| 3 | # libbz2.so.1.0.2, with soname libbz2.so.1.0, | 3 | # libbz2.so.1.0.3, with soname libbz2.so.1.0, |
| 4 | # at least on x86-Linux (RedHat 7.2), | 4 | # at least on x86-Linux (RedHat 7.2), |
| 5 | # with gcc-2.96 20000731 (Red Hat Linux 7.1 2.96-98). | 5 | # with gcc-2.96 20000731 (Red Hat Linux 7.1 2.96-98). |
| 6 | # Please see the README file for some | 6 | # Please see the README file for some |
| @@ -9,7 +9,7 @@ | |||
| 9 | SHELL=/bin/sh | 9 | SHELL=/bin/sh |
| 10 | CC=gcc | 10 | CC=gcc |
| 11 | BIGFILES=-D_FILE_OFFSET_BITS=64 | 11 | BIGFILES=-D_FILE_OFFSET_BITS=64 |
| 12 | CFLAGS=-fpic -fPIC -Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES) | 12 | CFLAGS=-fpic -fPIC -Wall -Winline -O -g |
| 13 | 13 | ||
| 14 | OBJS= blocksort.o \ | 14 | OBJS= blocksort.o \ |
| 15 | huffman.o \ | 15 | huffman.o \ |
| @@ -20,13 +20,13 @@ OBJS= blocksort.o \ | |||
| 20 | bzlib.o | 20 | bzlib.o |
| 21 | 21 | ||
| 22 | all: $(OBJS) | 22 | all: $(OBJS) |
| 23 | $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.2 $(OBJS) | 23 | $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.3 $(OBJS) |
| 24 | $(CC) $(CFLAGS) -o bzip2-shared bzip2.c libbz2.so.1.0.2 | 24 | $(CC) $(CFLAGS) -o bzip2-shared bzip2.c libbz2.so.1.0.3 |
| 25 | rm -f libbz2.so.1.0 | 25 | rm -f libbz2.so.1.0 |
| 26 | ln -s libbz2.so.1.0.2 libbz2.so.1.0 | 26 | ln -s libbz2.so.1.0.3 libbz2.so.1.0 |
| 27 | 27 | ||
| 28 | clean: | 28 | clean: |
| 29 | rm -f $(OBJS) bzip2.o libbz2.so.1.0.2 libbz2.so.1.0 bzip2-shared | 29 | rm -f $(OBJS) bzip2.o libbz2.so.1.0.3 libbz2.so.1.0 bzip2-shared |
| 30 | 30 | ||
| 31 | blocksort.o: blocksort.c | 31 | blocksort.o: blocksort.c |
| 32 | $(CC) $(CFLAGS) -c blocksort.c | 32 | $(CC) $(CFLAGS) -c blocksort.c |
| @@ -1,15 +1,15 @@ | |||
| 1 | 1 | ||
| 2 | This is the README for bzip2, a block-sorting file compressor, version | 2 | This is the README for bzip2, a block-sorting file compressor, version |
| 3 | 1.0.2. This version is fully compatible with the previous public | 3 | 1.0.3. This version is fully compatible with the previous public |
| 4 | releases, versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1. | 4 | releases, versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0, 1.0.1 and 1.0.2. |
| 5 | 5 | ||
| 6 | bzip2-1.0.2 is distributed under a BSD-style license. For details, | 6 | bzip2-1.0.3 is distributed under a BSD-style license. For details, |
| 7 | see the file LICENSE. | 7 | see the file LICENSE. |
| 8 | 8 | ||
| 9 | Complete documentation is available in Postscript form (manual.ps), | 9 | Complete documentation is available in Postscript form (manual.ps), |
| 10 | PDF (manual.pdf, amazingly enough) or html (manual_toc.html). A | 10 | PDF (manual.pdf) or html (manual.html). A plain-text version of the |
| 11 | plain-text version of the manual page is available as bzip2.txt. | 11 | manual page is available as bzip2.txt. A statement about Y2K issues |
| 12 | A statement about Y2K issues is now included in the file Y2K_INFO. | 12 | is now included in the file Y2K_INFO. |
| 13 | 13 | ||
| 14 | 14 | ||
| 15 | HOW TO BUILD -- UNIX | 15 | HOW TO BUILD -- UNIX |
| @@ -78,8 +78,7 @@ importance. To validate bzip2, I used a modified version of Mark | |||
| 78 | Nelson's churn program. Churn is an automated test driver which | 78 | Nelson's churn program. Churn is an automated test driver which |
| 79 | recursively traverses a directory structure, using bzip2 to compress | 79 | recursively traverses a directory structure, using bzip2 to compress |
| 80 | and then decompress each file it encounters, and checking that the | 80 | and then decompress each file it encounters, and checking that the |
| 81 | decompressed data is the same as the original. There are more details | 81 | decompressed data is the same as the original. |
| 82 | in Section 4 of the user guide. | ||
| 83 | 82 | ||
| 84 | 83 | ||
| 85 | 84 | ||
| @@ -119,9 +118,9 @@ DISCLAIMER: | |||
| 119 | PATENTS: | 118 | PATENTS: |
| 120 | 119 | ||
| 121 | To the best of my knowledge, bzip2 does not use any patented | 120 | To the best of my knowledge, bzip2 does not use any patented |
| 122 | algorithms. However, I do not have the resources available to | 121 | algorithms. However, I do not have the resources to carry out |
| 123 | carry out a full patent search. Therefore I cannot give any | 122 | a patent search. Therefore I cannot give any guarantee of the |
| 124 | guarantee of the above statement. | 123 | above statement. |
| 125 | 124 | ||
| 126 | End of legalities. | 125 | End of legalities. |
| 127 | 126 | ||
| @@ -153,22 +152,26 @@ WHAT'S NEW IN 1.0.2 ? | |||
| 153 | 152 | ||
| 154 | See the CHANGES file. | 153 | See the CHANGES file. |
| 155 | 154 | ||
| 155 | WHAT'S NEW IN 1.0.3 ? | ||
| 156 | |||
| 157 | See the CHANGES file. | ||
| 158 | |||
| 156 | 159 | ||
| 157 | I hope you find bzip2 useful. Feel free to contact me at | 160 | I hope you find bzip2 useful. Feel free to contact me at |
| 158 | jseward@acm.org | 161 | jseward@bzip.org |
| 159 | if you have any suggestions or queries. Many people mailed me with | 162 | if you have any suggestions or queries. Many people mailed me with |
| 160 | comments, suggestions and patches after the releases of bzip-0.15, | 163 | comments, suggestions and patches after the releases of bzip-0.15, |
| 161 | bzip-0.21, and bzip2 versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, | 164 | bzip-0.21, and bzip2 versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0, 1.0.1 and |
| 162 | and the changes in bzip2 are largely a result of this feedback. | 165 | 1.0.2, and the changes in bzip2 are largely a result of this feedback. |
| 163 | I thank you for your comments. | 166 | I thank you for your comments. |
| 164 | 167 | ||
| 165 | At least for the time being, bzip2's "home" is (or can be reached via) | 168 | At least for the time being, bzip2's "home" is (or can be reached via) |
| 166 | http://sources.redhat.com/bzip2. | 169 | http://www.bzip.org |
| 167 | 170 | ||
| 168 | Julian Seward | 171 | Julian Seward |
| 169 | jseward@acm.org | 172 | jseward@bzip.org |
| 170 | 173 | ||
| 171 | Cambridge, UK (and what a great town this is!) | 174 | Cambridge, UK. |
| 172 | 175 | ||
| 173 | 18 July 1996 (version 0.15) | 176 | 18 July 1996 (version 0.15) |
| 174 | 25 August 1996 (version 0.21) | 177 | 25 August 1996 (version 0.21) |
| @@ -178,4 +181,5 @@ Cambridge, UK (and what a great town this is!) | |||
| 178 | 8 June 1999 (bzip2, version 0.9.5) | 181 | 8 June 1999 (bzip2, version 0.9.5) |
| 179 | 4 Sept 1999 (bzip2, version 0.9.5d) | 182 | 4 Sept 1999 (bzip2, version 0.9.5d) |
| 180 | 5 May 2000 (bzip2, version 1.0pre8) | 183 | 5 May 2000 (bzip2, version 1.0pre8) |
| 181 | 30 December 2001 (bzip2, version 1.0.2pre1) \ No newline at end of file | 184 | 30 December 2001 (bzip2, version 1.0.2pre1) |
| 185 | 15 February 2005 (bzip2, version 1.0.3) | ||
diff --git a/README.COMPILATION.PROBLEMS b/README.COMPILATION.PROBLEMS index bd1822d..f1bc396 100644 --- a/README.COMPILATION.PROBLEMS +++ b/README.COMPILATION.PROBLEMS | |||
| @@ -1,11 +1,10 @@ | |||
| 1 | 1 | ||
| 2 | bzip2-1.0 should compile without problems on the vast majority of | 2 | bzip2-1.0.3 should compile without problems on the vast majority of |
| 3 | platforms. Using the supplied Makefile, I've built and tested it | 3 | platforms. Using the supplied Makefile, I've built and tested it |
| 4 | myself for x86-linux, sparc-solaris, alpha-linux, x86-cygwin32 and | 4 | myself for x86-linux and x86_64-linux. With makefile.msc, Visual C++ |
| 5 | alpha-tru64unix. With makefile.msc, Visual C++ 6.0 and nmake, you can | 5 | 6.0 and nmake, you can build a native Win32 version too. Large file |
| 6 | build a native Win32 version too. Large file support seems to work | 6 | support seems to work correctly on at least alpha-tru64unix and |
| 7 | correctly on at least alpha-tru64unix and x86-cygwin32 (on Windows | 7 | x86-cygwin32 (on Windows 2000). |
| 8 | 2000). | ||
| 9 | 8 | ||
| 10 | When I say "large file" I mean a file of size 2,147,483,648 (2^31) | 9 | When I say "large file" I mean a file of size 2,147,483,648 (2^31) |
| 11 | bytes or above. Many older OSs can't handle files above this size, | 10 | bytes or above. Many older OSs can't handle files above this size, |
| @@ -22,7 +21,7 @@ The technique of adding -D_FILE_OFFSET_BITS=64 to get large file | |||
| 22 | support is, as far as I know, the Recommended Way to get correct large | 21 | support is, as far as I know, the Recommended Way to get correct large |
| 23 | file support. For more details, see the Large File Support | 22 | file support. For more details, see the Large File Support |
| 24 | Specification, published by the Large File Summit, at | 23 | Specification, published by the Large File Summit, at |
| 25 | http://www.sas.com/standard/large.file/ | 24 | http://ftp.sas.com/standards/large.file |
| 26 | 25 | ||
| 27 | As a general comment, if you get compilation errors which you think | 26 | As a general comment, if you get compilation errors which you think |
| 28 | are related to large file support, try removing the above define from | 27 | are related to large file support, try removing the above define from |
| @@ -38,93 +37,3 @@ You can use the spewG.c program to generate huge files to test bzip2's | |||
| 38 | large file support, if you are feeling paranoid. Be aware though that | 37 | large file support, if you are feeling paranoid. Be aware though that |
| 39 | any compilation problems which affect bzip2 will also affect spewG.c, | 38 | any compilation problems which affect bzip2 will also affect spewG.c, |
| 40 | alas. | 39 | alas. |
| 41 | |||
| 42 | |||
| 43 | Known problems as of 1.0pre8: | ||
| 44 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
| 45 | |||
| 46 | * HP/UX 10.20 and 11.00, using gcc (2.7.2.3 and 2.95.2): A large | ||
| 47 | number of warnings appear, including the following: | ||
| 48 | |||
| 49 | /usr/include/sys/resource.h: In function `getrlimit': | ||
| 50 | /usr/include/sys/resource.h:168: | ||
| 51 | warning: implicit declaration of function `__getrlimit64' | ||
| 52 | /usr/include/sys/resource.h: In function `setrlimit': | ||
| 53 | /usr/include/sys/resource.h:170: | ||
| 54 | warning: implicit declaration of function `__setrlimit64' | ||
| 55 | |||
| 56 | This would appear to be a problem with large file support, header | ||
| 57 | files and gcc. gcc may or may not give up at this point. If it | ||
| 58 | fails, you might be able to improve matters by adding | ||
| 59 | -D__STDC_EXT__=1 | ||
| 60 | to the BIGFILES variable in the Makefile (ie, change its definition | ||
| 61 | to | ||
| 62 | BIGFILES=-D_FILE_OFFSET_BITS=64 -D__STDC_EXT__=1 | ||
| 63 | |||
| 64 | Even if gcc does produce a binary which appears to work (ie passes | ||
| 65 | its self-tests), you might want to test it to see if it works properly | ||
| 66 | on large files. | ||
| 67 | |||
| 68 | |||
| 69 | * HP/UX 10.20 and 11.00, using HP's cc compiler. | ||
| 70 | |||
| 71 | No specific problems for this combination, except that you'll need to | ||
| 72 | specify the -Ae flag, and zap the gcc-specific stuff | ||
| 73 | -Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce. | ||
| 74 | You should retain -D_FILE_OFFSET_BITS=64 in order to get large | ||
| 75 | file support -- which is reported to work ok for this HP/UX + cc | ||
| 76 | combination. | ||
| 77 | |||
| 78 | |||
| 79 | * SunOS 4.1.X. | ||
| 80 | |||
| 81 | Amazingly, there are still people out there using this venerable old | ||
| 82 | banger. I shouldn't be too rude -- I started life on SunOS, and | ||
| 83 | it was a pretty darn good OS, way back then. Anyway: | ||
| 84 | |||
| 85 | SunOS doesn't seem to have strerror(), so you'll have to use | ||
| 86 | perror(), perhaps by doing adding this (warning: UNTESTED CODE): | ||
| 87 | |||
| 88 | char* strerror ( int errnum ) | ||
| 89 | { | ||
| 90 | if (errnum < 0 || errnum >= sys_nerr) | ||
| 91 | return "Unknown error"; | ||
| 92 | else | ||
| 93 | return sys_errlist[errnum]; | ||
| 94 | } | ||
| 95 | |||
| 96 | Or you could comment out the relevant calls to strerror; they're | ||
| 97 | not mission-critical. Or you could upgrade to Solaris. Ha ha ha! | ||
| 98 | (what?? you think I've got Bad Attitude?) | ||
| 99 | |||
| 100 | |||
| 101 | * Making a shared library on Solaris. (Not really a compilation | ||
| 102 | problem, but many people ask ...) | ||
| 103 | |||
| 104 | Firstly, if you have Solaris 8, either you have libbz2.so already | ||
| 105 | on your system, or you can install it from the Solaris CD. | ||
| 106 | |||
| 107 | Secondly, be aware that there are potential naming conflicts | ||
| 108 | between the .so file supplied with Solaris 8, and the .so file | ||
| 109 | which Makefile-libbz2_so will make. Makefile-libbz2_so creates | ||
| 110 | a .so which has the names which I intend to be "official" as | ||
| 111 | of version 1.0.0 and onwards. Unfortunately, the .so in | ||
| 112 | Solaris 8 appeared before I decided on the final names, so | ||
| 113 | the two libraries are incompatible. We have since communicated | ||
| 114 | and I hope that the problems will have been solved in the next | ||
| 115 | version of Solaris, whenever that might appear. | ||
| 116 | |||
| 117 | All that said: you might be able to get somewhere | ||
| 118 | by finding the line in Makefile-libbz2_so which says | ||
| 119 | |||
| 120 | $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.2 $(OBJS) | ||
| 121 | |||
| 122 | and replacing with | ||
| 123 | |||
| 124 | $(CC) -G -shared -o libbz2.so.1.0.2 -h libbz2.so.1.0 $(OBJS) | ||
| 125 | |||
| 126 | If gcc objects to the combination -fpic -fPIC, get rid of | ||
| 127 | the second one, leaving just "-fpic". | ||
| 128 | |||
| 129 | |||
| 130 | That's the end of the currently known compilation problems. | ||
diff --git a/README.XML.STUFF b/README.XML.STUFF new file mode 100644 index 0000000..0ff209f --- /dev/null +++ b/README.XML.STUFF | |||
| @@ -0,0 +1,31 @@ | |||
| 1 | The script xmlproc.sh takes an xml file as input, | ||
| 2 | and processes it to create .pdf, .html or .ps output. | ||
| 3 | It uses format.pl, a perl script to format <pre> blocks nicely, | ||
| 4 | and add CDATA tags so writers do not have to use eg. < | ||
| 5 | |||
| 6 | The file "entities.xml" must be edited to reflect current | ||
| 7 | version, year, etc. | ||
| 8 | |||
| 9 | |||
| 10 | Usage: | ||
| 11 | |||
| 12 | xmlproc.sh -v manual.xml | ||
| 13 | Validates an xml file to ensure no dtd-compliance errors | ||
| 14 | |||
| 15 | xmlproc.sh -html manual.xml | ||
| 16 | Output: manual.html | ||
| 17 | |||
| 18 | xmlproc.sh -pdf manual.xml | ||
| 19 | Output: manual.pdf | ||
| 20 | |||
| 21 | xmlproc.sh -ps manual.xml | ||
| 22 | Output: manual.ps | ||
| 23 | |||
| 24 | |||
| 25 | Notum bene: | ||
| 26 | - pdfxmltex barfs if given a filename with an underscore in it | ||
| 27 | |||
| 28 | - xmltex won't work yet - there's a bug in passivetex | ||
| 29 | which we are all waiting for Sebastian to fix. | ||
| 30 | So we are going the xml -> pdf -> ps route for the time being, | ||
| 31 | using pdfxmltex. | ||
diff --git a/blocksort.c b/blocksort.c index aba3efc..33ec9f5 100644 --- a/blocksort.c +++ b/blocksort.c | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
| 9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
| 10 | 10 | ||
| 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2005 Julian R Seward. All rights reserved. |
| 12 | 12 | ||
| 13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
| 14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
| @@ -42,7 +42,7 @@ | |||
| 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 43 | 43 | ||
| 44 | Julian Seward, Cambridge, UK. | 44 | Julian Seward, Cambridge, UK. |
| 45 | jseward@acm.org | 45 | jseward@bzip.org |
| 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 | 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 |
| 47 | 47 | ||
| 48 | This program is based on (at least) the work of: | 48 | This program is based on (at least) the work of: |
diff --git a/bz-common.xsl b/bz-common.xsl new file mode 100644 index 0000000..66fcd6f --- /dev/null +++ b/bz-common.xsl | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | <?xml version="1.0"?> <!-- -*- sgml -*- --> | ||
| 2 | <xsl:stylesheet | ||
| 3 | xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> | ||
| 4 | |||
| 5 | <!-- we like '1.2 Title' --> | ||
| 6 | <xsl:param name="section.autolabel" select="'1'"/> | ||
| 7 | <xsl:param name="section.label.includes.component.label" select="'1'"/> | ||
| 8 | |||
| 9 | <!-- Do not put 'Chapter' at the start of eg 'Chapter 1. Doing This' --> | ||
| 10 | <xsl:param name="local.l10n.xml" select="document('')"/> | ||
| 11 | <l:i18n xmlns:l="http://docbook.sourceforge.net/xmlns/l10n/1.0"> | ||
| 12 | <l:l10n language="en"> | ||
| 13 | <l:context name="title-numbered"> | ||
| 14 | <l:template name="chapter" text="%n. %t"/> | ||
| 15 | </l:context> | ||
| 16 | </l:l10n> | ||
| 17 | </l:i18n> | ||
| 18 | |||
| 19 | <!-- don't generate sub-tocs for qanda sets --> | ||
| 20 | <xsl:param name="generate.toc"> | ||
| 21 | set toc,title | ||
| 22 | book toc,title,figure,table,example,equation | ||
| 23 | chapter toc,title | ||
| 24 | section toc | ||
| 25 | sect1 toc | ||
| 26 | sect2 toc | ||
| 27 | sect3 toc | ||
| 28 | sect4 nop | ||
| 29 | sect5 nop | ||
| 30 | qandaset toc | ||
| 31 | qandadiv nop | ||
| 32 | appendix toc,title | ||
| 33 | article/appendix nop | ||
| 34 | article toc,title | ||
| 35 | preface toc,title | ||
| 36 | reference toc,title | ||
| 37 | </xsl:param> | ||
| 38 | |||
| 39 | </xsl:stylesheet> | ||
diff --git a/bz-fo.xsl b/bz-fo.xsl new file mode 100644 index 0000000..7f2a767 --- /dev/null +++ b/bz-fo.xsl | |||
| @@ -0,0 +1,257 @@ | |||
| 1 | <?xml version="1.0" encoding="UTF-8"?> <!-- -*- sgml -*- --> | ||
| 2 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | ||
| 3 | xmlns:fo="http://www.w3.org/1999/XSL/Format" version="1.0"> | ||
| 4 | |||
| 5 | <xsl:import href="http://docbook.sourceforge.net/release/xsl/current/fo/docbook.xsl"/> | ||
| 6 | <xsl:import href="bz-common.xsl"/> | ||
| 7 | |||
| 8 | <!-- set indent = yes while debugging, then change to NO --> | ||
| 9 | <xsl:output method="xml" indent="yes"/> | ||
| 10 | |||
| 11 | <!-- ensure only passivetex extensions are on --> | ||
| 12 | <xsl:param name="stylesheet.result.type" select="'fo'"/> | ||
| 13 | <!-- fo extensions: PDF bookmarks and index terms --> | ||
| 14 | <xsl:param name="use.extensions" select="'1'"/> | ||
| 15 | <xsl:param name="xep.extensions" select="0"/> | ||
| 16 | <xsl:param name="fop.extensions" select="0"/> | ||
| 17 | <xsl:param name="saxon.extensions" select="0"/> | ||
| 18 | <xsl:param name="passivetex.extensions" select="1"/> | ||
| 19 | <xsl:param name="tablecolumns.extension" select="'1'"/> | ||
| 20 | |||
| 21 | <!-- ensure we are using single sided --> | ||
| 22 | <xsl:param name="double.sided" select="'0'"/> | ||
| 23 | |||
| 24 | <!-- insert cross references to page numbers --> | ||
| 25 | <xsl:param name="insert.xref.page.number" select="1"/> | ||
| 26 | |||
| 27 | <!-- <?custom-pagebreak?> inserts a page break at this point --> | ||
| 28 | <xsl:template match="processing-instruction('custom-pagebreak')"> | ||
| 29 | <fo:block break-before='page'/> | ||
| 30 | </xsl:template> | ||
| 31 | |||
| 32 | <!-- show links in color --> | ||
| 33 | <xsl:attribute-set name="xref.properties"> | ||
| 34 | <xsl:attribute name="color">blue</xsl:attribute> | ||
| 35 | </xsl:attribute-set> | ||
| 36 | |||
| 37 | <!-- make pre listings indented a bit + a bg colour --> | ||
| 38 | <xsl:template match="programlisting | screen"> | ||
| 39 | <fo:block start-indent="0.25in" wrap-option="no-wrap" | ||
| 40 | white-space-collapse="false" text-align="start" | ||
| 41 | font-family="monospace" background-color="#f2f2f9" | ||
| 42 | linefeed-treatment="preserve" | ||
| 43 | xsl:use-attribute-sets="normal.para.spacing"> | ||
| 44 | <xsl:apply-templates/> | ||
| 45 | </fo:block> | ||
| 46 | </xsl:template> | ||
| 47 | <!-- make verbatim output prettier --> | ||
| 48 | <xsl:template match="literallayout"> | ||
| 49 | <fo:block start-indent="0.25in" wrap-option="no-wrap" | ||
| 50 | white-space-collapse="false" text-align="start" | ||
| 51 | font-family="monospace" background-color="#edf7f4" | ||
| 52 | linefeed-treatment="preserve" | ||
| 53 | space-before="0em" space-after="0em"> | ||
| 54 | <xsl:apply-templates/> | ||
| 55 | </fo:block> | ||
| 56 | </xsl:template> | ||
| 57 | |||
| 58 | <!-- workaround bug in passivetex fo output for itemizedlist --> | ||
| 59 | <xsl:template match="itemizedlist/listitem"> | ||
| 60 | <xsl:variable name="id"> | ||
| 61 | <xsl:call-template name="object.id"/></xsl:variable> | ||
| 62 | <xsl:variable name="itemsymbol"> | ||
| 63 | <xsl:call-template name="list.itemsymbol"> | ||
| 64 | <xsl:with-param name="node" select="parent::itemizedlist"/> | ||
| 65 | </xsl:call-template> | ||
| 66 | </xsl:variable> | ||
| 67 | <xsl:variable name="item.contents"> | ||
| 68 | <fo:list-item-label end-indent="label-end()"> | ||
| 69 | <fo:block> | ||
| 70 | <xsl:choose> | ||
| 71 | <xsl:when test="$itemsymbol='disc'">•</xsl:when> | ||
| 72 | <xsl:when test="$itemsymbol='bullet'">•</xsl:when> | ||
| 73 | <xsl:otherwise>•</xsl:otherwise> | ||
| 74 | </xsl:choose> | ||
| 75 | </fo:block> | ||
| 76 | </fo:list-item-label> | ||
| 77 | <fo:list-item-body start-indent="body-start()"> | ||
| 78 | <xsl:apply-templates/> <!-- removed extra block wrapper --> | ||
| 79 | </fo:list-item-body> | ||
| 80 | </xsl:variable> | ||
| 81 | <xsl:choose> | ||
| 82 | <xsl:when test="parent::*/@spacing = 'compact'"> | ||
| 83 | <fo:list-item id="{$id}" | ||
| 84 | xsl:use-attribute-sets="compact.list.item.spacing"> | ||
| 85 | <xsl:copy-of select="$item.contents"/> | ||
| 86 | </fo:list-item> | ||
| 87 | </xsl:when> | ||
| 88 | <xsl:otherwise> | ||
| 89 | <fo:list-item id="{$id}" xsl:use-attribute-sets="list.item.spacing"> | ||
| 90 | <xsl:copy-of select="$item.contents"/> | ||
| 91 | </fo:list-item> | ||
| 92 | </xsl:otherwise> | ||
| 93 | </xsl:choose> | ||
| 94 | </xsl:template> | ||
| 95 | |||
| 96 | <!-- workaround bug in passivetex fo output for orderedlist --> | ||
| 97 | <xsl:template match="orderedlist/listitem"> | ||
| 98 | <xsl:variable name="id"> | ||
| 99 | <xsl:call-template name="object.id"/></xsl:variable> | ||
| 100 | <xsl:variable name="item.contents"> | ||
| 101 | <fo:list-item-label end-indent="label-end()"> | ||
| 102 | <fo:block> | ||
| 103 | <xsl:apply-templates select="." mode="item-number"/> | ||
| 104 | </fo:block> | ||
| 105 | </fo:list-item-label> | ||
| 106 | <fo:list-item-body start-indent="body-start()"> | ||
| 107 | <xsl:apply-templates/> <!-- removed extra block wrapper --> | ||
| 108 | </fo:list-item-body> | ||
| 109 | </xsl:variable> | ||
| 110 | <xsl:choose> | ||
| 111 | <xsl:when test="parent::*/@spacing = 'compact'"> | ||
| 112 | <fo:list-item id="{$id}" | ||
| 113 | xsl:use-attribute-sets="compact.list.item.spacing"> | ||
| 114 | <xsl:copy-of select="$item.contents"/> | ||
| 115 | </fo:list-item> | ||
| 116 | </xsl:when> | ||
| 117 | <xsl:otherwise> | ||
| 118 | <fo:list-item id="{$id}" xsl:use-attribute-sets="list.item.spacing"> | ||
| 119 | <xsl:copy-of select="$item.contents"/> | ||
| 120 | </fo:list-item> | ||
| 121 | </xsl:otherwise> | ||
| 122 | </xsl:choose> | ||
| 123 | </xsl:template> | ||
| 124 | |||
| 125 | <!-- workaround bug in passivetex fo output for variablelist --> | ||
| 126 | <xsl:param name="variablelist.as.blocks" select="1"/> | ||
| 127 | <xsl:template match="varlistentry" mode="vl.as.blocks"> | ||
| 128 | <xsl:variable name="id"> | ||
| 129 | <xsl:call-template name="object.id"/></xsl:variable> | ||
| 130 | <fo:block id="{$id}" xsl:use-attribute-sets="list.item.spacing" | ||
| 131 | keep-together.within-column="always" | ||
| 132 | keep-with-next.within-column="always"> | ||
| 133 | <xsl:apply-templates select="term"/> | ||
| 134 | </fo:block> | ||
| 135 | <fo:block start-indent="0.5in" end-indent="0in" | ||
| 136 | space-after.minimum="0.2em" | ||
| 137 | space-after.optimum="0.4em" | ||
| 138 | space-after.maximum="0.6em"> | ||
| 139 | <fo:block> | ||
| 140 | <xsl:apply-templates select="listitem"/> | ||
| 141 | </fo:block> | ||
| 142 | </fo:block> | ||
| 143 | </xsl:template> | ||
| 144 | |||
| 145 | |||
| 146 | <!-- workaround bug in footers: force right-align w/two 80|30 cols --> | ||
| 147 | <xsl:template name="footer.table"> | ||
| 148 | <xsl:param name="pageclass" select="''"/> | ||
| 149 | <xsl:param name="sequence" select="''"/> | ||
| 150 | <xsl:param name="gentext-key" select="''"/> | ||
| 151 | <xsl:choose> | ||
| 152 | <xsl:when test="$pageclass = 'index'"> | ||
| 153 | <xsl:attribute name="margin-left">0pt</xsl:attribute> | ||
| 154 | </xsl:when> | ||
| 155 | </xsl:choose> | ||
| 156 | <xsl:variable name="candidate"> | ||
| 157 | <fo:table table-layout="fixed" width="100%"> | ||
| 158 | <fo:table-column column-number="1" column-width="80%"/> | ||
| 159 | <fo:table-column column-number="2" column-width="20%"/> | ||
| 160 | <fo:table-body> | ||
| 161 | <fo:table-row height="14pt"> | ||
| 162 | <fo:table-cell text-align="left" display-align="after"> | ||
| 163 | <xsl:attribute name="relative-align">baseline</xsl:attribute> | ||
| 164 | <fo:block> | ||
| 165 | <fo:block> </fo:block><!-- empty cell --> | ||
| 166 | </fo:block> | ||
| 167 | </fo:table-cell> | ||
| 168 | <fo:table-cell text-align="center" display-align="after"> | ||
| 169 | <xsl:attribute name="relative-align">baseline</xsl:attribute> | ||
| 170 | <fo:block> | ||
| 171 | <xsl:call-template name="footer.content"> | ||
| 172 | <xsl:with-param name="pageclass" select="$pageclass"/> | ||
| 173 | <xsl:with-param name="sequence" select="$sequence"/> | ||
| 174 | <xsl:with-param name="position" select="'center'"/> | ||
| 175 | <xsl:with-param name="gentext-key" select="$gentext-key"/> | ||
| 176 | </xsl:call-template> | ||
| 177 | </fo:block> | ||
| 178 | </fo:table-cell> | ||
| 179 | </fo:table-row> | ||
| 180 | </fo:table-body> | ||
| 181 | </fo:table> | ||
| 182 | </xsl:variable> | ||
| 183 | <!-- Really output a footer? --> | ||
| 184 | <xsl:choose> | ||
| 185 | <xsl:when test="$pageclass='titlepage' and $gentext-key='book' | ||
| 186 | and $sequence='first'"> | ||
| 187 | <!-- no, book titlepages have no footers at all --> | ||
| 188 | </xsl:when> | ||
| 189 | <xsl:when test="$sequence = 'blank' and $footers.on.blank.pages = 0"> | ||
| 190 | <!-- no output --> | ||
| 191 | </xsl:when> | ||
| 192 | <xsl:otherwise> | ||
| 193 | <xsl:copy-of select="$candidate"/> | ||
| 194 | </xsl:otherwise> | ||
| 195 | </xsl:choose> | ||
| 196 | </xsl:template> | ||
| 197 | |||
| 198 | |||
| 199 | <!-- fix bug in headers: force right-align w/two 40|60 cols --> | ||
| 200 | <xsl:template name="header.table"> | ||
| 201 | <xsl:param name="pageclass" select="''"/> | ||
| 202 | <xsl:param name="sequence" select="''"/> | ||
| 203 | <xsl:param name="gentext-key" select="''"/> | ||
| 204 | <xsl:choose> | ||
| 205 | <xsl:when test="$pageclass = 'index'"> | ||
| 206 | <xsl:attribute name="margin-left">0pt</xsl:attribute> | ||
| 207 | </xsl:when> | ||
| 208 | </xsl:choose> | ||
| 209 | <xsl:variable name="candidate"> | ||
| 210 | <fo:table table-layout="fixed" width="100%"> | ||
| 211 | <xsl:call-template name="head.sep.rule"> | ||
| 212 | <xsl:with-param name="pageclass" select="$pageclass"/> | ||
| 213 | <xsl:with-param name="sequence" select="$sequence"/> | ||
| 214 | <xsl:with-param name="gentext-key" select="$gentext-key"/> | ||
| 215 | </xsl:call-template> | ||
| 216 | <fo:table-column column-number="1" column-width="40%"/> | ||
| 217 | <fo:table-column column-number="2" column-width="60%"/> | ||
| 218 | <fo:table-body> | ||
| 219 | <fo:table-row height="14pt"> | ||
| 220 | <fo:table-cell text-align="left" display-align="before"> | ||
| 221 | <xsl:attribute name="relative-align">baseline</xsl:attribute> | ||
| 222 | <fo:block> | ||
| 223 | <fo:block> </fo:block><!-- empty cell --> | ||
| 224 | </fo:block> | ||
| 225 | </fo:table-cell> | ||
| 226 | <fo:table-cell text-align="center" display-align="before"> | ||
| 227 | <xsl:attribute name="relative-align">baseline</xsl:attribute> | ||
| 228 | <fo:block> | ||
| 229 | <xsl:call-template name="header.content"> | ||
| 230 | <xsl:with-param name="pageclass" select="$pageclass"/> | ||
| 231 | <xsl:with-param name="sequence" select="$sequence"/> | ||
| 232 | <xsl:with-param name="position" select="'center'"/> | ||
| 233 | <xsl:with-param name="gentext-key" select="$gentext-key"/> | ||
| 234 | </xsl:call-template> | ||
| 235 | </fo:block> | ||
| 236 | </fo:table-cell> | ||
| 237 | </fo:table-row> | ||
| 238 | </fo:table-body> | ||
| 239 | </fo:table> | ||
| 240 | </xsl:variable> | ||
| 241 | <!-- Really output a header? --> | ||
| 242 | <xsl:choose> | ||
| 243 | <xsl:when test="$pageclass = 'titlepage' and $gentext-key = 'book' | ||
| 244 | and $sequence='first'"> | ||
| 245 | <!-- no, book titlepages have no headers at all --> | ||
| 246 | </xsl:when> | ||
| 247 | <xsl:when test="$sequence = 'blank' and $headers.on.blank.pages = 0"> | ||
| 248 | <!-- no output --> | ||
| 249 | </xsl:when> | ||
| 250 | <xsl:otherwise> | ||
| 251 | <xsl:copy-of select="$candidate"/> | ||
| 252 | </xsl:otherwise> | ||
| 253 | </xsl:choose> | ||
| 254 | </xsl:template> | ||
| 255 | |||
| 256 | |||
| 257 | </xsl:stylesheet> | ||
diff --git a/bz-html.xsl b/bz-html.xsl new file mode 100644 index 0000000..1785fff --- /dev/null +++ b/bz-html.xsl | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | <?xml version="1.0"?> <!-- -*- sgml -*- --> | ||
| 2 | <!DOCTYPE xsl:stylesheet [ <!ENTITY bz-css SYSTEM "./bzip.css"> ]> | ||
| 3 | |||
| 4 | <xsl:stylesheet | ||
| 5 | xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> | ||
| 6 | |||
| 7 | <xsl:import href="http://docbook.sourceforge.net/release/xsl/current/html/docbook.xsl"/> | ||
| 8 | <xsl:import href="bz-common.xsl"/> | ||
| 9 | |||
| 10 | <!-- use 8859-1 encoding --> | ||
| 11 | <xsl:output method="html" encoding="ISO-8859-1" indent="yes"/> | ||
| 12 | |||
| 13 | <!-- we include the css directly when generating one large file --> | ||
| 14 | <xsl:template name="user.head.content"> | ||
| 15 | <style type="text/css" media="screen"> | ||
| 16 | <xsl:text>&bz-css;</xsl:text> | ||
| 17 | </style> | ||
| 18 | </xsl:template> | ||
| 19 | |||
| 20 | </xsl:stylesheet> | ||
diff --git a/bzip.css b/bzip.css new file mode 100644 index 0000000..43193d8 --- /dev/null +++ b/bzip.css | |||
| @@ -0,0 +1,74 @@ | |||
| 1 | /* Colours: | ||
| 2 | #74240f dark brown h1, h2, h3, h4 | ||
| 3 | #336699 medium blue links | ||
| 4 | #339999 turquoise link hover colour | ||
| 5 | #202020 almost black general text | ||
| 6 | #761596 purple md5sum text | ||
| 7 | #626262 dark gray pre border | ||
| 8 | #eeeeee very light gray pre background | ||
| 9 | #f2f2f9 very light blue nav table background | ||
| 10 | #3366cc medium blue nav table border | ||
| 11 | */ | ||
| 12 | |||
| 13 | a, a:link, a:visited, a:active { color: #336699; } | ||
| 14 | a:hover { color: #339999; } | ||
| 15 | |||
| 16 | body { font: 80%/126% sans-serif; } | ||
| 17 | h1, h2, h3, h4 { color: #74240f; } | ||
| 18 | |||
| 19 | dt { color: #336699; font-weight: bold } | ||
| 20 | dd { | ||
| 21 | margin-left: 1.5em; | ||
| 22 | padding-bottom: 0.8em; | ||
| 23 | } | ||
| 24 | |||
| 25 | /* -- ruler -- */ | ||
| 26 | div.hr_blue { | ||
| 27 | height: 3px; | ||
| 28 | background:#ffffff url("/images/hr_blue.png") repeat-x; } | ||
| 29 | div.hr_blue hr { display:none; } | ||
| 30 | |||
| 31 | /* release styles */ | ||
| 32 | #release p { margin-top: 0.4em; } | ||
| 33 | #release .md5sum { color: #761596; } | ||
| 34 | |||
| 35 | |||
| 36 | /* ------ styles for docs|manuals|howto ------ */ | ||
| 37 | /* -- lists -- */ | ||
| 38 | ul { | ||
| 39 | margin: 0px 4px 16px 16px; | ||
| 40 | padding: 0px; | ||
| 41 | list-style: url("/images/li-blue.png"); | ||
| 42 | } | ||
| 43 | ul li { | ||
| 44 | margin-bottom: 10px; | ||
| 45 | } | ||
| 46 | ul ul { | ||
| 47 | list-style-type: none; | ||
| 48 | list-style-image: none; | ||
| 49 | margin-left: 0px; | ||
| 50 | } | ||
| 51 | |||
| 52 | /* header / footer nav tables */ | ||
| 53 | table.nav { | ||
| 54 | border: solid 1px #3366cc; | ||
| 55 | background: #f2f2f9; | ||
| 56 | background-color: #f2f2f9; | ||
| 57 | margin-bottom: 0.5em; | ||
| 58 | } | ||
| 59 | /* don't have underlined links in chunked nav menus */ | ||
| 60 | table.nav a { text-decoration: none; } | ||
| 61 | table.nav a:hover { text-decoration: underline; } | ||
| 62 | table.nav td { font-size: 85%; } | ||
| 63 | |||
| 64 | code, tt, pre { font-size: 120%; } | ||
| 65 | code, tt { color: #761596; } | ||
| 66 | |||
| 67 | div.literallayout, pre.programlisting, pre.screen { | ||
| 68 | color: #000000; | ||
| 69 | padding: 0.5em; | ||
| 70 | background: #eeeeee; | ||
| 71 | border: 1px solid #626262; | ||
| 72 | background-color: #eeeeee; | ||
| 73 | margin: 4px 0px 4px 0px; | ||
| 74 | } | ||
| @@ -1,7 +1,7 @@ | |||
| 1 | .PU | 1 | .PU |
| 2 | .TH bzip2 1 | 2 | .TH bzip2 1 |
| 3 | .SH NAME | 3 | .SH NAME |
| 4 | bzip2, bunzip2 \- a block-sorting file compressor, v1.0.2 | 4 | bzip2, bunzip2 \- a block-sorting file compressor, v1.0.3 |
| 5 | .br | 5 | .br |
| 6 | bzcat \- decompresses files to stdout | 6 | bzcat \- decompresses files to stdout |
| 7 | .br | 7 | .br |
| @@ -405,19 +405,19 @@ I/O error messages are not as helpful as they could be. | |||
| 405 | tries hard to detect I/O errors and exit cleanly, but the details of | 405 | tries hard to detect I/O errors and exit cleanly, but the details of |
| 406 | what the problem is sometimes seem rather misleading. | 406 | what the problem is sometimes seem rather misleading. |
| 407 | 407 | ||
| 408 | This manual page pertains to version 1.0.2 of | 408 | This manual page pertains to version 1.0.3 of |
| 409 | .I bzip2. | 409 | .I bzip2. |
| 410 | Compressed data created by this version is entirely forwards and | 410 | Compressed data created by this version is entirely forwards and |
| 411 | backwards compatible with the previous public releases, versions | 411 | backwards compatible with the previous public releases, versions |
| 412 | 0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, but with the following | 412 | 0.1pl2, 0.9.0, 0.9.5, 1.0.0, 1.0.1 and 1.0.2, but with the following |
| 413 | exception: 0.9.0 and above can correctly decompress multiple | 413 | exception: 0.9.0 and above can correctly decompress multiple |
| 414 | concatenated compressed files. 0.1pl2 cannot do this; it will stop | 414 | concatenated compressed files. 0.1pl2 cannot do this; it will stop |
| 415 | after decompressing just the first file in the stream. | 415 | after decompressing just the first file in the stream. |
| 416 | 416 | ||
| 417 | .I bzip2recover | 417 | .I bzip2recover |
| 418 | versions prior to this one, 1.0.2, used 32-bit integers to represent | 418 | versions prior to 1.0.2 used 32-bit integers to represent |
| 419 | bit positions in compressed files, so it could not handle compressed | 419 | bit positions in compressed files, so they could not handle compressed |
| 420 | files more than 512 megabytes long. Version 1.0.2 and above uses | 420 | files more than 512 megabytes long. Versions 1.0.2 and above use |
| 421 | 64-bit ints on some platforms which support them (GNU supported | 421 | 64-bit ints on some platforms which support them (GNU supported |
| 422 | targets, and Windows). To establish whether or not bzip2recover was | 422 | targets, and Windows). To establish whether or not bzip2recover was |
| 423 | built with such a limitation, run it without arguments. In any event | 423 | built with such a limitation, run it without arguments. In any event |
| @@ -427,9 +427,9 @@ with MaybeUInt64 set to be an unsigned 64-bit integer. | |||
| 427 | 427 | ||
| 428 | 428 | ||
| 429 | .SH AUTHOR | 429 | .SH AUTHOR |
| 430 | Julian Seward, jseward@acm.org. | 430 | Julian Seward, jsewardbzip.org. |
| 431 | 431 | ||
| 432 | http://sources.redhat.com/bzip2 | 432 | http://www.bzip.org |
| 433 | 433 | ||
| 434 | The ideas embodied in | 434 | The ideas embodied in |
| 435 | .I bzip2 | 435 | .I bzip2 |
| @@ -447,6 +447,7 @@ source distribution for pointers to sources of documentation. Christian | |||
| 447 | von Roques encouraged me to look for faster sorting algorithms, so as to | 447 | von Roques encouraged me to look for faster sorting algorithms, so as to |
| 448 | speed up compression. Bela Lubkin encouraged me to improve the | 448 | speed up compression. Bela Lubkin encouraged me to improve the |
| 449 | worst-case compression performance. | 449 | worst-case compression performance. |
| 450 | Donna Robinson XMLised the documentation. | ||
| 450 | The bz* scripts are derived from those of GNU gzip. | 451 | The bz* scripts are derived from those of GNU gzip. |
| 451 | Many people sent patches, helped | 452 | Many people sent patches, helped |
| 452 | with portability problems, lent machines, gave advice and were generally | 453 | with portability problems, lent machines, gave advice and were generally |
diff --git a/bzip2.1.preformatted b/bzip2.1.preformatted index 0f20cb5..129ca83 100644 --- a/bzip2.1.preformatted +++ b/bzip2.1.preformatted | |||
| @@ -3,43 +3,43 @@ bzip2(1) bzip2(1) | |||
| 3 | 3 | ||
| 4 | 4 | ||
| 5 | NNAAMMEE | 5 | NNAAMMEE |
| 6 | bzip2, bunzip2 - a block-sorting file compressor, v1.0.2 | 6 | bzip2, bunzip2 − a blockâ€sorting file compressor, v1.0.3 |
| 7 | bzcat - decompresses files to stdout | 7 | bzcat − decompresses files to stdout |
| 8 | bzip2recover - recovers data from damaged bzip2 files | 8 | bzip2recover − recovers data from damaged bzip2 files |
| 9 | 9 | ||
| 10 | 10 | ||
| 11 | SSYYNNOOPPSSIISS | 11 | SSYYNNOOPPSSIISS |
| 12 | bbzziipp22 [ --ccddffkkqqssttvvzzVVLL112233445566778899 ] [ _f_i_l_e_n_a_m_e_s _._._. ] | 12 | bbzziipp22 [ −−ccddffkkqqssttvvzzVVLL112233445566778899 ] [ _f_i_l_e_n_a_m_e_s _._._. ] |
| 13 | bbuunnzziipp22 [ --ffkkvvssVVLL ] [ _f_i_l_e_n_a_m_e_s _._._. ] | 13 | bbuunnzziipp22 [ −−ffkkvvssVVLL ] [ _f_i_l_e_n_a_m_e_s _._._. ] |
| 14 | bbzzccaatt [ --ss ] [ _f_i_l_e_n_a_m_e_s _._._. ] | 14 | bbzzccaatt [ −−ss ] [ _f_i_l_e_n_a_m_e_s _._._. ] |
| 15 | bbzziipp22rreeccoovveerr _f_i_l_e_n_a_m_e | 15 | bbzziipp22rreeccoovveerr _f_i_l_e_n_a_m_e |
| 16 | 16 | ||
| 17 | 17 | ||
| 18 | DDEESSCCRRIIPPTTIIOONN | 18 | DDEESSCCRRIIPPTTIIOONN |
| 19 | _b_z_i_p_2 compresses files using the Burrows-Wheeler block | 19 | _b_z_i_p_2 compresses files using the Burrowsâ€Wheeler block |
| 20 | sorting text compression algorithm, and Huffman coding. | 20 | sorting text compression algorithm, and Huffman coding. |
| 21 | Compression is generally considerably better than that | 21 | Compression is generally considerably better than that |
| 22 | achieved by more conventional LZ77/LZ78-based compressors, | 22 | achieved by more conventional LZ77/LZ78â€based compressors, |
| 23 | and approaches the performance of the PPM family of sta | 23 | and approaches the performance of the PPM family of sta |
| 24 | tistical compressors. | 24 | tistical compressors. |
| 25 | 25 | ||
| 26 | The command-line options are deliberately very similar to | 26 | The commandâ€line options are deliberately very similar to |
| 27 | those of _G_N_U _g_z_i_p_, but they are not identical. | 27 | those of _G_N_U _g_z_i_p_, but they are not identical. |
| 28 | 28 | ||
| 29 | _b_z_i_p_2 expects a list of file names to accompany the com | 29 | _b_z_i_p_2 expects a list of file names to accompany the com |
| 30 | mand-line flags. Each file is replaced by a compressed | 30 | mandâ€line flags. Each file is replaced by a compressed |
| 31 | version of itself, with the name "original_name.bz2". | 31 | version of itself, with the name "original_name.bz2". |
| 32 | Each compressed file has the same modification date, per | 32 | Each compressed file has the same modification date, per |
| 33 | missions, and, when possible, ownership as the correspond | 33 | missions, and, when possible, ownership as the correspond |
| 34 | ing original, so that these properties can be correctly | 34 | ing original, so that these properties can be correctly |
| 35 | restored at decompression time. File name handling is | 35 | restored at decompression time. File name handling is |
| 36 | naive in the sense that there is no mechanism for preserv | 36 | naive in the sense that there is no mechanism for preserv |
| 37 | ing original file names, permissions, ownerships or dates | 37 | ing original file names, permissions, ownerships or dates |
| 38 | in filesystems which lack these concepts, or have serious | 38 | in filesystems which lack these concepts, or have serious |
| 39 | file name length restrictions, such as MS-DOS. | 39 | file name length restrictions, such as MSâ€DOS. |
| 40 | 40 | ||
| 41 | _b_z_i_p_2 and _b_u_n_z_i_p_2 will by default not overwrite existing | 41 | _b_z_i_p_2 and _b_u_n_z_i_p_2 will by default not overwrite existing |
| 42 | files. If you want this to happen, specify the -f flag. | 42 | files. If you want this to happen, specify the −f flag. |
| 43 | 43 | ||
| 44 | If no file names are specified, _b_z_i_p_2 compresses from | 44 | If no file names are specified, _b_z_i_p_2 compresses from |
| 45 | standard input to standard output. In this case, _b_z_i_p_2 | 45 | standard input to standard output. In this case, _b_z_i_p_2 |
| @@ -47,7 +47,7 @@ DDEESSCCRRIIPPTTIIOONN | |||
| 47 | this would be entirely incomprehensible and therefore | 47 | this would be entirely incomprehensible and therefore |
| 48 | pointless. | 48 | pointless. |
| 49 | 49 | ||
| 50 | _b_u_n_z_i_p_2 (or _b_z_i_p_2 _-_d_) decompresses all specified files. | 50 | _b_u_n_z_i_p_2 (or _b_z_i_p_2 _−_d_) decompresses all specified files. |
| 51 | Files which were not created by _b_z_i_p_2 will be detected and | 51 | Files which were not created by _b_z_i_p_2 will be detected and |
| 52 | ignored, and a warning issued. _b_z_i_p_2 attempts to guess | 52 | ignored, and a warning issued. _b_z_i_p_2 attempts to guess |
| 53 | the filename for the decompressed file from that of the | 53 | the filename for the decompressed file from that of the |
| @@ -64,26 +64,26 @@ DDEESSCCRRIIPPTTIIOONN | |||
| 64 | guess the name of the original file, and uses the original | 64 | guess the name of the original file, and uses the original |
| 65 | name with _._o_u_t appended. | 65 | name with _._o_u_t appended. |
| 66 | 66 | ||
| 67 | As with compression, supplying no filenames causes decom | 67 | As with compression, supplying no filenames causes decom |
| 68 | pression from standard input to standard output. | 68 | pression from standard input to standard output. |
| 69 | 69 | ||
| 70 | _b_u_n_z_i_p_2 will correctly decompress a file which is the con | 70 | _b_u_n_z_i_p_2 will correctly decompress a file which is the con |
| 71 | catenation of two or more compressed files. The result is | 71 | catenation of two or more compressed files. The result is |
| 72 | the concatenation of the corresponding uncompressed files. | 72 | the concatenation of the corresponding uncompressed files. |
| 73 | Integrity testing (-t) of concatenated compressed files is | 73 | Integrity testing (−t) of concatenated compressed files is |
| 74 | also supported. | 74 | also supported. |
| 75 | 75 | ||
| 76 | You can also compress or decompress files to the standard | 76 | You can also compress or decompress files to the standard |
| 77 | output by giving the -c flag. Multiple files may be com | 77 | output by giving the −c flag. Multiple files may be com |
| 78 | pressed and decompressed like this. The resulting outputs | 78 | pressed and decompressed like this. The resulting outputs |
| 79 | are fed sequentially to stdout. Compression of multiple | 79 | are fed sequentially to stdout. Compression of multiple |
| 80 | files in this manner generates a stream containing multi | 80 | files in this manner generates a stream containing multi |
| 81 | ple compressed file representations. Such a stream can be | 81 | ple compressed file representations. Such a stream can be |
| 82 | decompressed correctly only by _b_z_i_p_2 version 0.9.0 or | 82 | decompressed correctly only by _b_z_i_p_2 version 0.9.0 or |
| 83 | later. Earlier versions of _b_z_i_p_2 will stop after decom | 83 | later. Earlier versions of _b_z_i_p_2 will stop after decom |
| 84 | pressing the first file in the stream. | 84 | pressing the first file in the stream. |
| 85 | 85 | ||
| 86 | _b_z_c_a_t (or _b_z_i_p_2 _-_d_c_) decompresses all specified files to | 86 | _b_z_c_a_t (or _b_z_i_p_2 _â€_d_c_) decompresses all specified files to |
| 87 | the standard output. | 87 | the standard output. |
| 88 | 88 | ||
| 89 | _b_z_i_p_2 will read arguments from the environment variables | 89 | _b_z_i_p_2 will read arguments from the environment variables |
| @@ -99,15 +99,15 @@ DDEESSCCRRIIPPTTIIOONN | |||
| 99 | most file compressors) is coded at about 8.05 bits per | 99 | most file compressors) is coded at about 8.05 bits per |
| 100 | byte, giving an expansion of around 0.5%. | 100 | byte, giving an expansion of around 0.5%. |
| 101 | 101 | ||
| 102 | As a self-check for your protection, _b_z_i_p_2 uses 32-bit | 102 | As a selfâ€check for your protection, _b_z_i_p_2 uses 32â€bit |
| 103 | CRCs to make sure that the decompressed version of a file | 103 | CRCs to make sure that the decompressed version of a file |
| 104 | is identical to the original. This guards against corrup | 104 | is identical to the original. This guards against corrup |
| 105 | tion of the compressed data, and against undetected bugs | 105 | tion of the compressed data, and against undetected bugs |
| 106 | in _b_z_i_p_2 (hopefully very unlikely). The chances of data | 106 | in _b_z_i_p_2 (hopefully very unlikely). The chances of data |
| 107 | corruption going undetected is microscopic, about one | 107 | corruption going undetected is microscopic, about one |
| 108 | chance in four billion for each file processed. Be aware, | 108 | chance in four billion for each file processed. Be aware, |
| 109 | though, that the check occurs upon decompression, so it | 109 | though, that the check occurs upon decompression, so it |
| 110 | can only tell you that something is wrong. It can't help | 110 | can only tell you that something is wrong. It can’t help |
| 111 | you recover the original uncompressed data. You can use | 111 | you recover the original uncompressed data. You can use |
| 112 | _b_z_i_p_2_r_e_c_o_v_e_r to try to recover data from damaged files. | 112 | _b_z_i_p_2_r_e_c_o_v_e_r to try to recover data from damaged files. |
| 113 | 113 | ||
| @@ -118,41 +118,41 @@ DDEESSCCRRIIPPTTIIOONN | |||
| 118 | 118 | ||
| 119 | 119 | ||
| 120 | OOPPTTIIOONNSS | 120 | OOPPTTIIOONNSS |
| 121 | --cc ----ssttddoouutt | 121 | −−cc â€â€â€â€ssttddoouutt |
| 122 | Compress or decompress to standard output. | 122 | Compress or decompress to standard output. |
| 123 | 123 | ||
| 124 | --dd ----ddeeccoommpprreessss | 124 | −−dd â€â€â€â€ddeeccoommpprreessss |
| 125 | Force decompression. _b_z_i_p_2_, _b_u_n_z_i_p_2 and _b_z_c_a_t are | 125 | Force decompression. _b_z_i_p_2_, _b_u_n_z_i_p_2 and _b_z_c_a_t are |
| 126 | really the same program, and the decision about | 126 | really the same program, and the decision about |
| 127 | what actions to take is done on the basis of which | 127 | what actions to take is done on the basis of which |
| 128 | name is used. This flag overrides that mechanism, | 128 | name is used. This flag overrides that mechanism, |
| 129 | and forces _b_z_i_p_2 to decompress. | 129 | and forces _b_z_i_p_2 to decompress. |
| 130 | 130 | ||
| 131 | --zz ----ccoommpprreessss | 131 | −−zz â€â€â€â€ccoommpprreessss |
| 132 | The complement to -d: forces compression, | 132 | The complement to −d: forces compression, |
| 133 | regardless of the invocation name. | 133 | regardless of the invocation name. |
| 134 | 134 | ||
| 135 | --tt ----tteesstt | 135 | −−tt â€â€â€â€tteesstt |
| 136 | Check integrity of the specified file(s), but don't | 136 | Check integrity of the specified file(s), but don’t |
| 137 | decompress them. This really performs a trial | 137 | decompress them. This really performs a trial |
| 138 | decompression and throws away the result. | 138 | decompression and throws away the result. |
| 139 | 139 | ||
| 140 | --ff ----ffoorrccee | 140 | −−ff â€â€â€â€ffoorrccee |
| 141 | Force overwrite of output files. Normally, _b_z_i_p_2 | 141 | Force overwrite of output files. Normally, _b_z_i_p_2 |
| 142 | will not overwrite existing output files. Also | 142 | will not overwrite existing output files. Also |
| 143 | forces _b_z_i_p_2 to break hard links to files, which it | 143 | forces _b_z_i_p_2 to break hard links to files, which it |
| 144 | otherwise wouldn't do. | 144 | otherwise wouldn’t do. |
| 145 | 145 | ||
| 146 | bzip2 normally declines to decompress files which | 146 | bzip2 normally declines to decompress files which |
| 147 | don't have the correct magic header bytes. If | 147 | don’t have the correct magic header bytes. If |
| 148 | forced (-f), however, it will pass such files | 148 | forced (â€f), however, it will pass such files |
| 149 | through unmodified. This is how GNU gzip behaves. | 149 | through unmodified. This is how GNU gzip behaves. |
| 150 | 150 | ||
| 151 | --kk ----kkeeeepp | 151 | −−kk â€â€â€â€kkeeeepp |
| 152 | Keep (don't delete) input files during compression | 152 | Keep (don’t delete) input files during compression |
| 153 | or decompression. | 153 | or decompression. |
| 154 | 154 | ||
| 155 | --ss ----ssmmaallll | 155 | −−ss â€â€â€â€ssmmaallll |
| 156 | Reduce memory usage, for compression, decompression | 156 | Reduce memory usage, for compression, decompression |
| 157 | and testing. Files are decompressed and tested | 157 | and testing. Files are decompressed and tested |
| 158 | using a modified algorithm which only requires 2.5 | 158 | using a modified algorithm which only requires 2.5 |
| @@ -160,46 +160,46 @@ OOPPTTIIOONNSS | |||
| 160 | decompressed in 2300k of memory, albeit at about | 160 | decompressed in 2300k of memory, albeit at about |
| 161 | half the normal speed. | 161 | half the normal speed. |
| 162 | 162 | ||
| 163 | During compression, -s selects a block size of | 163 | During compression, −s selects a block size of |
| 164 | 200k, which limits memory use to around the same | 164 | 200k, which limits memory use to around the same |
| 165 | figure, at the expense of your compression ratio. | 165 | figure, at the expense of your compression ratio. |
| 166 | In short, if your machine is low on memory (8 | 166 | In short, if your machine is low on memory (8 |
| 167 | megabytes or less), use -s for everything. See | 167 | megabytes or less), use −s for everything. See |
| 168 | MEMORY MANAGEMENT below. | 168 | MEMORY MANAGEMENT below. |
| 169 | 169 | ||
| 170 | --qq ----qquuiieett | 170 | −−qq â€â€â€â€qquuiieett |
| 171 | Suppress non-essential warning messages. Messages | 171 | Suppress nonâ€essential warning messages. Messages |
| 172 | pertaining to I/O errors and other critical events | 172 | pertaining to I/O errors and other critical events |
| 173 | will not be suppressed. | 173 | will not be suppressed. |
| 174 | 174 | ||
| 175 | --vv ----vveerrbboossee | 175 | −−vv â€â€â€â€vveerrbboossee |
| 176 | Verbose mode -- show the compression ratio for each | 176 | Verbose mode â€â€ show the compression ratio for each |
| 177 | file processed. Further -v's increase the ver | 177 | file processed. Further −v’s increase the ver |
| 178 | bosity level, spewing out lots of information which | 178 | bosity level, spewing out lots of information which |
| 179 | is primarily of interest for diagnostic purposes. | 179 | is primarily of interest for diagnostic purposes. |
| 180 | 180 | ||
| 181 | --LL ----lliicceennssee --VV ----vveerrssiioonn | 181 | −−LL â€â€â€â€lliicceennssee â€â€VV â€â€â€â€vveerrssiioonn |
| 182 | Display the software version, license terms and | 182 | Display the software version, license terms and |
| 183 | conditions. | 183 | conditions. |
| 184 | 184 | ||
| 185 | --11 ((oorr ----ffaasstt)) ttoo --99 ((oorr ----bbeesstt)) | 185 | −−11 ((oorr −−−−ffaasstt)) ttoo −−99 ((oorr −−−−bbeesstt)) |
| 186 | Set the block size to 100 k, 200 k .. 900 k when | 186 | Set the block size to 100 k, 200 k .. 900 k when |
| 187 | compressing. Has no effect when decompressing. | 187 | compressing. Has no effect when decompressing. |
| 188 | See MEMORY MANAGEMENT below. The --fast and --best | 188 | See MEMORY MANAGEMENT below. The −−fast and −−best |
| 189 | aliases are primarily for GNU gzip compatibility. | 189 | aliases are primarily for GNU gzip compatibility. |
| 190 | In particular, --fast doesn't make things signifi | 190 | In particular, −−fast doesn’t make things signifi |
| 191 | cantly faster. And --best merely selects the | 191 | cantly faster. And −−best merely selects the |
| 192 | default behaviour. | 192 | default behaviour. |
| 193 | 193 | ||
| 194 | ---- Treats all subsequent arguments as file names, even | 194 | −−â€â€ Treats all subsequent arguments as file names, even |
| 195 | if they start with a dash. This is so you can han | 195 | if they start with a dash. This is so you can han |
| 196 | dle files with names beginning with a dash, for | 196 | dle files with names beginning with a dash, for |
| 197 | example: bzip2 -- -myfilename. | 197 | example: bzip2 −†−myfilename. |
| 198 | 198 | ||
| 199 | ----rreeppeettiittiivvee--ffaasstt ----rreeppeettiittiivvee--bbeesstt | 199 | −−â€â€rreeppeettiittiivveeâ€â€ffaasstt â€â€â€â€rreeppeettiittiivveeâ€â€bbeesstt |
| 200 | These flags are redundant in versions 0.9.5 and | 200 | These flags are redundant in versions 0.9.5 and |
| 201 | above. They provided some coarse control over the | 201 | above. They provided some coarse control over the |
| 202 | behaviour of the sorting algorithm in earlier ver | 202 | behaviour of the sorting algorithm in earlier ver |
| 203 | sions, which was sometimes useful. 0.9.5 and above | 203 | sions, which was sometimes useful. 0.9.5 and above |
| 204 | have an improved algorithm which renders these | 204 | have an improved algorithm which renders these |
| 205 | flags irrelevant. | 205 | flags irrelevant. |
| @@ -209,13 +209,13 @@ MMEEMMOORRYY MMAANNAAGGEEMMEENNTT | |||
| 209 | _b_z_i_p_2 compresses large files in blocks. The block size | 209 | _b_z_i_p_2 compresses large files in blocks. The block size |
| 210 | affects both the compression ratio achieved, and the | 210 | affects both the compression ratio achieved, and the |
| 211 | amount of memory needed for compression and decompression. | 211 | amount of memory needed for compression and decompression. |
| 212 | The flags -1 through -9 specify the block size to be | 212 | The flags −1 through −9 specify the block size to be |
| 213 | 100,000 bytes through 900,000 bytes (the default) respec | 213 | 100,000 bytes through 900,000 bytes (the default) respec |
| 214 | tively. At decompression time, the block size used for | 214 | tively. At decompression time, the block size used for |
| 215 | compression is read from the header of the compressed | 215 | compression is read from the header of the compressed |
| 216 | file, and _b_u_n_z_i_p_2 then allocates itself just enough memory | 216 | file, and _b_u_n_z_i_p_2 then allocates itself just enough memory |
| 217 | to decompress the file. Since block sizes are stored in | 217 | to decompress the file. Since block sizes are stored in |
| 218 | compressed files, it follows that the flags -1 to -9 are | 218 | compressed files, it follows that the flags −1 to −9 are |
| 219 | irrelevant to and so ignored during decompression. | 219 | irrelevant to and so ignored during decompression. |
| 220 | 220 | ||
| 221 | Compression and decompression requirements, in bytes, can | 221 | Compression and decompression requirements, in bytes, can |
| @@ -238,21 +238,21 @@ MMEEMMOORRYY MMAANNAAGGEEMMEENNTT | |||
| 238 | _b_u_n_z_i_p_2 will require about 3700 kbytes to decompress. To | 238 | _b_u_n_z_i_p_2 will require about 3700 kbytes to decompress. To |
| 239 | support decompression of any file on a 4 megabyte machine, | 239 | support decompression of any file on a 4 megabyte machine, |
| 240 | _b_u_n_z_i_p_2 has an option to decompress using approximately | 240 | _b_u_n_z_i_p_2 has an option to decompress using approximately |
| 241 | half this amount of memory, about 2300 kbytes. Decompres | 241 | half this amount of memory, about 2300 kbytes. Decompres |
| 242 | sion speed is also halved, so you should use this option | 242 | sion speed is also halved, so you should use this option |
| 243 | only where necessary. The relevant flag is -s. | 243 | only where necessary. The relevant flag is â€s. |
| 244 | 244 | ||
| 245 | In general, try and use the largest block size memory con | 245 | In general, try and use the largest block size memory con |
| 246 | straints allow, since that maximises the compression | 246 | straints allow, since that maximises the compression |
| 247 | achieved. Compression and decompression speed are virtu | 247 | achieved. Compression and decompression speed are virtu |
| 248 | ally unaffected by block size. | 248 | ally unaffected by block size. |
| 249 | 249 | ||
| 250 | Another significant point applies to files which fit in a | 250 | Another significant point applies to files which fit in a |
| 251 | single block -- that means most files you'd encounter | 251 | single block â€â€ that means most files you’d encounter |
| 252 | using a large block size. The amount of real memory | 252 | using a large block size. The amount of real memory |
| 253 | touched is proportional to the size of the file, since the | 253 | touched is proportional to the size of the file, since the |
| 254 | file is smaller than a block. For example, compressing a | 254 | file is smaller than a block. For example, compressing a |
| 255 | file 20,000 bytes long with the flag -9 will cause the | 255 | file 20,000 bytes long with the flag â€9 will cause the |
| 256 | compressor to allocate around 7600k of memory, but only | 256 | compressor to allocate around 7600k of memory, but only |
| 257 | touch 400k + 20000 * 8 = 560 kbytes of it. Similarly, the | 257 | touch 400k + 20000 * 8 = 560 kbytes of it. Similarly, the |
| 258 | decompressor will allocate 3700k but only touch 100k + | 258 | decompressor will allocate 3700k but only touch 100k + |
| @@ -260,59 +260,59 @@ MMEEMMOORRYY MMAANNAAGGEEMMEENNTT | |||
| 260 | 260 | ||
| 261 | Here is a table which summarises the maximum memory usage | 261 | Here is a table which summarises the maximum memory usage |
| 262 | for different block sizes. Also recorded is the total | 262 | for different block sizes. Also recorded is the total |
| 263 | compressed size for 14 files of the Calgary Text Compres | 263 | compressed size for 14 files of the Calgary Text Compres |
| 264 | sion Corpus totalling 3,141,622 bytes. This column gives | 264 | sion Corpus totalling 3,141,622 bytes. This column gives |
| 265 | some feel for how compression varies with block size. | 265 | some feel for how compression varies with block size. |
| 266 | These figures tend to understate the advantage of larger | 266 | These figures tend to understate the advantage of larger |
| 267 | block sizes for larger files, since the Corpus is domi | 267 | block sizes for larger files, since the Corpus is domi |
| 268 | nated by smaller files. | 268 | nated by smaller files. |
| 269 | 269 | ||
| 270 | Compress Decompress Decompress Corpus | 270 | Compress Decompress Decompress Corpus |
| 271 | Flag usage usage -s usage Size | 271 | Flag usage usage â€s usage Size |
| 272 | 272 | ||
| 273 | -1 1200k 500k 350k 914704 | 273 | â€1 1200k 500k 350k 914704 |
| 274 | -2 2000k 900k 600k 877703 | 274 | â€2 2000k 900k 600k 877703 |
| 275 | -3 2800k 1300k 850k 860338 | 275 | â€3 2800k 1300k 850k 860338 |
| 276 | -4 3600k 1700k 1100k 846899 | 276 | â€4 3600k 1700k 1100k 846899 |
| 277 | -5 4400k 2100k 1350k 845160 | 277 | â€5 4400k 2100k 1350k 845160 |
| 278 | -6 5200k 2500k 1600k 838626 | 278 | â€6 5200k 2500k 1600k 838626 |
| 279 | -7 6100k 2900k 1850k 834096 | 279 | â€7 6100k 2900k 1850k 834096 |
| 280 | -8 6800k 3300k 2100k 828642 | 280 | â€8 6800k 3300k 2100k 828642 |
| 281 | -9 7600k 3700k 2350k 828642 | 281 | â€9 7600k 3700k 2350k 828642 |
| 282 | 282 | ||
| 283 | 283 | ||
| 284 | RREECCOOVVEERRIINNGG DDAATTAA FFRROOMM DDAAMMAAGGEEDD FFIILLEESS | 284 | RREECCOOVVEERRIINNGG DDAATTAA FFRROOMM DDAAMMAAGGEEDD FFIILLEESS |
| 285 | _b_z_i_p_2 compresses files in blocks, usually 900kbytes long. | 285 | _b_z_i_p_2 compresses files in blocks, usually 900kbytes long. |
| 286 | Each block is handled independently. If a media or trans | 286 | Each block is handled independently. If a media or trans |
| 287 | mission error causes a multi-block .bz2 file to become | 287 | mission error causes a multiâ€block .bz2 file to become |
| 288 | damaged, it may be possible to recover data from the | 288 | damaged, it may be possible to recover data from the |
| 289 | undamaged blocks in the file. | 289 | undamaged blocks in the file. |
| 290 | 290 | ||
| 291 | The compressed representation of each block is delimited | 291 | The compressed representation of each block is delimited |
| 292 | by a 48-bit pattern, which makes it possible to find the | 292 | by a 48â€bit pattern, which makes it possible to find the |
| 293 | block boundaries with reasonable certainty. Each block | 293 | block boundaries with reasonable certainty. Each block |
| 294 | also carries its own 32-bit CRC, so damaged blocks can be | 294 | also carries its own 32â€bit CRC, so damaged blocks can be |
| 295 | distinguished from undamaged ones. | 295 | distinguished from undamaged ones. |
| 296 | 296 | ||
| 297 | _b_z_i_p_2_r_e_c_o_v_e_r is a simple program whose purpose is to | 297 | _b_z_i_p_2_r_e_c_o_v_e_r is a simple program whose purpose is to |
| 298 | search for blocks in .bz2 files, and write each block out | 298 | search for blocks in .bz2 files, and write each block out |
| 299 | into its own .bz2 file. You can then use _b_z_i_p_2 -t to test | 299 | into its own .bz2 file. You can then use _b_z_i_p_2 −t to test |
| 300 | the integrity of the resulting files, and decompress those | 300 | the integrity of the resulting files, and decompress those |
| 301 | which are undamaged. | 301 | which are undamaged. |
| 302 | 302 | ||
| 303 | _b_z_i_p_2_r_e_c_o_v_e_r takes a single argument, the name of the dam | 303 | _b_z_i_p_2_r_e_c_o_v_e_r takes a single argument, the name of the dam |
| 304 | aged file, and writes a number of files | 304 | aged file, and writes a number of files |
| 305 | "rec00001file.bz2", "rec00002file.bz2", etc, containing | 305 | "rec00001file.bz2", "rec00002file.bz2", etc, containing |
| 306 | the extracted blocks. The output filenames are | 306 | the extracted blocks. The output filenames are |
| 307 | designed so that the use of wildcards in subsequent pro | 307 | designed so that the use of wildcards in subsequent pro |
| 308 | cessing -- for example, "bzip2 -dc rec*file.bz2 > recov | 308 | cessing â€â€ for example, "bzip2 â€dc rec*file.bz2 > recov |
| 309 | ered_data" -- processes the files in the correct order. | 309 | ered_data" â€â€ processes the files in the correct order. |
| 310 | 310 | ||
| 311 | _b_z_i_p_2_r_e_c_o_v_e_r should be of most use dealing with large .bz2 | 311 | _b_z_i_p_2_r_e_c_o_v_e_r should be of most use dealing with large .bz2 |
| 312 | files, as these will contain many blocks. It is clearly | 312 | files, as these will contain many blocks. It is clearly |
| 313 | futile to use it on damaged single-block files, since a | 313 | futile to use it on damaged singleâ€block files, since a |
| 314 | damaged block cannot be recovered. If you wish to min | 314 | damaged block cannot be recovered. If you wish to min |
| 315 | imise any potential data loss through media or transmis | 315 | imise any potential data loss through media or transmis |
| 316 | sion errors, you might consider compressing with a smaller | 316 | sion errors, you might consider compressing with a smaller |
| 317 | block size. | 317 | block size. |
| 318 | 318 | ||
| @@ -324,21 +324,21 @@ PPEERRFFOORRMMAANNCCEE NNOOTTEESS | |||
| 324 | ..." (repeated several hundred times) may compress more | 324 | ..." (repeated several hundred times) may compress more |
| 325 | slowly than normal. Versions 0.9.5 and above fare much | 325 | slowly than normal. Versions 0.9.5 and above fare much |
| 326 | better than previous versions in this respect. The ratio | 326 | better than previous versions in this respect. The ratio |
| 327 | between worst-case and average-case compression time is in | 327 | between worstâ€case and averageâ€case compression time is in |
| 328 | the region of 10:1. For previous versions, this figure | 328 | the region of 10:1. For previous versions, this figure |
| 329 | was more like 100:1. You can use the -vvvv option to mon | 329 | was more like 100:1. You can use the −vvvv option to mon |
| 330 | itor progress in great detail, if you want. | 330 | itor progress in great detail, if you want. |
| 331 | 331 | ||
| 332 | Decompression speed is unaffected by these phenomena. | 332 | Decompression speed is unaffected by these phenomena. |
| 333 | 333 | ||
| 334 | _b_z_i_p_2 usually allocates several megabytes of memory to | 334 | _b_z_i_p_2 usually allocates several megabytes of memory to |
| 335 | operate in, and then charges all over it in a fairly ran | 335 | operate in, and then charges all over it in a fairly ran |
| 336 | dom fashion. This means that performance, both for com | 336 | dom fashion. This means that performance, both for com |
| 337 | pressing and decompressing, is largely determined by the | 337 | pressing and decompressing, is largely determined by the |
| 338 | speed at which your machine can service cache misses. | 338 | speed at which your machine can service cache misses. |
| 339 | Because of this, small changes to the code to reduce the | 339 | Because of this, small changes to the code to reduce the |
| 340 | miss rate have been observed to give disproportionately | 340 | miss rate have been observed to give disproportionately |
| 341 | large performance improvements. I imagine _b_z_i_p_2 will per | 341 | large performance improvements. I imagine _b_z_i_p_2 will per |
| 342 | form best on machines with very large caches. | 342 | form best on machines with very large caches. |
| 343 | 343 | ||
| 344 | 344 | ||
| @@ -348,50 +348,51 @@ CCAAVVEEAATTSS | |||
| 348 | but the details of what the problem is sometimes seem | 348 | but the details of what the problem is sometimes seem |
| 349 | rather misleading. | 349 | rather misleading. |
| 350 | 350 | ||
| 351 | This manual page pertains to version 1.0.2 of _b_z_i_p_2_. Com | 351 | This manual page pertains to version 1.0.3 of _b_z_i_p_2_. Com |
| 352 | pressed data created by this version is entirely forwards | 352 | pressed data created by this version is entirely forwards |
| 353 | and backwards compatible with the previous public | 353 | and backwards compatible with the previous public |
| 354 | releases, versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, | 354 | releases, versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0, 1.0.1 and |
| 355 | but with the following exception: 0.9.0 and above can cor | 355 | 1.0.2, but with the following exception: 0.9.0 and above |
| 356 | rectly decompress multiple concatenated compressed files. | 356 | can correctly decompress multiple concatenated compressed |
| 357 | 0.1pl2 cannot do this; it will stop after decompressing | 357 | files. 0.1pl2 cannot do this; it will stop after decom |
| 358 | just the first file in the stream. | 358 | pressing just the first file in the stream. |
| 359 | 359 | ||
| 360 | _b_z_i_p_2_r_e_c_o_v_e_r versions prior to this one, 1.0.2, used | 360 | _b_z_i_p_2_r_e_c_o_v_e_r versions prior to 1.0.2 used 32â€bit integers |
| 361 | 32-bit integers to represent bit positions in compressed | 361 | to represent bit positions in compressed files, so they |
| 362 | files, so it could not handle compressed files more than | 362 | could not handle compressed files more than 512 megabytes |
| 363 | 512 megabytes long. Version 1.0.2 and above uses 64-bit | 363 | long. Versions 1.0.2 and above use 64â€bit ints on some |
| 364 | ints on some platforms which support them (GNU supported | 364 | platforms which support them (GNU supported targets, and |
| 365 | targets, and Windows). To establish whether or not | 365 | Windows). To establish whether or not bzip2recover was |
| 366 | bzip2recover was built with such a limitation, run it | 366 | built with such a limitation, run it without arguments. |
| 367 | without arguments. In any event you can build yourself an | 367 | In any event you can build yourself an unlimited version |
| 368 | unlimited version if you can recompile it with MaybeUInt64 | 368 | if you can recompile it with MaybeUInt64 set to be an |
| 369 | set to be an unsigned 64-bit integer. | 369 | unsigned 64â€bit integer. |
| 370 | 370 | ||
| 371 | 371 | ||
| 372 | 372 | ||
| 373 | 373 | ||
| 374 | AAUUTTHHOORR | 374 | AAUUTTHHOORR |
| 375 | Julian Seward, jseward@acm.org. | 375 | Julian Seward, jsewardbzip.org. |
| 376 | 376 | ||
| 377 | http://sources.redhat.com/bzip2 | 377 | http://www.bzip.org |
| 378 | 378 | ||
| 379 | The ideas embodied in _b_z_i_p_2 are due to (at least) the fol | 379 | The ideas embodied in _b_z_i_p_2 are due to (at least) the fol |
| 380 | lowing people: Michael Burrows and David Wheeler (for the | 380 | lowing people: Michael Burrows and David Wheeler (for the |
| 381 | block sorting transformation), David Wheeler (again, for | 381 | block sorting transformation), David Wheeler (again, for |
| 382 | the Huffman coder), Peter Fenwick (for the structured cod | 382 | the Huffman coder), Peter Fenwick (for the structured cod |
| 383 | ing model in the original _b_z_i_p_, and many refinements), and | 383 | ing model in the original _b_z_i_p_, and many refinements), and |
| 384 | Alistair Moffat, Radford Neal and Ian Witten (for the | 384 | Alistair Moffat, Radford Neal and Ian Witten (for the |
| 385 | arithmetic coder in the original _b_z_i_p_)_. I am much | 385 | arithmetic coder in the original _b_z_i_p_)_. I am much |
| 386 | indebted for their help, support and advice. See the man | 386 | indebted for their help, support and advice. See the man |
| 387 | ual in the source distribution for pointers to sources of | 387 | ual in the source distribution for pointers to sources of |
| 388 | documentation. Christian von Roques encouraged me to look | 388 | documentation. Christian von Roques encouraged me to look |
| 389 | for faster sorting algorithms, so as to speed up compres | 389 | for faster sorting algorithms, so as to speed up compres |
| 390 | sion. Bela Lubkin encouraged me to improve the worst-case | 390 | sion. Bela Lubkin encouraged me to improve the worstâ€case |
| 391 | compression performance. The bz* scripts are derived from | 391 | compression performance. Donna Robinson XMLised the docu |
| 392 | those of GNU gzip. Many people sent patches, helped with | 392 | mentation. The bz* scripts are derived from those of GNU |
| 393 | portability problems, lent machines, gave advice and were | 393 | gzip. Many people sent patches, helped with portability |
| 394 | generally helpful. | 394 | problems, lent machines, gave advice and were generally |
| 395 | helpful. | ||
| 395 | 396 | ||
| 396 | 397 | ||
| 397 | 398 | ||
| @@ -7,7 +7,7 @@ | |||
| 7 | This file is a part of bzip2 and/or libbzip2, a program and | 7 | This file is a part of bzip2 and/or libbzip2, a program and |
| 8 | library for lossless, block-sorting data compression. | 8 | library for lossless, block-sorting data compression. |
| 9 | 9 | ||
| 10 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. | 10 | Copyright (C) 1996-2005 Julian R Seward. All rights reserved. |
| 11 | 11 | ||
| 12 | Redistribution and use in source and binary forms, with or without | 12 | Redistribution and use in source and binary forms, with or without |
| 13 | modification, are permitted provided that the following conditions | 13 | modification, are permitted provided that the following conditions |
| @@ -41,7 +41,7 @@ | |||
| 41 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 41 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 42 | 42 | ||
| 43 | Julian Seward, Cambridge, UK. | 43 | Julian Seward, Cambridge, UK. |
| 44 | jseward@acm.org | 44 | jseward@bzip.org |
| 45 | bzip2/libbzip2 version 1.0 of 21 March 2000 | 45 | bzip2/libbzip2 version 1.0 of 21 March 2000 |
| 46 | 46 | ||
| 47 | This program is based on (at least) the work of: | 47 | This program is based on (at least) the work of: |
| @@ -525,6 +525,7 @@ Bool uncompressStream ( FILE *zStream, FILE *stream ) | |||
| 525 | UChar obuf[5000]; | 525 | UChar obuf[5000]; |
| 526 | UChar unused[BZ_MAX_UNUSED]; | 526 | UChar unused[BZ_MAX_UNUSED]; |
| 527 | Int32 nUnused; | 527 | Int32 nUnused; |
| 528 | void* unusedTmpV; | ||
| 528 | UChar* unusedTmp; | 529 | UChar* unusedTmp; |
| 529 | 530 | ||
| 530 | nUnused = 0; | 531 | nUnused = 0; |
| @@ -554,9 +555,10 @@ Bool uncompressStream ( FILE *zStream, FILE *stream ) | |||
| 554 | } | 555 | } |
| 555 | if (bzerr != BZ_STREAM_END) goto errhandler; | 556 | if (bzerr != BZ_STREAM_END) goto errhandler; |
| 556 | 557 | ||
| 557 | BZ2_bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused ); | 558 | BZ2_bzReadGetUnused ( &bzerr, bzf, &unusedTmpV, &nUnused ); |
| 558 | if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" ); | 559 | if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" ); |
| 559 | 560 | ||
| 561 | unusedTmp = (UChar*)unusedTmpV; | ||
| 560 | for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i]; | 562 | for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i]; |
| 561 | 563 | ||
| 562 | BZ2_bzReadClose ( &bzerr, bzf ); | 564 | BZ2_bzReadClose ( &bzerr, bzf ); |
| @@ -639,6 +641,7 @@ Bool testStream ( FILE *zStream ) | |||
| 639 | UChar obuf[5000]; | 641 | UChar obuf[5000]; |
| 640 | UChar unused[BZ_MAX_UNUSED]; | 642 | UChar unused[BZ_MAX_UNUSED]; |
| 641 | Int32 nUnused; | 643 | Int32 nUnused; |
| 644 | void* unusedTmpV; | ||
| 642 | UChar* unusedTmp; | 645 | UChar* unusedTmp; |
| 643 | 646 | ||
| 644 | nUnused = 0; | 647 | nUnused = 0; |
| @@ -662,9 +665,10 @@ Bool testStream ( FILE *zStream ) | |||
| 662 | } | 665 | } |
| 663 | if (bzerr != BZ_STREAM_END) goto errhandler; | 666 | if (bzerr != BZ_STREAM_END) goto errhandler; |
| 664 | 667 | ||
| 665 | BZ2_bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused ); | 668 | BZ2_bzReadGetUnused ( &bzerr, bzf, &unusedTmpV, &nUnused ); |
| 666 | if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" ); | 669 | if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" ); |
| 667 | 670 | ||
| 671 | unusedTmp = (UChar*)unusedTmpV; | ||
| 668 | for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i]; | 672 | for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i]; |
| 669 | 673 | ||
| 670 | BZ2_bzReadClose ( &bzerr, bzf ); | 674 | BZ2_bzReadClose ( &bzerr, bzf ); |
| @@ -828,7 +832,7 @@ void panic ( Char* s ) | |||
| 828 | "\n%s: PANIC -- internal consistency error:\n" | 832 | "\n%s: PANIC -- internal consistency error:\n" |
| 829 | "\t%s\n" | 833 | "\t%s\n" |
| 830 | "\tThis is a BUG. Please report it to me at:\n" | 834 | "\tThis is a BUG. Please report it to me at:\n" |
| 831 | "\tjseward@acm.org\n", | 835 | "\tjseward@bzip.org\n", |
| 832 | progName, s ); | 836 | progName, s ); |
| 833 | showFileNames(); | 837 | showFileNames(); |
| 834 | cleanUpAndFail( 3 ); | 838 | cleanUpAndFail( 3 ); |
| @@ -908,7 +912,7 @@ void mySIGSEGVorSIGBUScatcher ( IntNative n ) | |||
| 908 | " The user's manual, Section 4.3, has more info on (1) and (2).\n" | 912 | " The user's manual, Section 4.3, has more info on (1) and (2).\n" |
| 909 | " \n" | 913 | " \n" |
| 910 | " If you suspect this is a bug in bzip2, or are unsure about (1)\n" | 914 | " If you suspect this is a bug in bzip2, or are unsure about (1)\n" |
| 911 | " or (2), feel free to report it to me at: jseward@acm.org.\n" | 915 | " or (2), feel free to report it to me at: jseward@bzip.org.\n" |
| 912 | " Section 4.3 of the user's manual describes the info a useful\n" | 916 | " Section 4.3 of the user's manual describes the info a useful\n" |
| 913 | " bug report should have. If the manual is available on your\n" | 917 | " bug report should have. If the manual is available on your\n" |
| 914 | " system, please try and read it before mailing me. If you don't\n" | 918 | " system, please try and read it before mailing me. If you don't\n" |
| @@ -931,7 +935,7 @@ void mySIGSEGVorSIGBUScatcher ( IntNative n ) | |||
| 931 | " The user's manual, Section 4.3, has more info on (2) and (3).\n" | 935 | " The user's manual, Section 4.3, has more info on (2) and (3).\n" |
| 932 | " \n" | 936 | " \n" |
| 933 | " If you suspect this is a bug in bzip2, or are unsure about (2)\n" | 937 | " If you suspect this is a bug in bzip2, or are unsure about (2)\n" |
| 934 | " or (3), feel free to report it to me at: jseward@acm.org.\n" | 938 | " or (3), feel free to report it to me at: jseward@bzip.org.\n" |
| 935 | " Section 4.3 of the user's manual describes the info a useful\n" | 939 | " Section 4.3 of the user's manual describes the info a useful\n" |
| 936 | " bug report should have. If the manual is available on your\n" | 940 | " bug report should have. If the manual is available on your\n" |
| 937 | " system, please try and read it before mailing me. If you don't\n" | 941 | " system, please try and read it before mailing me. If you don't\n" |
| @@ -1674,7 +1678,7 @@ void license ( void ) | |||
| 1674 | "bzip2, a block-sorting file compressor. " | 1678 | "bzip2, a block-sorting file compressor. " |
| 1675 | "Version %s.\n" | 1679 | "Version %s.\n" |
| 1676 | " \n" | 1680 | " \n" |
| 1677 | " Copyright (C) 1996-2002 by Julian Seward.\n" | 1681 | " Copyright (C) 1996-2005 by Julian Seward.\n" |
| 1678 | " \n" | 1682 | " \n" |
| 1679 | " This program is free software; you can redistribute it and/or modify\n" | 1683 | " This program is free software; you can redistribute it and/or modify\n" |
| 1680 | " it under the terms set out in the LICENSE file, which is included\n" | 1684 | " it under the terms set out in the LICENSE file, which is included\n" |
| @@ -1,6 +1,6 @@ | |||
| 1 | 1 | ||
| 2 | NAME | 2 | NAME |
| 3 | bzip2, bunzip2 - a block-sorting file compressor, v1.0.2 | 3 | bzip2, bunzip2 - a block-sorting file compressor, v1.0.3 |
| 4 | bzcat - decompresses files to stdout | 4 | bzcat - decompresses files to stdout |
| 5 | bzip2recover - recovers data from damaged bzip2 files | 5 | bzip2recover - recovers data from damaged bzip2 files |
| 6 | 6 | ||
| @@ -17,20 +17,20 @@ DESCRIPTION | |||
| 17 | sorting text compression algorithm, and Huffman coding. | 17 | sorting text compression algorithm, and Huffman coding. |
| 18 | Compression is generally considerably better than that | 18 | Compression is generally considerably better than that |
| 19 | achieved by more conventional LZ77/LZ78-based compressors, | 19 | achieved by more conventional LZ77/LZ78-based compressors, |
| 20 | and approaches the performance of the PPM family of sta | 20 | and approaches the performance of the PPM family of sta- |
| 21 | tistical compressors. | 21 | tistical compressors. |
| 22 | 22 | ||
| 23 | The command-line options are deliberately very similar to | 23 | The command-line options are deliberately very similar to |
| 24 | those of GNU gzip, but they are not identical. | 24 | those of GNU gzip, but they are not identical. |
| 25 | 25 | ||
| 26 | bzip2 expects a list of file names to accompany the com | 26 | bzip2 expects a list of file names to accompany the com- |
| 27 | mand-line flags. Each file is replaced by a compressed | 27 | mand-line flags. Each file is replaced by a compressed |
| 28 | version of itself, with the name "original_name.bz2". | 28 | version of itself, with the name "original_name.bz2". |
| 29 | Each compressed file has the same modification date, per | 29 | Each compressed file has the same modification date, per- |
| 30 | missions, and, when possible, ownership as the correspond | 30 | missions, and, when possible, ownership as the correspond- |
| 31 | ing original, so that these properties can be correctly | 31 | ing original, so that these properties can be correctly |
| 32 | restored at decompression time. File name handling is | 32 | restored at decompression time. File name handling is |
| 33 | naive in the sense that there is no mechanism for preserv | 33 | naive in the sense that there is no mechanism for preserv- |
| 34 | ing original file names, permissions, ownerships or dates | 34 | ing original file names, permissions, ownerships or dates |
| 35 | in filesystems which lack these concepts, or have serious | 35 | in filesystems which lack these concepts, or have serious |
| 36 | file name length restrictions, such as MS-DOS. | 36 | file name length restrictions, such as MS-DOS. |
| @@ -61,23 +61,23 @@ DESCRIPTION | |||
| 61 | guess the name of the original file, and uses the original | 61 | guess the name of the original file, and uses the original |
| 62 | name with .out appended. | 62 | name with .out appended. |
| 63 | 63 | ||
| 64 | As with compression, supplying no filenames causes decom | 64 | As with compression, supplying no filenames causes decom- |
| 65 | pression from standard input to standard output. | 65 | pression from standard input to standard output. |
| 66 | 66 | ||
| 67 | bunzip2 will correctly decompress a file which is the con | 67 | bunzip2 will correctly decompress a file which is the con- |
| 68 | catenation of two or more compressed files. The result is | 68 | catenation of two or more compressed files. The result is |
| 69 | the concatenation of the corresponding uncompressed files. | 69 | the concatenation of the corresponding uncompressed files. |
| 70 | Integrity testing (-t) of concatenated compressed files is | 70 | Integrity testing (-t) of concatenated compressed files is |
| 71 | also supported. | 71 | also supported. |
| 72 | 72 | ||
| 73 | You can also compress or decompress files to the standard | 73 | You can also compress or decompress files to the standard |
| 74 | output by giving the -c flag. Multiple files may be com | 74 | output by giving the -c flag. Multiple files may be com- |
| 75 | pressed and decompressed like this. The resulting outputs | 75 | pressed and decompressed like this. The resulting outputs |
| 76 | are fed sequentially to stdout. Compression of multiple | 76 | are fed sequentially to stdout. Compression of multiple |
| 77 | files in this manner generates a stream containing multi | 77 | files in this manner generates a stream containing multi- |
| 78 | ple compressed file representations. Such a stream can be | 78 | ple compressed file representations. Such a stream can be |
| 79 | decompressed correctly only by bzip2 version 0.9.0 or | 79 | decompressed correctly only by bzip2 version 0.9.0 or |
| 80 | later. Earlier versions of bzip2 will stop after decom | 80 | later. Earlier versions of bzip2 will stop after decom- |
| 81 | pressing the first file in the stream. | 81 | pressing the first file in the stream. |
| 82 | 82 | ||
| 83 | bzcat (or bzip2 -dc) decompresses all specified files to | 83 | bzcat (or bzip2 -dc) decompresses all specified files to |
| @@ -98,7 +98,7 @@ DESCRIPTION | |||
| 98 | 98 | ||
| 99 | As a self-check for your protection, bzip2 uses 32-bit | 99 | As a self-check for your protection, bzip2 uses 32-bit |
| 100 | CRCs to make sure that the decompressed version of a file | 100 | CRCs to make sure that the decompressed version of a file |
| 101 | is identical to the original. This guards against corrup | 101 | is identical to the original. This guards against corrup- |
| 102 | tion of the compressed data, and against undetected bugs | 102 | tion of the compressed data, and against undetected bugs |
| 103 | in bzip2 (hopefully very unlikely). The chances of data | 103 | in bzip2 (hopefully very unlikely). The chances of data |
| 104 | corruption going undetected is microscopic, about one | 104 | corruption going undetected is microscopic, about one |
| @@ -171,7 +171,7 @@ OPTIONS | |||
| 171 | 171 | ||
| 172 | -v --verbose | 172 | -v --verbose |
| 173 | Verbose mode -- show the compression ratio for each | 173 | Verbose mode -- show the compression ratio for each |
| 174 | file processed. Further -v's increase the ver | 174 | file processed. Further -v's increase the ver- |
| 175 | bosity level, spewing out lots of information which | 175 | bosity level, spewing out lots of information which |
| 176 | is primarily of interest for diagnostic purposes. | 176 | is primarily of interest for diagnostic purposes. |
| 177 | 177 | ||
| @@ -184,19 +184,19 @@ OPTIONS | |||
| 184 | compressing. Has no effect when decompressing. | 184 | compressing. Has no effect when decompressing. |
| 185 | See MEMORY MANAGEMENT below. The --fast and --best | 185 | See MEMORY MANAGEMENT below. The --fast and --best |
| 186 | aliases are primarily for GNU gzip compatibility. | 186 | aliases are primarily for GNU gzip compatibility. |
| 187 | In particular, --fast doesn't make things signifi | 187 | In particular, --fast doesn't make things signifi- |
| 188 | cantly faster. And --best merely selects the | 188 | cantly faster. And --best merely selects the |
| 189 | default behaviour. | 189 | default behaviour. |
| 190 | 190 | ||
| 191 | -- Treats all subsequent arguments as file names, even | 191 | -- Treats all subsequent arguments as file names, even |
| 192 | if they start with a dash. This is so you can han | 192 | if they start with a dash. This is so you can han- |
| 193 | dle files with names beginning with a dash, for | 193 | dle files with names beginning with a dash, for |
| 194 | example: bzip2 -- -myfilename. | 194 | example: bzip2 -- -myfilename. |
| 195 | 195 | ||
| 196 | --repetitive-fast --repetitive-best | 196 | --repetitive-fast --repetitive-best |
| 197 | These flags are redundant in versions 0.9.5 and | 197 | These flags are redundant in versions 0.9.5 and |
| 198 | above. They provided some coarse control over the | 198 | above. They provided some coarse control over the |
| 199 | behaviour of the sorting algorithm in earlier ver | 199 | behaviour of the sorting algorithm in earlier ver- |
| 200 | sions, which was sometimes useful. 0.9.5 and above | 200 | sions, which was sometimes useful. 0.9.5 and above |
| 201 | have an improved algorithm which renders these | 201 | have an improved algorithm which renders these |
| 202 | flags irrelevant. | 202 | flags irrelevant. |
| @@ -207,7 +207,7 @@ MEMORY MANAGEMENT | |||
| 207 | affects both the compression ratio achieved, and the | 207 | affects both the compression ratio achieved, and the |
| 208 | amount of memory needed for compression and decompression. | 208 | amount of memory needed for compression and decompression. |
| 209 | The flags -1 through -9 specify the block size to be | 209 | The flags -1 through -9 specify the block size to be |
| 210 | 100,000 bytes through 900,000 bytes (the default) respec | 210 | 100,000 bytes through 900,000 bytes (the default) respec- |
| 211 | tively. At decompression time, the block size used for | 211 | tively. At decompression time, the block size used for |
| 212 | compression is read from the header of the compressed | 212 | compression is read from the header of the compressed |
| 213 | file, and bunzip2 then allocates itself just enough memory | 213 | file, and bunzip2 then allocates itself just enough memory |
| @@ -235,13 +235,13 @@ MEMORY MANAGEMENT | |||
| 235 | bunzip2 will require about 3700 kbytes to decompress. To | 235 | bunzip2 will require about 3700 kbytes to decompress. To |
| 236 | support decompression of any file on a 4 megabyte machine, | 236 | support decompression of any file on a 4 megabyte machine, |
| 237 | bunzip2 has an option to decompress using approximately | 237 | bunzip2 has an option to decompress using approximately |
| 238 | half this amount of memory, about 2300 kbytes. Decompres | 238 | half this amount of memory, about 2300 kbytes. Decompres- |
| 239 | sion speed is also halved, so you should use this option | 239 | sion speed is also halved, so you should use this option |
| 240 | only where necessary. The relevant flag is -s. | 240 | only where necessary. The relevant flag is -s. |
| 241 | 241 | ||
| 242 | In general, try and use the largest block size memory con | 242 | In general, try and use the largest block size memory con- |
| 243 | straints allow, since that maximises the compression | 243 | straints allow, since that maximises the compression |
| 244 | achieved. Compression and decompression speed are virtu | 244 | achieved. Compression and decompression speed are virtu- |
| 245 | ally unaffected by block size. | 245 | ally unaffected by block size. |
| 246 | 246 | ||
| 247 | Another significant point applies to files which fit in a | 247 | Another significant point applies to files which fit in a |
| @@ -257,11 +257,11 @@ MEMORY MANAGEMENT | |||
| 257 | 257 | ||
| 258 | Here is a table which summarises the maximum memory usage | 258 | Here is a table which summarises the maximum memory usage |
| 259 | for different block sizes. Also recorded is the total | 259 | for different block sizes. Also recorded is the total |
| 260 | compressed size for 14 files of the Calgary Text Compres | 260 | compressed size for 14 files of the Calgary Text Compres- |
| 261 | sion Corpus totalling 3,141,622 bytes. This column gives | 261 | sion Corpus totalling 3,141,622 bytes. This column gives |
| 262 | some feel for how compression varies with block size. | 262 | some feel for how compression varies with block size. |
| 263 | These figures tend to understate the advantage of larger | 263 | These figures tend to understate the advantage of larger |
| 264 | block sizes for larger files, since the Corpus is domi | 264 | block sizes for larger files, since the Corpus is domi- |
| 265 | nated by smaller files. | 265 | nated by smaller files. |
| 266 | 266 | ||
| 267 | Compress Decompress Decompress Corpus | 267 | Compress Decompress Decompress Corpus |
| @@ -280,7 +280,7 @@ MEMORY MANAGEMENT | |||
| 280 | 280 | ||
| 281 | RECOVERING DATA FROM DAMAGED FILES | 281 | RECOVERING DATA FROM DAMAGED FILES |
| 282 | bzip2 compresses files in blocks, usually 900kbytes long. | 282 | bzip2 compresses files in blocks, usually 900kbytes long. |
| 283 | Each block is handled independently. If a media or trans | 283 | Each block is handled independently. If a media or trans- |
| 284 | mission error causes a multi-block .bz2 file to become | 284 | mission error causes a multi-block .bz2 file to become |
| 285 | damaged, it may be possible to recover data from the | 285 | damaged, it may be possible to recover data from the |
| 286 | undamaged blocks in the file. | 286 | undamaged blocks in the file. |
| @@ -297,19 +297,19 @@ RECOVERING DATA FROM DAMAGED FILES | |||
| 297 | the integrity of the resulting files, and decompress those | 297 | the integrity of the resulting files, and decompress those |
| 298 | which are undamaged. | 298 | which are undamaged. |
| 299 | 299 | ||
| 300 | bzip2recover takes a single argument, the name of the dam | 300 | bzip2recover takes a single argument, the name of the dam- |
| 301 | aged file, and writes a number of files | 301 | aged file, and writes a number of files |
| 302 | "rec00001file.bz2", "rec00002file.bz2", etc, containing | 302 | "rec00001file.bz2", "rec00002file.bz2", etc, containing |
| 303 | the extracted blocks. The output filenames are | 303 | the extracted blocks. The output filenames are |
| 304 | designed so that the use of wildcards in subsequent pro | 304 | designed so that the use of wildcards in subsequent pro- |
| 305 | cessing -- for example, "bzip2 -dc rec*file.bz2 > recov | 305 | cessing -- for example, "bzip2 -dc rec*file.bz2 > recov- |
| 306 | ered_data" -- processes the files in the correct order. | 306 | ered_data" -- processes the files in the correct order. |
| 307 | 307 | ||
| 308 | bzip2recover should be of most use dealing with large .bz2 | 308 | bzip2recover should be of most use dealing with large .bz2 |
| 309 | files, as these will contain many blocks. It is clearly | 309 | files, as these will contain many blocks. It is clearly |
| 310 | futile to use it on damaged single-block files, since a | 310 | futile to use it on damaged single-block files, since a |
| 311 | damaged block cannot be recovered. If you wish to min | 311 | damaged block cannot be recovered. If you wish to min- |
| 312 | imise any potential data loss through media or transmis | 312 | imise any potential data loss through media or transmis- |
| 313 | sion errors, you might consider compressing with a smaller | 313 | sion errors, you might consider compressing with a smaller |
| 314 | block size. | 314 | block size. |
| 315 | 315 | ||
| @@ -323,19 +323,19 @@ PERFORMANCE NOTES | |||
| 323 | better than previous versions in this respect. The ratio | 323 | better than previous versions in this respect. The ratio |
| 324 | between worst-case and average-case compression time is in | 324 | between worst-case and average-case compression time is in |
| 325 | the region of 10:1. For previous versions, this figure | 325 | the region of 10:1. For previous versions, this figure |
| 326 | was more like 100:1. You can use the -vvvv option to mon | 326 | was more like 100:1. You can use the -vvvv option to mon- |
| 327 | itor progress in great detail, if you want. | 327 | itor progress in great detail, if you want. |
| 328 | 328 | ||
| 329 | Decompression speed is unaffected by these phenomena. | 329 | Decompression speed is unaffected by these phenomena. |
| 330 | 330 | ||
| 331 | bzip2 usually allocates several megabytes of memory to | 331 | bzip2 usually allocates several megabytes of memory to |
| 332 | operate in, and then charges all over it in a fairly ran | 332 | operate in, and then charges all over it in a fairly ran- |
| 333 | dom fashion. This means that performance, both for com | 333 | dom fashion. This means that performance, both for com- |
| 334 | pressing and decompressing, is largely determined by the | 334 | pressing and decompressing, is largely determined by the |
| 335 | speed at which your machine can service cache misses. | 335 | speed at which your machine can service cache misses. |
| 336 | Because of this, small changes to the code to reduce the | 336 | Because of this, small changes to the code to reduce the |
| 337 | miss rate have been observed to give disproportionately | 337 | miss rate have been observed to give disproportionately |
| 338 | large performance improvements. I imagine bzip2 will per | 338 | large performance improvements. I imagine bzip2 will per- |
| 339 | form best on machines with very large caches. | 339 | form best on machines with very large caches. |
| 340 | 340 | ||
| 341 | 341 | ||
| @@ -345,46 +345,47 @@ CAVEATS | |||
| 345 | but the details of what the problem is sometimes seem | 345 | but the details of what the problem is sometimes seem |
| 346 | rather misleading. | 346 | rather misleading. |
| 347 | 347 | ||
| 348 | This manual page pertains to version 1.0.2 of bzip2. Com | 348 | This manual page pertains to version 1.0.3 of bzip2. Com- |
| 349 | pressed data created by this version is entirely forwards | 349 | pressed data created by this version is entirely forwards |
| 350 | and backwards compatible with the previous public | 350 | and backwards compatible with the previous public |
| 351 | releases, versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0 and 1.0.1, | 351 | releases, versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0, 1.0.1 and |
| 352 | but with the following exception: 0.9.0 and above can cor | 352 | 1.0.2, but with the following exception: 0.9.0 and above |
| 353 | rectly decompress multiple concatenated compressed files. | 353 | can correctly decompress multiple concatenated compressed |
| 354 | 0.1pl2 cannot do this; it will stop after decompressing | 354 | files. 0.1pl2 cannot do this; it will stop after decom- |
| 355 | just the first file in the stream. | 355 | pressing just the first file in the stream. |
| 356 | 356 | ||
| 357 | bzip2recover versions prior to this one, 1.0.2, used | 357 | bzip2recover versions prior to 1.0.2 used 32-bit integers |
| 358 | 32-bit integers to represent bit positions in compressed | 358 | to represent bit positions in compressed files, so they |
| 359 | files, so it could not handle compressed files more than | 359 | could not handle compressed files more than 512 megabytes |
| 360 | 512 megabytes long. Version 1.0.2 and above uses 64-bit | 360 | long. Versions 1.0.2 and above use 64-bit ints on some |
| 361 | ints on some platforms which support them (GNU supported | 361 | platforms which support them (GNU supported targets, and |
| 362 | targets, and Windows). To establish whether or not | 362 | Windows). To establish whether or not bzip2recover was |
| 363 | bzip2recover was built with such a limitation, run it | 363 | built with such a limitation, run it without arguments. |
| 364 | without arguments. In any event you can build yourself an | 364 | In any event you can build yourself an unlimited version |
| 365 | unlimited version if you can recompile it with MaybeUInt64 | 365 | if you can recompile it with MaybeUInt64 set to be an |
| 366 | set to be an unsigned 64-bit integer. | 366 | unsigned 64-bit integer. |
| 367 | 367 | ||
| 368 | 368 | ||
| 369 | AUTHOR | 369 | AUTHOR |
| 370 | Julian Seward, jseward@acm.org. | 370 | Julian Seward, jsewardbzip.org. |
| 371 | 371 | ||
| 372 | http://sources.redhat.com/bzip2 | 372 | http://www.bzip.org |
| 373 | 373 | ||
| 374 | The ideas embodied in bzip2 are due to (at least) the fol | 374 | The ideas embodied in bzip2 are due to (at least) the fol- |
| 375 | lowing people: Michael Burrows and David Wheeler (for the | 375 | lowing people: Michael Burrows and David Wheeler (for the |
| 376 | block sorting transformation), David Wheeler (again, for | 376 | block sorting transformation), David Wheeler (again, for |
| 377 | the Huffman coder), Peter Fenwick (for the structured cod | 377 | the Huffman coder), Peter Fenwick (for the structured cod- |
| 378 | ing model in the original bzip, and many refinements), and | 378 | ing model in the original bzip, and many refinements), and |
| 379 | Alistair Moffat, Radford Neal and Ian Witten (for the | 379 | Alistair Moffat, Radford Neal and Ian Witten (for the |
| 380 | arithmetic coder in the original bzip). I am much | 380 | arithmetic coder in the original bzip). I am much |
| 381 | indebted for their help, support and advice. See the man | 381 | indebted for their help, support and advice. See the man- |
| 382 | ual in the source distribution for pointers to sources of | 382 | ual in the source distribution for pointers to sources of |
| 383 | documentation. Christian von Roques encouraged me to look | 383 | documentation. Christian von Roques encouraged me to look |
| 384 | for faster sorting algorithms, so as to speed up compres | 384 | for faster sorting algorithms, so as to speed up compres- |
| 385 | sion. Bela Lubkin encouraged me to improve the worst-case | 385 | sion. Bela Lubkin encouraged me to improve the worst-case |
| 386 | compression performance. The bz* scripts are derived from | 386 | compression performance. Donna Robinson XMLised the docu- |
| 387 | those of GNU gzip. Many people sent patches, helped with | 387 | mentation. The bz* scripts are derived from those of GNU |
| 388 | portability problems, lent machines, gave advice and were | 388 | gzip. Many people sent patches, helped with portability |
| 389 | generally helpful. | 389 | problems, lent machines, gave advice and were generally |
| 390 | helpful. | ||
| 390 | 391 | ||
diff --git a/bzip2recover.c b/bzip2recover.c index 286873b..5cd405d 100644 --- a/bzip2recover.c +++ b/bzip2recover.c | |||
| @@ -7,9 +7,9 @@ | |||
| 7 | /*-- | 7 | /*-- |
| 8 | This program is bzip2recover, a program to attempt data | 8 | This program is bzip2recover, a program to attempt data |
| 9 | salvage from damaged files created by the accompanying | 9 | salvage from damaged files created by the accompanying |
| 10 | bzip2-1.0 program. | 10 | bzip2-1.0.3 program. |
| 11 | 11 | ||
| 12 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. | 12 | Copyright (C) 1996-2005 Julian R Seward. All rights reserved. |
| 13 | 13 | ||
| 14 | Redistribution and use in source and binary forms, with or without | 14 | Redistribution and use in source and binary forms, with or without |
| 15 | modification, are permitted provided that the following conditions | 15 | modification, are permitted provided that the following conditions |
| @@ -43,8 +43,8 @@ | |||
| 43 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 43 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 44 | 44 | ||
| 45 | Julian Seward, Cambridge, UK. | 45 | Julian Seward, Cambridge, UK. |
| 46 | jseward@acm.org | 46 | jseward@bzip.org |
| 47 | bzip2/libbzip2 version 1.0 of 21 March 2000 | 47 | bzip2/libbzip2 version 1.0.3 of 15 February 2005 |
| 48 | --*/ | 48 | --*/ |
| 49 | 49 | ||
| 50 | /*-- | 50 | /*-- |
| @@ -345,7 +345,7 @@ Int32 main ( Int32 argc, Char** argv ) | |||
| 345 | inFileName[0] = outFileName[0] = 0; | 345 | inFileName[0] = outFileName[0] = 0; |
| 346 | 346 | ||
| 347 | fprintf ( stderr, | 347 | fprintf ( stderr, |
| 348 | "bzip2recover 1.0.2: extracts blocks from damaged .bz2 files.\n" ); | 348 | "bzip2recover 1.0.3: extracts blocks from damaged .bz2 files.\n" ); |
| 349 | 349 | ||
| 350 | if (argc != 2) { | 350 | if (argc != 2) { |
| 351 | fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", | 351 | fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", |
| @@ -374,7 +374,7 @@ Int32 main ( Int32 argc, Char** argv ) | |||
| 374 | if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) { | 374 | if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) { |
| 375 | fprintf ( stderr, | 375 | fprintf ( stderr, |
| 376 | "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n", | 376 | "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n", |
| 377 | progName, strlen(argv[1]) ); | 377 | progName, (int)strlen(argv[1]) ); |
| 378 | exit(1); | 378 | exit(1); |
| 379 | } | 379 | } |
| 380 | 380 | ||
| @@ -8,7 +8,7 @@ | |||
| 8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
| 9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
| 10 | 10 | ||
| 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2005 Julian R Seward. All rights reserved. |
| 12 | 12 | ||
| 13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
| 14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
| @@ -42,7 +42,7 @@ | |||
| 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 43 | 43 | ||
| 44 | Julian Seward, Cambridge, UK. | 44 | Julian Seward, Cambridge, UK. |
| 45 | jseward@acm.org | 45 | jseward@bzip.org |
| 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 | 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 |
| 47 | 47 | ||
| 48 | This program is based on (at least) the work of: | 48 | This program is based on (at least) the work of: |
| @@ -88,12 +88,12 @@ void BZ2_bz__AssertH__fail ( int errcode ) | |||
| 88 | fprintf(stderr, | 88 | fprintf(stderr, |
| 89 | "\n\nbzip2/libbzip2: internal error number %d.\n" | 89 | "\n\nbzip2/libbzip2: internal error number %d.\n" |
| 90 | "This is a bug in bzip2/libbzip2, %s.\n" | 90 | "This is a bug in bzip2/libbzip2, %s.\n" |
| 91 | "Please report it to me at: jseward@acm.org. If this happened\n" | 91 | "Please report it to me at: jseward@bzip.org. If this happened\n" |
| 92 | "when you were using some program which uses libbzip2 as a\n" | 92 | "when you were using some program which uses libbzip2 as a\n" |
| 93 | "component, you should also report this bug to the author(s)\n" | 93 | "component, you should also report this bug to the author(s)\n" |
| 94 | "of that program. Please make an effort to report this bug;\n" | 94 | "of that program. Please make an effort to report this bug;\n" |
| 95 | "timely and accurate bug reports eventually lead to higher\n" | 95 | "timely and accurate bug reports eventually lead to higher\n" |
| 96 | "quality software. Thanks. Julian Seward, 30 December 2001.\n\n", | 96 | "quality software. Thanks. Julian Seward, 15 February 2005.\n\n", |
| 97 | errcode, | 97 | errcode, |
| 98 | BZ2_bzlibVersion() | 98 | BZ2_bzlibVersion() |
| 99 | ); | 99 | ); |
| @@ -574,8 +574,11 @@ int BZ_API(BZ2_bzDecompressInit) | |||
| 574 | 574 | ||
| 575 | 575 | ||
| 576 | /*---------------------------------------------------*/ | 576 | /*---------------------------------------------------*/ |
| 577 | /* Return True iff data corruption is discovered. | ||
| 578 | Returns False if there is no problem. | ||
| 579 | */ | ||
| 577 | static | 580 | static |
| 578 | void unRLE_obuf_to_output_FAST ( DState* s ) | 581 | Bool unRLE_obuf_to_output_FAST ( DState* s ) |
| 579 | { | 582 | { |
| 580 | UChar k1; | 583 | UChar k1; |
| 581 | 584 | ||
| @@ -584,7 +587,7 @@ void unRLE_obuf_to_output_FAST ( DState* s ) | |||
| 584 | while (True) { | 587 | while (True) { |
| 585 | /* try to finish existing run */ | 588 | /* try to finish existing run */ |
| 586 | while (True) { | 589 | while (True) { |
| 587 | if (s->strm->avail_out == 0) return; | 590 | if (s->strm->avail_out == 0) return False; |
| 588 | if (s->state_out_len == 0) break; | 591 | if (s->state_out_len == 0) break; |
| 589 | *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; | 592 | *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; |
| 590 | BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); | 593 | BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); |
| @@ -594,10 +597,13 @@ void unRLE_obuf_to_output_FAST ( DState* s ) | |||
| 594 | s->strm->total_out_lo32++; | 597 | s->strm->total_out_lo32++; |
| 595 | if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; | 598 | if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; |
| 596 | } | 599 | } |
| 597 | 600 | ||
| 598 | /* can a new run be started? */ | 601 | /* can a new run be started? */ |
| 599 | if (s->nblock_used == s->save_nblock+1) return; | 602 | if (s->nblock_used == s->save_nblock+1) return False; |
| 600 | 603 | ||
| 604 | /* Only caused by corrupt data stream? */ | ||
| 605 | if (s->nblock_used > s->save_nblock+1) | ||
| 606 | return True; | ||
| 601 | 607 | ||
| 602 | s->state_out_len = 1; | 608 | s->state_out_len = 1; |
| 603 | s->state_out_ch = s->k0; | 609 | s->state_out_ch = s->k0; |
| @@ -667,6 +673,10 @@ void unRLE_obuf_to_output_FAST ( DState* s ) | |||
| 667 | cs_avail_out--; | 673 | cs_avail_out--; |
| 668 | } | 674 | } |
| 669 | } | 675 | } |
| 676 | /* Only caused by corrupt data stream? */ | ||
| 677 | if (c_nblock_used > s_save_nblockPP) | ||
| 678 | return True; | ||
| 679 | |||
| 670 | /* can a new run be started? */ | 680 | /* can a new run be started? */ |
| 671 | if (c_nblock_used == s_save_nblockPP) { | 681 | if (c_nblock_used == s_save_nblockPP) { |
| 672 | c_state_out_len = 0; goto return_notr; | 682 | c_state_out_len = 0; goto return_notr; |
| @@ -712,6 +722,7 @@ void unRLE_obuf_to_output_FAST ( DState* s ) | |||
| 712 | s->strm->avail_out = cs_avail_out; | 722 | s->strm->avail_out = cs_avail_out; |
| 713 | /* end save */ | 723 | /* end save */ |
| 714 | } | 724 | } |
| 725 | return False; | ||
| 715 | } | 726 | } |
| 716 | 727 | ||
| 717 | 728 | ||
| @@ -732,8 +743,11 @@ __inline__ Int32 BZ2_indexIntoF ( Int32 indx, Int32 *cftab ) | |||
| 732 | 743 | ||
| 733 | 744 | ||
| 734 | /*---------------------------------------------------*/ | 745 | /*---------------------------------------------------*/ |
| 746 | /* Return True iff data corruption is discovered. | ||
| 747 | Returns False if there is no problem. | ||
| 748 | */ | ||
| 735 | static | 749 | static |
| 736 | void unRLE_obuf_to_output_SMALL ( DState* s ) | 750 | Bool unRLE_obuf_to_output_SMALL ( DState* s ) |
| 737 | { | 751 | { |
| 738 | UChar k1; | 752 | UChar k1; |
| 739 | 753 | ||
| @@ -742,7 +756,7 @@ void unRLE_obuf_to_output_SMALL ( DState* s ) | |||
| 742 | while (True) { | 756 | while (True) { |
| 743 | /* try to finish existing run */ | 757 | /* try to finish existing run */ |
| 744 | while (True) { | 758 | while (True) { |
| 745 | if (s->strm->avail_out == 0) return; | 759 | if (s->strm->avail_out == 0) return False; |
| 746 | if (s->state_out_len == 0) break; | 760 | if (s->state_out_len == 0) break; |
| 747 | *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; | 761 | *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; |
| 748 | BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); | 762 | BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); |
| @@ -754,8 +768,11 @@ void unRLE_obuf_to_output_SMALL ( DState* s ) | |||
| 754 | } | 768 | } |
| 755 | 769 | ||
| 756 | /* can a new run be started? */ | 770 | /* can a new run be started? */ |
| 757 | if (s->nblock_used == s->save_nblock+1) return; | 771 | if (s->nblock_used == s->save_nblock+1) return False; |
| 758 | 772 | ||
| 773 | /* Only caused by corrupt data stream? */ | ||
| 774 | if (s->nblock_used > s->save_nblock+1) | ||
| 775 | return True; | ||
| 759 | 776 | ||
| 760 | s->state_out_len = 1; | 777 | s->state_out_len = 1; |
| 761 | s->state_out_ch = s->k0; | 778 | s->state_out_ch = s->k0; |
| @@ -788,7 +805,7 @@ void unRLE_obuf_to_output_SMALL ( DState* s ) | |||
| 788 | while (True) { | 805 | while (True) { |
| 789 | /* try to finish existing run */ | 806 | /* try to finish existing run */ |
| 790 | while (True) { | 807 | while (True) { |
| 791 | if (s->strm->avail_out == 0) return; | 808 | if (s->strm->avail_out == 0) return False; |
| 792 | if (s->state_out_len == 0) break; | 809 | if (s->state_out_len == 0) break; |
| 793 | *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; | 810 | *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; |
| 794 | BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); | 811 | BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); |
| @@ -800,7 +817,11 @@ void unRLE_obuf_to_output_SMALL ( DState* s ) | |||
| 800 | } | 817 | } |
| 801 | 818 | ||
| 802 | /* can a new run be started? */ | 819 | /* can a new run be started? */ |
| 803 | if (s->nblock_used == s->save_nblock+1) return; | 820 | if (s->nblock_used == s->save_nblock+1) return False; |
| 821 | |||
| 822 | /* Only caused by corrupt data stream? */ | ||
| 823 | if (s->nblock_used > s->save_nblock+1) | ||
| 824 | return True; | ||
| 804 | 825 | ||
| 805 | s->state_out_len = 1; | 826 | s->state_out_len = 1; |
| 806 | s->state_out_ch = s->k0; | 827 | s->state_out_ch = s->k0; |
| @@ -830,6 +851,7 @@ void unRLE_obuf_to_output_SMALL ( DState* s ) | |||
| 830 | /*---------------------------------------------------*/ | 851 | /*---------------------------------------------------*/ |
| 831 | int BZ_API(BZ2_bzDecompress) ( bz_stream *strm ) | 852 | int BZ_API(BZ2_bzDecompress) ( bz_stream *strm ) |
| 832 | { | 853 | { |
| 854 | Bool corrupt; | ||
| 833 | DState* s; | 855 | DState* s; |
| 834 | if (strm == NULL) return BZ_PARAM_ERROR; | 856 | if (strm == NULL) return BZ_PARAM_ERROR; |
| 835 | s = strm->state; | 857 | s = strm->state; |
| @@ -840,12 +862,13 @@ int BZ_API(BZ2_bzDecompress) ( bz_stream *strm ) | |||
| 840 | if (s->state == BZ_X_IDLE) return BZ_SEQUENCE_ERROR; | 862 | if (s->state == BZ_X_IDLE) return BZ_SEQUENCE_ERROR; |
| 841 | if (s->state == BZ_X_OUTPUT) { | 863 | if (s->state == BZ_X_OUTPUT) { |
| 842 | if (s->smallDecompress) | 864 | if (s->smallDecompress) |
| 843 | unRLE_obuf_to_output_SMALL ( s ); else | 865 | corrupt = unRLE_obuf_to_output_SMALL ( s ); else |
| 844 | unRLE_obuf_to_output_FAST ( s ); | 866 | corrupt = unRLE_obuf_to_output_FAST ( s ); |
| 867 | if (corrupt) return BZ_DATA_ERROR; | ||
| 845 | if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) { | 868 | if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) { |
| 846 | BZ_FINALISE_CRC ( s->calculatedBlockCRC ); | 869 | BZ_FINALISE_CRC ( s->calculatedBlockCRC ); |
| 847 | if (s->verbosity >= 3) | 870 | if (s->verbosity >= 3) |
| 848 | VPrintf2 ( " {0x%x, 0x%x}", s->storedBlockCRC, | 871 | VPrintf2 ( " {0x%08x, 0x%08x}", s->storedBlockCRC, |
| 849 | s->calculatedBlockCRC ); | 872 | s->calculatedBlockCRC ); |
| 850 | if (s->verbosity >= 2) VPrintf0 ( "]" ); | 873 | if (s->verbosity >= 2) VPrintf0 ( "]" ); |
| 851 | if (s->calculatedBlockCRC != s->storedBlockCRC) | 874 | if (s->calculatedBlockCRC != s->storedBlockCRC) |
| @@ -863,7 +886,7 @@ int BZ_API(BZ2_bzDecompress) ( bz_stream *strm ) | |||
| 863 | Int32 r = BZ2_decompress ( s ); | 886 | Int32 r = BZ2_decompress ( s ); |
| 864 | if (r == BZ_STREAM_END) { | 887 | if (r == BZ_STREAM_END) { |
| 865 | if (s->verbosity >= 3) | 888 | if (s->verbosity >= 3) |
| 866 | VPrintf2 ( "\n combined CRCs: stored = 0x%x, computed = 0x%x", | 889 | VPrintf2 ( "\n combined CRCs: stored = 0x%08x, computed = 0x%08x", |
| 867 | s->storedCombinedCRC, s->calculatedCombinedCRC ); | 890 | s->storedCombinedCRC, s->calculatedCombinedCRC ); |
| 868 | if (s->calculatedCombinedCRC != s->storedCombinedCRC) | 891 | if (s->calculatedCombinedCRC != s->storedCombinedCRC) |
| 869 | return BZ_DATA_ERROR; | 892 | return BZ_DATA_ERROR; |
| @@ -8,7 +8,7 @@ | |||
| 8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
| 9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
| 10 | 10 | ||
| 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2005 Julian R Seward. All rights reserved. |
| 12 | 12 | ||
| 13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
| 14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
| @@ -42,7 +42,7 @@ | |||
| 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 43 | 43 | ||
| 44 | Julian Seward, Cambridge, UK. | 44 | Julian Seward, Cambridge, UK. |
| 45 | jseward@acm.org | 45 | jseward@bzip.org |
| 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 | 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 |
| 47 | 47 | ||
| 48 | This program is based on (at least) the work of: | 48 | This program is based on (at least) the work of: |
| @@ -110,8 +110,10 @@ typedef | |||
| 110 | #define BZ_EXPORT | 110 | #define BZ_EXPORT |
| 111 | #endif | 111 | #endif |
| 112 | 112 | ||
| 113 | #ifndef BZ_NO_STDIO | ||
| 113 | /* Need a definitition for FILE */ | 114 | /* Need a definitition for FILE */ |
| 114 | #include <stdio.h> | 115 | #include <stdio.h> |
| 116 | #endif | ||
| 115 | 117 | ||
| 116 | #ifdef _WIN32 | 118 | #ifdef _WIN32 |
| 117 | # include <windows.h> | 119 | # include <windows.h> |
diff --git a/bzlib_private.h b/bzlib_private.h index ff973c3..ca76fe6 100644 --- a/bzlib_private.h +++ b/bzlib_private.h | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
| 9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
| 10 | 10 | ||
| 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2005 Julian R Seward. All rights reserved. |
| 12 | 12 | ||
| 13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
| 14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
| @@ -42,7 +42,7 @@ | |||
| 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 43 | 43 | ||
| 44 | Julian Seward, Cambridge, UK. | 44 | Julian Seward, Cambridge, UK. |
| 45 | jseward@acm.org | 45 | jseward@bzip.org |
| 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 | 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 |
| 47 | 47 | ||
| 48 | This program is based on (at least) the work of: | 48 | This program is based on (at least) the work of: |
| @@ -76,7 +76,7 @@ | |||
| 76 | 76 | ||
| 77 | /*-- General stuff. --*/ | 77 | /*-- General stuff. --*/ |
| 78 | 78 | ||
| 79 | #define BZ_VERSION "1.0.2, 30-Dec-2001" | 79 | #define BZ_VERSION "1.0.3, 15-Feb-2005" |
| 80 | 80 | ||
| 81 | typedef char Char; | 81 | typedef char Char; |
| 82 | typedef unsigned char Bool; | 82 | typedef unsigned char Bool; |
| @@ -8,7 +8,7 @@ | |||
| 8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
| 9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
| 10 | 10 | ||
| 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2005 Julian R Seward. All rights reserved. |
| 12 | 12 | ||
| 13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
| 14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
| @@ -42,7 +42,7 @@ | |||
| 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 43 | 43 | ||
| 44 | Julian Seward, Cambridge, UK. | 44 | Julian Seward, Cambridge, UK. |
| 45 | jseward@acm.org | 45 | jseward@bzip.org |
| 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 | 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 |
| 47 | 47 | ||
| 48 | This program is based on (at least) the work of: | 48 | This program is based on (at least) the work of: |
| @@ -488,9 +488,11 @@ void sendMTFValues ( EState* s ) | |||
| 488 | /*-- | 488 | /*-- |
| 489 | Recompute the tables based on the accumulated frequencies. | 489 | Recompute the tables based on the accumulated frequencies. |
| 490 | --*/ | 490 | --*/ |
| 491 | /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See | ||
| 492 | comment in huffman.c for details. */ | ||
| 491 | for (t = 0; t < nGroups; t++) | 493 | for (t = 0; t < nGroups; t++) |
| 492 | BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), | 494 | BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), |
| 493 | alphaSize, 20 ); | 495 | alphaSize, 17 /*20*/ ); |
| 494 | } | 496 | } |
| 495 | 497 | ||
| 496 | 498 | ||
| @@ -527,7 +529,7 @@ void sendMTFValues ( EState* s ) | |||
| 527 | if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; | 529 | if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; |
| 528 | if (s->len[t][i] < minLen) minLen = s->len[t][i]; | 530 | if (s->len[t][i] < minLen) minLen = s->len[t][i]; |
| 529 | } | 531 | } |
| 530 | AssertH ( !(maxLen > 20), 3004 ); | 532 | AssertH ( !(maxLen > 17 /*20*/ ), 3004 ); |
| 531 | AssertH ( !(minLen < 1), 3005 ); | 533 | AssertH ( !(minLen < 1), 3005 ); |
| 532 | BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), | 534 | BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), |
| 533 | minLen, maxLen, alphaSize ); | 535 | minLen, maxLen, alphaSize ); |
| @@ -651,8 +653,8 @@ void BZ2_compressBlock ( EState* s, Bool is_last_block ) | |||
| 651 | if (s->blockNo > 1) s->numZ = 0; | 653 | if (s->blockNo > 1) s->numZ = 0; |
| 652 | 654 | ||
| 653 | if (s->verbosity >= 2) | 655 | if (s->verbosity >= 2) |
| 654 | VPrintf4( " block %d: crc = 0x%8x, " | 656 | VPrintf4( " block %d: crc = 0x%08x, " |
| 655 | "combined CRC = 0x%8x, size = %d\n", | 657 | "combined CRC = 0x%08x, size = %d\n", |
| 656 | s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); | 658 | s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); |
| 657 | 659 | ||
| 658 | BZ2_blockSort ( s ); | 660 | BZ2_blockSort ( s ); |
| @@ -703,7 +705,7 @@ void BZ2_compressBlock ( EState* s, Bool is_last_block ) | |||
| 703 | bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); | 705 | bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); |
| 704 | bsPutUInt32 ( s, s->combinedCRC ); | 706 | bsPutUInt32 ( s, s->combinedCRC ); |
| 705 | if (s->verbosity >= 2) | 707 | if (s->verbosity >= 2) |
| 706 | VPrintf1( " final combined CRC = 0x%x\n ", s->combinedCRC ); | 708 | VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC ); |
| 707 | bsFinishWrite ( s ); | 709 | bsFinishWrite ( s ); |
| 708 | } | 710 | } |
| 709 | } | 711 | } |
| @@ -8,7 +8,7 @@ | |||
| 8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
| 9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
| 10 | 10 | ||
| 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2005 Julian R Seward. All rights reserved. |
| 12 | 12 | ||
| 13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
| 14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
| @@ -42,7 +42,7 @@ | |||
| 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 43 | 43 | ||
| 44 | Julian Seward, Cambridge, UK. | 44 | Julian Seward, Cambridge, UK. |
| 45 | jseward@acm.org | 45 | jseward@bzip.org |
| 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 | 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 |
| 47 | 47 | ||
| 48 | This program is based on (at least) the work of: | 48 | This program is based on (at least) the work of: |
diff --git a/decompress.c b/decompress.c index e921347..81c3d2c 100644 --- a/decompress.c +++ b/decompress.c | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
| 9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
| 10 | 10 | ||
| 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2005 Julian R Seward. All rights reserved. |
| 12 | 12 | ||
| 13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
| 14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
| @@ -42,7 +42,7 @@ | |||
| 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 43 | 43 | ||
| 44 | Julian Seward, Cambridge, UK. | 44 | Julian Seward, Cambridge, UK. |
| 45 | jseward@acm.org | 45 | jseward@bzip.org |
| 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 | 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 |
| 47 | 47 | ||
| 48 | This program is based on (at least) the work of: | 48 | This program is based on (at least) the work of: |
| @@ -524,17 +524,23 @@ Int32 BZ2_decompress ( DState* s ) | |||
| 524 | if (s->origPtr < 0 || s->origPtr >= nblock) | 524 | if (s->origPtr < 0 || s->origPtr >= nblock) |
| 525 | RETURN(BZ_DATA_ERROR); | 525 | RETURN(BZ_DATA_ERROR); |
| 526 | 526 | ||
| 527 | /*-- Set up cftab to facilitate generation of T^(-1) --*/ | ||
| 528 | s->cftab[0] = 0; | ||
| 529 | for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1]; | ||
| 530 | for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1]; | ||
| 531 | for (i = 0; i <= 256; i++) { | ||
| 532 | if (s->cftab[i] < 0 || s->cftab[i] > nblock) { | ||
| 533 | /* s->cftab[i] can legitimately be == nblock */ | ||
| 534 | RETURN(BZ_DATA_ERROR); | ||
| 535 | } | ||
| 536 | } | ||
| 537 | |||
| 527 | s->state_out_len = 0; | 538 | s->state_out_len = 0; |
| 528 | s->state_out_ch = 0; | 539 | s->state_out_ch = 0; |
| 529 | BZ_INITIALISE_CRC ( s->calculatedBlockCRC ); | 540 | BZ_INITIALISE_CRC ( s->calculatedBlockCRC ); |
| 530 | s->state = BZ_X_OUTPUT; | 541 | s->state = BZ_X_OUTPUT; |
| 531 | if (s->verbosity >= 2) VPrintf0 ( "rt+rld" ); | 542 | if (s->verbosity >= 2) VPrintf0 ( "rt+rld" ); |
| 532 | 543 | ||
| 533 | /*-- Set up cftab to facilitate generation of T^(-1) --*/ | ||
| 534 | s->cftab[0] = 0; | ||
| 535 | for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1]; | ||
| 536 | for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1]; | ||
| 537 | |||
| 538 | if (s->smallDecompress) { | 544 | if (s->smallDecompress) { |
| 539 | 545 | ||
| 540 | /*-- Make a copy of cftab, used in generation of T --*/ | 546 | /*-- Make a copy of cftab, used in generation of T --*/ |
diff --git a/entities.xml b/entities.xml new file mode 100644 index 0000000..6d0975f --- /dev/null +++ b/entities.xml | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | <!-- misc. strings --> | ||
| 2 | <!ENTITY bz-url "http://www.bzip.org"> | ||
| 3 | <!ENTITY bz-email "jseward@bzip.org"> | ||
| 4 | <!ENTITY bz-lifespan "1996-2005"> | ||
| 5 | |||
| 6 | <!ENTITY bz-version "1.0.3"> | ||
| 7 | <!ENTITY bz-date "15 February 2005"> | ||
| 8 | |||
| 9 | <!ENTITY manual-title "bzip2 Manual"> | ||
diff --git a/format.pl b/format.pl new file mode 100755 index 0000000..8ab47ac --- /dev/null +++ b/format.pl | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | #!/usr/bin/perl -w | ||
| 2 | use strict; | ||
| 3 | |||
| 4 | # get command line values: | ||
| 5 | if ( $#ARGV !=1 ) { | ||
| 6 | die "Usage: $0 xml_infile xml_outfile\n"; | ||
| 7 | } | ||
| 8 | |||
| 9 | my $infile = shift; | ||
| 10 | # check infile exists | ||
| 11 | die "Can't find file \"$infile\"" | ||
| 12 | unless -f $infile; | ||
| 13 | # check we can read infile | ||
| 14 | if (! -r $infile) { | ||
| 15 | die "Can't read input $infile\n"; | ||
| 16 | } | ||
| 17 | # check we can open infile | ||
| 18 | open( INFILE,"<$infile" ) or | ||
| 19 | die "Can't input $infile $!"; | ||
| 20 | |||
| 21 | #my $outfile = 'fmt-manual.xml'; | ||
| 22 | my $outfile = shift; | ||
| 23 | #print "Infile: $infile, Outfile: $outfile\n"; | ||
| 24 | # check we can write to outfile | ||
| 25 | open( OUTFILE,">$outfile" ) or | ||
| 26 | die "Can't output $outfile $! for writing"; | ||
| 27 | |||
| 28 | my ($prev, $curr, $str); | ||
| 29 | $prev = ''; $curr = ''; | ||
| 30 | while ( <INFILE> ) { | ||
| 31 | |||
| 32 | print OUTFILE $prev; | ||
| 33 | $prev = $curr; | ||
| 34 | $curr = $_; | ||
| 35 | $str = ''; | ||
| 36 | |||
| 37 | if ( $prev =~ /<programlisting>$|<screen>$/ ) { | ||
| 38 | chomp $prev; | ||
| 39 | $curr = join( '', $prev, "<![CDATA[", $curr ); | ||
| 40 | $prev = ''; | ||
| 41 | next; | ||
| 42 | } | ||
| 43 | elsif ( $curr =~ /<\/programlisting>|<\/screen>/ ) { | ||
| 44 | chomp $prev; | ||
| 45 | $curr = join( '', $prev, "]]>", $curr ); | ||
| 46 | $prev = ''; | ||
| 47 | next; | ||
| 48 | } | ||
| 49 | } | ||
| 50 | print OUTFILE $curr; | ||
| 51 | close INFILE; | ||
| 52 | close OUTFILE; | ||
| 53 | exit; | ||
| @@ -8,7 +8,7 @@ | |||
| 8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
| 9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
| 10 | 10 | ||
| 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2005 Julian R Seward. All rights reserved. |
| 12 | 12 | ||
| 13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
| 14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
| @@ -42,7 +42,7 @@ | |||
| 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 43 | 43 | ||
| 44 | Julian Seward, Cambridge, UK. | 44 | Julian Seward, Cambridge, UK. |
| 45 | jseward@acm.org | 45 | jseward@bzip.org |
| 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 | 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 |
| 47 | 47 | ||
| 48 | This program is based on (at least) the work of: | 48 | This program is based on (at least) the work of: |
| @@ -162,7 +162,24 @@ void BZ2_hbMakeCodeLengths ( UChar *len, | |||
| 162 | 162 | ||
| 163 | if (! tooLong) break; | 163 | if (! tooLong) break; |
| 164 | 164 | ||
| 165 | for (i = 1; i < alphaSize; i++) { | 165 | /* 17 Oct 04: keep-going condition for the following loop used |
| 166 | to be 'i < alphaSize', which missed the last element, | ||
| 167 | theoretically leading to the possibility of the compressor | ||
| 168 | looping. However, this count-scaling step is only needed if | ||
| 169 | one of the generated Huffman code words is longer than | ||
| 170 | maxLen, which up to and including version 1.0.2 was 20 bits, | ||
| 171 | which is extremely unlikely. In version 1.0.3 maxLen was | ||
| 172 | changed to 17 bits, which has minimal effect on compression | ||
| 173 | ratio, but does mean this scaling step is used from time to | ||
| 174 | time, enough to verify that it works. | ||
| 175 | |||
| 176 | This means that bzip2-1.0.3 and later will only produce | ||
| 177 | Huffman codes with a maximum length of 17 bits. However, in | ||
| 178 | order to preserve backwards compatibility with bitstreams | ||
| 179 | produced by versions pre-1.0.3, the decompressor must still | ||
| 180 | handle lengths of up to 20. */ | ||
| 181 | |||
| 182 | for (i = 1; i <= alphaSize; i++) { | ||
| 166 | j = weight[i] >> 8; | 183 | j = weight[i] >> 8; |
| 167 | j = 1 + (j / 2); | 184 | j = 1 + (j / 2); |
| 168 | weight[i] = j << 8; | 185 | weight[i] = j << 8; |
diff --git a/manual.texi b/manual.texi deleted file mode 100644 index 5bc27d5..0000000 --- a/manual.texi +++ /dev/null | |||
| @@ -1,2243 +0,0 @@ | |||
| 1 | \input texinfo @c -*- Texinfo -*- | ||
| 2 | @setfilename bzip2.info | ||
| 3 | |||
| 4 | @ignore | ||
| 5 | This file documents bzip2 version 1.0.2, and associated library | ||
| 6 | libbzip2, written by Julian Seward (jseward@acm.org). | ||
| 7 | |||
| 8 | Copyright (C) 1996-2002 Julian R Seward | ||
| 9 | |||
| 10 | Permission is granted to make and distribute verbatim copies of | ||
| 11 | this manual provided the copyright notice and this permission notice | ||
| 12 | are preserved on all copies. | ||
| 13 | |||
| 14 | Permission is granted to copy and distribute translations of this manual | ||
| 15 | into another language, under the above conditions for verbatim copies. | ||
| 16 | @end ignore | ||
| 17 | |||
| 18 | @ifinfo | ||
| 19 | @format | ||
| 20 | START-INFO-DIR-ENTRY | ||
| 21 | * Bzip2: (bzip2). A program and library for data compression. | ||
| 22 | END-INFO-DIR-ENTRY | ||
| 23 | @end format | ||
| 24 | |||
| 25 | @end ifinfo | ||
| 26 | |||
| 27 | @iftex | ||
| 28 | @c @finalout | ||
| 29 | @settitle bzip2 and libbzip2 | ||
| 30 | @titlepage | ||
| 31 | @title bzip2 and libbzip2 | ||
| 32 | @subtitle a program and library for data compression | ||
| 33 | @subtitle copyright (C) 1996-2002 Julian Seward | ||
| 34 | @subtitle version 1.0.2 of 30 December 2001 | ||
| 35 | @author Julian Seward | ||
| 36 | |||
| 37 | @end titlepage | ||
| 38 | |||
| 39 | @parindent 0mm | ||
| 40 | @parskip 2mm | ||
| 41 | |||
| 42 | @end iftex | ||
| 43 | @node Top,,, (dir) | ||
| 44 | |||
| 45 | The following text is the License for this software. You should | ||
| 46 | find it identical to that contained in the file LICENSE in the | ||
| 47 | source distribution. | ||
| 48 | |||
| 49 | @bf{------------------ START OF THE LICENSE ------------------} | ||
| 50 | |||
| 51 | This program, @code{bzip2}, | ||
| 52 | and associated library @code{libbzip2}, are | ||
| 53 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. | ||
| 54 | |||
| 55 | Redistribution and use in source and binary forms, with or without | ||
| 56 | modification, are permitted provided that the following conditions | ||
| 57 | are met: | ||
| 58 | @itemize @bullet | ||
| 59 | @item | ||
| 60 | Redistributions of source code must retain the above copyright | ||
| 61 | notice, this list of conditions and the following disclaimer. | ||
| 62 | @item | ||
| 63 | The origin of this software must not be misrepresented; you must | ||
| 64 | not claim that you wrote the original software. If you use this | ||
| 65 | software in a product, an acknowledgment in the product | ||
| 66 | documentation would be appreciated but is not required. | ||
| 67 | @item | ||
| 68 | Altered source versions must be plainly marked as such, and must | ||
| 69 | not be misrepresented as being the original software. | ||
| 70 | @item | ||
| 71 | The name of the author may not be used to endorse or promote | ||
| 72 | products derived from this software without specific prior written | ||
| 73 | permission. | ||
| 74 | @end itemize | ||
| 75 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS | ||
| 76 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
| 77 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
| 78 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | ||
| 79 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| 80 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE | ||
| 81 | GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
| 82 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | ||
| 83 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
| 84 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| 85 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 86 | |||
| 87 | Julian Seward, Cambridge, UK. | ||
| 88 | |||
| 89 | @code{jseward@@acm.org} | ||
| 90 | |||
| 91 | @code{bzip2}/@code{libbzip2} version 1.0.2 of 30 December 2001. | ||
| 92 | |||
| 93 | @bf{------------------ END OF THE LICENSE ------------------} | ||
| 94 | |||
| 95 | Web sites: | ||
| 96 | |||
| 97 | @code{http://sources.redhat.com/bzip2} | ||
| 98 | |||
| 99 | @code{http://www.cacheprof.org} | ||
| 100 | |||
| 101 | PATENTS: To the best of my knowledge, @code{bzip2} does not use any patented | ||
| 102 | algorithms. However, I do not have the resources available to carry out | ||
| 103 | a full patent search. Therefore I cannot give any guarantee of the | ||
| 104 | above statement. | ||
| 105 | |||
| 106 | |||
| 107 | |||
| 108 | |||
| 109 | |||
| 110 | |||
| 111 | |||
| 112 | @chapter Introduction | ||
| 113 | |||
| 114 | @code{bzip2} compresses files using the Burrows-Wheeler | ||
| 115 | block-sorting text compression algorithm, and Huffman coding. | ||
| 116 | Compression is generally considerably better than that | ||
| 117 | achieved by more conventional LZ77/LZ78-based compressors, | ||
| 118 | and approaches the performance of the PPM family of statistical compressors. | ||
| 119 | |||
| 120 | @code{bzip2} is built on top of @code{libbzip2}, a flexible library | ||
| 121 | for handling compressed data in the @code{bzip2} format. This manual | ||
| 122 | describes both how to use the program and | ||
| 123 | how to work with the library interface. Most of the | ||
| 124 | manual is devoted to this library, not the program, | ||
| 125 | which is good news if your interest is only in the program. | ||
| 126 | |||
| 127 | Chapter 2 describes how to use @code{bzip2}; this is the only part | ||
| 128 | you need to read if you just want to know how to operate the program. | ||
| 129 | Chapter 3 describes the programming interfaces in detail, and | ||
| 130 | Chapter 4 records some miscellaneous notes which I thought | ||
| 131 | ought to be recorded somewhere. | ||
| 132 | |||
| 133 | |||
| 134 | @chapter How to use @code{bzip2} | ||
| 135 | |||
| 136 | This chapter contains a copy of the @code{bzip2} man page, | ||
| 137 | and nothing else. | ||
| 138 | |||
| 139 | @quotation | ||
| 140 | |||
| 141 | @unnumberedsubsubsec NAME | ||
| 142 | @itemize | ||
| 143 | @item @code{bzip2}, @code{bunzip2} | ||
| 144 | - a block-sorting file compressor, v1.0.2 | ||
| 145 | @item @code{bzcat} | ||
| 146 | - decompresses files to stdout | ||
| 147 | @item @code{bzip2recover} | ||
| 148 | - recovers data from damaged bzip2 files | ||
| 149 | @end itemize | ||
| 150 | |||
| 151 | @unnumberedsubsubsec SYNOPSIS | ||
| 152 | @itemize | ||
| 153 | @item @code{bzip2} [ -cdfkqstvzVL123456789 ] [ filenames ... ] | ||
| 154 | @item @code{bunzip2} [ -fkvsVL ] [ filenames ... ] | ||
| 155 | @item @code{bzcat} [ -s ] [ filenames ... ] | ||
| 156 | @item @code{bzip2recover} filename | ||
| 157 | @end itemize | ||
| 158 | |||
| 159 | @unnumberedsubsubsec DESCRIPTION | ||
| 160 | |||
| 161 | @code{bzip2} compresses files using the Burrows-Wheeler block sorting | ||
| 162 | text compression algorithm, and Huffman coding. Compression is | ||
| 163 | generally considerably better than that achieved by more conventional | ||
| 164 | LZ77/LZ78-based compressors, and approaches the performance of the PPM | ||
| 165 | family of statistical compressors. | ||
| 166 | |||
| 167 | The command-line options are deliberately very similar to those of GNU | ||
| 168 | @code{gzip}, but they are not identical. | ||
| 169 | |||
| 170 | @code{bzip2} expects a list of file names to accompany the command-line | ||
| 171 | flags. Each file is replaced by a compressed version of itself, with | ||
| 172 | the name @code{original_name.bz2}. Each compressed file has the same | ||
| 173 | modification date, permissions, and, when possible, ownership as the | ||
| 174 | corresponding original, so that these properties can be correctly | ||
| 175 | restored at decompression time. File name handling is naive in the | ||
| 176 | sense that there is no mechanism for preserving original file names, | ||
| 177 | permissions, ownerships or dates in filesystems which lack these | ||
| 178 | concepts, or have serious file name length restrictions, such as MS-DOS. | ||
| 179 | |||
| 180 | @code{bzip2} and @code{bunzip2} will by default not overwrite existing | ||
| 181 | files. If you want this to happen, specify the @code{-f} flag. | ||
| 182 | |||
| 183 | If no file names are specified, @code{bzip2} compresses from standard | ||
| 184 | input to standard output. In this case, @code{bzip2} will decline to | ||
| 185 | write compressed output to a terminal, as this would be entirely | ||
| 186 | incomprehensible and therefore pointless. | ||
| 187 | |||
| 188 | @code{bunzip2} (or @code{bzip2 -d}) decompresses all | ||
| 189 | specified files. Files which were not created by @code{bzip2} | ||
| 190 | will be detected and ignored, and a warning issued. | ||
| 191 | @code{bzip2} attempts to guess the filename for the decompressed file | ||
| 192 | from that of the compressed file as follows: | ||
| 193 | @itemize | ||
| 194 | @item @code{filename.bz2 } becomes @code{filename} | ||
| 195 | @item @code{filename.bz } becomes @code{filename} | ||
| 196 | @item @code{filename.tbz2} becomes @code{filename.tar} | ||
| 197 | @item @code{filename.tbz } becomes @code{filename.tar} | ||
| 198 | @item @code{anyothername } becomes @code{anyothername.out} | ||
| 199 | @end itemize | ||
| 200 | If the file does not end in one of the recognised endings, | ||
| 201 | @code{.bz2}, @code{.bz}, | ||
| 202 | @code{.tbz2} or @code{.tbz}, @code{bzip2} complains that it cannot | ||
| 203 | guess the name of the original file, and uses the original name | ||
| 204 | with @code{.out} appended. | ||
| 205 | |||
| 206 | As with compression, supplying no | ||
| 207 | filenames causes decompression from standard input to standard output. | ||
| 208 | |||
| 209 | @code{bunzip2} will correctly decompress a file which is the | ||
| 210 | concatenation of two or more compressed files. The result is the | ||
| 211 | concatenation of the corresponding uncompressed files. Integrity | ||
| 212 | testing (@code{-t}) of concatenated compressed files is also supported. | ||
| 213 | |||
| 214 | You can also compress or decompress files to the standard output by | ||
| 215 | giving the @code{-c} flag. Multiple files may be compressed and | ||
| 216 | decompressed like this. The resulting outputs are fed sequentially to | ||
| 217 | stdout. Compression of multiple files in this manner generates a stream | ||
| 218 | containing multiple compressed file representations. Such a stream | ||
| 219 | can be decompressed correctly only by @code{bzip2} version 0.9.0 or | ||
| 220 | later. Earlier versions of @code{bzip2} will stop after decompressing | ||
| 221 | the first file in the stream. | ||
| 222 | |||
| 223 | @code{bzcat} (or @code{bzip2 -dc}) decompresses all specified files to | ||
| 224 | the standard output. | ||
| 225 | |||
| 226 | @code{bzip2} will read arguments from the environment variables | ||
| 227 | @code{BZIP2} and @code{BZIP}, in that order, and will process them | ||
| 228 | before any arguments read from the command line. This gives a | ||
| 229 | convenient way to supply default arguments. | ||
| 230 | |||
| 231 | Compression is always performed, even if the compressed file is slightly | ||
| 232 | larger than the original. Files of less than about one hundred bytes | ||
| 233 | tend to get larger, since the compression mechanism has a constant | ||
| 234 | overhead in the region of 50 bytes. Random data (including the output | ||
| 235 | of most file compressors) is coded at about 8.05 bits per byte, giving | ||
| 236 | an expansion of around 0.5%. | ||
| 237 | |||
| 238 | As a self-check for your protection, @code{bzip2} uses 32-bit CRCs to | ||
| 239 | make sure that the decompressed version of a file is identical to the | ||
| 240 | original. This guards against corruption of the compressed data, and | ||
| 241 | against undetected bugs in @code{bzip2} (hopefully very unlikely). The | ||
| 242 | chances of data corruption going undetected is microscopic, about one | ||
| 243 | chance in four billion for each file processed. Be aware, though, that | ||
| 244 | the check occurs upon decompression, so it can only tell you that | ||
| 245 | something is wrong. It can't help you recover the original uncompressed | ||
| 246 | data. You can use @code{bzip2recover} to try to recover data from | ||
| 247 | damaged files. | ||
| 248 | |||
| 249 | Return values: 0 for a normal exit, 1 for environmental problems (file | ||
| 250 | not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt | ||
| 251 | compressed file, 3 for an internal consistency error (eg, bug) which | ||
| 252 | caused @code{bzip2} to panic. | ||
| 253 | |||
| 254 | |||
| 255 | @unnumberedsubsubsec OPTIONS | ||
| 256 | @table @code | ||
| 257 | @item -c --stdout | ||
| 258 | Compress or decompress to standard output. | ||
| 259 | @item -d --decompress | ||
| 260 | Force decompression. @code{bzip2}, @code{bunzip2} and @code{bzcat} are | ||
| 261 | really the same program, and the decision about what actions to take is | ||
| 262 | done on the basis of which name is used. This flag overrides that | ||
| 263 | mechanism, and forces bzip2 to decompress. | ||
| 264 | @item -z --compress | ||
| 265 | The complement to @code{-d}: forces compression, regardless of the | ||
| 266 | invokation name. | ||
| 267 | @item -t --test | ||
| 268 | Check integrity of the specified file(s), but don't decompress them. | ||
| 269 | This really performs a trial decompression and throws away the result. | ||
| 270 | @item -f --force | ||
| 271 | Force overwrite of output files. Normally, @code{bzip2} will not overwrite | ||
| 272 | existing output files. Also forces @code{bzip2} to break hard links | ||
| 273 | to files, which it otherwise wouldn't do. | ||
| 274 | |||
| 275 | @code{bzip2} normally declines to decompress files which don't have the | ||
| 276 | correct magic header bytes. If forced (@code{-f}), however, it will | ||
| 277 | pass such files through unmodified. This is how GNU @code{gzip} | ||
| 278 | behaves. | ||
| 279 | @item -k --keep | ||
| 280 | Keep (don't delete) input files during compression | ||
| 281 | or decompression. | ||
| 282 | @item -s --small | ||
| 283 | Reduce memory usage, for compression, decompression and testing. Files | ||
| 284 | are decompressed and tested using a modified algorithm which only | ||
| 285 | requires 2.5 bytes per block byte. This means any file can be | ||
| 286 | decompressed in 2300k of memory, albeit at about half the normal speed. | ||
| 287 | |||
| 288 | During compression, @code{-s} selects a block size of 200k, which limits | ||
| 289 | memory use to around the same figure, at the expense of your compression | ||
| 290 | ratio. In short, if your machine is low on memory (8 megabytes or | ||
| 291 | less), use -s for everything. See MEMORY MANAGEMENT below. | ||
| 292 | @item -q --quiet | ||
| 293 | Suppress non-essential warning messages. Messages pertaining to | ||
| 294 | I/O errors and other critical events will not be suppressed. | ||
| 295 | @item -v --verbose | ||
| 296 | Verbose mode -- show the compression ratio for each file processed. | ||
| 297 | Further @code{-v}'s increase the verbosity level, spewing out lots of | ||
| 298 | information which is primarily of interest for diagnostic purposes. | ||
| 299 | @item -L --license -V --version | ||
| 300 | Display the software version, license terms and conditions. | ||
| 301 | @item -1 (or --fast) to -9 (or --best) | ||
| 302 | Set the block size to 100 k, 200 k .. 900 k when compressing. Has no | ||
| 303 | effect when decompressing. See MEMORY MANAGEMENT below. | ||
| 304 | The @code{--fast} and @code{--best} aliases are primarily for GNU | ||
| 305 | @code{gzip} compatibility. In particular, @code{--fast} doesn't make | ||
| 306 | things significantly faster. And @code{--best} merely selects the | ||
| 307 | default behaviour. | ||
| 308 | @item -- | ||
| 309 | Treats all subsequent arguments as file names, even if they start | ||
| 310 | with a dash. This is so you can handle files with names beginning | ||
| 311 | with a dash, for example: @code{bzip2 -- -myfilename}. | ||
| 312 | @item --repetitive-fast | ||
| 313 | @item --repetitive-best | ||
| 314 | These flags are redundant in versions 0.9.5 and above. They provided | ||
| 315 | some coarse control over the behaviour of the sorting algorithm in | ||
| 316 | earlier versions, which was sometimes useful. 0.9.5 and above have an | ||
| 317 | improved algorithm which renders these flags irrelevant. | ||
| 318 | @end table | ||
| 319 | |||
| 320 | |||
| 321 | @unnumberedsubsubsec MEMORY MANAGEMENT | ||
| 322 | |||
| 323 | @code{bzip2} compresses large files in blocks. The block size affects | ||
| 324 | both the compression ratio achieved, and the amount of memory needed for | ||
| 325 | compression and decompression. The flags @code{-1} through @code{-9} | ||
| 326 | specify the block size to be 100,000 bytes through 900,000 bytes (the | ||
| 327 | default) respectively. At decompression time, the block size used for | ||
| 328 | compression is read from the header of the compressed file, and | ||
| 329 | @code{bunzip2} then allocates itself just enough memory to decompress | ||
| 330 | the file. Since block sizes are stored in compressed files, it follows | ||
| 331 | that the flags @code{-1} to @code{-9} are irrelevant to and so ignored | ||
| 332 | during decompression. | ||
| 333 | |||
| 334 | Compression and decompression requirements, in bytes, can be estimated | ||
| 335 | as: | ||
| 336 | @example | ||
| 337 | Compression: 400k + ( 8 x block size ) | ||
| 338 | |||
| 339 | Decompression: 100k + ( 4 x block size ), or | ||
| 340 | 100k + ( 2.5 x block size ) | ||
| 341 | @end example | ||
| 342 | Larger block sizes give rapidly diminishing marginal returns. Most of | ||
| 343 | the compression comes from the first two or three hundred k of block | ||
| 344 | size, a fact worth bearing in mind when using @code{bzip2} on small machines. | ||
| 345 | It is also important to appreciate that the decompression memory | ||
| 346 | requirement is set at compression time by the choice of block size. | ||
| 347 | |||
| 348 | For files compressed with the default 900k block size, @code{bunzip2} | ||
| 349 | will require about 3700 kbytes to decompress. To support decompression | ||
| 350 | of any file on a 4 megabyte machine, @code{bunzip2} has an option to | ||
| 351 | decompress using approximately half this amount of memory, about 2300 | ||
| 352 | kbytes. Decompression speed is also halved, so you should use this | ||
| 353 | option only where necessary. The relevant flag is @code{-s}. | ||
| 354 | |||
| 355 | In general, try and use the largest block size memory constraints allow, | ||
| 356 | since that maximises the compression achieved. Compression and | ||
| 357 | decompression speed are virtually unaffected by block size. | ||
| 358 | |||
| 359 | Another significant point applies to files which fit in a single block | ||
| 360 | -- that means most files you'd encounter using a large block size. The | ||
| 361 | amount of real memory touched is proportional to the size of the file, | ||
| 362 | since the file is smaller than a block. For example, compressing a file | ||
| 363 | 20,000 bytes long with the flag @code{-9} will cause the compressor to | ||
| 364 | allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560 | ||
| 365 | kbytes of it. Similarly, the decompressor will allocate 3700k but only | ||
| 366 | touch 100k + 20000 * 4 = 180 kbytes. | ||
| 367 | |||
| 368 | Here is a table which summarises the maximum memory usage for different | ||
| 369 | block sizes. Also recorded is the total compressed size for 14 files of | ||
| 370 | the Calgary Text Compression Corpus totalling 3,141,622 bytes. This | ||
| 371 | column gives some feel for how compression varies with block size. | ||
| 372 | These figures tend to understate the advantage of larger block sizes for | ||
| 373 | larger files, since the Corpus is dominated by smaller files. | ||
| 374 | @example | ||
| 375 | Compress Decompress Decompress Corpus | ||
| 376 | Flag usage usage -s usage Size | ||
| 377 | |||
| 378 | -1 1200k 500k 350k 914704 | ||
| 379 | -2 2000k 900k 600k 877703 | ||
| 380 | -3 2800k 1300k 850k 860338 | ||
| 381 | -4 3600k 1700k 1100k 846899 | ||
| 382 | -5 4400k 2100k 1350k 845160 | ||
| 383 | -6 5200k 2500k 1600k 838626 | ||
| 384 | -7 6100k 2900k 1850k 834096 | ||
| 385 | -8 6800k 3300k 2100k 828642 | ||
| 386 | -9 7600k 3700k 2350k 828642 | ||
| 387 | @end example | ||
| 388 | |||
| 389 | @unnumberedsubsubsec RECOVERING DATA FROM DAMAGED FILES | ||
| 390 | |||
| 391 | @code{bzip2} compresses files in blocks, usually 900kbytes long. Each | ||
| 392 | block is handled independently. If a media or transmission error causes | ||
| 393 | a multi-block @code{.bz2} file to become damaged, it may be possible to | ||
| 394 | recover data from the undamaged blocks in the file. | ||
| 395 | |||
| 396 | The compressed representation of each block is delimited by a 48-bit | ||
| 397 | pattern, which makes it possible to find the block boundaries with | ||
| 398 | reasonable certainty. Each block also carries its own 32-bit CRC, so | ||
| 399 | damaged blocks can be distinguished from undamaged ones. | ||
| 400 | |||
| 401 | @code{bzip2recover} is a simple program whose purpose is to search for | ||
| 402 | blocks in @code{.bz2} files, and write each block out into its own | ||
| 403 | @code{.bz2} file. You can then use @code{bzip2 -t} to test the | ||
| 404 | integrity of the resulting files, and decompress those which are | ||
| 405 | undamaged. | ||
| 406 | |||
| 407 | @code{bzip2recover} | ||
| 408 | takes a single argument, the name of the damaged file, and writes a | ||
| 409 | number of files @code{rec00001file.bz2}, @code{rec00002file.bz2}, etc, | ||
| 410 | containing the extracted blocks. The output filenames are designed so | ||
| 411 | that the use of wildcards in subsequent processing -- for example, | ||
| 412 | @code{bzip2 -dc rec*file.bz2 > recovered_data} -- processes the files in | ||
| 413 | the correct order. | ||
| 414 | |||
| 415 | @code{bzip2recover} should be of most use dealing with large @code{.bz2} | ||
| 416 | files, as these will contain many blocks. It is clearly futile to use | ||
| 417 | it on damaged single-block files, since a damaged block cannot be | ||
| 418 | recovered. If you wish to minimise any potential data loss through | ||
| 419 | media or transmission errors, you might consider compressing with a | ||
| 420 | smaller block size. | ||
| 421 | |||
| 422 | |||
| 423 | @unnumberedsubsubsec PERFORMANCE NOTES | ||
| 424 | |||
| 425 | The sorting phase of compression gathers together similar strings in the | ||
| 426 | file. Because of this, files containing very long runs of repeated | ||
| 427 | symbols, like "aabaabaabaab ..." (repeated several hundred times) may | ||
| 428 | compress more slowly than normal. Versions 0.9.5 and above fare much | ||
| 429 | better than previous versions in this respect. The ratio between | ||
| 430 | worst-case and average-case compression time is in the region of 10:1. | ||
| 431 | For previous versions, this figure was more like 100:1. You can use the | ||
| 432 | @code{-vvvv} option to monitor progress in great detail, if you want. | ||
| 433 | |||
| 434 | Decompression speed is unaffected by these phenomena. | ||
| 435 | |||
| 436 | @code{bzip2} usually allocates several megabytes of memory to operate | ||
| 437 | in, and then charges all over it in a fairly random fashion. This means | ||
| 438 | that performance, both for compressing and decompressing, is largely | ||
| 439 | determined by the speed at which your machine can service cache misses. | ||
| 440 | Because of this, small changes to the code to reduce the miss rate have | ||
| 441 | been observed to give disproportionately large performance improvements. | ||
| 442 | I imagine @code{bzip2} will perform best on machines with very large | ||
| 443 | caches. | ||
| 444 | |||
| 445 | |||
| 446 | @unnumberedsubsubsec CAVEATS | ||
| 447 | |||
| 448 | I/O error messages are not as helpful as they could be. @code{bzip2} | ||
| 449 | tries hard to detect I/O errors and exit cleanly, but the details of | ||
| 450 | what the problem is sometimes seem rather misleading. | ||
| 451 | |||
| 452 | This manual page pertains to version 1.0.2 of @code{bzip2}. Compressed | ||
| 453 | data created by this version is entirely forwards and backwards | ||
| 454 | compatible with the previous public releases, versions 0.1pl2, 0.9.0, | ||
| 455 | 0.9.5, 1.0.0 and 1.0.1, but with the following exception: 0.9.0 and | ||
| 456 | above can correctly decompress multiple concatenated compressed files. | ||
| 457 | 0.1pl2 cannot do this; it will stop after decompressing just the first | ||
| 458 | file in the stream. | ||
| 459 | |||
| 460 | @code{bzip2recover} versions prior to this one, 1.0.2, used 32-bit | ||
| 461 | integers to represent bit positions in compressed files, so it could not | ||
| 462 | handle compressed files more than 512 megabytes long. Version 1.0.2 and | ||
| 463 | above uses 64-bit ints on some platforms which support them (GNU | ||
| 464 | supported targets, and Windows). To establish whether or not | ||
| 465 | @code{bzip2recover} was built with such a limitation, run it without | ||
| 466 | arguments. In any event you can build yourself an unlimited version if | ||
| 467 | you can recompile it with @code{MaybeUInt64} set to be an unsigned | ||
| 468 | 64-bit integer. | ||
| 469 | |||
| 470 | |||
| 471 | |||
| 472 | @unnumberedsubsubsec AUTHOR | ||
| 473 | Julian Seward, @code{jseward@@acm.org}. | ||
| 474 | |||
| 475 | @code{http://sources.redhat.com/bzip2} | ||
| 476 | |||
| 477 | The ideas embodied in @code{bzip2} are due to (at least) the following | ||
| 478 | people: Michael Burrows and David Wheeler (for the block sorting | ||
| 479 | transformation), David Wheeler (again, for the Huffman coder), Peter | ||
| 480 | Fenwick (for the structured coding model in the original @code{bzip}, | ||
| 481 | and many refinements), and Alistair Moffat, Radford Neal and Ian Witten | ||
| 482 | (for the arithmetic coder in the original @code{bzip}). I am much | ||
| 483 | indebted for their help, support and advice. See the manual in the | ||
| 484 | source distribution for pointers to sources of documentation. Christian | ||
| 485 | von Roques encouraged me to look for faster sorting algorithms, so as to | ||
| 486 | speed up compression. Bela Lubkin encouraged me to improve the | ||
| 487 | worst-case compression performance. The @code{bz*} scripts are derived | ||
| 488 | from those of GNU @code{gzip}. Many people sent patches, helped with | ||
| 489 | portability problems, lent machines, gave advice and were generally | ||
| 490 | helpful. | ||
| 491 | |||
| 492 | @end quotation | ||
| 493 | |||
| 494 | |||
| 495 | |||
| 496 | |||
| 497 | @chapter Programming with @code{libbzip2} | ||
| 498 | |||
| 499 | This chapter describes the programming interface to @code{libbzip2}. | ||
| 500 | |||
| 501 | For general background information, particularly about memory | ||
| 502 | use and performance aspects, you'd be well advised to read Chapter 2 | ||
| 503 | as well. | ||
| 504 | |||
| 505 | @section Top-level structure | ||
| 506 | |||
| 507 | @code{libbzip2} is a flexible library for compressing and decompressing | ||
| 508 | data in the @code{bzip2} data format. Although packaged as a single | ||
| 509 | entity, it helps to regard the library as three separate parts: the low | ||
| 510 | level interface, and the high level interface, and some utility | ||
| 511 | functions. | ||
| 512 | |||
| 513 | The structure of @code{libbzip2}'s interfaces is similar to | ||
| 514 | that of Jean-loup Gailly's and Mark Adler's excellent @code{zlib} | ||
| 515 | library. | ||
| 516 | |||
| 517 | All externally visible symbols have names beginning @code{BZ2_}. | ||
| 518 | This is new in version 1.0. The intention is to minimise pollution | ||
| 519 | of the namespaces of library clients. | ||
| 520 | |||
| 521 | @subsection Low-level summary | ||
| 522 | |||
| 523 | This interface provides services for compressing and decompressing | ||
| 524 | data in memory. There's no provision for dealing with files, streams | ||
| 525 | or any other I/O mechanisms, just straight memory-to-memory work. | ||
| 526 | In fact, this part of the library can be compiled without inclusion | ||
| 527 | of @code{stdio.h}, which may be helpful for embedded applications. | ||
| 528 | |||
| 529 | The low-level part of the library has no global variables and | ||
| 530 | is therefore thread-safe. | ||
| 531 | |||
| 532 | Six routines make up the low level interface: | ||
| 533 | @code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, and @* @code{BZ2_bzCompressEnd} | ||
| 534 | for compression, | ||
| 535 | and a corresponding trio @code{BZ2_bzDecompressInit}, @* @code{BZ2_bzDecompress} | ||
| 536 | and @code{BZ2_bzDecompressEnd} for decompression. | ||
| 537 | The @code{*Init} functions allocate | ||
| 538 | memory for compression/decompression and do other | ||
| 539 | initialisations, whilst the @code{*End} functions close down operations | ||
| 540 | and release memory. | ||
| 541 | |||
| 542 | The real work is done by @code{BZ2_bzCompress} and @code{BZ2_bzDecompress}. | ||
| 543 | These compress and decompress data from a user-supplied input buffer | ||
| 544 | to a user-supplied output buffer. These buffers can be any size; | ||
| 545 | arbitrary quantities of data are handled by making repeated calls | ||
| 546 | to these functions. This is a flexible mechanism allowing a | ||
| 547 | consumer-pull style of activity, or producer-push, or a mixture of | ||
| 548 | both. | ||
| 549 | |||
| 550 | |||
| 551 | |||
| 552 | @subsection High-level summary | ||
| 553 | |||
| 554 | This interface provides some handy wrappers around the low-level | ||
| 555 | interface to facilitate reading and writing @code{bzip2} format | ||
| 556 | files (@code{.bz2} files). The routines provide hooks to facilitate | ||
| 557 | reading files in which the @code{bzip2} data stream is embedded | ||
| 558 | within some larger-scale file structure, or where there are | ||
| 559 | multiple @code{bzip2} data streams concatenated end-to-end. | ||
| 560 | |||
| 561 | For reading files, @code{BZ2_bzReadOpen}, @code{BZ2_bzRead}, | ||
| 562 | @code{BZ2_bzReadClose} and @* @code{BZ2_bzReadGetUnused} are supplied. For | ||
| 563 | writing files, @code{BZ2_bzWriteOpen}, @code{BZ2_bzWrite} and | ||
| 564 | @code{BZ2_bzWriteFinish} are available. | ||
| 565 | |||
| 566 | As with the low-level library, no global variables are used | ||
| 567 | so the library is per se thread-safe. However, if I/O errors | ||
| 568 | occur whilst reading or writing the underlying compressed files, | ||
| 569 | you may have to consult @code{errno} to determine the cause of | ||
| 570 | the error. In that case, you'd need a C library which correctly | ||
| 571 | supports @code{errno} in a multithreaded environment. | ||
| 572 | |||
| 573 | To make the library a little simpler and more portable, | ||
| 574 | @code{BZ2_bzReadOpen} and @code{BZ2_bzWriteOpen} require you to pass them file | ||
| 575 | handles (@code{FILE*}s) which have previously been opened for reading or | ||
| 576 | writing respectively. That avoids portability problems associated with | ||
| 577 | file operations and file attributes, whilst not being much of an | ||
| 578 | imposition on the programmer. | ||
| 579 | |||
| 580 | |||
| 581 | |||
| 582 | @subsection Utility functions summary | ||
| 583 | For very simple needs, @code{BZ2_bzBuffToBuffCompress} and | ||
| 584 | @code{BZ2_bzBuffToBuffDecompress} are provided. These compress | ||
| 585 | data in memory from one buffer to another buffer in a single | ||
| 586 | function call. You should assess whether these functions | ||
| 587 | fulfill your memory-to-memory compression/decompression | ||
| 588 | requirements before investing effort in understanding the more | ||
| 589 | general but more complex low-level interface. | ||
| 590 | |||
| 591 | Yoshioka Tsuneo (@code{QWF00133@@niftyserve.or.jp} / | ||
| 592 | @code{tsuneo-y@@is.aist-nara.ac.jp}) has contributed some functions to | ||
| 593 | give better @code{zlib} compatibility. These functions are | ||
| 594 | @code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush}, | ||
| 595 | @code{BZ2_bzclose}, | ||
| 596 | @code{BZ2_bzerror} and @code{BZ2_bzlibVersion}. You may find these functions | ||
| 597 | more convenient for simple file reading and writing, than those in the | ||
| 598 | high-level interface. These functions are not (yet) officially part of | ||
| 599 | the library, and are minimally documented here. If they break, you | ||
| 600 | get to keep all the pieces. I hope to document them properly when time | ||
| 601 | permits. | ||
| 602 | |||
| 603 | Yoshioka also contributed modifications to allow the library to be | ||
| 604 | built as a Windows DLL. | ||
| 605 | |||
| 606 | |||
| 607 | @section Error handling | ||
| 608 | |||
| 609 | The library is designed to recover cleanly in all situations, including | ||
| 610 | the worst-case situation of decompressing random data. I'm not | ||
| 611 | 100% sure that it can always do this, so you might want to add | ||
| 612 | a signal handler to catch segmentation violations during decompression | ||
| 613 | if you are feeling especially paranoid. I would be interested in | ||
| 614 | hearing more about the robustness of the library to corrupted | ||
| 615 | compressed data. | ||
| 616 | |||
| 617 | Version 1.0 is much more robust in this respect than | ||
| 618 | 0.9.0 or 0.9.5. Investigations with Checker (a tool for | ||
| 619 | detecting problems with memory management, similar to Purify) | ||
| 620 | indicate that, at least for the few files I tested, all single-bit | ||
| 621 | errors in the decompressed data are caught properly, with no | ||
| 622 | segmentation faults, no reads of uninitialised data and no | ||
| 623 | out of range reads or writes. So it's certainly much improved, | ||
| 624 | although I wouldn't claim it to be totally bombproof. | ||
| 625 | |||
| 626 | The file @code{bzlib.h} contains all definitions needed to use | ||
| 627 | the library. In particular, you should definitely not include | ||
| 628 | @code{bzlib_private.h}. | ||
| 629 | |||
| 630 | In @code{bzlib.h}, the various return values are defined. The following | ||
| 631 | list is not intended as an exhaustive description of the circumstances | ||
| 632 | in which a given value may be returned -- those descriptions are given | ||
| 633 | later. Rather, it is intended to convey the rough meaning of each | ||
| 634 | return value. The first five actions are normal and not intended to | ||
| 635 | denote an error situation. | ||
| 636 | @table @code | ||
| 637 | @item BZ_OK | ||
| 638 | The requested action was completed successfully. | ||
| 639 | @item BZ_RUN_OK | ||
| 640 | @itemx BZ_FLUSH_OK | ||
| 641 | @itemx BZ_FINISH_OK | ||
| 642 | In @code{BZ2_bzCompress}, the requested flush/finish/nothing-special action | ||
| 643 | was completed successfully. | ||
| 644 | @item BZ_STREAM_END | ||
| 645 | Compression of data was completed, or the logical stream end was | ||
| 646 | detected during decompression. | ||
| 647 | @end table | ||
| 648 | |||
| 649 | The following return values indicate an error of some kind. | ||
| 650 | @table @code | ||
| 651 | @item BZ_CONFIG_ERROR | ||
| 652 | Indicates that the library has been improperly compiled on your | ||
| 653 | platform -- a major configuration error. Specifically, it means | ||
| 654 | that @code{sizeof(char)}, @code{sizeof(short)} and @code{sizeof(int)} | ||
| 655 | are not 1, 2 and 4 respectively, as they should be. Note that the | ||
| 656 | library should still work properly on 64-bit platforms which follow | ||
| 657 | the LP64 programming model -- that is, where @code{sizeof(long)} | ||
| 658 | and @code{sizeof(void*)} are 8. Under LP64, @code{sizeof(int)} is | ||
| 659 | still 4, so @code{libbzip2}, which doesn't use the @code{long} type, | ||
| 660 | is OK. | ||
| 661 | @item BZ_SEQUENCE_ERROR | ||
| 662 | When using the library, it is important to call the functions in the | ||
| 663 | correct sequence and with data structures (buffers etc) in the correct | ||
| 664 | states. @code{libbzip2} checks as much as it can to ensure this is | ||
| 665 | happening, and returns @code{BZ_SEQUENCE_ERROR} if not. Code which | ||
| 666 | complies precisely with the function semantics, as detailed below, | ||
| 667 | should never receive this value; such an event denotes buggy code | ||
| 668 | which you should investigate. | ||
| 669 | @item BZ_PARAM_ERROR | ||
| 670 | Returned when a parameter to a function call is out of range | ||
| 671 | or otherwise manifestly incorrect. As with @code{BZ_SEQUENCE_ERROR}, | ||
| 672 | this denotes a bug in the client code. The distinction between | ||
| 673 | @code{BZ_PARAM_ERROR} and @code{BZ_SEQUENCE_ERROR} is a bit hazy, but still worth | ||
| 674 | making. | ||
| 675 | @item BZ_MEM_ERROR | ||
| 676 | Returned when a request to allocate memory failed. Note that the | ||
| 677 | quantity of memory needed to decompress a stream cannot be determined | ||
| 678 | until the stream's header has been read. So @code{BZ2_bzDecompress} and | ||
| 679 | @code{BZ2_bzRead} may return @code{BZ_MEM_ERROR} even though some of | ||
| 680 | the compressed data has been read. The same is not true for | ||
| 681 | compression; once @code{BZ2_bzCompressInit} or @code{BZ2_bzWriteOpen} have | ||
| 682 | successfully completed, @code{BZ_MEM_ERROR} cannot occur. | ||
| 683 | @item BZ_DATA_ERROR | ||
| 684 | Returned when a data integrity error is detected during decompression. | ||
| 685 | Most importantly, this means when stored and computed CRCs for the | ||
| 686 | data do not match. This value is also returned upon detection of any | ||
| 687 | other anomaly in the compressed data. | ||
| 688 | @item BZ_DATA_ERROR_MAGIC | ||
| 689 | As a special case of @code{BZ_DATA_ERROR}, it is sometimes useful to | ||
| 690 | know when the compressed stream does not start with the correct | ||
| 691 | magic bytes (@code{'B' 'Z' 'h'}). | ||
| 692 | @item BZ_IO_ERROR | ||
| 693 | Returned by @code{BZ2_bzRead} and @code{BZ2_bzWrite} when there is an error | ||
| 694 | reading or writing in the compressed file, and by @code{BZ2_bzReadOpen} | ||
| 695 | and @code{BZ2_bzWriteOpen} for attempts to use a file for which the | ||
| 696 | error indicator (viz, @code{ferror(f)}) is set. | ||
| 697 | On receipt of @code{BZ_IO_ERROR}, the caller should consult | ||
| 698 | @code{errno} and/or @code{perror} to acquire operating-system | ||
| 699 | specific information about the problem. | ||
| 700 | @item BZ_UNEXPECTED_EOF | ||
| 701 | Returned by @code{BZ2_bzRead} when the compressed file finishes | ||
| 702 | before the logical end of stream is detected. | ||
| 703 | @item BZ_OUTBUFF_FULL | ||
| 704 | Returned by @code{BZ2_bzBuffToBuffCompress} and | ||
| 705 | @code{BZ2_bzBuffToBuffDecompress} to indicate that the output data | ||
| 706 | will not fit into the output buffer provided. | ||
| 707 | @end table | ||
| 708 | |||
| 709 | |||
| 710 | |||
| 711 | @section Low-level interface | ||
| 712 | |||
| 713 | @subsection @code{BZ2_bzCompressInit} | ||
| 714 | @example | ||
| 715 | typedef | ||
| 716 | struct @{ | ||
| 717 | char *next_in; | ||
| 718 | unsigned int avail_in; | ||
| 719 | unsigned int total_in_lo32; | ||
| 720 | unsigned int total_in_hi32; | ||
| 721 | |||
| 722 | char *next_out; | ||
| 723 | unsigned int avail_out; | ||
| 724 | unsigned int total_out_lo32; | ||
| 725 | unsigned int total_out_hi32; | ||
| 726 | |||
| 727 | void *state; | ||
| 728 | |||
| 729 | void *(*bzalloc)(void *,int,int); | ||
| 730 | void (*bzfree)(void *,void *); | ||
| 731 | void *opaque; | ||
| 732 | @} | ||
| 733 | bz_stream; | ||
| 734 | |||
| 735 | int BZ2_bzCompressInit ( bz_stream *strm, | ||
| 736 | int blockSize100k, | ||
| 737 | int verbosity, | ||
| 738 | int workFactor ); | ||
| 739 | |||
| 740 | @end example | ||
| 741 | |||
| 742 | Prepares for compression. The @code{bz_stream} structure | ||
| 743 | holds all data pertaining to the compression activity. | ||
| 744 | A @code{bz_stream} structure should be allocated and initialised | ||
| 745 | prior to the call. | ||
| 746 | The fields of @code{bz_stream} | ||
| 747 | comprise the entirety of the user-visible data. @code{state} | ||
| 748 | is a pointer to the private data structures required for compression. | ||
| 749 | |||
| 750 | Custom memory allocators are supported, via fields @code{bzalloc}, | ||
| 751 | @code{bzfree}, | ||
| 752 | and @code{opaque}. The value | ||
| 753 | @code{opaque} is passed to as the first argument to | ||
| 754 | all calls to @code{bzalloc} and @code{bzfree}, but is | ||
| 755 | otherwise ignored by the library. | ||
| 756 | The call @code{bzalloc ( opaque, n, m )} is expected to return a | ||
| 757 | pointer @code{p} to | ||
| 758 | @code{n * m} bytes of memory, and @code{bzfree ( opaque, p )} | ||
| 759 | should free | ||
| 760 | that memory. | ||
| 761 | |||
| 762 | If you don't want to use a custom memory allocator, set @code{bzalloc}, | ||
| 763 | @code{bzfree} and | ||
| 764 | @code{opaque} to @code{NULL}, | ||
| 765 | and the library will then use the standard @code{malloc}/@code{free} | ||
| 766 | routines. | ||
| 767 | |||
| 768 | Before calling @code{BZ2_bzCompressInit}, fields @code{bzalloc}, | ||
| 769 | @code{bzfree} and @code{opaque} should | ||
| 770 | be filled appropriately, as just described. Upon return, the internal | ||
| 771 | state will have been allocated and initialised, and @code{total_in_lo32}, | ||
| 772 | @code{total_in_hi32}, @code{total_out_lo32} and | ||
| 773 | @code{total_out_hi32} will have been set to zero. | ||
| 774 | These four fields are used by the library | ||
| 775 | to inform the caller of the total amount of data passed into and out of | ||
| 776 | the library, respectively. You should not try to change them. | ||
| 777 | As of version 1.0, 64-bit counts are maintained, even on 32-bit | ||
| 778 | platforms, using the @code{_hi32} fields to store the upper 32 bits | ||
| 779 | of the count. So, for example, the total amount of data in | ||
| 780 | is @code{(total_in_hi32 << 32) + total_in_lo32}. | ||
| 781 | |||
| 782 | Parameter @code{blockSize100k} specifies the block size to be used for | ||
| 783 | compression. It should be a value between 1 and 9 inclusive, and the | ||
| 784 | actual block size used is 100000 x this figure. 9 gives the best | ||
| 785 | compression but takes most memory. | ||
| 786 | |||
| 787 | Parameter @code{verbosity} should be set to a number between 0 and 4 | ||
| 788 | inclusive. 0 is silent, and greater numbers give increasingly verbose | ||
| 789 | monitoring/debugging output. If the library has been compiled with | ||
| 790 | @code{-DBZ_NO_STDIO}, no such output will appear for any verbosity | ||
| 791 | setting. | ||
| 792 | |||
| 793 | Parameter @code{workFactor} controls how the compression phase behaves | ||
| 794 | when presented with worst case, highly repetitive, input data. If | ||
| 795 | compression runs into difficulties caused by repetitive data, the | ||
| 796 | library switches from the standard sorting algorithm to a fallback | ||
| 797 | algorithm. The fallback is slower than the standard algorithm by | ||
| 798 | perhaps a factor of three, but always behaves reasonably, no matter how | ||
| 799 | bad the input. | ||
| 800 | |||
| 801 | Lower values of @code{workFactor} reduce the amount of effort the | ||
| 802 | standard algorithm will expend before resorting to the fallback. You | ||
| 803 | should set this parameter carefully; too low, and many inputs will be | ||
| 804 | handled by the fallback algorithm and so compress rather slowly, too | ||
| 805 | high, and your average-to-worst case compression times can become very | ||
| 806 | large. The default value of 30 gives reasonable behaviour over a wide | ||
| 807 | range of circumstances. | ||
| 808 | |||
| 809 | Allowable values range from 0 to 250 inclusive. 0 is a special case, | ||
| 810 | equivalent to using the default value of 30. | ||
| 811 | |||
| 812 | Note that the compressed output generated is the same regardless of | ||
| 813 | whether or not the fallback algorithm is used. | ||
| 814 | |||
| 815 | Be aware also that this parameter may disappear entirely in future | ||
| 816 | versions of the library. In principle it should be possible to devise a | ||
| 817 | good way to automatically choose which algorithm to use. Such a | ||
| 818 | mechanism would render the parameter obsolete. | ||
| 819 | |||
| 820 | Possible return values: | ||
| 821 | @display | ||
| 822 | @code{BZ_CONFIG_ERROR} | ||
| 823 | if the library has been mis-compiled | ||
| 824 | @code{BZ_PARAM_ERROR} | ||
| 825 | if @code{strm} is @code{NULL} | ||
| 826 | or @code{blockSize} < 1 or @code{blockSize} > 9 | ||
| 827 | or @code{verbosity} < 0 or @code{verbosity} > 4 | ||
| 828 | or @code{workFactor} < 0 or @code{workFactor} > 250 | ||
| 829 | @code{BZ_MEM_ERROR} | ||
| 830 | if not enough memory is available | ||
| 831 | @code{BZ_OK} | ||
| 832 | otherwise | ||
| 833 | @end display | ||
| 834 | Allowable next actions: | ||
| 835 | @display | ||
| 836 | @code{BZ2_bzCompress} | ||
| 837 | if @code{BZ_OK} is returned | ||
| 838 | no specific action needed in case of error | ||
| 839 | @end display | ||
| 840 | |||
| 841 | @subsection @code{BZ2_bzCompress} | ||
| 842 | @example | ||
| 843 | int BZ2_bzCompress ( bz_stream *strm, int action ); | ||
| 844 | @end example | ||
| 845 | Provides more input and/or output buffer space for the library. The | ||
| 846 | caller maintains input and output buffers, and calls @code{BZ2_bzCompress} to | ||
| 847 | transfer data between them. | ||
| 848 | |||
| 849 | Before each call to @code{BZ2_bzCompress}, @code{next_in} should point at | ||
| 850 | the data to be compressed, and @code{avail_in} should indicate how many | ||
| 851 | bytes the library may read. @code{BZ2_bzCompress} updates @code{next_in}, | ||
| 852 | @code{avail_in} and @code{total_in} to reflect the number of bytes it | ||
| 853 | has read. | ||
| 854 | |||
| 855 | Similarly, @code{next_out} should point to a buffer in which the | ||
| 856 | compressed data is to be placed, with @code{avail_out} indicating how | ||
| 857 | much output space is available. @code{BZ2_bzCompress} updates | ||
| 858 | @code{next_out}, @code{avail_out} and @code{total_out} to reflect the | ||
| 859 | number of bytes output. | ||
| 860 | |||
| 861 | You may provide and remove as little or as much data as you like on each | ||
| 862 | call of @code{BZ2_bzCompress}. In the limit, it is acceptable to supply and | ||
| 863 | remove data one byte at a time, although this would be terribly | ||
| 864 | inefficient. You should always ensure that at least one byte of output | ||
| 865 | space is available at each call. | ||
| 866 | |||
| 867 | A second purpose of @code{BZ2_bzCompress} is to request a change of mode of the | ||
| 868 | compressed stream. | ||
| 869 | |||
| 870 | Conceptually, a compressed stream can be in one of four states: IDLE, | ||
| 871 | RUNNING, FLUSHING and FINISHING. Before initialisation | ||
| 872 | (@code{BZ2_bzCompressInit}) and after termination (@code{BZ2_bzCompressEnd}), a | ||
| 873 | stream is regarded as IDLE. | ||
| 874 | |||
| 875 | Upon initialisation (@code{BZ2_bzCompressInit}), the stream is placed in the | ||
| 876 | RUNNING state. Subsequent calls to @code{BZ2_bzCompress} should pass | ||
| 877 | @code{BZ_RUN} as the requested action; other actions are illegal and | ||
| 878 | will result in @code{BZ_SEQUENCE_ERROR}. | ||
| 879 | |||
| 880 | At some point, the calling program will have provided all the input data | ||
| 881 | it wants to. It will then want to finish up -- in effect, asking the | ||
| 882 | library to process any data it might have buffered internally. In this | ||
| 883 | state, @code{BZ2_bzCompress} will no longer attempt to read data from | ||
| 884 | @code{next_in}, but it will want to write data to @code{next_out}. | ||
| 885 | Because the output buffer supplied by the user can be arbitrarily small, | ||
| 886 | the finishing-up operation cannot necessarily be done with a single call | ||
| 887 | of @code{BZ2_bzCompress}. | ||
| 888 | |||
| 889 | Instead, the calling program passes @code{BZ_FINISH} as an action to | ||
| 890 | @code{BZ2_bzCompress}. This changes the stream's state to FINISHING. Any | ||
| 891 | remaining input (ie, @code{next_in[0 .. avail_in-1]}) is compressed and | ||
| 892 | transferred to the output buffer. To do this, @code{BZ2_bzCompress} must be | ||
| 893 | called repeatedly until all the output has been consumed. At that | ||
| 894 | point, @code{BZ2_bzCompress} returns @code{BZ_STREAM_END}, and the stream's | ||
| 895 | state is set back to IDLE. @code{BZ2_bzCompressEnd} should then be | ||
| 896 | called. | ||
| 897 | |||
| 898 | Just to make sure the calling program does not cheat, the library makes | ||
| 899 | a note of @code{avail_in} at the time of the first call to | ||
| 900 | @code{BZ2_bzCompress} which has @code{BZ_FINISH} as an action (ie, at the | ||
| 901 | time the program has announced its intention to not supply any more | ||
| 902 | input). By comparing this value with that of @code{avail_in} over | ||
| 903 | subsequent calls to @code{BZ2_bzCompress}, the library can detect any | ||
| 904 | attempts to slip in more data to compress. Any calls for which this is | ||
| 905 | detected will return @code{BZ_SEQUENCE_ERROR}. This indicates a | ||
| 906 | programming mistake which should be corrected. | ||
| 907 | |||
| 908 | Instead of asking to finish, the calling program may ask | ||
| 909 | @code{BZ2_bzCompress} to take all the remaining input, compress it and | ||
| 910 | terminate the current (Burrows-Wheeler) compression block. This could | ||
| 911 | be useful for error control purposes. The mechanism is analogous to | ||
| 912 | that for finishing: call @code{BZ2_bzCompress} with an action of | ||
| 913 | @code{BZ_FLUSH}, remove output data, and persist with the | ||
| 914 | @code{BZ_FLUSH} action until the value @code{BZ_RUN} is returned. As | ||
| 915 | with finishing, @code{BZ2_bzCompress} detects any attempt to provide more | ||
| 916 | input data once the flush has begun. | ||
| 917 | |||
| 918 | Once the flush is complete, the stream returns to the normal RUNNING | ||
| 919 | state. | ||
| 920 | |||
| 921 | This all sounds pretty complex, but isn't really. Here's a table | ||
| 922 | which shows which actions are allowable in each state, what action | ||
| 923 | will be taken, what the next state is, and what the non-error return | ||
| 924 | values are. Note that you can't explicitly ask what state the | ||
| 925 | stream is in, but nor do you need to -- it can be inferred from the | ||
| 926 | values returned by @code{BZ2_bzCompress}. | ||
| 927 | @display | ||
| 928 | IDLE/@code{any} | ||
| 929 | Illegal. IDLE state only exists after @code{BZ2_bzCompressEnd} or | ||
| 930 | before @code{BZ2_bzCompressInit}. | ||
| 931 | Return value = @code{BZ_SEQUENCE_ERROR} | ||
| 932 | |||
| 933 | RUNNING/@code{BZ_RUN} | ||
| 934 | Compress from @code{next_in} to @code{next_out} as much as possible. | ||
| 935 | Next state = RUNNING | ||
| 936 | Return value = @code{BZ_RUN_OK} | ||
| 937 | |||
| 938 | RUNNING/@code{BZ_FLUSH} | ||
| 939 | Remember current value of @code{next_in}. Compress from @code{next_in} | ||
| 940 | to @code{next_out} as much as possible, but do not accept any more input. | ||
| 941 | Next state = FLUSHING | ||
| 942 | Return value = @code{BZ_FLUSH_OK} | ||
| 943 | |||
| 944 | RUNNING/@code{BZ_FINISH} | ||
| 945 | Remember current value of @code{next_in}. Compress from @code{next_in} | ||
| 946 | to @code{next_out} as much as possible, but do not accept any more input. | ||
| 947 | Next state = FINISHING | ||
| 948 | Return value = @code{BZ_FINISH_OK} | ||
| 949 | |||
| 950 | FLUSHING/@code{BZ_FLUSH} | ||
| 951 | Compress from @code{next_in} to @code{next_out} as much as possible, | ||
| 952 | but do not accept any more input. | ||
| 953 | If all the existing input has been used up and all compressed | ||
| 954 | output has been removed | ||
| 955 | Next state = RUNNING; Return value = @code{BZ_RUN_OK} | ||
| 956 | else | ||
| 957 | Next state = FLUSHING; Return value = @code{BZ_FLUSH_OK} | ||
| 958 | |||
| 959 | FLUSHING/other | ||
| 960 | Illegal. | ||
| 961 | Return value = @code{BZ_SEQUENCE_ERROR} | ||
| 962 | |||
| 963 | FINISHING/@code{BZ_FINISH} | ||
| 964 | Compress from @code{next_in} to @code{next_out} as much as possible, | ||
| 965 | but to not accept any more input. | ||
| 966 | If all the existing input has been used up and all compressed | ||
| 967 | output has been removed | ||
| 968 | Next state = IDLE; Return value = @code{BZ_STREAM_END} | ||
| 969 | else | ||
| 970 | Next state = FINISHING; Return value = @code{BZ_FINISHING} | ||
| 971 | |||
| 972 | FINISHING/other | ||
| 973 | Illegal. | ||
| 974 | Return value = @code{BZ_SEQUENCE_ERROR} | ||
| 975 | @end display | ||
| 976 | |||
| 977 | That still looks complicated? Well, fair enough. The usual sequence | ||
| 978 | of calls for compressing a load of data is: | ||
| 979 | @itemize @bullet | ||
| 980 | @item Get started with @code{BZ2_bzCompressInit}. | ||
| 981 | @item Shovel data in and shlurp out its compressed form using zero or more | ||
| 982 | calls of @code{BZ2_bzCompress} with action = @code{BZ_RUN}. | ||
| 983 | @item Finish up. | ||
| 984 | Repeatedly call @code{BZ2_bzCompress} with action = @code{BZ_FINISH}, | ||
| 985 | copying out the compressed output, until @code{BZ_STREAM_END} is returned. | ||
| 986 | @item Close up and go home. Call @code{BZ2_bzCompressEnd}. | ||
| 987 | @end itemize | ||
| 988 | If the data you want to compress fits into your input buffer all | ||
| 989 | at once, you can skip the calls of @code{BZ2_bzCompress ( ..., BZ_RUN )} and | ||
| 990 | just do the @code{BZ2_bzCompress ( ..., BZ_FINISH )} calls. | ||
| 991 | |||
| 992 | All required memory is allocated by @code{BZ2_bzCompressInit}. The | ||
| 993 | compression library can accept any data at all (obviously). So you | ||
| 994 | shouldn't get any error return values from the @code{BZ2_bzCompress} calls. | ||
| 995 | If you do, they will be @code{BZ_SEQUENCE_ERROR}, and indicate a bug in | ||
| 996 | your programming. | ||
| 997 | |||
| 998 | Trivial other possible return values: | ||
| 999 | @display | ||
| 1000 | @code{BZ_PARAM_ERROR} | ||
| 1001 | if @code{strm} is @code{NULL}, or @code{strm->s} is @code{NULL} | ||
| 1002 | @end display | ||
| 1003 | |||
| 1004 | @subsection @code{BZ2_bzCompressEnd} | ||
| 1005 | @example | ||
| 1006 | int BZ2_bzCompressEnd ( bz_stream *strm ); | ||
| 1007 | @end example | ||
| 1008 | Releases all memory associated with a compression stream. | ||
| 1009 | |||
| 1010 | Possible return values: | ||
| 1011 | @display | ||
| 1012 | @code{BZ_PARAM_ERROR} if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL} | ||
| 1013 | @code{BZ_OK} otherwise | ||
| 1014 | @end display | ||
| 1015 | |||
| 1016 | |||
| 1017 | @subsection @code{BZ2_bzDecompressInit} | ||
| 1018 | @example | ||
| 1019 | int BZ2_bzDecompressInit ( bz_stream *strm, int verbosity, int small ); | ||
| 1020 | @end example | ||
| 1021 | Prepares for decompression. As with @code{BZ2_bzCompressInit}, a | ||
| 1022 | @code{bz_stream} record should be allocated and initialised before the | ||
| 1023 | call. Fields @code{bzalloc}, @code{bzfree} and @code{opaque} should be | ||
| 1024 | set if a custom memory allocator is required, or made @code{NULL} for | ||
| 1025 | the normal @code{malloc}/@code{free} routines. Upon return, the internal | ||
| 1026 | state will have been initialised, and @code{total_in} and | ||
| 1027 | @code{total_out} will be zero. | ||
| 1028 | |||
| 1029 | For the meaning of parameter @code{verbosity}, see @code{BZ2_bzCompressInit}. | ||
| 1030 | |||
| 1031 | If @code{small} is nonzero, the library will use an alternative | ||
| 1032 | decompression algorithm which uses less memory but at the cost of | ||
| 1033 | decompressing more slowly (roughly speaking, half the speed, but the | ||
| 1034 | maximum memory requirement drops to around 2300k). See Chapter 2 for | ||
| 1035 | more information on memory management. | ||
| 1036 | |||
| 1037 | Note that the amount of memory needed to decompress | ||
| 1038 | a stream cannot be determined until the stream's header has been read, | ||
| 1039 | so even if @code{BZ2_bzDecompressInit} succeeds, a subsequent | ||
| 1040 | @code{BZ2_bzDecompress} could fail with @code{BZ_MEM_ERROR}. | ||
| 1041 | |||
| 1042 | Possible return values: | ||
| 1043 | @display | ||
| 1044 | @code{BZ_CONFIG_ERROR} | ||
| 1045 | if the library has been mis-compiled | ||
| 1046 | @code{BZ_PARAM_ERROR} | ||
| 1047 | if @code{(small != 0 && small != 1)} | ||
| 1048 | or @code{(verbosity < 0 || verbosity > 4)} | ||
| 1049 | @code{BZ_MEM_ERROR} | ||
| 1050 | if insufficient memory is available | ||
| 1051 | @end display | ||
| 1052 | |||
| 1053 | Allowable next actions: | ||
| 1054 | @display | ||
| 1055 | @code{BZ2_bzDecompress} | ||
| 1056 | if @code{BZ_OK} was returned | ||
| 1057 | no specific action required in case of error | ||
| 1058 | @end display | ||
| 1059 | |||
| 1060 | |||
| 1061 | |||
| 1062 | @subsection @code{BZ2_bzDecompress} | ||
| 1063 | @example | ||
| 1064 | int BZ2_bzDecompress ( bz_stream *strm ); | ||
| 1065 | @end example | ||
| 1066 | Provides more input and/out output buffer space for the library. The | ||
| 1067 | caller maintains input and output buffers, and uses @code{BZ2_bzDecompress} | ||
| 1068 | to transfer data between them. | ||
| 1069 | |||
| 1070 | Before each call to @code{BZ2_bzDecompress}, @code{next_in} | ||
| 1071 | should point at the compressed data, | ||
| 1072 | and @code{avail_in} should indicate how many bytes the library | ||
| 1073 | may read. @code{BZ2_bzDecompress} updates @code{next_in}, @code{avail_in} | ||
| 1074 | and @code{total_in} | ||
| 1075 | to reflect the number of bytes it has read. | ||
| 1076 | |||
| 1077 | Similarly, @code{next_out} should point to a buffer in which the uncompressed | ||
| 1078 | output is to be placed, with @code{avail_out} indicating how much output space | ||
| 1079 | is available. @code{BZ2_bzCompress} updates @code{next_out}, | ||
| 1080 | @code{avail_out} and @code{total_out} to reflect | ||
| 1081 | the number of bytes output. | ||
| 1082 | |||
| 1083 | You may provide and remove as little or as much data as you like on | ||
| 1084 | each call of @code{BZ2_bzDecompress}. | ||
| 1085 | In the limit, it is acceptable to | ||
| 1086 | supply and remove data one byte at a time, although this would be | ||
| 1087 | terribly inefficient. You should always ensure that at least one | ||
| 1088 | byte of output space is available at each call. | ||
| 1089 | |||
| 1090 | Use of @code{BZ2_bzDecompress} is simpler than @code{BZ2_bzCompress}. | ||
| 1091 | |||
| 1092 | You should provide input and remove output as described above, and | ||
| 1093 | repeatedly call @code{BZ2_bzDecompress} until @code{BZ_STREAM_END} is | ||
| 1094 | returned. Appearance of @code{BZ_STREAM_END} denotes that | ||
| 1095 | @code{BZ2_bzDecompress} has detected the logical end of the compressed | ||
| 1096 | stream. @code{BZ2_bzDecompress} will not produce @code{BZ_STREAM_END} until | ||
| 1097 | all output data has been placed into the output buffer, so once | ||
| 1098 | @code{BZ_STREAM_END} appears, you are guaranteed to have available all | ||
| 1099 | the decompressed output, and @code{BZ2_bzDecompressEnd} can safely be | ||
| 1100 | called. | ||
| 1101 | |||
| 1102 | If case of an error return value, you should call @code{BZ2_bzDecompressEnd} | ||
| 1103 | to clean up and release memory. | ||
| 1104 | |||
| 1105 | Possible return values: | ||
| 1106 | @display | ||
| 1107 | @code{BZ_PARAM_ERROR} | ||
| 1108 | if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL} | ||
| 1109 | or @code{strm->avail_out < 1} | ||
| 1110 | @code{BZ_DATA_ERROR} | ||
| 1111 | if a data integrity error is detected in the compressed stream | ||
| 1112 | @code{BZ_DATA_ERROR_MAGIC} | ||
| 1113 | if the compressed stream doesn't begin with the right magic bytes | ||
| 1114 | @code{BZ_MEM_ERROR} | ||
| 1115 | if there wasn't enough memory available | ||
| 1116 | @code{BZ_STREAM_END} | ||
| 1117 | if the logical end of the data stream was detected and all | ||
| 1118 | output in has been consumed, eg @code{s->avail_out > 0} | ||
| 1119 | @code{BZ_OK} | ||
| 1120 | otherwise | ||
| 1121 | @end display | ||
| 1122 | Allowable next actions: | ||
| 1123 | @display | ||
| 1124 | @code{BZ2_bzDecompress} | ||
| 1125 | if @code{BZ_OK} was returned | ||
| 1126 | @code{BZ2_bzDecompressEnd} | ||
| 1127 | otherwise | ||
| 1128 | @end display | ||
| 1129 | |||
| 1130 | |||
| 1131 | @subsection @code{BZ2_bzDecompressEnd} | ||
| 1132 | @example | ||
| 1133 | int BZ2_bzDecompressEnd ( bz_stream *strm ); | ||
| 1134 | @end example | ||
| 1135 | Releases all memory associated with a decompression stream. | ||
| 1136 | |||
| 1137 | Possible return values: | ||
| 1138 | @display | ||
| 1139 | @code{BZ_PARAM_ERROR} | ||
| 1140 | if @code{strm} is @code{NULL} or @code{strm->s} is @code{NULL} | ||
| 1141 | @code{BZ_OK} | ||
| 1142 | otherwise | ||
| 1143 | @end display | ||
| 1144 | |||
| 1145 | Allowable next actions: | ||
| 1146 | @display | ||
| 1147 | None. | ||
| 1148 | @end display | ||
| 1149 | |||
| 1150 | |||
| 1151 | @section High-level interface | ||
| 1152 | |||
| 1153 | This interface provides functions for reading and writing | ||
| 1154 | @code{bzip2} format files. First, some general points. | ||
| 1155 | |||
| 1156 | @itemize @bullet | ||
| 1157 | @item All of the functions take an @code{int*} first argument, | ||
| 1158 | @code{bzerror}. | ||
| 1159 | After each call, @code{bzerror} should be consulted first to determine | ||
| 1160 | the outcome of the call. If @code{bzerror} is @code{BZ_OK}, | ||
| 1161 | the call completed | ||
| 1162 | successfully, and only then should the return value of the function | ||
| 1163 | (if any) be consulted. If @code{bzerror} is @code{BZ_IO_ERROR}, | ||
| 1164 | there was an error | ||
| 1165 | reading/writing the underlying compressed file, and you should | ||
| 1166 | then consult @code{errno}/@code{perror} to determine the | ||
| 1167 | cause of the difficulty. | ||
| 1168 | @code{bzerror} may also be set to various other values; precise details are | ||
| 1169 | given on a per-function basis below. | ||
| 1170 | @item If @code{bzerror} indicates an error | ||
| 1171 | (ie, anything except @code{BZ_OK} and @code{BZ_STREAM_END}), | ||
| 1172 | you should immediately call @code{BZ2_bzReadClose} (or @code{BZ2_bzWriteClose}, | ||
| 1173 | depending on whether you are attempting to read or to write) | ||
| 1174 | to free up all resources associated | ||
| 1175 | with the stream. Once an error has been indicated, behaviour of all calls | ||
| 1176 | except @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) is undefined. | ||
| 1177 | The implication is that (1) @code{bzerror} should | ||
| 1178 | be checked after each call, and (2) if @code{bzerror} indicates an error, | ||
| 1179 | @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) should then be called to clean up. | ||
| 1180 | @item The @code{FILE*} arguments passed to | ||
| 1181 | @code{BZ2_bzReadOpen}/@code{BZ2_bzWriteOpen} | ||
| 1182 | should be set to binary mode. | ||
| 1183 | Most Unix systems will do this by default, but other platforms, | ||
| 1184 | including Windows and Mac, will not. If you omit this, you may | ||
| 1185 | encounter problems when moving code to new platforms. | ||
| 1186 | @item Memory allocation requests are handled by | ||
| 1187 | @code{malloc}/@code{free}. | ||
| 1188 | At present | ||
| 1189 | there is no facility for user-defined memory allocators in the file I/O | ||
| 1190 | functions (could easily be added, though). | ||
| 1191 | @end itemize | ||
| 1192 | |||
| 1193 | |||
| 1194 | |||
| 1195 | @subsection @code{BZ2_bzReadOpen} | ||
| 1196 | @example | ||
| 1197 | typedef void BZFILE; | ||
| 1198 | |||
| 1199 | BZFILE *BZ2_bzReadOpen ( int *bzerror, FILE *f, | ||
| 1200 | int small, int verbosity, | ||
| 1201 | void *unused, int nUnused ); | ||
| 1202 | @end example | ||
| 1203 | Prepare to read compressed data from file handle @code{f}. @code{f} | ||
| 1204 | should refer to a file which has been opened for reading, and for which | ||
| 1205 | the error indicator (@code{ferror(f)})is not set. If @code{small} is 1, | ||
| 1206 | the library will try to decompress using less memory, at the expense of | ||
| 1207 | speed. | ||
| 1208 | |||
| 1209 | For reasons explained below, @code{BZ2_bzRead} will decompress the | ||
| 1210 | @code{nUnused} bytes starting at @code{unused}, before starting to read | ||
| 1211 | from the file @code{f}. At most @code{BZ_MAX_UNUSED} bytes may be | ||
| 1212 | supplied like this. If this facility is not required, you should pass | ||
| 1213 | @code{NULL} and @code{0} for @code{unused} and n@code{Unused} | ||
| 1214 | respectively. | ||
| 1215 | |||
| 1216 | For the meaning of parameters @code{small} and @code{verbosity}, | ||
| 1217 | see @code{BZ2_bzDecompressInit}. | ||
| 1218 | |||
| 1219 | The amount of memory needed to decompress a file cannot be determined | ||
| 1220 | until the file's header has been read. So it is possible that | ||
| 1221 | @code{BZ2_bzReadOpen} returns @code{BZ_OK} but a subsequent call of | ||
| 1222 | @code{BZ2_bzRead} will return @code{BZ_MEM_ERROR}. | ||
| 1223 | |||
| 1224 | Possible assignments to @code{bzerror}: | ||
| 1225 | @display | ||
| 1226 | @code{BZ_CONFIG_ERROR} | ||
| 1227 | if the library has been mis-compiled | ||
| 1228 | @code{BZ_PARAM_ERROR} | ||
| 1229 | if @code{f} is @code{NULL} | ||
| 1230 | or @code{small} is neither @code{0} nor @code{1} | ||
| 1231 | or @code{(unused == NULL && nUnused != 0)} | ||
| 1232 | or @code{(unused != NULL && !(0 <= nUnused <= BZ_MAX_UNUSED))} | ||
| 1233 | @code{BZ_IO_ERROR} | ||
| 1234 | if @code{ferror(f)} is nonzero | ||
| 1235 | @code{BZ_MEM_ERROR} | ||
| 1236 | if insufficient memory is available | ||
| 1237 | @code{BZ_OK} | ||
| 1238 | otherwise. | ||
| 1239 | @end display | ||
| 1240 | |||
| 1241 | Possible return values: | ||
| 1242 | @display | ||
| 1243 | Pointer to an abstract @code{BZFILE} | ||
| 1244 | if @code{bzerror} is @code{BZ_OK} | ||
| 1245 | @code{NULL} | ||
| 1246 | otherwise | ||
| 1247 | @end display | ||
| 1248 | |||
| 1249 | Allowable next actions: | ||
| 1250 | @display | ||
| 1251 | @code{BZ2_bzRead} | ||
| 1252 | if @code{bzerror} is @code{BZ_OK} | ||
| 1253 | @code{BZ2_bzClose} | ||
| 1254 | otherwise | ||
| 1255 | @end display | ||
| 1256 | |||
| 1257 | |||
| 1258 | @subsection @code{BZ2_bzRead} | ||
| 1259 | @example | ||
| 1260 | int BZ2_bzRead ( int *bzerror, BZFILE *b, void *buf, int len ); | ||
| 1261 | @end example | ||
| 1262 | Reads up to @code{len} (uncompressed) bytes from the compressed file | ||
| 1263 | @code{b} into | ||
| 1264 | the buffer @code{buf}. If the read was successful, | ||
| 1265 | @code{bzerror} is set to @code{BZ_OK} | ||
| 1266 | and the number of bytes read is returned. If the logical end-of-stream | ||
| 1267 | was detected, @code{bzerror} will be set to @code{BZ_STREAM_END}, | ||
| 1268 | and the number | ||
| 1269 | of bytes read is returned. All other @code{bzerror} values denote an error. | ||
| 1270 | |||
| 1271 | @code{BZ2_bzRead} will supply @code{len} bytes, | ||
| 1272 | unless the logical stream end is detected | ||
| 1273 | or an error occurs. Because of this, it is possible to detect the | ||
| 1274 | stream end by observing when the number of bytes returned is | ||
| 1275 | less than the number | ||
| 1276 | requested. Nevertheless, this is regarded as inadvisable; you should | ||
| 1277 | instead check @code{bzerror} after every call and watch out for | ||
| 1278 | @code{BZ_STREAM_END}. | ||
| 1279 | |||
| 1280 | Internally, @code{BZ2_bzRead} copies data from the compressed file in chunks | ||
| 1281 | of size @code{BZ_MAX_UNUSED} bytes | ||
| 1282 | before decompressing it. If the file contains more bytes than strictly | ||
| 1283 | needed to reach the logical end-of-stream, @code{BZ2_bzRead} will almost certainly | ||
| 1284 | read some of the trailing data before signalling @code{BZ_SEQUENCE_END}. | ||
| 1285 | To collect the read but unused data once @code{BZ_SEQUENCE_END} has | ||
| 1286 | appeared, call @code{BZ2_bzReadGetUnused} immediately before @code{BZ2_bzReadClose}. | ||
| 1287 | |||
| 1288 | Possible assignments to @code{bzerror}: | ||
| 1289 | @display | ||
| 1290 | @code{BZ_PARAM_ERROR} | ||
| 1291 | if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0} | ||
| 1292 | @code{BZ_SEQUENCE_ERROR} | ||
| 1293 | if @code{b} was opened with @code{BZ2_bzWriteOpen} | ||
| 1294 | @code{BZ_IO_ERROR} | ||
| 1295 | if there is an error reading from the compressed file | ||
| 1296 | @code{BZ_UNEXPECTED_EOF} | ||
| 1297 | if the compressed file ended before the logical end-of-stream was detected | ||
| 1298 | @code{BZ_DATA_ERROR} | ||
| 1299 | if a data integrity error was detected in the compressed stream | ||
| 1300 | @code{BZ_DATA_ERROR_MAGIC} | ||
| 1301 | if the stream does not begin with the requisite header bytes (ie, is not | ||
| 1302 | a @code{bzip2} data file). This is really a special case of @code{BZ_DATA_ERROR}. | ||
| 1303 | @code{BZ_MEM_ERROR} | ||
| 1304 | if insufficient memory was available | ||
| 1305 | @code{BZ_STREAM_END} | ||
| 1306 | if the logical end of stream was detected. | ||
| 1307 | @code{BZ_OK} | ||
| 1308 | otherwise. | ||
| 1309 | @end display | ||
| 1310 | |||
| 1311 | Possible return values: | ||
| 1312 | @display | ||
| 1313 | number of bytes read | ||
| 1314 | if @code{bzerror} is @code{BZ_OK} or @code{BZ_STREAM_END} | ||
| 1315 | undefined | ||
| 1316 | otherwise | ||
| 1317 | @end display | ||
| 1318 | |||
| 1319 | Allowable next actions: | ||
| 1320 | @display | ||
| 1321 | collect data from @code{buf}, then @code{BZ2_bzRead} or @code{BZ2_bzReadClose} | ||
| 1322 | if @code{bzerror} is @code{BZ_OK} | ||
| 1323 | collect data from @code{buf}, then @code{BZ2_bzReadClose} or @code{BZ2_bzReadGetUnused} | ||
| 1324 | if @code{bzerror} is @code{BZ_SEQUENCE_END} | ||
| 1325 | @code{BZ2_bzReadClose} | ||
| 1326 | otherwise | ||
| 1327 | @end display | ||
| 1328 | |||
| 1329 | |||
| 1330 | |||
| 1331 | @subsection @code{BZ2_bzReadGetUnused} | ||
| 1332 | @example | ||
| 1333 | void BZ2_bzReadGetUnused ( int* bzerror, BZFILE *b, | ||
| 1334 | void** unused, int* nUnused ); | ||
| 1335 | @end example | ||
| 1336 | Returns data which was read from the compressed file but was not needed | ||
| 1337 | to get to the logical end-of-stream. @code{*unused} is set to the address | ||
| 1338 | of the data, and @code{*nUnused} to the number of bytes. @code{*nUnused} will | ||
| 1339 | be set to a value between @code{0} and @code{BZ_MAX_UNUSED} inclusive. | ||
| 1340 | |||
| 1341 | This function may only be called once @code{BZ2_bzRead} has signalled | ||
| 1342 | @code{BZ_STREAM_END} but before @code{BZ2_bzReadClose}. | ||
| 1343 | |||
| 1344 | Possible assignments to @code{bzerror}: | ||
| 1345 | @display | ||
| 1346 | @code{BZ_PARAM_ERROR} | ||
| 1347 | if @code{b} is @code{NULL} | ||
| 1348 | or @code{unused} is @code{NULL} or @code{nUnused} is @code{NULL} | ||
| 1349 | @code{BZ_SEQUENCE_ERROR} | ||
| 1350 | if @code{BZ_STREAM_END} has not been signalled | ||
| 1351 | or if @code{b} was opened with @code{BZ2_bzWriteOpen} | ||
| 1352 | @code{BZ_OK} | ||
| 1353 | otherwise | ||
| 1354 | @end display | ||
| 1355 | |||
| 1356 | Allowable next actions: | ||
| 1357 | @display | ||
| 1358 | @code{BZ2_bzReadClose} | ||
| 1359 | @end display | ||
| 1360 | |||
| 1361 | |||
| 1362 | @subsection @code{BZ2_bzReadClose} | ||
| 1363 | @example | ||
| 1364 | void BZ2_bzReadClose ( int *bzerror, BZFILE *b ); | ||
| 1365 | @end example | ||
| 1366 | Releases all memory pertaining to the compressed file @code{b}. | ||
| 1367 | @code{BZ2_bzReadClose} does not call @code{fclose} on the underlying file | ||
| 1368 | handle, so you should do that yourself if appropriate. | ||
| 1369 | @code{BZ2_bzReadClose} should be called to clean up after all error | ||
| 1370 | situations. | ||
| 1371 | |||
| 1372 | Possible assignments to @code{bzerror}: | ||
| 1373 | @display | ||
| 1374 | @code{BZ_SEQUENCE_ERROR} | ||
| 1375 | if @code{b} was opened with @code{BZ2_bzOpenWrite} | ||
| 1376 | @code{BZ_OK} | ||
| 1377 | otherwise | ||
| 1378 | @end display | ||
| 1379 | |||
| 1380 | Allowable next actions: | ||
| 1381 | @display | ||
| 1382 | none | ||
| 1383 | @end display | ||
| 1384 | |||
| 1385 | |||
| 1386 | |||
| 1387 | @subsection @code{BZ2_bzWriteOpen} | ||
| 1388 | @example | ||
| 1389 | BZFILE *BZ2_bzWriteOpen ( int *bzerror, FILE *f, | ||
| 1390 | int blockSize100k, int verbosity, | ||
| 1391 | int workFactor ); | ||
| 1392 | @end example | ||
| 1393 | Prepare to write compressed data to file handle @code{f}. | ||
| 1394 | @code{f} should refer to | ||
| 1395 | a file which has been opened for writing, and for which the error | ||
| 1396 | indicator (@code{ferror(f)})is not set. | ||
| 1397 | |||
| 1398 | For the meaning of parameters @code{blockSize100k}, | ||
| 1399 | @code{verbosity} and @code{workFactor}, see | ||
| 1400 | @* @code{BZ2_bzCompressInit}. | ||
| 1401 | |||
| 1402 | All required memory is allocated at this stage, so if the call | ||
| 1403 | completes successfully, @code{BZ_MEM_ERROR} cannot be signalled by a | ||
| 1404 | subsequent call to @code{BZ2_bzWrite}. | ||
| 1405 | |||
| 1406 | Possible assignments to @code{bzerror}: | ||
| 1407 | @display | ||
| 1408 | @code{BZ_CONFIG_ERROR} | ||
| 1409 | if the library has been mis-compiled | ||
| 1410 | @code{BZ_PARAM_ERROR} | ||
| 1411 | if @code{f} is @code{NULL} | ||
| 1412 | or @code{blockSize100k < 1} or @code{blockSize100k > 9} | ||
| 1413 | @code{BZ_IO_ERROR} | ||
| 1414 | if @code{ferror(f)} is nonzero | ||
| 1415 | @code{BZ_MEM_ERROR} | ||
| 1416 | if insufficient memory is available | ||
| 1417 | @code{BZ_OK} | ||
| 1418 | otherwise | ||
| 1419 | @end display | ||
| 1420 | |||
| 1421 | Possible return values: | ||
| 1422 | @display | ||
| 1423 | Pointer to an abstract @code{BZFILE} | ||
| 1424 | if @code{bzerror} is @code{BZ_OK} | ||
| 1425 | @code{NULL} | ||
| 1426 | otherwise | ||
| 1427 | @end display | ||
| 1428 | |||
| 1429 | Allowable next actions: | ||
| 1430 | @display | ||
| 1431 | @code{BZ2_bzWrite} | ||
| 1432 | if @code{bzerror} is @code{BZ_OK} | ||
| 1433 | (you could go directly to @code{BZ2_bzWriteClose}, but this would be pretty pointless) | ||
| 1434 | @code{BZ2_bzWriteClose} | ||
| 1435 | otherwise | ||
| 1436 | @end display | ||
| 1437 | |||
| 1438 | |||
| 1439 | |||
| 1440 | @subsection @code{BZ2_bzWrite} | ||
| 1441 | @example | ||
| 1442 | void BZ2_bzWrite ( int *bzerror, BZFILE *b, void *buf, int len ); | ||
| 1443 | @end example | ||
| 1444 | Absorbs @code{len} bytes from the buffer @code{buf}, eventually to be | ||
| 1445 | compressed and written to the file. | ||
| 1446 | |||
| 1447 | Possible assignments to @code{bzerror}: | ||
| 1448 | @display | ||
| 1449 | @code{BZ_PARAM_ERROR} | ||
| 1450 | if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0} | ||
| 1451 | @code{BZ_SEQUENCE_ERROR} | ||
| 1452 | if b was opened with @code{BZ2_bzReadOpen} | ||
| 1453 | @code{BZ_IO_ERROR} | ||
| 1454 | if there is an error writing the compressed file. | ||
| 1455 | @code{BZ_OK} | ||
| 1456 | otherwise | ||
| 1457 | @end display | ||
| 1458 | |||
| 1459 | |||
| 1460 | |||
| 1461 | |||
| 1462 | @subsection @code{BZ2_bzWriteClose} | ||
| 1463 | @example | ||
| 1464 | void BZ2_bzWriteClose ( int *bzerror, BZFILE* f, | ||
| 1465 | int abandon, | ||
| 1466 | unsigned int* nbytes_in, | ||
| 1467 | unsigned int* nbytes_out ); | ||
| 1468 | |||
| 1469 | void BZ2_bzWriteClose64 ( int *bzerror, BZFILE* f, | ||
| 1470 | int abandon, | ||
| 1471 | unsigned int* nbytes_in_lo32, | ||
| 1472 | unsigned int* nbytes_in_hi32, | ||
| 1473 | unsigned int* nbytes_out_lo32, | ||
| 1474 | unsigned int* nbytes_out_hi32 ); | ||
| 1475 | @end example | ||
| 1476 | |||
| 1477 | Compresses and flushes to the compressed file all data so far supplied | ||
| 1478 | by @code{BZ2_bzWrite}. The logical end-of-stream markers are also written, so | ||
| 1479 | subsequent calls to @code{BZ2_bzWrite} are illegal. All memory associated | ||
| 1480 | with the compressed file @code{b} is released. | ||
| 1481 | @code{fflush} is called on the | ||
| 1482 | compressed file, but it is not @code{fclose}'d. | ||
| 1483 | |||
| 1484 | If @code{BZ2_bzWriteClose} is called to clean up after an error, the only | ||
| 1485 | action is to release the memory. The library records the error codes | ||
| 1486 | issued by previous calls, so this situation will be detected | ||
| 1487 | automatically. There is no attempt to complete the compression | ||
| 1488 | operation, nor to @code{fflush} the compressed file. You can force this | ||
| 1489 | behaviour to happen even in the case of no error, by passing a nonzero | ||
| 1490 | value to @code{abandon}. | ||
| 1491 | |||
| 1492 | If @code{nbytes_in} is non-null, @code{*nbytes_in} will be set to be the | ||
| 1493 | total volume of uncompressed data handled. Similarly, @code{nbytes_out} | ||
| 1494 | will be set to the total volume of compressed data written. For | ||
| 1495 | compatibility with older versions of the library, @code{BZ2_bzWriteClose} | ||
| 1496 | only yields the lower 32 bits of these counts. Use | ||
| 1497 | @code{BZ2_bzWriteClose64} if you want the full 64 bit counts. These | ||
| 1498 | two functions are otherwise absolutely identical. | ||
| 1499 | |||
| 1500 | |||
| 1501 | Possible assignments to @code{bzerror}: | ||
| 1502 | @display | ||
| 1503 | @code{BZ_SEQUENCE_ERROR} | ||
| 1504 | if @code{b} was opened with @code{BZ2_bzReadOpen} | ||
| 1505 | @code{BZ_IO_ERROR} | ||
| 1506 | if there is an error writing the compressed file | ||
| 1507 | @code{BZ_OK} | ||
| 1508 | otherwise | ||
| 1509 | @end display | ||
| 1510 | |||
| 1511 | @subsection Handling embedded compressed data streams | ||
| 1512 | |||
| 1513 | The high-level library facilitates use of | ||
| 1514 | @code{bzip2} data streams which form some part of a surrounding, larger | ||
| 1515 | data stream. | ||
| 1516 | @itemize @bullet | ||
| 1517 | @item For writing, the library takes an open file handle, writes | ||
| 1518 | compressed data to it, @code{fflush}es it but does not @code{fclose} it. | ||
| 1519 | The calling application can write its own data before and after the | ||
| 1520 | compressed data stream, using that same file handle. | ||
| 1521 | @item Reading is more complex, and the facilities are not as general | ||
| 1522 | as they could be since generality is hard to reconcile with efficiency. | ||
| 1523 | @code{BZ2_bzRead} reads from the compressed file in blocks of size | ||
| 1524 | @code{BZ_MAX_UNUSED} bytes, and in doing so probably will overshoot | ||
| 1525 | the logical end of compressed stream. | ||
| 1526 | To recover this data once decompression has | ||
| 1527 | ended, call @code{BZ2_bzReadGetUnused} after the last call of @code{BZ2_bzRead} | ||
| 1528 | (the one returning @code{BZ_STREAM_END}) but before calling | ||
| 1529 | @code{BZ2_bzReadClose}. | ||
| 1530 | @end itemize | ||
| 1531 | |||
| 1532 | This mechanism makes it easy to decompress multiple @code{bzip2} | ||
| 1533 | streams placed end-to-end. As the end of one stream, when @code{BZ2_bzRead} | ||
| 1534 | returns @code{BZ_STREAM_END}, call @code{BZ2_bzReadGetUnused} to collect the | ||
| 1535 | unused data (copy it into your own buffer somewhere). | ||
| 1536 | That data forms the start of the next compressed stream. | ||
| 1537 | To start uncompressing that next stream, call @code{BZ2_bzReadOpen} again, | ||
| 1538 | feeding in the unused data via the @code{unused}/@code{nUnused} | ||
| 1539 | parameters. | ||
| 1540 | Keep doing this until @code{BZ_STREAM_END} return coincides with the | ||
| 1541 | physical end of file (@code{feof(f)}). In this situation | ||
| 1542 | @code{BZ2_bzReadGetUnused} | ||
| 1543 | will of course return no data. | ||
| 1544 | |||
| 1545 | This should give some feel for how the high-level interface can be used. | ||
| 1546 | If you require extra flexibility, you'll have to bite the bullet and get | ||
| 1547 | to grips with the low-level interface. | ||
| 1548 | |||
| 1549 | @subsection Standard file-reading/writing code | ||
| 1550 | Here's how you'd write data to a compressed file: | ||
| 1551 | @example @code | ||
| 1552 | FILE* f; | ||
| 1553 | BZFILE* b; | ||
| 1554 | int nBuf; | ||
| 1555 | char buf[ /* whatever size you like */ ]; | ||
| 1556 | int bzerror; | ||
| 1557 | int nWritten; | ||
| 1558 | |||
| 1559 | f = fopen ( "myfile.bz2", "w" ); | ||
| 1560 | if (!f) @{ | ||
| 1561 | /* handle error */ | ||
| 1562 | @} | ||
| 1563 | b = BZ2_bzWriteOpen ( &bzerror, f, 9 ); | ||
| 1564 | if (bzerror != BZ_OK) @{ | ||
| 1565 | BZ2_bzWriteClose ( b ); | ||
| 1566 | /* handle error */ | ||
| 1567 | @} | ||
| 1568 | |||
| 1569 | while ( /* condition */ ) @{ | ||
| 1570 | /* get data to write into buf, and set nBuf appropriately */ | ||
| 1571 | nWritten = BZ2_bzWrite ( &bzerror, b, buf, nBuf ); | ||
| 1572 | if (bzerror == BZ_IO_ERROR) @{ | ||
| 1573 | BZ2_bzWriteClose ( &bzerror, b ); | ||
| 1574 | /* handle error */ | ||
| 1575 | @} | ||
| 1576 | @} | ||
| 1577 | |||
| 1578 | BZ2_bzWriteClose ( &bzerror, b ); | ||
| 1579 | if (bzerror == BZ_IO_ERROR) @{ | ||
| 1580 | /* handle error */ | ||
| 1581 | @} | ||
| 1582 | @end example | ||
| 1583 | And to read from a compressed file: | ||
| 1584 | @example | ||
| 1585 | FILE* f; | ||
| 1586 | BZFILE* b; | ||
| 1587 | int nBuf; | ||
| 1588 | char buf[ /* whatever size you like */ ]; | ||
| 1589 | int bzerror; | ||
| 1590 | int nWritten; | ||
| 1591 | |||
| 1592 | f = fopen ( "myfile.bz2", "r" ); | ||
| 1593 | if (!f) @{ | ||
| 1594 | /* handle error */ | ||
| 1595 | @} | ||
| 1596 | b = BZ2_bzReadOpen ( &bzerror, f, 0, NULL, 0 ); | ||
| 1597 | if (bzerror != BZ_OK) @{ | ||
| 1598 | BZ2_bzReadClose ( &bzerror, b ); | ||
| 1599 | /* handle error */ | ||
| 1600 | @} | ||
| 1601 | |||
| 1602 | bzerror = BZ_OK; | ||
| 1603 | while (bzerror == BZ_OK && /* arbitrary other conditions */) @{ | ||
| 1604 | nBuf = BZ2_bzRead ( &bzerror, b, buf, /* size of buf */ ); | ||
| 1605 | if (bzerror == BZ_OK) @{ | ||
| 1606 | /* do something with buf[0 .. nBuf-1] */ | ||
| 1607 | @} | ||
| 1608 | @} | ||
| 1609 | if (bzerror != BZ_STREAM_END) @{ | ||
| 1610 | BZ2_bzReadClose ( &bzerror, b ); | ||
| 1611 | /* handle error */ | ||
| 1612 | @} else @{ | ||
| 1613 | BZ2_bzReadClose ( &bzerror ); | ||
| 1614 | @} | ||
| 1615 | @end example | ||
| 1616 | |||
| 1617 | |||
| 1618 | |||
| 1619 | @section Utility functions | ||
| 1620 | @subsection @code{BZ2_bzBuffToBuffCompress} | ||
| 1621 | @example | ||
| 1622 | int BZ2_bzBuffToBuffCompress( char* dest, | ||
| 1623 | unsigned int* destLen, | ||
| 1624 | char* source, | ||
| 1625 | unsigned int sourceLen, | ||
| 1626 | int blockSize100k, | ||
| 1627 | int verbosity, | ||
| 1628 | int workFactor ); | ||
| 1629 | @end example | ||
| 1630 | Attempts to compress the data in @code{source[0 .. sourceLen-1]} | ||
| 1631 | into the destination buffer, @code{dest[0 .. *destLen-1]}. | ||
| 1632 | If the destination buffer is big enough, @code{*destLen} is | ||
| 1633 | set to the size of the compressed data, and @code{BZ_OK} is | ||
| 1634 | returned. If the compressed data won't fit, @code{*destLen} | ||
| 1635 | is unchanged, and @code{BZ_OUTBUFF_FULL} is returned. | ||
| 1636 | |||
| 1637 | Compression in this manner is a one-shot event, done with a single call | ||
| 1638 | to this function. The resulting compressed data is a complete | ||
| 1639 | @code{bzip2} format data stream. There is no mechanism for making | ||
| 1640 | additional calls to provide extra input data. If you want that kind of | ||
| 1641 | mechanism, use the low-level interface. | ||
| 1642 | |||
| 1643 | For the meaning of parameters @code{blockSize100k}, @code{verbosity} | ||
| 1644 | and @code{workFactor}, @* see @code{BZ2_bzCompressInit}. | ||
| 1645 | |||
| 1646 | To guarantee that the compressed data will fit in its buffer, allocate | ||
| 1647 | an output buffer of size 1% larger than the uncompressed data, plus | ||
| 1648 | six hundred extra bytes. | ||
| 1649 | |||
| 1650 | @code{BZ2_bzBuffToBuffDecompress} will not write data at or | ||
| 1651 | beyond @code{dest[*destLen]}, even in case of buffer overflow. | ||
| 1652 | |||
| 1653 | Possible return values: | ||
| 1654 | @display | ||
| 1655 | @code{BZ_CONFIG_ERROR} | ||
| 1656 | if the library has been mis-compiled | ||
| 1657 | @code{BZ_PARAM_ERROR} | ||
| 1658 | if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL} | ||
| 1659 | or @code{blockSize100k < 1} or @code{blockSize100k > 9} | ||
| 1660 | or @code{verbosity < 0} or @code{verbosity > 4} | ||
| 1661 | or @code{workFactor < 0} or @code{workFactor > 250} | ||
| 1662 | @code{BZ_MEM_ERROR} | ||
| 1663 | if insufficient memory is available | ||
| 1664 | @code{BZ_OUTBUFF_FULL} | ||
| 1665 | if the size of the compressed data exceeds @code{*destLen} | ||
| 1666 | @code{BZ_OK} | ||
| 1667 | otherwise | ||
| 1668 | @end display | ||
| 1669 | |||
| 1670 | |||
| 1671 | |||
| 1672 | @subsection @code{BZ2_bzBuffToBuffDecompress} | ||
| 1673 | @example | ||
| 1674 | int BZ2_bzBuffToBuffDecompress ( char* dest, | ||
| 1675 | unsigned int* destLen, | ||
| 1676 | char* source, | ||
| 1677 | unsigned int sourceLen, | ||
| 1678 | int small, | ||
| 1679 | int verbosity ); | ||
| 1680 | @end example | ||
| 1681 | Attempts to decompress the data in @code{source[0 .. sourceLen-1]} | ||
| 1682 | into the destination buffer, @code{dest[0 .. *destLen-1]}. | ||
| 1683 | If the destination buffer is big enough, @code{*destLen} is | ||
| 1684 | set to the size of the uncompressed data, and @code{BZ_OK} is | ||
| 1685 | returned. If the compressed data won't fit, @code{*destLen} | ||
| 1686 | is unchanged, and @code{BZ_OUTBUFF_FULL} is returned. | ||
| 1687 | |||
| 1688 | @code{source} is assumed to hold a complete @code{bzip2} format | ||
| 1689 | data stream. @* @code{BZ2_bzBuffToBuffDecompress} tries to decompress | ||
| 1690 | the entirety of the stream into the output buffer. | ||
| 1691 | |||
| 1692 | For the meaning of parameters @code{small} and @code{verbosity}, | ||
| 1693 | see @code{BZ2_bzDecompressInit}. | ||
| 1694 | |||
| 1695 | Because the compression ratio of the compressed data cannot be known in | ||
| 1696 | advance, there is no easy way to guarantee that the output buffer will | ||
| 1697 | be big enough. You may of course make arrangements in your code to | ||
| 1698 | record the size of the uncompressed data, but such a mechanism is beyond | ||
| 1699 | the scope of this library. | ||
| 1700 | |||
| 1701 | @code{BZ2_bzBuffToBuffDecompress} will not write data at or | ||
| 1702 | beyond @code{dest[*destLen]}, even in case of buffer overflow. | ||
| 1703 | |||
| 1704 | Possible return values: | ||
| 1705 | @display | ||
| 1706 | @code{BZ_CONFIG_ERROR} | ||
| 1707 | if the library has been mis-compiled | ||
| 1708 | @code{BZ_PARAM_ERROR} | ||
| 1709 | if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL} | ||
| 1710 | or @code{small != 0 && small != 1} | ||
| 1711 | or @code{verbosity < 0} or @code{verbosity > 4} | ||
| 1712 | @code{BZ_MEM_ERROR} | ||
| 1713 | if insufficient memory is available | ||
| 1714 | @code{BZ_OUTBUFF_FULL} | ||
| 1715 | if the size of the compressed data exceeds @code{*destLen} | ||
| 1716 | @code{BZ_DATA_ERROR} | ||
| 1717 | if a data integrity error was detected in the compressed data | ||
| 1718 | @code{BZ_DATA_ERROR_MAGIC} | ||
| 1719 | if the compressed data doesn't begin with the right magic bytes | ||
| 1720 | @code{BZ_UNEXPECTED_EOF} | ||
| 1721 | if the compressed data ends unexpectedly | ||
| 1722 | @code{BZ_OK} | ||
| 1723 | otherwise | ||
| 1724 | @end display | ||
| 1725 | |||
| 1726 | |||
| 1727 | |||
| 1728 | @section @code{zlib} compatibility functions | ||
| 1729 | Yoshioka Tsuneo has contributed some functions to | ||
| 1730 | give better @code{zlib} compatibility. These functions are | ||
| 1731 | @code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush}, | ||
| 1732 | @code{BZ2_bzclose}, | ||
| 1733 | @code{BZ2_bzerror} and @code{BZ2_bzlibVersion}. | ||
| 1734 | These functions are not (yet) officially part of | ||
| 1735 | the library. If they break, you get to keep all the pieces. | ||
| 1736 | Nevertheless, I think they work ok. | ||
| 1737 | @example | ||
| 1738 | typedef void BZFILE; | ||
| 1739 | |||
| 1740 | const char * BZ2_bzlibVersion ( void ); | ||
| 1741 | @end example | ||
| 1742 | Returns a string indicating the library version. | ||
| 1743 | @example | ||
| 1744 | BZFILE * BZ2_bzopen ( const char *path, const char *mode ); | ||
| 1745 | BZFILE * BZ2_bzdopen ( int fd, const char *mode ); | ||
| 1746 | @end example | ||
| 1747 | Opens a @code{.bz2} file for reading or writing, using either its name | ||
| 1748 | or a pre-existing file descriptor. | ||
| 1749 | Analogous to @code{fopen} and @code{fdopen}. | ||
| 1750 | @example | ||
| 1751 | int BZ2_bzread ( BZFILE* b, void* buf, int len ); | ||
| 1752 | int BZ2_bzwrite ( BZFILE* b, void* buf, int len ); | ||
| 1753 | @end example | ||
| 1754 | Reads/writes data from/to a previously opened @code{BZFILE}. | ||
| 1755 | Analogous to @code{fread} and @code{fwrite}. | ||
| 1756 | @example | ||
| 1757 | int BZ2_bzflush ( BZFILE* b ); | ||
| 1758 | void BZ2_bzclose ( BZFILE* b ); | ||
| 1759 | @end example | ||
| 1760 | Flushes/closes a @code{BZFILE}. @code{BZ2_bzflush} doesn't actually do | ||
| 1761 | anything. Analogous to @code{fflush} and @code{fclose}. | ||
| 1762 | |||
| 1763 | @example | ||
| 1764 | const char * BZ2_bzerror ( BZFILE *b, int *errnum ) | ||
| 1765 | @end example | ||
| 1766 | Returns a string describing the more recent error status of | ||
| 1767 | @code{b}, and also sets @code{*errnum} to its numerical value. | ||
| 1768 | |||
| 1769 | |||
| 1770 | @section Using the library in a @code{stdio}-free environment | ||
| 1771 | |||
| 1772 | @subsection Getting rid of @code{stdio} | ||
| 1773 | |||
| 1774 | In a deeply embedded application, you might want to use just | ||
| 1775 | the memory-to-memory functions. You can do this conveniently | ||
| 1776 | by compiling the library with preprocessor symbol @code{BZ_NO_STDIO} | ||
| 1777 | defined. Doing this gives you a library containing only the following | ||
| 1778 | eight functions: | ||
| 1779 | |||
| 1780 | @code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, @code{BZ2_bzCompressEnd} @* | ||
| 1781 | @code{BZ2_bzDecompressInit}, @code{BZ2_bzDecompress}, @code{BZ2_bzDecompressEnd} @* | ||
| 1782 | @code{BZ2_bzBuffToBuffCompress}, @code{BZ2_bzBuffToBuffDecompress} | ||
| 1783 | |||
| 1784 | When compiled like this, all functions will ignore @code{verbosity} | ||
| 1785 | settings. | ||
| 1786 | |||
| 1787 | @subsection Critical error handling | ||
| 1788 | @code{libbzip2} contains a number of internal assertion checks which | ||
| 1789 | should, needless to say, never be activated. Nevertheless, if an | ||
| 1790 | assertion should fail, behaviour depends on whether or not the library | ||
| 1791 | was compiled with @code{BZ_NO_STDIO} set. | ||
| 1792 | |||
| 1793 | For a normal compile, an assertion failure yields the message | ||
| 1794 | @example | ||
| 1795 | bzip2/libbzip2: internal error number N. | ||
| 1796 | This is a bug in bzip2/libbzip2, 1.0.2, 30-Dec-2001. | ||
| 1797 | Please report it to me at: jseward@@acm.org. If this happened | ||
| 1798 | when you were using some program which uses libbzip2 as a | ||
| 1799 | component, you should also report this bug to the author(s) | ||
| 1800 | of that program. Please make an effort to report this bug; | ||
| 1801 | timely and accurate bug reports eventually lead to higher | ||
| 1802 | quality software. Thanks. Julian Seward, 30 December 2001. | ||
| 1803 | @end example | ||
| 1804 | where @code{N} is some error code number. If @code{N == 1007}, it also | ||
| 1805 | prints some extra text advising the reader that unreliable memory is | ||
| 1806 | often associated with internal error 1007. (This is a | ||
| 1807 | frequently-observed-phenomenon with versions 1.0.0/1.0.1). | ||
| 1808 | |||
| 1809 | @code{exit(3)} is then called. | ||
| 1810 | |||
| 1811 | For a @code{stdio}-free library, assertion failures result | ||
| 1812 | in a call to a function declared as: | ||
| 1813 | @example | ||
| 1814 | extern void bz_internal_error ( int errcode ); | ||
| 1815 | @end example | ||
| 1816 | The relevant code is passed as a parameter. You should supply | ||
| 1817 | such a function. | ||
| 1818 | |||
| 1819 | In either case, once an assertion failure has occurred, any | ||
| 1820 | @code{bz_stream} records involved can be regarded as invalid. | ||
| 1821 | You should not attempt to resume normal operation with them. | ||
| 1822 | |||
| 1823 | You may, of course, change critical error handling to suit | ||
| 1824 | your needs. As I said above, critical errors indicate bugs | ||
| 1825 | in the library and should not occur. All "normal" error | ||
| 1826 | situations are indicated via error return codes from functions, | ||
| 1827 | and can be recovered from. | ||
| 1828 | |||
| 1829 | |||
| 1830 | @section Making a Windows DLL | ||
| 1831 | Everything related to Windows has been contributed by Yoshioka Tsuneo | ||
| 1832 | @* (@code{QWF00133@@niftyserve.or.jp} / | ||
| 1833 | @code{tsuneo-y@@is.aist-nara.ac.jp}), so you should send your queries to | ||
| 1834 | him (but perhaps Cc: me, @code{jseward@@acm.org}). | ||
| 1835 | |||
| 1836 | My vague understanding of what to do is: using Visual C++ 5.0, | ||
| 1837 | open the project file @code{libbz2.dsp}, and build. That's all. | ||
| 1838 | |||
| 1839 | If you can't | ||
| 1840 | open the project file for some reason, make a new one, naming these files: | ||
| 1841 | @code{blocksort.c}, @code{bzlib.c}, @code{compress.c}, | ||
| 1842 | @code{crctable.c}, @code{decompress.c}, @code{huffman.c}, @* | ||
| 1843 | @code{randtable.c} and @code{libbz2.def}. You will also need | ||
| 1844 | to name the header files @code{bzlib.h} and @code{bzlib_private.h}. | ||
| 1845 | |||
| 1846 | If you don't use VC++, you may need to define the proprocessor symbol | ||
| 1847 | @code{_WIN32}. | ||
| 1848 | |||
| 1849 | Finally, @code{dlltest.c} is a sample program using the DLL. It has a | ||
| 1850 | project file, @code{dlltest.dsp}. | ||
| 1851 | |||
| 1852 | If you just want a makefile for Visual C, have a look at | ||
| 1853 | @code{makefile.msc}. | ||
| 1854 | |||
| 1855 | Be aware that if you compile @code{bzip2} itself on Win32, you must set | ||
| 1856 | @code{BZ_UNIX} to 0 and @code{BZ_LCCWIN32} to 1, in the file | ||
| 1857 | @code{bzip2.c}, before compiling. Otherwise the resulting binary won't | ||
| 1858 | work correctly. | ||
| 1859 | |||
| 1860 | I haven't tried any of this stuff myself, but it all looks plausible. | ||
| 1861 | |||
| 1862 | |||
| 1863 | |||
| 1864 | @chapter Miscellanea | ||
| 1865 | |||
| 1866 | These are just some random thoughts of mine. Your mileage may | ||
| 1867 | vary. | ||
| 1868 | |||
| 1869 | @section Limitations of the compressed file format | ||
| 1870 | @code{bzip2-1.0}, @code{0.9.5} and @code{0.9.0} | ||
| 1871 | use exactly the same file format as the previous | ||
| 1872 | version, @code{bzip2-0.1}. This decision was made in the interests of | ||
| 1873 | stability. Creating yet another incompatible compressed file format | ||
| 1874 | would create further confusion and disruption for users. | ||
| 1875 | |||
| 1876 | Nevertheless, this is not a painless decision. Development | ||
| 1877 | work since the release of @code{bzip2-0.1} in August 1997 | ||
| 1878 | has shown complexities in the file format which slow down | ||
| 1879 | decompression and, in retrospect, are unnecessary. These are: | ||
| 1880 | @itemize @bullet | ||
| 1881 | @item The run-length encoder, which is the first of the | ||
| 1882 | compression transformations, is entirely irrelevant. | ||
| 1883 | The original purpose was to protect the sorting algorithm | ||
| 1884 | from the very worst case input: a string of repeated | ||
| 1885 | symbols. But algorithm steps Q6a and Q6b in the original | ||
| 1886 | Burrows-Wheeler technical report (SRC-124) show how | ||
| 1887 | repeats can be handled without difficulty in block | ||
| 1888 | sorting. | ||
| 1889 | @item The randomisation mechanism doesn't really need to be | ||
| 1890 | there. Udi Manber and Gene Myers published a suffix | ||
| 1891 | array construction algorithm a few years back, which | ||
| 1892 | can be employed to sort any block, no matter how | ||
| 1893 | repetitive, in O(N log N) time. Subsequent work by | ||
| 1894 | Kunihiko Sadakane has produced a derivative O(N (log N)^2) | ||
| 1895 | algorithm which usually outperforms the Manber-Myers | ||
| 1896 | algorithm. | ||
| 1897 | |||
| 1898 | I could have changed to Sadakane's algorithm, but I find | ||
| 1899 | it to be slower than @code{bzip2}'s existing algorithm for | ||
| 1900 | most inputs, and the randomisation mechanism protects | ||
| 1901 | adequately against bad cases. I didn't think it was | ||
| 1902 | a good tradeoff to make. Partly this is due to the fact | ||
| 1903 | that I was not flooded with email complaints about | ||
| 1904 | @code{bzip2-0.1}'s performance on repetitive data, so | ||
| 1905 | perhaps it isn't a problem for real inputs. | ||
| 1906 | |||
| 1907 | Probably the best long-term solution, | ||
| 1908 | and the one I have incorporated into 0.9.5 and above, | ||
| 1909 | is to use the existing sorting | ||
| 1910 | algorithm initially, and fall back to a O(N (log N)^2) | ||
| 1911 | algorithm if the standard algorithm gets into difficulties. | ||
| 1912 | @item The compressed file format was never designed to be | ||
| 1913 | handled by a library, and I have had to jump though | ||
| 1914 | some hoops to produce an efficient implementation of | ||
| 1915 | decompression. It's a bit hairy. Try passing | ||
| 1916 | @code{decompress.c} through the C preprocessor | ||
| 1917 | and you'll see what I mean. Much of this complexity | ||
| 1918 | could have been avoided if the compressed size of | ||
| 1919 | each block of data was recorded in the data stream. | ||
| 1920 | @item An Adler-32 checksum, rather than a CRC32 checksum, | ||
| 1921 | would be faster to compute. | ||
| 1922 | @end itemize | ||
| 1923 | It would be fair to say that the @code{bzip2} format was frozen | ||
| 1924 | before I properly and fully understood the performance | ||
| 1925 | consequences of doing so. | ||
| 1926 | |||
| 1927 | Improvements which I was able to incorporate into | ||
| 1928 | 0.9.0, despite using the same file format, are: | ||
| 1929 | @itemize @bullet | ||
| 1930 | @item Single array implementation of the inverse BWT. This | ||
| 1931 | significantly speeds up decompression, presumably | ||
| 1932 | because it reduces the number of cache misses. | ||
| 1933 | @item Faster inverse MTF transform for large MTF values. The | ||
| 1934 | new implementation is based on the notion of sliding blocks | ||
| 1935 | of values. | ||
| 1936 | @item @code{bzip2-0.9.0} now reads and writes files with @code{fread} | ||
| 1937 | and @code{fwrite}; version 0.1 used @code{putc} and @code{getc}. | ||
| 1938 | Duh! Well, you live and learn. | ||
| 1939 | |||
| 1940 | @end itemize | ||
| 1941 | Further ahead, it would be nice | ||
| 1942 | to be able to do random access into files. This will | ||
| 1943 | require some careful design of compressed file formats. | ||
| 1944 | |||
| 1945 | |||
| 1946 | |||
| 1947 | @section Portability issues | ||
| 1948 | After some consideration, I have decided not to use | ||
| 1949 | GNU @code{autoconf} to configure 0.9.5 or 1.0. | ||
| 1950 | |||
| 1951 | @code{autoconf}, admirable and wonderful though it is, | ||
| 1952 | mainly assists with portability problems between Unix-like | ||
| 1953 | platforms. But @code{bzip2} doesn't have much in the way | ||
| 1954 | of portability problems on Unix; most of the difficulties appear | ||
| 1955 | when porting to the Mac, or to Microsoft's operating systems. | ||
| 1956 | @code{autoconf} doesn't help in those cases, and brings in a | ||
| 1957 | whole load of new complexity. | ||
| 1958 | |||
| 1959 | Most people should be able to compile the library and program | ||
| 1960 | under Unix straight out-of-the-box, so to speak, especially | ||
| 1961 | if you have a version of GNU C available. | ||
| 1962 | |||
| 1963 | There are a couple of @code{__inline__} directives in the code. GNU C | ||
| 1964 | (@code{gcc}) should be able to handle them. If you're not using | ||
| 1965 | GNU C, your C compiler shouldn't see them at all. | ||
| 1966 | If your compiler does, for some reason, see them and doesn't | ||
| 1967 | like them, just @code{#define} @code{__inline__} to be @code{/* */}. One | ||
| 1968 | easy way to do this is to compile with the flag @code{-D__inline__=}, | ||
| 1969 | which should be understood by most Unix compilers. | ||
| 1970 | |||
| 1971 | If you still have difficulties, try compiling with the macro | ||
| 1972 | @code{BZ_STRICT_ANSI} defined. This should enable you to build the | ||
| 1973 | library in a strictly ANSI compliant environment. Building the program | ||
| 1974 | itself like this is dangerous and not supported, since you remove | ||
| 1975 | @code{bzip2}'s checks against compressing directories, symbolic links, | ||
| 1976 | devices, and other not-really-a-file entities. This could cause | ||
| 1977 | filesystem corruption! | ||
| 1978 | |||
| 1979 | One other thing: if you create a @code{bzip2} binary for public | ||
| 1980 | distribution, please try and link it statically (@code{gcc -s}). This | ||
| 1981 | avoids all sorts of library-version issues that others may encounter | ||
| 1982 | later on. | ||
| 1983 | |||
| 1984 | If you build @code{bzip2} on Win32, you must set @code{BZ_UNIX} to 0 and | ||
| 1985 | @code{BZ_LCCWIN32} to 1, in the file @code{bzip2.c}, before compiling. | ||
| 1986 | Otherwise the resulting binary won't work correctly. | ||
| 1987 | |||
| 1988 | |||
| 1989 | |||
| 1990 | @section Reporting bugs | ||
| 1991 | I tried pretty hard to make sure @code{bzip2} is | ||
| 1992 | bug free, both by design and by testing. Hopefully | ||
| 1993 | you'll never need to read this section for real. | ||
| 1994 | |||
| 1995 | Nevertheless, if @code{bzip2} dies with a segmentation | ||
| 1996 | fault, a bus error or an internal assertion failure, it | ||
| 1997 | will ask you to email me a bug report. Experience with | ||
| 1998 | version 0.1 shows that almost all these problems can | ||
| 1999 | be traced to either compiler bugs or hardware problems. | ||
| 2000 | @itemize @bullet | ||
| 2001 | @item | ||
| 2002 | Recompile the program with no optimisation, and see if it | ||
| 2003 | works. And/or try a different compiler. | ||
| 2004 | I heard all sorts of stories about various flavours | ||
| 2005 | of GNU C (and other compilers) generating bad code for | ||
| 2006 | @code{bzip2}, and I've run across two such examples myself. | ||
| 2007 | |||
| 2008 | 2.7.X versions of GNU C are known to generate bad code from | ||
| 2009 | time to time, at high optimisation levels. | ||
| 2010 | If you get problems, try using the flags | ||
| 2011 | @code{-O2} @code{-fomit-frame-pointer} @code{-fno-strength-reduce}. | ||
| 2012 | You should specifically @emph{not} use @code{-funroll-loops}. | ||
| 2013 | |||
| 2014 | You may notice that the Makefile runs six tests as part of | ||
| 2015 | the build process. If the program passes all of these, it's | ||
| 2016 | a pretty good (but not 100%) indication that the compiler has | ||
| 2017 | done its job correctly. | ||
| 2018 | @item | ||
| 2019 | If @code{bzip2} crashes randomly, and the crashes are not | ||
| 2020 | repeatable, you may have a flaky memory subsystem. @code{bzip2} | ||
| 2021 | really hammers your memory hierarchy, and if it's a bit marginal, | ||
| 2022 | you may get these problems. Ditto if your disk or I/O subsystem | ||
| 2023 | is slowly failing. Yup, this really does happen. | ||
| 2024 | |||
| 2025 | Try using a different machine of the same type, and see if | ||
| 2026 | you can repeat the problem. | ||
| 2027 | @item This isn't really a bug, but ... If @code{bzip2} tells | ||
| 2028 | you your file is corrupted on decompression, and you | ||
| 2029 | obtained the file via FTP, there is a possibility that you | ||
| 2030 | forgot to tell FTP to do a binary mode transfer. That absolutely | ||
| 2031 | will cause the file to be non-decompressible. You'll have to transfer | ||
| 2032 | it again. | ||
| 2033 | @end itemize | ||
| 2034 | |||
| 2035 | If you've incorporated @code{libbzip2} into your own program | ||
| 2036 | and are getting problems, please, please, please, check that the | ||
| 2037 | parameters you are passing in calls to the library, are | ||
| 2038 | correct, and in accordance with what the documentation says | ||
| 2039 | is allowable. I have tried to make the library robust against | ||
| 2040 | such problems, but I'm sure I haven't succeeded. | ||
| 2041 | |||
| 2042 | Finally, if the above comments don't help, you'll have to send | ||
| 2043 | me a bug report. Now, it's just amazing how many people will | ||
| 2044 | send me a bug report saying something like | ||
| 2045 | @display | ||
| 2046 | bzip2 crashed with segmentation fault on my machine | ||
| 2047 | @end display | ||
| 2048 | and absolutely nothing else. Needless to say, a such a report | ||
| 2049 | is @emph{totally, utterly, completely and comprehensively 100% useless; | ||
| 2050 | a waste of your time, my time, and net bandwidth}. | ||
| 2051 | With no details at all, there's no way I can possibly begin | ||
| 2052 | to figure out what the problem is. | ||
| 2053 | |||
| 2054 | The rules of the game are: facts, facts, facts. Don't omit | ||
| 2055 | them because "oh, they won't be relevant". At the bare | ||
| 2056 | minimum: | ||
| 2057 | @display | ||
| 2058 | Machine type. Operating system version. | ||
| 2059 | Exact version of @code{bzip2} (do @code{bzip2 -V}). | ||
| 2060 | Exact version of the compiler used. | ||
| 2061 | Flags passed to the compiler. | ||
| 2062 | @end display | ||
| 2063 | However, the most important single thing that will help me is | ||
| 2064 | the file that you were trying to compress or decompress at the | ||
| 2065 | time the problem happened. Without that, my ability to do anything | ||
| 2066 | more than speculate about the cause, is limited. | ||
| 2067 | |||
| 2068 | Please remember that I connect to the Internet with a modem, so | ||
| 2069 | you should contact me before mailing me huge files. | ||
| 2070 | |||
| 2071 | |||
| 2072 | @section Did you get the right package? | ||
| 2073 | |||
| 2074 | @code{bzip2} is a resource hog. It soaks up large amounts of CPU cycles | ||
| 2075 | and memory. Also, it gives very large latencies. In the worst case, you | ||
| 2076 | can feed many megabytes of uncompressed data into the library before | ||
| 2077 | getting any compressed output, so this probably rules out applications | ||
| 2078 | requiring interactive behaviour. | ||
| 2079 | |||
| 2080 | These aren't faults of my implementation, I hope, but more | ||
| 2081 | an intrinsic property of the Burrows-Wheeler transform (unfortunately). | ||
| 2082 | Maybe this isn't what you want. | ||
| 2083 | |||
| 2084 | If you want a compressor and/or library which is faster, uses less | ||
| 2085 | memory but gets pretty good compression, and has minimal latency, | ||
| 2086 | consider Jean-loup | ||
| 2087 | Gailly's and Mark Adler's work, @code{zlib-1.1.3} and | ||
| 2088 | @code{gzip-1.2.4}. Look for them at | ||
| 2089 | |||
| 2090 | @code{http://www.zlib.org} and | ||
| 2091 | @code{http://www.gzip.org} respectively. | ||
| 2092 | |||
| 2093 | For something faster and lighter still, you might try Markus F X J | ||
| 2094 | Oberhumer's @code{LZO} real-time compression/decompression library, at | ||
| 2095 | @* @code{http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html}. | ||
| 2096 | |||
| 2097 | If you want to use the @code{bzip2} algorithms to compress small blocks | ||
| 2098 | of data, 64k bytes or smaller, for example on an on-the-fly disk | ||
| 2099 | compressor, you'd be well advised not to use this library. Instead, | ||
| 2100 | I've made a special library tuned for that kind of use. It's part of | ||
| 2101 | @code{e2compr-0.40}, an on-the-fly disk compressor for the Linux | ||
| 2102 | @code{ext2} filesystem. Look at | ||
| 2103 | @code{http://www.netspace.net.au/~reiter/e2compr}. | ||
| 2104 | |||
| 2105 | |||
| 2106 | |||
| 2107 | @section Testing | ||
| 2108 | |||
| 2109 | A record of the tests I've done. | ||
| 2110 | |||
| 2111 | First, some data sets: | ||
| 2112 | @itemize @bullet | ||
| 2113 | @item B: a directory containing 6001 files, one for every length in the | ||
| 2114 | range 0 to 6000 bytes. The files contain random lowercase | ||
| 2115 | letters. 18.7 megabytes. | ||
| 2116 | @item H: my home directory tree. Documents, source code, mail files, | ||
| 2117 | compressed data. H contains B, and also a directory of | ||
| 2118 | files designed as boundary cases for the sorting; mostly very | ||
| 2119 | repetitive, nasty files. 565 megabytes. | ||
| 2120 | @item A: directory tree holding various applications built from source: | ||
| 2121 | @code{egcs}, @code{gcc-2.8.1}, KDE, GTK, Octave, etc. | ||
| 2122 | 2200 megabytes. | ||
| 2123 | @end itemize | ||
| 2124 | The tests conducted are as follows. Each test means compressing | ||
| 2125 | (a copy of) each file in the data set, decompressing it and | ||
| 2126 | comparing it against the original. | ||
| 2127 | |||
| 2128 | First, a bunch of tests with block sizes and internal buffer | ||
| 2129 | sizes set very small, | ||
| 2130 | to detect any problems with the | ||
| 2131 | blocking and buffering mechanisms. | ||
| 2132 | This required modifying the source code so as to try to | ||
| 2133 | break it. | ||
| 2134 | @enumerate | ||
| 2135 | @item Data set H, with | ||
| 2136 | buffer size of 1 byte, and block size of 23 bytes. | ||
| 2137 | @item Data set B, buffer sizes 1 byte, block size 1 byte. | ||
| 2138 | @item As (2) but small-mode decompression. | ||
| 2139 | @item As (2) with block size 2 bytes. | ||
| 2140 | @item As (2) with block size 3 bytes. | ||
| 2141 | @item As (2) with block size 4 bytes. | ||
| 2142 | @item As (2) with block size 5 bytes. | ||
| 2143 | @item As (2) with block size 6 bytes and small-mode decompression. | ||
| 2144 | @item H with buffer size of 1 byte, but normal block | ||
| 2145 | size (up to 900000 bytes). | ||
| 2146 | @end enumerate | ||
| 2147 | Then some tests with unmodified source code. | ||
| 2148 | @enumerate | ||
| 2149 | @item H, all settings normal. | ||
| 2150 | @item As (1), with small-mode decompress. | ||
| 2151 | @item H, compress with flag @code{-1}. | ||
| 2152 | @item H, compress with flag @code{-s}, decompress with flag @code{-s}. | ||
| 2153 | @item Forwards compatibility: H, @code{bzip2-0.1pl2} compressing, | ||
| 2154 | @code{bzip2-0.9.5} decompressing, all settings normal. | ||
| 2155 | @item Backwards compatibility: H, @code{bzip2-0.9.5} compressing, | ||
| 2156 | @code{bzip2-0.1pl2} decompressing, all settings normal. | ||
| 2157 | @item Bigger tests: A, all settings normal. | ||
| 2158 | @item As (7), using the fallback (Sadakane-like) sorting algorithm. | ||
| 2159 | @item As (8), compress with flag @code{-1}, decompress with flag | ||
| 2160 | @code{-s}. | ||
| 2161 | @item H, using the fallback sorting algorithm. | ||
| 2162 | @item Forwards compatibility: A, @code{bzip2-0.1pl2} compressing, | ||
| 2163 | @code{bzip2-0.9.5} decompressing, all settings normal. | ||
| 2164 | @item Backwards compatibility: A, @code{bzip2-0.9.5} compressing, | ||
| 2165 | @code{bzip2-0.1pl2} decompressing, all settings normal. | ||
| 2166 | @item Misc test: about 400 megabytes of @code{.tar} files with | ||
| 2167 | @code{bzip2} compiled with Checker (a memory access error | ||
| 2168 | detector, like Purify). | ||
| 2169 | @item Misc tests to make sure it builds and runs ok on non-Linux/x86 | ||
| 2170 | platforms. | ||
| 2171 | @end enumerate | ||
| 2172 | These tests were conducted on a 225 MHz IDT WinChip machine, running | ||
| 2173 | Linux 2.0.36. They represent nearly a week of continuous computation. | ||
| 2174 | All tests completed successfully. | ||
| 2175 | |||
| 2176 | |||
| 2177 | @section Further reading | ||
| 2178 | @code{bzip2} is not research work, in the sense that it doesn't present | ||
| 2179 | any new ideas. Rather, it's an engineering exercise based on existing | ||
| 2180 | ideas. | ||
| 2181 | |||
| 2182 | Four documents describe essentially all the ideas behind @code{bzip2}: | ||
| 2183 | @example | ||
| 2184 | Michael Burrows and D. J. Wheeler: | ||
| 2185 | "A block-sorting lossless data compression algorithm" | ||
| 2186 | 10th May 1994. | ||
| 2187 | Digital SRC Research Report 124. | ||
| 2188 | ftp://ftp.digital.com/pub/DEC/SRC/research-reports/SRC-124.ps.gz | ||
| 2189 | If you have trouble finding it, try searching at the | ||
| 2190 | New Zealand Digital Library, http://www.nzdl.org. | ||
| 2191 | |||
| 2192 | Daniel S. Hirschberg and Debra A. LeLewer | ||
| 2193 | "Efficient Decoding of Prefix Codes" | ||
| 2194 | Communications of the ACM, April 1990, Vol 33, Number 4. | ||
| 2195 | You might be able to get an electronic copy of this | ||
| 2196 | from the ACM Digital Library. | ||
| 2197 | |||
| 2198 | David J. Wheeler | ||
| 2199 | Program bred3.c and accompanying document bred3.ps. | ||
| 2200 | This contains the idea behind the multi-table Huffman | ||
| 2201 | coding scheme. | ||
| 2202 | ftp://ftp.cl.cam.ac.uk/users/djw3/ | ||
| 2203 | |||
| 2204 | Jon L. Bentley and Robert Sedgewick | ||
| 2205 | "Fast Algorithms for Sorting and Searching Strings" | ||
| 2206 | Available from Sedgewick's web page, | ||
| 2207 | www.cs.princeton.edu/~rs | ||
| 2208 | @end example | ||
| 2209 | The following paper gives valuable additional insights into the | ||
| 2210 | algorithm, but is not immediately the basis of any code | ||
| 2211 | used in bzip2. | ||
| 2212 | @example | ||
| 2213 | Peter Fenwick: | ||
| 2214 | Block Sorting Text Compression | ||
| 2215 | Proceedings of the 19th Australasian Computer Science Conference, | ||
| 2216 | Melbourne, Australia. Jan 31 - Feb 2, 1996. | ||
| 2217 | ftp://ftp.cs.auckland.ac.nz/pub/peter-f/ACSC96paper.ps | ||
| 2218 | @end example | ||
| 2219 | Kunihiko Sadakane's sorting algorithm, mentioned above, | ||
| 2220 | is available from: | ||
| 2221 | @example | ||
| 2222 | http://naomi.is.s.u-tokyo.ac.jp/~sada/papers/Sada98b.ps.gz | ||
| 2223 | @end example | ||
| 2224 | The Manber-Myers suffix array construction | ||
| 2225 | algorithm is described in a paper | ||
| 2226 | available from: | ||
| 2227 | @example | ||
| 2228 | http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps | ||
| 2229 | @end example | ||
| 2230 | Finally, the following paper documents some recent investigations | ||
| 2231 | I made into the performance of sorting algorithms: | ||
| 2232 | @example | ||
| 2233 | Julian Seward: | ||
| 2234 | On the Performance of BWT Sorting Algorithms | ||
| 2235 | Proceedings of the IEEE Data Compression Conference 2000 | ||
| 2236 | Snowbird, Utah. 28-30 March 2000. | ||
| 2237 | @end example | ||
| 2238 | |||
| 2239 | |||
| 2240 | @contents | ||
| 2241 | |||
| 2242 | @bye | ||
| 2243 | |||
diff --git a/manual.xml b/manual.xml new file mode 100644 index 0000000..1ab5bd7 --- /dev/null +++ b/manual.xml | |||
| @@ -0,0 +1,2966 @@ | |||
| 1 | <?xml version="1.0"?> <!-- -*- sgml -*- --> | ||
| 2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" | ||
| 3 | "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd"[ | ||
| 4 | |||
| 5 | <!-- various strings, dates etc. common to all docs --> | ||
| 6 | <!ENTITY % common-ents SYSTEM "entities.xml"> %common-ents; | ||
| 7 | ]> | ||
| 8 | |||
| 9 | <book lang="en" id="userman" xreflabel="bzip2 Manual"> | ||
| 10 | |||
| 11 | <bookinfo> | ||
| 12 | <title>bzip2 and libbzip2, version 1.0.3</title> | ||
| 13 | <subtitle>A program and library for data compression</subtitle> | ||
| 14 | <copyright> | ||
| 15 | <year>&bz-lifespan;</year> | ||
| 16 | <holder>Julian Seward</holder> | ||
| 17 | </copyright> | ||
| 18 | <releaseinfo>Version &bz-version; of &bz-date;</releaseinfo> | ||
| 19 | |||
| 20 | <authorgroup> | ||
| 21 | <author> | ||
| 22 | <firstname>Julian</firstname> | ||
| 23 | <surname>Seward</surname> | ||
| 24 | <affiliation> | ||
| 25 | <orgname>&bz-url;</orgname> | ||
| 26 | </affiliation> | ||
| 27 | </author> | ||
| 28 | </authorgroup> | ||
| 29 | |||
| 30 | <legalnotice> | ||
| 31 | |||
| 32 | <para>This program, <computeroutput>bzip2</computeroutput>, the | ||
| 33 | associated library <computeroutput>libbzip2</computeroutput>, and | ||
| 34 | all documentation, are copyright © &bz-lifespan; Julian Seward. | ||
| 35 | All rights reserved.</para> | ||
| 36 | |||
| 37 | <para>Redistribution and use in source and binary forms, with | ||
| 38 | or without modification, are permitted provided that the | ||
| 39 | following conditions are met:</para> | ||
| 40 | |||
| 41 | <itemizedlist mark='bullet'> | ||
| 42 | |||
| 43 | <listitem><para>Redistributions of source code must retain the | ||
| 44 | above copyright notice, this list of conditions and the | ||
| 45 | following disclaimer.</para></listitem> | ||
| 46 | |||
| 47 | <listitem><para>The origin of this software must not be | ||
| 48 | misrepresented; you must not claim that you wrote the original | ||
| 49 | software. If you use this software in a product, an | ||
| 50 | acknowledgment in the product documentation would be | ||
| 51 | appreciated but is not required.</para></listitem> | ||
| 52 | |||
| 53 | <listitem><para>Altered source versions must be plainly marked | ||
| 54 | as such, and must not be misrepresented as being the original | ||
| 55 | software.</para></listitem> | ||
| 56 | |||
| 57 | <listitem><para>The name of the author may not be used to | ||
| 58 | endorse or promote products derived from this software without | ||
| 59 | specific prior written permission.</para></listitem> | ||
| 60 | |||
| 61 | </itemizedlist> | ||
| 62 | |||
| 63 | <para>THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY | ||
| 64 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, | ||
| 65 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | ||
| 66 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | ||
| 67 | AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
| 68 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED | ||
| 69 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 70 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||
| 71 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
| 72 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING | ||
| 73 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | ||
| 74 | THE POSSIBILITY OF SUCH DAMAGE.</para> | ||
| 75 | |||
| 76 | <para>PATENTS: To the best of my knowledge, | ||
| 77 | <computeroutput>bzip2</computeroutput> and | ||
| 78 | <computeroutput>libbzip2</computeroutput> do not use any patented | ||
| 79 | algorithms. However, I do not have the resources to carry | ||
| 80 | out a patent search. Therefore I cannot give any guarantee of | ||
| 81 | the above statement. | ||
| 82 | </para> | ||
| 83 | |||
| 84 | </legalnotice> | ||
| 85 | |||
| 86 | </bookinfo> | ||
| 87 | |||
| 88 | |||
| 89 | |||
| 90 | <chapter id="intro" xreflabel="Introduction"> | ||
| 91 | <title>Introduction</title> | ||
| 92 | |||
| 93 | <para><computeroutput>bzip2</computeroutput> compresses files | ||
| 94 | using the Burrows-Wheeler block-sorting text compression | ||
| 95 | algorithm, and Huffman coding. Compression is generally | ||
| 96 | considerably better than that achieved by more conventional | ||
| 97 | LZ77/LZ78-based compressors, and approaches the performance of | ||
| 98 | the PPM family of statistical compressors.</para> | ||
| 99 | |||
| 100 | <para><computeroutput>bzip2</computeroutput> is built on top of | ||
| 101 | <computeroutput>libbzip2</computeroutput>, a flexible library for | ||
| 102 | handling compressed data in the | ||
| 103 | <computeroutput>bzip2</computeroutput> format. This manual | ||
| 104 | describes both how to use the program and how to work with the | ||
| 105 | library interface. Most of the manual is devoted to this | ||
| 106 | library, not the program, which is good news if your interest is | ||
| 107 | only in the program.</para> | ||
| 108 | |||
| 109 | <itemizedlist mark='bullet'> | ||
| 110 | |||
| 111 | <listitem><para><xref linkend="using"/> describes how to use | ||
| 112 | <computeroutput>bzip2</computeroutput>; this is the only part | ||
| 113 | you need to read if you just want to know how to operate the | ||
| 114 | program.</para></listitem> | ||
| 115 | |||
| 116 | <listitem><para><xref linkend="libprog"/> describes the | ||
| 117 | programming interfaces in detail, and</para></listitem> | ||
| 118 | |||
| 119 | <listitem><para><xref linkend="misc"/> records some | ||
| 120 | miscellaneous notes which I thought ought to be recorded | ||
| 121 | somewhere.</para></listitem> | ||
| 122 | |||
| 123 | </itemizedlist> | ||
| 124 | |||
| 125 | </chapter> | ||
| 126 | |||
| 127 | |||
| 128 | <chapter id="using" xreflabel="How to use bzip2"> | ||
| 129 | <title>How to use bzip2</title> | ||
| 130 | |||
| 131 | <para>This chapter contains a copy of the | ||
| 132 | <computeroutput>bzip2</computeroutput> man page, and nothing | ||
| 133 | else.</para> | ||
| 134 | |||
| 135 | <sect1 id="name" xreflabel="NAME"> | ||
| 136 | <title>NAME</title> | ||
| 137 | |||
| 138 | <itemizedlist mark='bullet'> | ||
| 139 | |||
| 140 | <listitem><para><computeroutput>bzip2</computeroutput>, | ||
| 141 | <computeroutput>bunzip2</computeroutput> - a block-sorting file | ||
| 142 | compressor, v1.0.3</para></listitem> | ||
| 143 | |||
| 144 | <listitem><para><computeroutput>bzcat</computeroutput> - | ||
| 145 | decompresses files to stdout</para></listitem> | ||
| 146 | |||
| 147 | <listitem><para><computeroutput>bzip2recover</computeroutput> - | ||
| 148 | recovers data from damaged bzip2 files</para></listitem> | ||
| 149 | |||
| 150 | </itemizedlist> | ||
| 151 | |||
| 152 | </sect1> | ||
| 153 | |||
| 154 | |||
| 155 | <sect1 id="synopsis" xreflabel="SYNOPSIS"> | ||
| 156 | <title>SYNOPSIS</title> | ||
| 157 | |||
| 158 | <itemizedlist mark='bullet'> | ||
| 159 | |||
| 160 | <listitem><para><computeroutput>bzip2</computeroutput> [ | ||
| 161 | -cdfkqstvzVL123456789 ] [ filenames ... ]</para></listitem> | ||
| 162 | |||
| 163 | <listitem><para><computeroutput>bunzip2</computeroutput> [ | ||
| 164 | -fkvsVL ] [ filenames ... ]</para></listitem> | ||
| 165 | |||
| 166 | <listitem><para><computeroutput>bzcat</computeroutput> [ -s ] [ | ||
| 167 | filenames ... ]</para></listitem> | ||
| 168 | |||
| 169 | <listitem><para><computeroutput>bzip2recover</computeroutput> | ||
| 170 | filename</para></listitem> | ||
| 171 | |||
| 172 | </itemizedlist> | ||
| 173 | |||
| 174 | </sect1> | ||
| 175 | |||
| 176 | |||
| 177 | <sect1 id="description" xreflabel="DESCRIPTION"> | ||
| 178 | <title>DESCRIPTION</title> | ||
| 179 | |||
| 180 | <para><computeroutput>bzip2</computeroutput> compresses files | ||
| 181 | using the Burrows-Wheeler block sorting text compression | ||
| 182 | algorithm, and Huffman coding. Compression is generally | ||
| 183 | considerably better than that achieved by more conventional | ||
| 184 | LZ77/LZ78-based compressors, and approaches the performance of | ||
| 185 | the PPM family of statistical compressors.</para> | ||
| 186 | |||
| 187 | <para>The command-line options are deliberately very similar to | ||
| 188 | those of GNU <computeroutput>gzip</computeroutput>, but they are | ||
| 189 | not identical.</para> | ||
| 190 | |||
| 191 | <para><computeroutput>bzip2</computeroutput> expects a list of | ||
| 192 | file names to accompany the command-line flags. Each file is | ||
| 193 | replaced by a compressed version of itself, with the name | ||
| 194 | <computeroutput>original_name.bz2</computeroutput>. Each | ||
| 195 | compressed file has the same modification date, permissions, and, | ||
| 196 | when possible, ownership as the corresponding original, so that | ||
| 197 | these properties can be correctly restored at decompression time. | ||
| 198 | File name handling is naive in the sense that there is no | ||
| 199 | mechanism for preserving original file names, permissions, | ||
| 200 | ownerships or dates in filesystems which lack these concepts, or | ||
| 201 | have serious file name length restrictions, such as | ||
| 202 | MS-DOS.</para> | ||
| 203 | |||
| 204 | <para><computeroutput>bzip2</computeroutput> and | ||
| 205 | <computeroutput>bunzip2</computeroutput> will by default not | ||
| 206 | overwrite existing files. If you want this to happen, specify | ||
| 207 | the <computeroutput>-f</computeroutput> flag.</para> | ||
| 208 | |||
| 209 | <para>If no file names are specified, | ||
| 210 | <computeroutput>bzip2</computeroutput> compresses from standard | ||
| 211 | input to standard output. In this case, | ||
| 212 | <computeroutput>bzip2</computeroutput> will decline to write | ||
| 213 | compressed output to a terminal, as this would be entirely | ||
| 214 | incomprehensible and therefore pointless.</para> | ||
| 215 | |||
| 216 | <para><computeroutput>bunzip2</computeroutput> (or | ||
| 217 | <computeroutput>bzip2 -d</computeroutput>) decompresses all | ||
| 218 | specified files. Files which were not created by | ||
| 219 | <computeroutput>bzip2</computeroutput> will be detected and | ||
| 220 | ignored, and a warning issued. | ||
| 221 | <computeroutput>bzip2</computeroutput> attempts to guess the | ||
| 222 | filename for the decompressed file from that of the compressed | ||
| 223 | file as follows:</para> | ||
| 224 | |||
| 225 | <itemizedlist mark='bullet'> | ||
| 226 | |||
| 227 | <listitem><para><computeroutput>filename.bz2 </computeroutput> | ||
| 228 | becomes | ||
| 229 | <computeroutput>filename</computeroutput></para></listitem> | ||
| 230 | |||
| 231 | <listitem><para><computeroutput>filename.bz </computeroutput> | ||
| 232 | becomes | ||
| 233 | <computeroutput>filename</computeroutput></para></listitem> | ||
| 234 | |||
| 235 | <listitem><para><computeroutput>filename.tbz2</computeroutput> | ||
| 236 | becomes | ||
| 237 | <computeroutput>filename.tar</computeroutput></para></listitem> | ||
| 238 | |||
| 239 | <listitem><para><computeroutput>filename.tbz </computeroutput> | ||
| 240 | becomes | ||
| 241 | <computeroutput>filename.tar</computeroutput></para></listitem> | ||
| 242 | |||
| 243 | <listitem><para><computeroutput>anyothername </computeroutput> | ||
| 244 | becomes | ||
| 245 | <computeroutput>anyothername.out</computeroutput></para></listitem> | ||
| 246 | |||
| 247 | </itemizedlist> | ||
| 248 | |||
| 249 | <para>If the file does not end in one of the recognised endings, | ||
| 250 | <computeroutput>.bz2</computeroutput>, | ||
| 251 | <computeroutput>.bz</computeroutput>, | ||
| 252 | <computeroutput>.tbz2</computeroutput> or | ||
| 253 | <computeroutput>.tbz</computeroutput>, | ||
| 254 | <computeroutput>bzip2</computeroutput> complains that it cannot | ||
| 255 | guess the name of the original file, and uses the original name | ||
| 256 | with <computeroutput>.out</computeroutput> appended.</para> | ||
| 257 | |||
| 258 | <para>As with compression, supplying no filenames causes | ||
| 259 | decompression from standard input to standard output.</para> | ||
| 260 | |||
| 261 | <para><computeroutput>bunzip2</computeroutput> will correctly | ||
| 262 | decompress a file which is the concatenation of two or more | ||
| 263 | compressed files. The result is the concatenation of the | ||
| 264 | corresponding uncompressed files. Integrity testing | ||
| 265 | (<computeroutput>-t</computeroutput>) of concatenated compressed | ||
| 266 | files is also supported.</para> | ||
| 267 | |||
| 268 | <para>You can also compress or decompress files to the standard | ||
| 269 | output by giving the <computeroutput>-c</computeroutput> flag. | ||
| 270 | Multiple files may be compressed and decompressed like this. The | ||
| 271 | resulting outputs are fed sequentially to stdout. Compression of | ||
| 272 | multiple files in this manner generates a stream containing | ||
| 273 | multiple compressed file representations. Such a stream can be | ||
| 274 | decompressed correctly only by | ||
| 275 | <computeroutput>bzip2</computeroutput> version 0.9.0 or later. | ||
| 276 | Earlier versions of <computeroutput>bzip2</computeroutput> will | ||
| 277 | stop after decompressing the first file in the stream.</para> | ||
| 278 | |||
| 279 | <para><computeroutput>bzcat</computeroutput> (or | ||
| 280 | <computeroutput>bzip2 -dc</computeroutput>) decompresses all | ||
| 281 | specified files to the standard output.</para> | ||
| 282 | |||
| 283 | <para><computeroutput>bzip2</computeroutput> will read arguments | ||
| 284 | from the environment variables | ||
| 285 | <computeroutput>BZIP2</computeroutput> and | ||
| 286 | <computeroutput>BZIP</computeroutput>, in that order, and will | ||
| 287 | process them before any arguments read from the command line. | ||
| 288 | This gives a convenient way to supply default arguments.</para> | ||
| 289 | |||
| 290 | <para>Compression is always performed, even if the compressed | ||
| 291 | file is slightly larger than the original. Files of less than | ||
| 292 | about one hundred bytes tend to get larger, since the compression | ||
| 293 | mechanism has a constant overhead in the region of 50 bytes. | ||
| 294 | Random data (including the output of most file compressors) is | ||
| 295 | coded at about 8.05 bits per byte, giving an expansion of around | ||
| 296 | 0.5%.</para> | ||
| 297 | |||
| 298 | <para>As a self-check for your protection, | ||
| 299 | <computeroutput>bzip2</computeroutput> uses 32-bit CRCs to make | ||
| 300 | sure that the decompressed version of a file is identical to the | ||
| 301 | original. This guards against corruption of the compressed data, | ||
| 302 | and against undetected bugs in | ||
| 303 | <computeroutput>bzip2</computeroutput> (hopefully very unlikely). | ||
| 304 | The chances of data corruption going undetected is microscopic, | ||
| 305 | about one chance in four billion for each file processed. Be | ||
| 306 | aware, though, that the check occurs upon decompression, so it | ||
| 307 | can only tell you that something is wrong. It can't help you | ||
| 308 | recover the original uncompressed data. You can use | ||
| 309 | <computeroutput>bzip2recover</computeroutput> to try to recover | ||
| 310 | data from damaged files.</para> | ||
| 311 | |||
| 312 | <para>Return values: 0 for a normal exit, 1 for environmental | ||
| 313 | problems (file not found, invalid flags, I/O errors, etc.), 2 | ||
| 314 | to indicate a corrupt compressed file, 3 for an internal | ||
| 315 | consistency error (eg, bug) which caused | ||
| 316 | <computeroutput>bzip2</computeroutput> to panic.</para> | ||
| 317 | |||
| 318 | </sect1> | ||
| 319 | |||
| 320 | |||
| 321 | <sect1 id="options" xreflabel="OPTIONS"> | ||
| 322 | <title>OPTIONS</title> | ||
| 323 | |||
| 324 | <variablelist> | ||
| 325 | |||
| 326 | <varlistentry> | ||
| 327 | <term><computeroutput>-c --stdout</computeroutput></term> | ||
| 328 | <listitem><para>Compress or decompress to standard | ||
| 329 | output.</para></listitem> | ||
| 330 | </varlistentry> | ||
| 331 | |||
| 332 | <varlistentry> | ||
| 333 | <term><computeroutput>-d --decompress</computeroutput></term> | ||
| 334 | <listitem><para>Force decompression. | ||
| 335 | <computeroutput>bzip2</computeroutput>, | ||
| 336 | <computeroutput>bunzip2</computeroutput> and | ||
| 337 | <computeroutput>bzcat</computeroutput> are really the same | ||
| 338 | program, and the decision about what actions to take is done on | ||
| 339 | the basis of which name is used. This flag overrides that | ||
| 340 | mechanism, and forces bzip2 to decompress.</para></listitem> | ||
| 341 | </varlistentry> | ||
| 342 | |||
| 343 | <varlistentry> | ||
| 344 | <term><computeroutput>-z --compress</computeroutput></term> | ||
| 345 | <listitem><para>The complement to | ||
| 346 | <computeroutput>-d</computeroutput>: forces compression, | ||
| 347 | regardless of the invokation name.</para></listitem> | ||
| 348 | </varlistentry> | ||
| 349 | |||
| 350 | <varlistentry> | ||
| 351 | <term><computeroutput>-t --test</computeroutput></term> | ||
| 352 | <listitem><para>Check integrity of the specified file(s), but | ||
| 353 | don't decompress them. This really performs a trial | ||
| 354 | decompression and throws away the result.</para></listitem> | ||
| 355 | </varlistentry> | ||
| 356 | |||
| 357 | <varlistentry> | ||
| 358 | <term><computeroutput>-f --force</computeroutput></term> | ||
| 359 | <listitem><para>Force overwrite of output files. Normally, | ||
| 360 | <computeroutput>bzip2</computeroutput> will not overwrite | ||
| 361 | existing output files. Also forces | ||
| 362 | <computeroutput>bzip2</computeroutput> to break hard links to | ||
| 363 | files, which it otherwise wouldn't do.</para> | ||
| 364 | <para><computeroutput>bzip2</computeroutput> normally declines | ||
| 365 | to decompress files which don't have the correct magic header | ||
| 366 | bytes. If forced (<computeroutput>-f</computeroutput>), | ||
| 367 | however, it will pass such files through unmodified. This is | ||
| 368 | how GNU <computeroutput>gzip</computeroutput> behaves.</para> | ||
| 369 | </listitem> | ||
| 370 | </varlistentry> | ||
| 371 | |||
| 372 | <varlistentry> | ||
| 373 | <term><computeroutput>-k --keep</computeroutput></term> | ||
| 374 | <listitem><para>Keep (don't delete) input files during | ||
| 375 | compression or decompression.</para></listitem> | ||
| 376 | </varlistentry> | ||
| 377 | |||
| 378 | <varlistentry> | ||
| 379 | <term><computeroutput>-s --small</computeroutput></term> | ||
| 380 | <listitem><para>Reduce memory usage, for compression, | ||
| 381 | decompression and testing. Files are decompressed and tested | ||
| 382 | using a modified algorithm which only requires 2.5 bytes per | ||
| 383 | block byte. This means any file can be decompressed in 2300k | ||
| 384 | of memory, albeit at about half the normal speed.</para> | ||
| 385 | <para>During compression, <computeroutput>-s</computeroutput> | ||
| 386 | selects a block size of 200k, which limits memory use to around | ||
| 387 | the same figure, at the expense of your compression ratio. In | ||
| 388 | short, if your machine is low on memory (8 megabytes or less), | ||
| 389 | use <computeroutput>-s</computeroutput> for everything. See | ||
| 390 | <xref linkend="memory-management"/> below.</para></listitem> | ||
| 391 | </varlistentry> | ||
| 392 | |||
| 393 | <varlistentry> | ||
| 394 | <term><computeroutput>-q --quiet</computeroutput></term> | ||
| 395 | <listitem><para>Suppress non-essential warning messages. | ||
| 396 | Messages pertaining to I/O errors and other critical events | ||
| 397 | will not be suppressed.</para></listitem> | ||
| 398 | </varlistentry> | ||
| 399 | |||
| 400 | <varlistentry> | ||
| 401 | <term><computeroutput>-v --verbose</computeroutput></term> | ||
| 402 | <listitem><para>Verbose mode -- show the compression ratio for | ||
| 403 | each file processed. Further | ||
| 404 | <computeroutput>-v</computeroutput>'s increase the verbosity | ||
| 405 | level, spewing out lots of information which is primarily of | ||
| 406 | interest for diagnostic purposes.</para></listitem> | ||
| 407 | </varlistentry> | ||
| 408 | |||
| 409 | <varlistentry> | ||
| 410 | <term><computeroutput>-L --license -V --version</computeroutput></term> | ||
| 411 | <listitem><para>Display the software version, license terms and | ||
| 412 | conditions.</para></listitem> | ||
| 413 | </varlistentry> | ||
| 414 | |||
| 415 | <varlistentry> | ||
| 416 | <term><computeroutput>-1</computeroutput> (or | ||
| 417 | <computeroutput>--fast</computeroutput>) to | ||
| 418 | <computeroutput>-9</computeroutput> (or | ||
| 419 | <computeroutput>-best</computeroutput>)</term> | ||
| 420 | <listitem><para>Set the block size to 100 k, 200 k ... 900 k | ||
| 421 | when compressing. Has no effect when decompressing. See <xref | ||
| 422 | linkend="memory-management" /> below. The | ||
| 423 | <computeroutput>--fast</computeroutput> and | ||
| 424 | <computeroutput>--best</computeroutput> aliases are primarily | ||
| 425 | for GNU <computeroutput>gzip</computeroutput> compatibility. | ||
| 426 | In particular, <computeroutput>--fast</computeroutput> doesn't | ||
| 427 | make things significantly faster. And | ||
| 428 | <computeroutput>--best</computeroutput> merely selects the | ||
| 429 | default behaviour.</para></listitem> | ||
| 430 | </varlistentry> | ||
| 431 | |||
| 432 | <varlistentry> | ||
| 433 | <term><computeroutput>--</computeroutput></term> | ||
| 434 | <listitem><para>Treats all subsequent arguments as file names, | ||
| 435 | even if they start with a dash. This is so you can handle | ||
| 436 | files with names beginning with a dash, for example: | ||
| 437 | <computeroutput>bzip2 -- | ||
| 438 | -myfilename</computeroutput>.</para></listitem> | ||
| 439 | </varlistentry> | ||
| 440 | |||
| 441 | <varlistentry> | ||
| 442 | <term><computeroutput>--repetitive-fast</computeroutput></term> | ||
| 443 | <term><computeroutput>--repetitive-best</computeroutput></term> | ||
| 444 | <listitem><para>These flags are redundant in versions 0.9.5 and | ||
| 445 | above. They provided some coarse control over the behaviour of | ||
| 446 | the sorting algorithm in earlier versions, which was sometimes | ||
| 447 | useful. 0.9.5 and above have an improved algorithm which | ||
| 448 | renders these flags irrelevant.</para></listitem> | ||
| 449 | </varlistentry> | ||
| 450 | |||
| 451 | </variablelist> | ||
| 452 | |||
| 453 | </sect1> | ||
| 454 | |||
| 455 | |||
| 456 | <sect1 id="memory-management" xreflabel="MEMORY MANAGEMENT"> | ||
| 457 | <title>MEMORY MANAGEMENT</title> | ||
| 458 | |||
| 459 | <para><computeroutput>bzip2</computeroutput> compresses large | ||
| 460 | files in blocks. The block size affects both the compression | ||
| 461 | ratio achieved, and the amount of memory needed for compression | ||
| 462 | and decompression. The flags <computeroutput>-1</computeroutput> | ||
| 463 | through <computeroutput>-9</computeroutput> specify the block | ||
| 464 | size to be 100,000 bytes through 900,000 bytes (the default) | ||
| 465 | respectively. At decompression time, the block size used for | ||
| 466 | compression is read from the header of the compressed file, and | ||
| 467 | <computeroutput>bunzip2</computeroutput> then allocates itself | ||
| 468 | just enough memory to decompress the file. Since block sizes are | ||
| 469 | stored in compressed files, it follows that the flags | ||
| 470 | <computeroutput>-1</computeroutput> to | ||
| 471 | <computeroutput>-9</computeroutput> are irrelevant to and so | ||
| 472 | ignored during decompression.</para> | ||
| 473 | |||
| 474 | <para>Compression and decompression requirements, in bytes, can be | ||
| 475 | estimated as:</para> | ||
| 476 | <programlisting> | ||
| 477 | Compression: 400k + ( 8 x block size ) | ||
| 478 | |||
| 479 | Decompression: 100k + ( 4 x block size ), or | ||
| 480 | 100k + ( 2.5 x block size ) | ||
| 481 | </programlisting> | ||
| 482 | |||
| 483 | <para>Larger block sizes give rapidly diminishing marginal | ||
| 484 | returns. Most of the compression comes from the first two or | ||
| 485 | three hundred k of block size, a fact worth bearing in mind when | ||
| 486 | using <computeroutput>bzip2</computeroutput> on small machines. | ||
| 487 | It is also important to appreciate that the decompression memory | ||
| 488 | requirement is set at compression time by the choice of block | ||
| 489 | size.</para> | ||
| 490 | |||
| 491 | <para>For files compressed with the default 900k block size, | ||
| 492 | <computeroutput>bunzip2</computeroutput> will require about 3700 | ||
| 493 | kbytes to decompress. To support decompression of any file on a | ||
| 494 | 4 megabyte machine, <computeroutput>bunzip2</computeroutput> has | ||
| 495 | an option to decompress using approximately half this amount of | ||
| 496 | memory, about 2300 kbytes. Decompression speed is also halved, | ||
| 497 | so you should use this option only where necessary. The relevant | ||
| 498 | flag is <computeroutput>-s</computeroutput>.</para> | ||
| 499 | |||
| 500 | <para>In general, try and use the largest block size memory | ||
| 501 | constraints allow, since that maximises the compression achieved. | ||
| 502 | Compression and decompression speed are virtually unaffected by | ||
| 503 | block size.</para> | ||
| 504 | |||
| 505 | <para>Another significant point applies to files which fit in a | ||
| 506 | single block -- that means most files you'd encounter using a | ||
| 507 | large block size. The amount of real memory touched is | ||
| 508 | proportional to the size of the file, since the file is smaller | ||
| 509 | than a block. For example, compressing a file 20,000 bytes long | ||
| 510 | with the flag <computeroutput>-9</computeroutput> will cause the | ||
| 511 | compressor to allocate around 7600k of memory, but only touch | ||
| 512 | 400k + 20000 * 8 = 560 kbytes of it. Similarly, the decompressor | ||
| 513 | will allocate 3700k but only touch 100k + 20000 * 4 = 180 | ||
| 514 | kbytes.</para> | ||
| 515 | |||
| 516 | <para>Here is a table which summarises the maximum memory usage | ||
| 517 | for different block sizes. Also recorded is the total compressed | ||
| 518 | size for 14 files of the Calgary Text Compression Corpus | ||
| 519 | totalling 3,141,622 bytes. This column gives some feel for how | ||
| 520 | compression varies with block size. These figures tend to | ||
| 521 | understate the advantage of larger block sizes for larger files, | ||
| 522 | since the Corpus is dominated by smaller files.</para> | ||
| 523 | |||
| 524 | <programlisting> | ||
| 525 | Compress Decompress Decompress Corpus | ||
| 526 | Flag usage usage -s usage Size | ||
| 527 | |||
| 528 | -1 1200k 500k 350k 914704 | ||
| 529 | -2 2000k 900k 600k 877703 | ||
| 530 | -3 2800k 1300k 850k 860338 | ||
| 531 | -4 3600k 1700k 1100k 846899 | ||
| 532 | -5 4400k 2100k 1350k 845160 | ||
| 533 | -6 5200k 2500k 1600k 838626 | ||
| 534 | -7 6100k 2900k 1850k 834096 | ||
| 535 | -8 6800k 3300k 2100k 828642 | ||
| 536 | -9 7600k 3700k 2350k 828642 | ||
| 537 | </programlisting> | ||
| 538 | |||
| 539 | </sect1> | ||
| 540 | |||
| 541 | |||
| 542 | <sect1 id="recovering" xreflabel="RECOVERING DATA FROM DAMAGED FILES"> | ||
| 543 | <title>RECOVERING DATA FROM DAMAGED FILES</title> | ||
| 544 | |||
| 545 | <para><computeroutput>bzip2</computeroutput> compresses files in | ||
| 546 | blocks, usually 900kbytes long. Each block is handled | ||
| 547 | independently. If a media or transmission error causes a | ||
| 548 | multi-block <computeroutput>.bz2</computeroutput> file to become | ||
| 549 | damaged, it may be possible to recover data from the undamaged | ||
| 550 | blocks in the file.</para> | ||
| 551 | |||
| 552 | <para>The compressed representation of each block is delimited by | ||
| 553 | a 48-bit pattern, which makes it possible to find the block | ||
| 554 | boundaries with reasonable certainty. Each block also carries | ||
| 555 | its own 32-bit CRC, so damaged blocks can be distinguished from | ||
| 556 | undamaged ones.</para> | ||
| 557 | |||
| 558 | <para><computeroutput>bzip2recover</computeroutput> is a simple | ||
| 559 | program whose purpose is to search for blocks in | ||
| 560 | <computeroutput>.bz2</computeroutput> files, and write each block | ||
| 561 | out into its own <computeroutput>.bz2</computeroutput> file. You | ||
| 562 | can then use <computeroutput>bzip2 -t</computeroutput> to test | ||
| 563 | the integrity of the resulting files, and decompress those which | ||
| 564 | are undamaged.</para> | ||
| 565 | |||
| 566 | <para><computeroutput>bzip2recover</computeroutput> takes a | ||
| 567 | single argument, the name of the damaged file, and writes a | ||
| 568 | number of files <computeroutput>rec0001file.bz2</computeroutput>, | ||
| 569 | <computeroutput>rec0002file.bz2</computeroutput>, etc, containing | ||
| 570 | the extracted blocks. The output filenames are designed so that | ||
| 571 | the use of wildcards in subsequent processing -- for example, | ||
| 572 | <computeroutput>bzip2 -dc rec*file.bz2 > | ||
| 573 | recovered_data</computeroutput> -- lists the files in the correct | ||
| 574 | order.</para> | ||
| 575 | |||
| 576 | <para><computeroutput>bzip2recover</computeroutput> should be of | ||
| 577 | most use dealing with large <computeroutput>.bz2</computeroutput> | ||
| 578 | files, as these will contain many blocks. It is clearly futile | ||
| 579 | to use it on damaged single-block files, since a damaged block | ||
| 580 | cannot be recovered. If you wish to minimise any potential data | ||
| 581 | loss through media or transmission errors, you might consider | ||
| 582 | compressing with a smaller block size.</para> | ||
| 583 | |||
| 584 | </sect1> | ||
| 585 | |||
| 586 | |||
| 587 | <sect1 id="performance" xreflabel="PERFORMANCE NOTES"> | ||
| 588 | <title>PERFORMANCE NOTES</title> | ||
| 589 | |||
| 590 | <para>The sorting phase of compression gathers together similar | ||
| 591 | strings in the file. Because of this, files containing very long | ||
| 592 | runs of repeated symbols, like "aabaabaabaab ..." (repeated | ||
| 593 | several hundred times) may compress more slowly than normal. | ||
| 594 | Versions 0.9.5 and above fare much better than previous versions | ||
| 595 | in this respect. The ratio between worst-case and average-case | ||
| 596 | compression time is in the region of 10:1. For previous | ||
| 597 | versions, this figure was more like 100:1. You can use the | ||
| 598 | <computeroutput>-vvvv</computeroutput> option to monitor progress | ||
| 599 | in great detail, if you want.</para> | ||
| 600 | |||
| 601 | <para>Decompression speed is unaffected by these | ||
| 602 | phenomena.</para> | ||
| 603 | |||
| 604 | <para><computeroutput>bzip2</computeroutput> usually allocates | ||
| 605 | several megabytes of memory to operate in, and then charges all | ||
| 606 | over it in a fairly random fashion. This means that performance, | ||
| 607 | both for compressing and decompressing, is largely determined by | ||
| 608 | the speed at which your machine can service cache misses. | ||
| 609 | Because of this, small changes to the code to reduce the miss | ||
| 610 | rate have been observed to give disproportionately large | ||
| 611 | performance improvements. I imagine | ||
| 612 | <computeroutput>bzip2</computeroutput> will perform best on | ||
| 613 | machines with very large caches.</para> | ||
| 614 | |||
| 615 | </sect1> | ||
| 616 | |||
| 617 | |||
| 618 | |||
| 619 | <sect1 id="caveats" xreflabel="CAVEATS"> | ||
| 620 | <title>CAVEATS</title> | ||
| 621 | |||
| 622 | <para>I/O error messages are not as helpful as they could be. | ||
| 623 | <computeroutput>bzip2</computeroutput> tries hard to detect I/O | ||
| 624 | errors and exit cleanly, but the details of what the problem is | ||
| 625 | sometimes seem rather misleading.</para> | ||
| 626 | |||
| 627 | <para>This manual page pertains to version &bz-version; of | ||
| 628 | <computeroutput>bzip2</computeroutput>. Compressed data created | ||
| 629 | by this version is entirely forwards and backwards compatible | ||
| 630 | with the previous public releases, versions 0.1pl2, 0.9.0 and | ||
| 631 | 0.9.5, 1.0.0, 1.0.1 and 1.0.2, but with the following exception: 0.9.0 | ||
| 632 | and above can correctly decompress multiple concatenated | ||
| 633 | compressed files. 0.1pl2 cannot do this; it will stop after | ||
| 634 | decompressing just the first file in the stream.</para> | ||
| 635 | |||
| 636 | <para><computeroutput>bzip2recover</computeroutput> versions | ||
| 637 | prior to 1.0.2 used 32-bit integers to represent bit positions in | ||
| 638 | compressed files, so it could not handle compressed files more | ||
| 639 | than 512 megabytes long. Versions 1.0.2 and above use 64-bit ints | ||
| 640 | on some platforms which support them (GNU supported targets, and | ||
| 641 | Windows). To establish whether or not | ||
| 642 | <computeroutput>bzip2recover</computeroutput> was built with such | ||
| 643 | a limitation, run it without arguments. In any event you can | ||
| 644 | build yourself an unlimited version if you can recompile it with | ||
| 645 | <computeroutput>MaybeUInt64</computeroutput> set to be an | ||
| 646 | unsigned 64-bit integer.</para> | ||
| 647 | |||
| 648 | </sect1> | ||
| 649 | |||
| 650 | |||
| 651 | |||
| 652 | <sect1 id="author" xreflabel="AUTHOR"> | ||
| 653 | <title>AUTHOR</title> | ||
| 654 | |||
| 655 | <para>Julian Seward, | ||
| 656 | <computeroutput>&bz-email;</computeroutput></para> | ||
| 657 | |||
| 658 | <para>The ideas embodied in | ||
| 659 | <computeroutput>bzip2</computeroutput> are due to (at least) the | ||
| 660 | following people: Michael Burrows and David Wheeler (for the | ||
| 661 | block sorting transformation), David Wheeler (again, for the | ||
| 662 | Huffman coder), Peter Fenwick (for the structured coding model in | ||
| 663 | the original <computeroutput>bzip</computeroutput>, and many | ||
| 664 | refinements), and Alistair Moffat, Radford Neal and Ian Witten | ||
| 665 | (for the arithmetic coder in the original | ||
| 666 | <computeroutput>bzip</computeroutput>). I am much indebted for | ||
| 667 | their help, support and advice. See the manual in the source | ||
| 668 | distribution for pointers to sources of documentation. Christian | ||
| 669 | von Roques encouraged me to look for faster sorting algorithms, | ||
| 670 | so as to speed up compression. Bela Lubkin encouraged me to | ||
| 671 | improve the worst-case compression performance. | ||
| 672 | Donna Robinson XMLised the documentation. | ||
| 673 | Many people sent | ||
| 674 | patches, helped with portability problems, lent machines, gave | ||
| 675 | advice and were generally helpful.</para> | ||
| 676 | |||
| 677 | </sect1> | ||
| 678 | |||
| 679 | </chapter> | ||
| 680 | |||
| 681 | |||
| 682 | |||
| 683 | <chapter id="libprog" xreflabel="Programming with libbzip2"> | ||
| 684 | <title> | ||
| 685 | Programming with <computeroutput>libbzip2</computeroutput> | ||
| 686 | </title> | ||
| 687 | |||
| 688 | <para>This chapter describes the programming interface to | ||
| 689 | <computeroutput>libbzip2</computeroutput>.</para> | ||
| 690 | |||
| 691 | <para>For general background information, particularly about | ||
| 692 | memory use and performance aspects, you'd be well advised to read | ||
| 693 | <xref linkend="using"/> as well.</para> | ||
| 694 | |||
| 695 | |||
| 696 | <sect1 id="top-level" xreflabel="Top-level structure"> | ||
| 697 | <title>Top-level structure</title> | ||
| 698 | |||
| 699 | <para><computeroutput>libbzip2</computeroutput> is a flexible | ||
| 700 | library for compressing and decompressing data in the | ||
| 701 | <computeroutput>bzip2</computeroutput> data format. Although | ||
| 702 | packaged as a single entity, it helps to regard the library as | ||
| 703 | three separate parts: the low level interface, and the high level | ||
| 704 | interface, and some utility functions.</para> | ||
| 705 | |||
| 706 | <para>The structure of | ||
| 707 | <computeroutput>libbzip2</computeroutput>'s interfaces is similar | ||
| 708 | to that of Jean-loup Gailly's and Mark Adler's excellent | ||
| 709 | <computeroutput>zlib</computeroutput> library.</para> | ||
| 710 | |||
| 711 | <para>All externally visible symbols have names beginning | ||
| 712 | <computeroutput>BZ2_</computeroutput>. This is new in version | ||
| 713 | 1.0. The intention is to minimise pollution of the namespaces of | ||
| 714 | library clients.</para> | ||
| 715 | |||
| 716 | <para>To use any part of the library, you need to | ||
| 717 | <computeroutput>#include <bzlib.h></computeroutput> | ||
| 718 | into your sources.</para> | ||
| 719 | |||
| 720 | |||
| 721 | |||
| 722 | <sect2 id="ll-summary" xreflabel="Low-level summary"> | ||
| 723 | <title>Low-level summary</title> | ||
| 724 | |||
| 725 | <para>This interface provides services for compressing and | ||
| 726 | decompressing data in memory. There's no provision for dealing | ||
| 727 | with files, streams or any other I/O mechanisms, just straight | ||
| 728 | memory-to-memory work. In fact, this part of the library can be | ||
| 729 | compiled without inclusion of | ||
| 730 | <computeroutput>stdio.h</computeroutput>, which may be helpful | ||
| 731 | for embedded applications.</para> | ||
| 732 | |||
| 733 | <para>The low-level part of the library has no global variables | ||
| 734 | and is therefore thread-safe.</para> | ||
| 735 | |||
| 736 | <para>Six routines make up the low level interface: | ||
| 737 | <computeroutput>BZ2_bzCompressInit</computeroutput>, | ||
| 738 | <computeroutput>BZ2_bzCompress</computeroutput>, and | ||
| 739 | <computeroutput>BZ2_bzCompressEnd</computeroutput> for | ||
| 740 | compression, and a corresponding trio | ||
| 741 | <computeroutput>BZ2_bzDecompressInit</computeroutput>, | ||
| 742 | <computeroutput>BZ2_bzDecompress</computeroutput> and | ||
| 743 | <computeroutput>BZ2_bzDecompressEnd</computeroutput> for | ||
| 744 | decompression. The <computeroutput>*Init</computeroutput> | ||
| 745 | functions allocate memory for compression/decompression and do | ||
| 746 | other initialisations, whilst the | ||
| 747 | <computeroutput>*End</computeroutput> functions close down | ||
| 748 | operations and release memory.</para> | ||
| 749 | |||
| 750 | <para>The real work is done by | ||
| 751 | <computeroutput>BZ2_bzCompress</computeroutput> and | ||
| 752 | <computeroutput>BZ2_bzDecompress</computeroutput>. These | ||
| 753 | compress and decompress data from a user-supplied input buffer to | ||
| 754 | a user-supplied output buffer. These buffers can be any size; | ||
| 755 | arbitrary quantities of data are handled by making repeated calls | ||
| 756 | to these functions. This is a flexible mechanism allowing a | ||
| 757 | consumer-pull style of activity, or producer-push, or a mixture | ||
| 758 | of both.</para> | ||
| 759 | |||
| 760 | </sect2> | ||
| 761 | |||
| 762 | |||
| 763 | <sect2 id="hl-summary" xreflabel="High-level summary"> | ||
| 764 | <title>High-level summary</title> | ||
| 765 | |||
| 766 | <para>This interface provides some handy wrappers around the | ||
| 767 | low-level interface to facilitate reading and writing | ||
| 768 | <computeroutput>bzip2</computeroutput> format files | ||
| 769 | (<computeroutput>.bz2</computeroutput> files). The routines | ||
| 770 | provide hooks to facilitate reading files in which the | ||
| 771 | <computeroutput>bzip2</computeroutput> data stream is embedded | ||
| 772 | within some larger-scale file structure, or where there are | ||
| 773 | multiple <computeroutput>bzip2</computeroutput> data streams | ||
| 774 | concatenated end-to-end.</para> | ||
| 775 | |||
| 776 | <para>For reading files, | ||
| 777 | <computeroutput>BZ2_bzReadOpen</computeroutput>, | ||
| 778 | <computeroutput>BZ2_bzRead</computeroutput>, | ||
| 779 | <computeroutput>BZ2_bzReadClose</computeroutput> and | ||
| 780 | <computeroutput>BZ2_bzReadGetUnused</computeroutput> are | ||
| 781 | supplied. For writing files, | ||
| 782 | <computeroutput>BZ2_bzWriteOpen</computeroutput>, | ||
| 783 | <computeroutput>BZ2_bzWrite</computeroutput> and | ||
| 784 | <computeroutput>BZ2_bzWriteFinish</computeroutput> are | ||
| 785 | available.</para> | ||
| 786 | |||
| 787 | <para>As with the low-level library, no global variables are used | ||
| 788 | so the library is per se thread-safe. However, if I/O errors | ||
| 789 | occur whilst reading or writing the underlying compressed files, | ||
| 790 | you may have to consult <computeroutput>errno</computeroutput> to | ||
| 791 | determine the cause of the error. In that case, you'd need a C | ||
| 792 | library which correctly supports | ||
| 793 | <computeroutput>errno</computeroutput> in a multithreaded | ||
| 794 | environment.</para> | ||
| 795 | |||
| 796 | <para>To make the library a little simpler and more portable, | ||
| 797 | <computeroutput>BZ2_bzReadOpen</computeroutput> and | ||
| 798 | <computeroutput>BZ2_bzWriteOpen</computeroutput> require you to | ||
| 799 | pass them file handles (<computeroutput>FILE*</computeroutput>s) | ||
| 800 | which have previously been opened for reading or writing | ||
| 801 | respectively. That avoids portability problems associated with | ||
| 802 | file operations and file attributes, whilst not being much of an | ||
| 803 | imposition on the programmer.</para> | ||
| 804 | |||
| 805 | </sect2> | ||
| 806 | |||
| 807 | |||
| 808 | <sect2 id="util-fns-summary" xreflabel="Utility functions summary"> | ||
| 809 | <title>Utility functions summary</title> | ||
| 810 | |||
| 811 | <para>For very simple needs, | ||
| 812 | <computeroutput>BZ2_bzBuffToBuffCompress</computeroutput> and | ||
| 813 | <computeroutput>BZ2_bzBuffToBuffDecompress</computeroutput> are | ||
| 814 | provided. These compress data in memory from one buffer to | ||
| 815 | another buffer in a single function call. You should assess | ||
| 816 | whether these functions fulfill your memory-to-memory | ||
| 817 | compression/decompression requirements before investing effort in | ||
| 818 | understanding the more general but more complex low-level | ||
| 819 | interface.</para> | ||
| 820 | |||
| 821 | <para>Yoshioka Tsuneo | ||
| 822 | (<computeroutput>QWF00133@niftyserve.or.jp</computeroutput> / | ||
| 823 | <computeroutput>tsuneo-y@is.aist-nara.ac.jp</computeroutput>) has | ||
| 824 | contributed some functions to give better | ||
| 825 | <computeroutput>zlib</computeroutput> compatibility. These | ||
| 826 | functions are <computeroutput>BZ2_bzopen</computeroutput>, | ||
| 827 | <computeroutput>BZ2_bzread</computeroutput>, | ||
| 828 | <computeroutput>BZ2_bzwrite</computeroutput>, | ||
| 829 | <computeroutput>BZ2_bzflush</computeroutput>, | ||
| 830 | <computeroutput>BZ2_bzclose</computeroutput>, | ||
| 831 | <computeroutput>BZ2_bzerror</computeroutput> and | ||
| 832 | <computeroutput>BZ2_bzlibVersion</computeroutput>. You may find | ||
| 833 | these functions more convenient for simple file reading and | ||
| 834 | writing, than those in the high-level interface. These functions | ||
| 835 | are not (yet) officially part of the library, and are minimally | ||
| 836 | documented here. If they break, you get to keep all the pieces. | ||
| 837 | I hope to document them properly when time permits.</para> | ||
| 838 | |||
| 839 | <para>Yoshioka also contributed modifications to allow the | ||
| 840 | library to be built as a Windows DLL.</para> | ||
| 841 | |||
| 842 | </sect2> | ||
| 843 | |||
| 844 | </sect1> | ||
| 845 | |||
| 846 | |||
| 847 | <sect1 id="err-handling" xreflabel="Error handling"> | ||
| 848 | <title>Error handling</title> | ||
| 849 | |||
| 850 | <para>The library is designed to recover cleanly in all | ||
| 851 | situations, including the worst-case situation of decompressing | ||
| 852 | random data. I'm not 100% sure that it can always do this, so | ||
| 853 | you might want to add a signal handler to catch segmentation | ||
| 854 | violations during decompression if you are feeling especially | ||
| 855 | paranoid. I would be interested in hearing more about the | ||
| 856 | robustness of the library to corrupted compressed data.</para> | ||
| 857 | |||
| 858 | <para>Version 1.0.3 more robust in this respect than any | ||
| 859 | previous version. Investigations with Valgrind (a tool for detecting | ||
| 860 | problems with memory management) indicate | ||
| 861 | that, at least for the few files I tested, all single-bit errors | ||
| 862 | in the decompressed data are caught properly, with no | ||
| 863 | segmentation faults, no uses of uninitialised data, no out of | ||
| 864 | range reads or writes, and no infinite looping in the decompressor. | ||
| 865 | So it's certainly pretty robust, although | ||
| 866 | I wouldn't claim it to be totally bombproof.</para> | ||
| 867 | |||
| 868 | <para>The file <computeroutput>bzlib.h</computeroutput> contains | ||
| 869 | all definitions needed to use the library. In particular, you | ||
| 870 | should definitely not include | ||
| 871 | <computeroutput>bzlib_private.h</computeroutput>.</para> | ||
| 872 | |||
| 873 | <para>In <computeroutput>bzlib.h</computeroutput>, the various | ||
| 874 | return values are defined. The following list is not intended as | ||
| 875 | an exhaustive description of the circumstances in which a given | ||
| 876 | value may be returned -- those descriptions are given later. | ||
| 877 | Rather, it is intended to convey the rough meaning of each return | ||
| 878 | value. The first five actions are normal and not intended to | ||
| 879 | denote an error situation.</para> | ||
| 880 | |||
| 881 | <variablelist> | ||
| 882 | |||
| 883 | <varlistentry> | ||
| 884 | <term><computeroutput>BZ_OK</computeroutput></term> | ||
| 885 | <listitem><para>The requested action was completed | ||
| 886 | successfully.</para></listitem> | ||
| 887 | </varlistentry> | ||
| 888 | |||
| 889 | <varlistentry> | ||
| 890 | <term><computeroutput>BZ_RUN_OK, BZ_FLUSH_OK, | ||
| 891 | BZ_FINISH_OK</computeroutput></term> | ||
| 892 | <listitem><para>In | ||
| 893 | <computeroutput>BZ2_bzCompress</computeroutput>, the requested | ||
| 894 | flush/finish/nothing-special action was completed | ||
| 895 | successfully.</para></listitem> | ||
| 896 | </varlistentry> | ||
| 897 | |||
| 898 | <varlistentry> | ||
| 899 | <term><computeroutput>BZ_STREAM_END</computeroutput></term> | ||
| 900 | <listitem><para>Compression of data was completed, or the | ||
| 901 | logical stream end was detected during | ||
| 902 | decompression.</para></listitem> | ||
| 903 | </varlistentry> | ||
| 904 | |||
| 905 | </variablelist> | ||
| 906 | |||
| 907 | <para>The following return values indicate an error of some | ||
| 908 | kind.</para> | ||
| 909 | |||
| 910 | <variablelist> | ||
| 911 | |||
| 912 | <varlistentry> | ||
| 913 | <term><computeroutput>BZ_CONFIG_ERROR</computeroutput></term> | ||
| 914 | <listitem><para>Indicates that the library has been improperly | ||
| 915 | compiled on your platform -- a major configuration error. | ||
| 916 | Specifically, it means that | ||
| 917 | <computeroutput>sizeof(char)</computeroutput>, | ||
| 918 | <computeroutput>sizeof(short)</computeroutput> and | ||
| 919 | <computeroutput>sizeof(int)</computeroutput> are not 1, 2 and | ||
| 920 | 4 respectively, as they should be. Note that the library | ||
| 921 | should still work properly on 64-bit platforms which follow | ||
| 922 | the LP64 programming model -- that is, where | ||
| 923 | <computeroutput>sizeof(long)</computeroutput> and | ||
| 924 | <computeroutput>sizeof(void*)</computeroutput> are 8. Under | ||
| 925 | LP64, <computeroutput>sizeof(int)</computeroutput> is still 4, | ||
| 926 | so <computeroutput>libbzip2</computeroutput>, which doesn't | ||
| 927 | use the <computeroutput>long</computeroutput> type, is | ||
| 928 | OK.</para></listitem> | ||
| 929 | </varlistentry> | ||
| 930 | |||
| 931 | <varlistentry> | ||
| 932 | <term><computeroutput>BZ_SEQUENCE_ERROR</computeroutput></term> | ||
| 933 | <listitem><para>When using the library, it is important to call | ||
| 934 | the functions in the correct sequence and with data structures | ||
| 935 | (buffers etc) in the correct states. | ||
| 936 | <computeroutput>libbzip2</computeroutput> checks as much as it | ||
| 937 | can to ensure this is happening, and returns | ||
| 938 | <computeroutput>BZ_SEQUENCE_ERROR</computeroutput> if not. | ||
| 939 | Code which complies precisely with the function semantics, as | ||
| 940 | detailed below, should never receive this value; such an event | ||
| 941 | denotes buggy code which you should | ||
| 942 | investigate.</para></listitem> | ||
| 943 | </varlistentry> | ||
| 944 | |||
| 945 | <varlistentry> | ||
| 946 | <term><computeroutput>BZ_PARAM_ERROR</computeroutput></term> | ||
| 947 | <listitem><para>Returned when a parameter to a function call is | ||
| 948 | out of range or otherwise manifestly incorrect. As with | ||
| 949 | <computeroutput>BZ_SEQUENCE_ERROR</computeroutput>, this | ||
| 950 | denotes a bug in the client code. The distinction between | ||
| 951 | <computeroutput>BZ_PARAM_ERROR</computeroutput> and | ||
| 952 | <computeroutput>BZ_SEQUENCE_ERROR</computeroutput> is a bit | ||
| 953 | hazy, but still worth making.</para></listitem> | ||
| 954 | </varlistentry> | ||
| 955 | |||
| 956 | <varlistentry> | ||
| 957 | <term><computeroutput>BZ_MEM_ERROR</computeroutput></term> | ||
| 958 | <listitem><para>Returned when a request to allocate memory | ||
| 959 | failed. Note that the quantity of memory needed to decompress | ||
| 960 | a stream cannot be determined until the stream's header has | ||
| 961 | been read. So | ||
| 962 | <computeroutput>BZ2_bzDecompress</computeroutput> and | ||
| 963 | <computeroutput>BZ2_bzRead</computeroutput> may return | ||
| 964 | <computeroutput>BZ_MEM_ERROR</computeroutput> even though some | ||
| 965 | of the compressed data has been read. The same is not true | ||
| 966 | for compression; once | ||
| 967 | <computeroutput>BZ2_bzCompressInit</computeroutput> or | ||
| 968 | <computeroutput>BZ2_bzWriteOpen</computeroutput> have | ||
| 969 | successfully completed, | ||
| 970 | <computeroutput>BZ_MEM_ERROR</computeroutput> cannot | ||
| 971 | occur.</para></listitem> | ||
| 972 | </varlistentry> | ||
| 973 | |||
| 974 | <varlistentry> | ||
| 975 | <term><computeroutput>BZ_DATA_ERROR</computeroutput></term> | ||
| 976 | <listitem><para>Returned when a data integrity error is | ||
| 977 | detected during decompression. Most importantly, this means | ||
| 978 | when stored and computed CRCs for the data do not match. This | ||
| 979 | value is also returned upon detection of any other anomaly in | ||
| 980 | the compressed data.</para></listitem> | ||
| 981 | </varlistentry> | ||
| 982 | |||
| 983 | <varlistentry> | ||
| 984 | <term><computeroutput>BZ_DATA_ERROR_MAGIC</computeroutput></term> | ||
| 985 | <listitem><para>As a special case of | ||
| 986 | <computeroutput>BZ_DATA_ERROR</computeroutput>, it is | ||
| 987 | sometimes useful to know when the compressed stream does not | ||
| 988 | start with the correct magic bytes (<computeroutput>'B' 'Z' | ||
| 989 | 'h'</computeroutput>).</para></listitem> | ||
| 990 | </varlistentry> | ||
| 991 | |||
| 992 | <varlistentry> | ||
| 993 | <term><computeroutput>BZ_IO_ERROR</computeroutput></term> | ||
| 994 | <listitem><para>Returned by | ||
| 995 | <computeroutput>BZ2_bzRead</computeroutput> and | ||
| 996 | <computeroutput>BZ2_bzWrite</computeroutput> when there is an | ||
| 997 | error reading or writing in the compressed file, and by | ||
| 998 | <computeroutput>BZ2_bzReadOpen</computeroutput> and | ||
| 999 | <computeroutput>BZ2_bzWriteOpen</computeroutput> for attempts | ||
| 1000 | to use a file for which the error indicator (viz, | ||
| 1001 | <computeroutput>ferror(f)</computeroutput>) is set. On | ||
| 1002 | receipt of <computeroutput>BZ_IO_ERROR</computeroutput>, the | ||
| 1003 | caller should consult <computeroutput>errno</computeroutput> | ||
| 1004 | and/or <computeroutput>perror</computeroutput> to acquire | ||
| 1005 | operating-system specific information about the | ||
| 1006 | problem.</para></listitem> | ||
| 1007 | </varlistentry> | ||
| 1008 | |||
| 1009 | <varlistentry> | ||
| 1010 | <term><computeroutput>BZ_UNEXPECTED_EOF</computeroutput></term> | ||
| 1011 | <listitem><para>Returned by | ||
| 1012 | <computeroutput>BZ2_bzRead</computeroutput> when the | ||
| 1013 | compressed file finishes before the logical end of stream is | ||
| 1014 | detected.</para></listitem> | ||
| 1015 | </varlistentry> | ||
| 1016 | |||
| 1017 | <varlistentry> | ||
| 1018 | <term><computeroutput>BZ_OUTBUFF_FULL</computeroutput></term> | ||
| 1019 | <listitem><para>Returned by | ||
| 1020 | <computeroutput>BZ2_bzBuffToBuffCompress</computeroutput> and | ||
| 1021 | <computeroutput>BZ2_bzBuffToBuffDecompress</computeroutput> to | ||
| 1022 | indicate that the output data will not fit into the output | ||
| 1023 | buffer provided.</para></listitem> | ||
| 1024 | </varlistentry> | ||
| 1025 | |||
| 1026 | </variablelist> | ||
| 1027 | |||
| 1028 | </sect1> | ||
| 1029 | |||
| 1030 | |||
| 1031 | |||
| 1032 | <sect1 id="low-level" xreflabel=">Low-level interface"> | ||
| 1033 | <title>Low-level interface</title> | ||
| 1034 | |||
| 1035 | |||
| 1036 | <sect2 id="bzcompress-init" xreflabel="BZ2_bzCompressInit"> | ||
| 1037 | <title><computeroutput>BZ2_bzCompressInit</computeroutput></title> | ||
| 1038 | |||
| 1039 | <programlisting> | ||
| 1040 | typedef struct { | ||
| 1041 | char *next_in; | ||
| 1042 | unsigned int avail_in; | ||
| 1043 | unsigned int total_in_lo32; | ||
| 1044 | unsigned int total_in_hi32; | ||
| 1045 | |||
| 1046 | char *next_out; | ||
| 1047 | unsigned int avail_out; | ||
| 1048 | unsigned int total_out_lo32; | ||
| 1049 | unsigned int total_out_hi32; | ||
| 1050 | |||
| 1051 | void *state; | ||
| 1052 | |||
| 1053 | void *(*bzalloc)(void *,int,int); | ||
| 1054 | void (*bzfree)(void *,void *); | ||
| 1055 | void *opaque; | ||
| 1056 | } bz_stream; | ||
| 1057 | |||
| 1058 | int BZ2_bzCompressInit ( bz_stream *strm, | ||
| 1059 | int blockSize100k, | ||
| 1060 | int verbosity, | ||
| 1061 | int workFactor ); | ||
| 1062 | </programlisting> | ||
| 1063 | |||
| 1064 | <para>Prepares for compression. The | ||
| 1065 | <computeroutput>bz_stream</computeroutput> structure holds all | ||
| 1066 | data pertaining to the compression activity. A | ||
| 1067 | <computeroutput>bz_stream</computeroutput> structure should be | ||
| 1068 | allocated and initialised prior to the call. The fields of | ||
| 1069 | <computeroutput>bz_stream</computeroutput> comprise the entirety | ||
| 1070 | of the user-visible data. <computeroutput>state</computeroutput> | ||
| 1071 | is a pointer to the private data structures required for | ||
| 1072 | compression.</para> | ||
| 1073 | |||
| 1074 | <para>Custom memory allocators are supported, via fields | ||
| 1075 | <computeroutput>bzalloc</computeroutput>, | ||
| 1076 | <computeroutput>bzfree</computeroutput>, and | ||
| 1077 | <computeroutput>opaque</computeroutput>. The value | ||
| 1078 | <computeroutput>opaque</computeroutput> is passed to as the first | ||
| 1079 | argument to all calls to <computeroutput>bzalloc</computeroutput> | ||
| 1080 | and <computeroutput>bzfree</computeroutput>, but is otherwise | ||
| 1081 | ignored by the library. The call <computeroutput>bzalloc ( | ||
| 1082 | opaque, n, m )</computeroutput> is expected to return a pointer | ||
| 1083 | <computeroutput>p</computeroutput> to <computeroutput>n * | ||
| 1084 | m</computeroutput> bytes of memory, and <computeroutput>bzfree ( | ||
| 1085 | opaque, p )</computeroutput> should free that memory.</para> | ||
| 1086 | |||
| 1087 | <para>If you don't want to use a custom memory allocator, set | ||
| 1088 | <computeroutput>bzalloc</computeroutput>, | ||
| 1089 | <computeroutput>bzfree</computeroutput> and | ||
| 1090 | <computeroutput>opaque</computeroutput> to | ||
| 1091 | <computeroutput>NULL</computeroutput>, and the library will then | ||
| 1092 | use the standard <computeroutput>malloc</computeroutput> / | ||
| 1093 | <computeroutput>free</computeroutput> routines.</para> | ||
| 1094 | |||
| 1095 | <para>Before calling | ||
| 1096 | <computeroutput>BZ2_bzCompressInit</computeroutput>, fields | ||
| 1097 | <computeroutput>bzalloc</computeroutput>, | ||
| 1098 | <computeroutput>bzfree</computeroutput> and | ||
| 1099 | <computeroutput>opaque</computeroutput> should be filled | ||
| 1100 | appropriately, as just described. Upon return, the internal | ||
| 1101 | state will have been allocated and initialised, and | ||
| 1102 | <computeroutput>total_in_lo32</computeroutput>, | ||
| 1103 | <computeroutput>total_in_hi32</computeroutput>, | ||
| 1104 | <computeroutput>total_out_lo32</computeroutput> and | ||
| 1105 | <computeroutput>total_out_hi32</computeroutput> will have been | ||
| 1106 | set to zero. These four fields are used by the library to inform | ||
| 1107 | the caller of the total amount of data passed into and out of the | ||
| 1108 | library, respectively. You should not try to change them. As of | ||
| 1109 | version 1.0, 64-bit counts are maintained, even on 32-bit | ||
| 1110 | platforms, using the <computeroutput>_hi32</computeroutput> | ||
| 1111 | fields to store the upper 32 bits of the count. So, for example, | ||
| 1112 | the total amount of data in is <computeroutput>(total_in_hi32 | ||
| 1113 | << 32) + total_in_lo32</computeroutput>.</para> | ||
| 1114 | |||
| 1115 | <para>Parameter <computeroutput>blockSize100k</computeroutput> | ||
| 1116 | specifies the block size to be used for compression. It should | ||
| 1117 | be a value between 1 and 9 inclusive, and the actual block size | ||
| 1118 | used is 100000 x this figure. 9 gives the best compression but | ||
| 1119 | takes most memory.</para> | ||
| 1120 | |||
| 1121 | <para>Parameter <computeroutput>verbosity</computeroutput> should | ||
| 1122 | be set to a number between 0 and 4 inclusive. 0 is silent, and | ||
| 1123 | greater numbers give increasingly verbose monitoring/debugging | ||
| 1124 | output. If the library has been compiled with | ||
| 1125 | <computeroutput>-DBZ_NO_STDIO</computeroutput>, no such output | ||
| 1126 | will appear for any verbosity setting.</para> | ||
| 1127 | |||
| 1128 | <para>Parameter <computeroutput>workFactor</computeroutput> | ||
| 1129 | controls how the compression phase behaves when presented with | ||
| 1130 | worst case, highly repetitive, input data. If compression runs | ||
| 1131 | into difficulties caused by repetitive data, the library switches | ||
| 1132 | from the standard sorting algorithm to a fallback algorithm. The | ||
| 1133 | fallback is slower than the standard algorithm by perhaps a | ||
| 1134 | factor of three, but always behaves reasonably, no matter how bad | ||
| 1135 | the input.</para> | ||
| 1136 | |||
| 1137 | <para>Lower values of <computeroutput>workFactor</computeroutput> | ||
| 1138 | reduce the amount of effort the standard algorithm will expend | ||
| 1139 | before resorting to the fallback. You should set this parameter | ||
| 1140 | carefully; too low, and many inputs will be handled by the | ||
| 1141 | fallback algorithm and so compress rather slowly, too high, and | ||
| 1142 | your average-to-worst case compression times can become very | ||
| 1143 | large. The default value of 30 gives reasonable behaviour over a | ||
| 1144 | wide range of circumstances.</para> | ||
| 1145 | |||
| 1146 | <para>Allowable values range from 0 to 250 inclusive. 0 is a | ||
| 1147 | special case, equivalent to using the default value of 30.</para> | ||
| 1148 | |||
| 1149 | <para>Note that the compressed output generated is the same | ||
| 1150 | regardless of whether or not the fallback algorithm is | ||
| 1151 | used.</para> | ||
| 1152 | |||
| 1153 | <para>Be aware also that this parameter may disappear entirely in | ||
| 1154 | future versions of the library. In principle it should be | ||
| 1155 | possible to devise a good way to automatically choose which | ||
| 1156 | algorithm to use. Such a mechanism would render the parameter | ||
| 1157 | obsolete.</para> | ||
| 1158 | |||
| 1159 | <para>Possible return values:</para> | ||
| 1160 | |||
| 1161 | <programlisting> | ||
| 1162 | BZ_CONFIG_ERROR | ||
| 1163 | if the library has been mis-compiled | ||
| 1164 | BZ_PARAM_ERROR | ||
| 1165 | if strm is NULL | ||
| 1166 | or blockSize < 1 or blockSize > 9 | ||
| 1167 | or verbosity < 0 or verbosity > 4 | ||
| 1168 | or workFactor < 0 or workFactor > 250 | ||
| 1169 | BZ_MEM_ERROR | ||
| 1170 | if not enough memory is available | ||
| 1171 | BZ_OK | ||
| 1172 | otherwise | ||
| 1173 | </programlisting> | ||
| 1174 | |||
| 1175 | <para>Allowable next actions:</para> | ||
| 1176 | |||
| 1177 | <programlisting> | ||
| 1178 | BZ2_bzCompress | ||
| 1179 | if BZ_OK is returned | ||
| 1180 | no specific action needed in case of error | ||
| 1181 | </programlisting> | ||
| 1182 | |||
| 1183 | </sect2> | ||
| 1184 | |||
| 1185 | |||
| 1186 | <sect2 id="bzCompress" xreflabel="BZ2_bzCompress"> | ||
| 1187 | <title><computeroutput>BZ2_bzCompress</computeroutput></title> | ||
| 1188 | |||
| 1189 | <programlisting> | ||
| 1190 | int BZ2_bzCompress ( bz_stream *strm, int action ); | ||
| 1191 | </programlisting> | ||
| 1192 | |||
| 1193 | <para>Provides more input and/or output buffer space for the | ||
| 1194 | library. The caller maintains input and output buffers, and | ||
| 1195 | calls <computeroutput>BZ2_bzCompress</computeroutput> to transfer | ||
| 1196 | data between them.</para> | ||
| 1197 | |||
| 1198 | <para>Before each call to | ||
| 1199 | <computeroutput>BZ2_bzCompress</computeroutput>, | ||
| 1200 | <computeroutput>next_in</computeroutput> should point at the data | ||
| 1201 | to be compressed, and <computeroutput>avail_in</computeroutput> | ||
| 1202 | should indicate how many bytes the library may read. | ||
| 1203 | <computeroutput>BZ2_bzCompress</computeroutput> updates | ||
| 1204 | <computeroutput>next_in</computeroutput>, | ||
| 1205 | <computeroutput>avail_in</computeroutput> and | ||
| 1206 | <computeroutput>total_in</computeroutput> to reflect the number | ||
| 1207 | of bytes it has read.</para> | ||
| 1208 | |||
| 1209 | <para>Similarly, <computeroutput>next_out</computeroutput> should | ||
| 1210 | point to a buffer in which the compressed data is to be placed, | ||
| 1211 | with <computeroutput>avail_out</computeroutput> indicating how | ||
| 1212 | much output space is available. | ||
| 1213 | <computeroutput>BZ2_bzCompress</computeroutput> updates | ||
| 1214 | <computeroutput>next_out</computeroutput>, | ||
| 1215 | <computeroutput>avail_out</computeroutput> and | ||
| 1216 | <computeroutput>total_out</computeroutput> to reflect the number | ||
| 1217 | of bytes output.</para> | ||
| 1218 | |||
| 1219 | <para>You may provide and remove as little or as much data as you | ||
| 1220 | like on each call of | ||
| 1221 | <computeroutput>BZ2_bzCompress</computeroutput>. In the limit, | ||
| 1222 | it is acceptable to supply and remove data one byte at a time, | ||
| 1223 | although this would be terribly inefficient. You should always | ||
| 1224 | ensure that at least one byte of output space is available at | ||
| 1225 | each call.</para> | ||
| 1226 | |||
| 1227 | <para>A second purpose of | ||
| 1228 | <computeroutput>BZ2_bzCompress</computeroutput> is to request a | ||
| 1229 | change of mode of the compressed stream.</para> | ||
| 1230 | |||
| 1231 | <para>Conceptually, a compressed stream can be in one of four | ||
| 1232 | states: IDLE, RUNNING, FLUSHING and FINISHING. Before | ||
| 1233 | initialisation | ||
| 1234 | (<computeroutput>BZ2_bzCompressInit</computeroutput>) and after | ||
| 1235 | termination (<computeroutput>BZ2_bzCompressEnd</computeroutput>), | ||
| 1236 | a stream is regarded as IDLE.</para> | ||
| 1237 | |||
| 1238 | <para>Upon initialisation | ||
| 1239 | (<computeroutput>BZ2_bzCompressInit</computeroutput>), the stream | ||
| 1240 | is placed in the RUNNING state. Subsequent calls to | ||
| 1241 | <computeroutput>BZ2_bzCompress</computeroutput> should pass | ||
| 1242 | <computeroutput>BZ_RUN</computeroutput> as the requested action; | ||
| 1243 | other actions are illegal and will result in | ||
| 1244 | <computeroutput>BZ_SEQUENCE_ERROR</computeroutput>.</para> | ||
| 1245 | |||
| 1246 | <para>At some point, the calling program will have provided all | ||
| 1247 | the input data it wants to. It will then want to finish up -- in | ||
| 1248 | effect, asking the library to process any data it might have | ||
| 1249 | buffered internally. In this state, | ||
| 1250 | <computeroutput>BZ2_bzCompress</computeroutput> will no longer | ||
| 1251 | attempt to read data from | ||
| 1252 | <computeroutput>next_in</computeroutput>, but it will want to | ||
| 1253 | write data to <computeroutput>next_out</computeroutput>. Because | ||
| 1254 | the output buffer supplied by the user can be arbitrarily small, | ||
| 1255 | the finishing-up operation cannot necessarily be done with a | ||
| 1256 | single call of | ||
| 1257 | <computeroutput>BZ2_bzCompress</computeroutput>.</para> | ||
| 1258 | |||
| 1259 | <para>Instead, the calling program passes | ||
| 1260 | <computeroutput>BZ_FINISH</computeroutput> as an action to | ||
| 1261 | <computeroutput>BZ2_bzCompress</computeroutput>. This changes | ||
| 1262 | the stream's state to FINISHING. Any remaining input (ie, | ||
| 1263 | <computeroutput>next_in[0 .. avail_in-1]</computeroutput>) is | ||
| 1264 | compressed and transferred to the output buffer. To do this, | ||
| 1265 | <computeroutput>BZ2_bzCompress</computeroutput> must be called | ||
| 1266 | repeatedly until all the output has been consumed. At that | ||
| 1267 | point, <computeroutput>BZ2_bzCompress</computeroutput> returns | ||
| 1268 | <computeroutput>BZ_STREAM_END</computeroutput>, and the stream's | ||
| 1269 | state is set back to IDLE. | ||
| 1270 | <computeroutput>BZ2_bzCompressEnd</computeroutput> should then be | ||
| 1271 | called.</para> | ||
| 1272 | |||
| 1273 | <para>Just to make sure the calling program does not cheat, the | ||
| 1274 | library makes a note of <computeroutput>avail_in</computeroutput> | ||
| 1275 | at the time of the first call to | ||
| 1276 | <computeroutput>BZ2_bzCompress</computeroutput> which has | ||
| 1277 | <computeroutput>BZ_FINISH</computeroutput> as an action (ie, at | ||
| 1278 | the time the program has announced its intention to not supply | ||
| 1279 | any more input). By comparing this value with that of | ||
| 1280 | <computeroutput>avail_in</computeroutput> over subsequent calls | ||
| 1281 | to <computeroutput>BZ2_bzCompress</computeroutput>, the library | ||
| 1282 | can detect any attempts to slip in more data to compress. Any | ||
| 1283 | calls for which this is detected will return | ||
| 1284 | <computeroutput>BZ_SEQUENCE_ERROR</computeroutput>. This | ||
| 1285 | indicates a programming mistake which should be corrected.</para> | ||
| 1286 | |||
| 1287 | <para>Instead of asking to finish, the calling program may ask | ||
| 1288 | <computeroutput>BZ2_bzCompress</computeroutput> to take all the | ||
| 1289 | remaining input, compress it and terminate the current | ||
| 1290 | (Burrows-Wheeler) compression block. This could be useful for | ||
| 1291 | error control purposes. The mechanism is analogous to that for | ||
| 1292 | finishing: call <computeroutput>BZ2_bzCompress</computeroutput> | ||
| 1293 | with an action of <computeroutput>BZ_FLUSH</computeroutput>, | ||
| 1294 | remove output data, and persist with the | ||
| 1295 | <computeroutput>BZ_FLUSH</computeroutput> action until the value | ||
| 1296 | <computeroutput>BZ_RUN</computeroutput> is returned. As with | ||
| 1297 | finishing, <computeroutput>BZ2_bzCompress</computeroutput> | ||
| 1298 | detects any attempt to provide more input data once the flush has | ||
| 1299 | begun.</para> | ||
| 1300 | |||
| 1301 | <para>Once the flush is complete, the stream returns to the | ||
| 1302 | normal RUNNING state.</para> | ||
| 1303 | |||
| 1304 | <para>This all sounds pretty complex, but isn't really. Here's a | ||
| 1305 | table which shows which actions are allowable in each state, what | ||
| 1306 | action will be taken, what the next state is, and what the | ||
| 1307 | non-error return values are. Note that you can't explicitly ask | ||
| 1308 | what state the stream is in, but nor do you need to -- it can be | ||
| 1309 | inferred from the values returned by | ||
| 1310 | <computeroutput>BZ2_bzCompress</computeroutput>.</para> | ||
| 1311 | |||
| 1312 | <programlisting> | ||
| 1313 | IDLE/any | ||
| 1314 | Illegal. IDLE state only exists after BZ2_bzCompressEnd or | ||
| 1315 | before BZ2_bzCompressInit. | ||
| 1316 | Return value = BZ_SEQUENCE_ERROR | ||
| 1317 | |||
| 1318 | RUNNING/BZ_RUN | ||
| 1319 | Compress from next_in to next_out as much as possible. | ||
| 1320 | Next state = RUNNING | ||
| 1321 | Return value = BZ_RUN_OK | ||
| 1322 | |||
| 1323 | RUNNING/BZ_FLUSH | ||
| 1324 | Remember current value of next_in. Compress from next_in | ||
| 1325 | to next_out as much as possible, but do not accept any more input. | ||
| 1326 | Next state = FLUSHING | ||
| 1327 | Return value = BZ_FLUSH_OK | ||
| 1328 | |||
| 1329 | RUNNING/BZ_FINISH | ||
| 1330 | Remember current value of next_in. Compress from next_in | ||
| 1331 | to next_out as much as possible, but do not accept any more input. | ||
| 1332 | Next state = FINISHING | ||
| 1333 | Return value = BZ_FINISH_OK | ||
| 1334 | |||
| 1335 | FLUSHING/BZ_FLUSH | ||
| 1336 | Compress from next_in to next_out as much as possible, | ||
| 1337 | but do not accept any more input. | ||
| 1338 | If all the existing input has been used up and all compressed | ||
| 1339 | output has been removed | ||
| 1340 | Next state = RUNNING; Return value = BZ_RUN_OK | ||
| 1341 | else | ||
| 1342 | Next state = FLUSHING; Return value = BZ_FLUSH_OK | ||
| 1343 | |||
| 1344 | FLUSHING/other | ||
| 1345 | Illegal. | ||
| 1346 | Return value = BZ_SEQUENCE_ERROR | ||
| 1347 | |||
| 1348 | FINISHING/BZ_FINISH | ||
| 1349 | Compress from next_in to next_out as much as possible, | ||
| 1350 | but to not accept any more input. | ||
| 1351 | If all the existing input has been used up and all compressed | ||
| 1352 | output has been removed | ||
| 1353 | Next state = IDLE; Return value = BZ_STREAM_END | ||
| 1354 | else | ||
| 1355 | Next state = FINISHING; Return value = BZ_FINISHING | ||
| 1356 | |||
| 1357 | FINISHING/other | ||
| 1358 | Illegal. | ||
| 1359 | Return value = BZ_SEQUENCE_ERROR | ||
| 1360 | </programlisting> | ||
| 1361 | |||
| 1362 | |||
| 1363 | <para>That still looks complicated? Well, fair enough. The | ||
| 1364 | usual sequence of calls for compressing a load of data is:</para> | ||
| 1365 | |||
| 1366 | <orderedlist> | ||
| 1367 | |||
| 1368 | <listitem><para>Get started with | ||
| 1369 | <computeroutput>BZ2_bzCompressInit</computeroutput>.</para></listitem> | ||
| 1370 | |||
| 1371 | <listitem><para>Shovel data in and shlurp out its compressed form | ||
| 1372 | using zero or more calls of | ||
| 1373 | <computeroutput>BZ2_bzCompress</computeroutput> with action = | ||
| 1374 | <computeroutput>BZ_RUN</computeroutput>.</para></listitem> | ||
| 1375 | |||
| 1376 | <listitem><para>Finish up. Repeatedly call | ||
| 1377 | <computeroutput>BZ2_bzCompress</computeroutput> with action = | ||
| 1378 | <computeroutput>BZ_FINISH</computeroutput>, copying out the | ||
| 1379 | compressed output, until | ||
| 1380 | <computeroutput>BZ_STREAM_END</computeroutput> is | ||
| 1381 | returned.</para></listitem> <listitem><para>Close up and go home. Call | ||
| 1382 | <computeroutput>BZ2_bzCompressEnd</computeroutput>.</para></listitem> | ||
| 1383 | |||
| 1384 | </orderedlist> | ||
| 1385 | |||
| 1386 | <para>If the data you want to compress fits into your input | ||
| 1387 | buffer all at once, you can skip the calls of | ||
| 1388 | <computeroutput>BZ2_bzCompress ( ..., BZ_RUN )</computeroutput> | ||
| 1389 | and just do the <computeroutput>BZ2_bzCompress ( ..., BZ_FINISH | ||
| 1390 | )</computeroutput> calls.</para> | ||
| 1391 | |||
| 1392 | <para>All required memory is allocated by | ||
| 1393 | <computeroutput>BZ2_bzCompressInit</computeroutput>. The | ||
| 1394 | compression library can accept any data at all (obviously). So | ||
| 1395 | you shouldn't get any error return values from the | ||
| 1396 | <computeroutput>BZ2_bzCompress</computeroutput> calls. If you | ||
| 1397 | do, they will be | ||
| 1398 | <computeroutput>BZ_SEQUENCE_ERROR</computeroutput>, and indicate | ||
| 1399 | a bug in your programming.</para> | ||
| 1400 | |||
| 1401 | <para>Trivial other possible return values:</para> | ||
| 1402 | |||
| 1403 | <programlisting> | ||
| 1404 | BZ_PARAM_ERROR | ||
| 1405 | if strm is NULL, or strm->s is NULL | ||
| 1406 | </programlisting> | ||
| 1407 | |||
| 1408 | </sect2> | ||
| 1409 | |||
| 1410 | |||
| 1411 | <sect2 id="bzCompress-end" xreflabel="BZ2_bzCompressEnd"> | ||
| 1412 | <title><computeroutput>BZ2_bzCompressEnd</computeroutput></title> | ||
| 1413 | |||
| 1414 | <programlisting> | ||
| 1415 | int BZ2_bzCompressEnd ( bz_stream *strm ); | ||
| 1416 | </programlisting> | ||
| 1417 | |||
| 1418 | <para>Releases all memory associated with a compression | ||
| 1419 | stream.</para> | ||
| 1420 | |||
| 1421 | <para>Possible return values:</para> | ||
| 1422 | |||
| 1423 | <programlisting> | ||
| 1424 | BZ_PARAM_ERROR if strm is NULL or strm->s is NULL | ||
| 1425 | BZ_OK otherwise | ||
| 1426 | </programlisting> | ||
| 1427 | |||
| 1428 | </sect2> | ||
| 1429 | |||
| 1430 | |||
| 1431 | <sect2 id="bzDecompress-init" xreflabel="BZ2_bzDecompressInit"> | ||
| 1432 | <title><computeroutput>BZ2_bzDecompressInit</computeroutput></title> | ||
| 1433 | |||
| 1434 | <programlisting> | ||
| 1435 | int BZ2_bzDecompressInit ( bz_stream *strm, int verbosity, int small ); | ||
| 1436 | </programlisting> | ||
| 1437 | |||
| 1438 | <para>Prepares for decompression. As with | ||
| 1439 | <computeroutput>BZ2_bzCompressInit</computeroutput>, a | ||
| 1440 | <computeroutput>bz_stream</computeroutput> record should be | ||
| 1441 | allocated and initialised before the call. Fields | ||
| 1442 | <computeroutput>bzalloc</computeroutput>, | ||
| 1443 | <computeroutput>bzfree</computeroutput> and | ||
| 1444 | <computeroutput>opaque</computeroutput> should be set if a custom | ||
| 1445 | memory allocator is required, or made | ||
| 1446 | <computeroutput>NULL</computeroutput> for the normal | ||
| 1447 | <computeroutput>malloc</computeroutput> / | ||
| 1448 | <computeroutput>free</computeroutput> routines. Upon return, the | ||
| 1449 | internal state will have been initialised, and | ||
| 1450 | <computeroutput>total_in</computeroutput> and | ||
| 1451 | <computeroutput>total_out</computeroutput> will be zero.</para> | ||
| 1452 | |||
| 1453 | <para>For the meaning of parameter | ||
| 1454 | <computeroutput>verbosity</computeroutput>, see | ||
| 1455 | <computeroutput>BZ2_bzCompressInit</computeroutput>.</para> | ||
| 1456 | |||
| 1457 | <para>If <computeroutput>small</computeroutput> is nonzero, the | ||
| 1458 | library will use an alternative decompression algorithm which | ||
| 1459 | uses less memory but at the cost of decompressing more slowly | ||
| 1460 | (roughly speaking, half the speed, but the maximum memory | ||
| 1461 | requirement drops to around 2300k). See <xref linkend="using"/> | ||
| 1462 | for more information on memory management.</para> | ||
| 1463 | |||
| 1464 | <para>Note that the amount of memory needed to decompress a | ||
| 1465 | stream cannot be determined until the stream's header has been | ||
| 1466 | read, so even if | ||
| 1467 | <computeroutput>BZ2_bzDecompressInit</computeroutput> succeeds, a | ||
| 1468 | subsequent <computeroutput>BZ2_bzDecompress</computeroutput> | ||
| 1469 | could fail with | ||
| 1470 | <computeroutput>BZ_MEM_ERROR</computeroutput>.</para> | ||
| 1471 | |||
| 1472 | <para>Possible return values:</para> | ||
| 1473 | |||
| 1474 | <programlisting> | ||
| 1475 | BZ_CONFIG_ERROR | ||
| 1476 | if the library has been mis-compiled | ||
| 1477 | BZ_PARAM_ERROR | ||
| 1478 | if ( small != 0 && small != 1 ) | ||
| 1479 | or (verbosity <; 0 || verbosity > 4) | ||
| 1480 | BZ_MEM_ERROR | ||
| 1481 | if insufficient memory is available | ||
| 1482 | </programlisting> | ||
| 1483 | |||
| 1484 | <para>Allowable next actions:</para> | ||
| 1485 | |||
| 1486 | <programlisting> | ||
| 1487 | BZ2_bzDecompress | ||
| 1488 | if BZ_OK was returned | ||
| 1489 | no specific action required in case of error | ||
| 1490 | </programlisting> | ||
| 1491 | |||
| 1492 | </sect2> | ||
| 1493 | |||
| 1494 | |||
| 1495 | <sect2 id="bzDecompress" xreflabel="BZ2_bzDecompress"> | ||
| 1496 | <title><computeroutput>BZ2_bzDecompress</computeroutput></title> | ||
| 1497 | |||
| 1498 | <programlisting> | ||
| 1499 | int BZ2_bzDecompress ( bz_stream *strm ); | ||
| 1500 | </programlisting> | ||
| 1501 | |||
| 1502 | <para>Provides more input and/out output buffer space for the | ||
| 1503 | library. The caller maintains input and output buffers, and uses | ||
| 1504 | <computeroutput>BZ2_bzDecompress</computeroutput> to transfer | ||
| 1505 | data between them.</para> | ||
| 1506 | |||
| 1507 | <para>Before each call to | ||
| 1508 | <computeroutput>BZ2_bzDecompress</computeroutput>, | ||
| 1509 | <computeroutput>next_in</computeroutput> should point at the | ||
| 1510 | compressed data, and <computeroutput>avail_in</computeroutput> | ||
| 1511 | should indicate how many bytes the library may read. | ||
| 1512 | <computeroutput>BZ2_bzDecompress</computeroutput> updates | ||
| 1513 | <computeroutput>next_in</computeroutput>, | ||
| 1514 | <computeroutput>avail_in</computeroutput> and | ||
| 1515 | <computeroutput>total_in</computeroutput> to reflect the number | ||
| 1516 | of bytes it has read.</para> | ||
| 1517 | |||
| 1518 | <para>Similarly, <computeroutput>next_out</computeroutput> should | ||
| 1519 | point to a buffer in which the uncompressed output is to be | ||
| 1520 | placed, with <computeroutput>avail_out</computeroutput> | ||
| 1521 | indicating how much output space is available. | ||
| 1522 | <computeroutput>BZ2_bzCompress</computeroutput> updates | ||
| 1523 | <computeroutput>next_out</computeroutput>, | ||
| 1524 | <computeroutput>avail_out</computeroutput> and | ||
| 1525 | <computeroutput>total_out</computeroutput> to reflect the number | ||
| 1526 | of bytes output.</para> | ||
| 1527 | |||
| 1528 | <para>You may provide and remove as little or as much data as you | ||
| 1529 | like on each call of | ||
| 1530 | <computeroutput>BZ2_bzDecompress</computeroutput>. In the limit, | ||
| 1531 | it is acceptable to supply and remove data one byte at a time, | ||
| 1532 | although this would be terribly inefficient. You should always | ||
| 1533 | ensure that at least one byte of output space is available at | ||
| 1534 | each call.</para> | ||
| 1535 | |||
| 1536 | <para>Use of <computeroutput>BZ2_bzDecompress</computeroutput> is | ||
| 1537 | simpler than | ||
| 1538 | <computeroutput>BZ2_bzCompress</computeroutput>.</para> | ||
| 1539 | |||
| 1540 | <para>You should provide input and remove output as described | ||
| 1541 | above, and repeatedly call | ||
| 1542 | <computeroutput>BZ2_bzDecompress</computeroutput> until | ||
| 1543 | <computeroutput>BZ_STREAM_END</computeroutput> is returned. | ||
| 1544 | Appearance of <computeroutput>BZ_STREAM_END</computeroutput> | ||
| 1545 | denotes that <computeroutput>BZ2_bzDecompress</computeroutput> | ||
| 1546 | has detected the logical end of the compressed stream. | ||
| 1547 | <computeroutput>BZ2_bzDecompress</computeroutput> will not | ||
| 1548 | produce <computeroutput>BZ_STREAM_END</computeroutput> until all | ||
| 1549 | output data has been placed into the output buffer, so once | ||
| 1550 | <computeroutput>BZ_STREAM_END</computeroutput> appears, you are | ||
| 1551 | guaranteed to have available all the decompressed output, and | ||
| 1552 | <computeroutput>BZ2_bzDecompressEnd</computeroutput> can safely | ||
| 1553 | be called.</para> | ||
| 1554 | |||
| 1555 | <para>If case of an error return value, you should call | ||
| 1556 | <computeroutput>BZ2_bzDecompressEnd</computeroutput> to clean up | ||
| 1557 | and release memory.</para> | ||
| 1558 | |||
| 1559 | <para>Possible return values:</para> | ||
| 1560 | |||
| 1561 | <programlisting> | ||
| 1562 | BZ_PARAM_ERROR | ||
| 1563 | if strm is NULL or strm->s is NULL | ||
| 1564 | or strm->avail_out < 1 | ||
| 1565 | BZ_DATA_ERROR | ||
| 1566 | if a data integrity error is detected in the compressed stream | ||
| 1567 | BZ_DATA_ERROR_MAGIC | ||
| 1568 | if the compressed stream doesn't begin with the right magic bytes | ||
| 1569 | BZ_MEM_ERROR | ||
| 1570 | if there wasn't enough memory available | ||
| 1571 | BZ_STREAM_END | ||
| 1572 | if the logical end of the data stream was detected and all | ||
| 1573 | output in has been consumed, eg s-->avail_out > 0 | ||
| 1574 | BZ_OK | ||
| 1575 | otherwise | ||
| 1576 | </programlisting> | ||
| 1577 | |||
| 1578 | <para>Allowable next actions:</para> | ||
| 1579 | |||
| 1580 | <programlisting> | ||
| 1581 | BZ2_bzDecompress | ||
| 1582 | if BZ_OK was returned | ||
| 1583 | BZ2_bzDecompressEnd | ||
| 1584 | otherwise | ||
| 1585 | </programlisting> | ||
| 1586 | |||
| 1587 | </sect2> | ||
| 1588 | |||
| 1589 | |||
| 1590 | <sect2 id="bzDecompress-end" xreflabel="BZ2_bzDecompressEnd"> | ||
| 1591 | <title><computeroutput>BZ2_bzDecompressEnd</computeroutput></title> | ||
| 1592 | |||
| 1593 | <programlisting> | ||
| 1594 | int BZ2_bzDecompressEnd ( bz_stream *strm ); | ||
| 1595 | </programlisting> | ||
| 1596 | |||
| 1597 | <para>Releases all memory associated with a decompression | ||
| 1598 | stream.</para> | ||
| 1599 | |||
| 1600 | <para>Possible return values:</para> | ||
| 1601 | |||
| 1602 | <programlisting> | ||
| 1603 | BZ_PARAM_ERROR | ||
| 1604 | if strm is NULL or strm->s is NULL | ||
| 1605 | BZ_OK | ||
| 1606 | otherwise | ||
| 1607 | </programlisting> | ||
| 1608 | |||
| 1609 | <para>Allowable next actions:</para> | ||
| 1610 | |||
| 1611 | <programlisting> | ||
| 1612 | None. | ||
| 1613 | </programlisting> | ||
| 1614 | |||
| 1615 | </sect2> | ||
| 1616 | |||
| 1617 | </sect1> | ||
| 1618 | |||
| 1619 | |||
| 1620 | <sect1 id="hl-interface" xreflabel="High-level interface"> | ||
| 1621 | <title>High-level interface</title> | ||
| 1622 | |||
| 1623 | <para>This interface provides functions for reading and writing | ||
| 1624 | <computeroutput>bzip2</computeroutput> format files. First, some | ||
| 1625 | general points.</para> | ||
| 1626 | |||
| 1627 | <itemizedlist mark='bullet'> | ||
| 1628 | |||
| 1629 | <listitem><para>All of the functions take an | ||
| 1630 | <computeroutput>int*</computeroutput> first argument, | ||
| 1631 | <computeroutput>bzerror</computeroutput>. After each call, | ||
| 1632 | <computeroutput>bzerror</computeroutput> should be consulted | ||
| 1633 | first to determine the outcome of the call. If | ||
| 1634 | <computeroutput>bzerror</computeroutput> is | ||
| 1635 | <computeroutput>BZ_OK</computeroutput>, the call completed | ||
| 1636 | successfully, and only then should the return value of the | ||
| 1637 | function (if any) be consulted. If | ||
| 1638 | <computeroutput>bzerror</computeroutput> is | ||
| 1639 | <computeroutput>BZ_IO_ERROR</computeroutput>, there was an | ||
| 1640 | error reading/writing the underlying compressed file, and you | ||
| 1641 | should then consult <computeroutput>errno</computeroutput> / | ||
| 1642 | <computeroutput>perror</computeroutput> to determine the cause | ||
| 1643 | of the difficulty. <computeroutput>bzerror</computeroutput> | ||
| 1644 | may also be set to various other values; precise details are | ||
| 1645 | given on a per-function basis below.</para></listitem> | ||
| 1646 | |||
| 1647 | <listitem><para>If <computeroutput>bzerror</computeroutput> indicates | ||
| 1648 | an error (ie, anything except | ||
| 1649 | <computeroutput>BZ_OK</computeroutput> and | ||
| 1650 | <computeroutput>BZ_STREAM_END</computeroutput>), you should | ||
| 1651 | immediately call | ||
| 1652 | <computeroutput>BZ2_bzReadClose</computeroutput> (or | ||
| 1653 | <computeroutput>BZ2_bzWriteClose</computeroutput>, depending on | ||
| 1654 | whether you are attempting to read or to write) to free up all | ||
| 1655 | resources associated with the stream. Once an error has been | ||
| 1656 | indicated, behaviour of all calls except | ||
| 1657 | <computeroutput>BZ2_bzReadClose</computeroutput> | ||
| 1658 | (<computeroutput>BZ2_bzWriteClose</computeroutput>) is | ||
| 1659 | undefined. The implication is that (1) | ||
| 1660 | <computeroutput>bzerror</computeroutput> should be checked | ||
| 1661 | after each call, and (2) if | ||
| 1662 | <computeroutput>bzerror</computeroutput> indicates an error, | ||
| 1663 | <computeroutput>BZ2_bzReadClose</computeroutput> | ||
| 1664 | (<computeroutput>BZ2_bzWriteClose</computeroutput>) should then | ||
| 1665 | be called to clean up.</para></listitem> | ||
| 1666 | |||
| 1667 | <listitem><para>The <computeroutput>FILE*</computeroutput> arguments | ||
| 1668 | passed to <computeroutput>BZ2_bzReadOpen</computeroutput> / | ||
| 1669 | <computeroutput>BZ2_bzWriteOpen</computeroutput> should be set | ||
| 1670 | to binary mode. Most Unix systems will do this by default, but | ||
| 1671 | other platforms, including Windows and Mac, will not. If you | ||
| 1672 | omit this, you may encounter problems when moving code to new | ||
| 1673 | platforms.</para></listitem> | ||
| 1674 | |||
| 1675 | <listitem><para>Memory allocation requests are handled by | ||
| 1676 | <computeroutput>malloc</computeroutput> / | ||
| 1677 | <computeroutput>free</computeroutput>. At present there is no | ||
| 1678 | facility for user-defined memory allocators in the file I/O | ||
| 1679 | functions (could easily be added, though).</para></listitem> | ||
| 1680 | |||
| 1681 | </itemizedlist> | ||
| 1682 | |||
| 1683 | |||
| 1684 | |||
| 1685 | <sect2 id="bzreadopen" xreflabel="BZ2_bzReadOpen"> | ||
| 1686 | <title><computeroutput>BZ2_bzReadOpen</computeroutput></title> | ||
| 1687 | |||
| 1688 | <programlisting> | ||
| 1689 | typedef void BZFILE; | ||
| 1690 | |||
| 1691 | BZFILE *BZ2_bzReadOpen( int *bzerror, FILE *f, | ||
| 1692 | int verbosity, int small, | ||
| 1693 | void *unused, int nUnused ); | ||
| 1694 | </programlisting> | ||
| 1695 | |||
| 1696 | <para>Prepare to read compressed data from file handle | ||
| 1697 | <computeroutput>f</computeroutput>. | ||
| 1698 | <computeroutput>f</computeroutput> should refer to a file which | ||
| 1699 | has been opened for reading, and for which the error indicator | ||
| 1700 | (<computeroutput>ferror(f)</computeroutput>)is not set. If | ||
| 1701 | <computeroutput>small</computeroutput> is 1, the library will try | ||
| 1702 | to decompress using less memory, at the expense of speed.</para> | ||
| 1703 | |||
| 1704 | <para>For reasons explained below, | ||
| 1705 | <computeroutput>BZ2_bzRead</computeroutput> will decompress the | ||
| 1706 | <computeroutput>nUnused</computeroutput> bytes starting at | ||
| 1707 | <computeroutput>unused</computeroutput>, before starting to read | ||
| 1708 | from the file <computeroutput>f</computeroutput>. At most | ||
| 1709 | <computeroutput>BZ_MAX_UNUSED</computeroutput> bytes may be | ||
| 1710 | supplied like this. If this facility is not required, you should | ||
| 1711 | pass <computeroutput>NULL</computeroutput> and | ||
| 1712 | <computeroutput>0</computeroutput> for | ||
| 1713 | <computeroutput>unused</computeroutput> and | ||
| 1714 | n<computeroutput>Unused</computeroutput> respectively.</para> | ||
| 1715 | |||
| 1716 | <para>For the meaning of parameters | ||
| 1717 | <computeroutput>small</computeroutput> and | ||
| 1718 | <computeroutput>verbosity</computeroutput>, see | ||
| 1719 | <computeroutput>BZ2_bzDecompressInit</computeroutput>.</para> | ||
| 1720 | |||
| 1721 | <para>The amount of memory needed to decompress a file cannot be | ||
| 1722 | determined until the file's header has been read. So it is | ||
| 1723 | possible that <computeroutput>BZ2_bzReadOpen</computeroutput> | ||
| 1724 | returns <computeroutput>BZ_OK</computeroutput> but a subsequent | ||
| 1725 | call of <computeroutput>BZ2_bzRead</computeroutput> will return | ||
| 1726 | <computeroutput>BZ_MEM_ERROR</computeroutput>.</para> | ||
| 1727 | |||
| 1728 | <para>Possible assignments to | ||
| 1729 | <computeroutput>bzerror</computeroutput>:</para> | ||
| 1730 | |||
| 1731 | <programlisting> | ||
| 1732 | BZ_CONFIG_ERROR | ||
| 1733 | if the library has been mis-compiled | ||
| 1734 | BZ_PARAM_ERROR | ||
| 1735 | if f is NULL | ||
| 1736 | or small is neither 0 nor 1 | ||
| 1737 | or ( unused == NULL && nUnused != 0 ) | ||
| 1738 | or ( unused != NULL && !(0 <= nUnused <= BZ_MAX_UNUSED) ) | ||
| 1739 | BZ_IO_ERROR | ||
| 1740 | if ferror(f) is nonzero | ||
| 1741 | BZ_MEM_ERROR | ||
| 1742 | if insufficient memory is available | ||
| 1743 | BZ_OK | ||
| 1744 | otherwise. | ||
| 1745 | </programlisting> | ||
| 1746 | |||
| 1747 | <para>Possible return values:</para> | ||
| 1748 | |||
| 1749 | <programlisting> | ||
| 1750 | Pointer to an abstract BZFILE | ||
| 1751 | if bzerror is BZ_OK | ||
| 1752 | NULL | ||
| 1753 | otherwise | ||
| 1754 | </programlisting> | ||
| 1755 | |||
| 1756 | <para>Allowable next actions:</para> | ||
| 1757 | |||
| 1758 | <programlisting> | ||
| 1759 | BZ2_bzRead | ||
| 1760 | if bzerror is BZ_OK | ||
| 1761 | BZ2_bzClose | ||
| 1762 | otherwise | ||
| 1763 | </programlisting> | ||
| 1764 | |||
| 1765 | </sect2> | ||
| 1766 | |||
| 1767 | |||
| 1768 | <sect2 id="bzread" xreflabel="BZ2_bzRead"> | ||
| 1769 | <title><computeroutput>BZ2_bzRead</computeroutput></title> | ||
| 1770 | |||
| 1771 | <programlisting> | ||
| 1772 | int BZ2_bzRead ( int *bzerror, BZFILE *b, void *buf, int len ); | ||
| 1773 | </programlisting> | ||
| 1774 | |||
| 1775 | <para>Reads up to <computeroutput>len</computeroutput> | ||
| 1776 | (uncompressed) bytes from the compressed file | ||
| 1777 | <computeroutput>b</computeroutput> into the buffer | ||
| 1778 | <computeroutput>buf</computeroutput>. If the read was | ||
| 1779 | successful, <computeroutput>bzerror</computeroutput> is set to | ||
| 1780 | <computeroutput>BZ_OK</computeroutput> and the number of bytes | ||
| 1781 | read is returned. If the logical end-of-stream was detected, | ||
| 1782 | <computeroutput>bzerror</computeroutput> will be set to | ||
| 1783 | <computeroutput>BZ_STREAM_END</computeroutput>, and the number of | ||
| 1784 | bytes read is returned. All other | ||
| 1785 | <computeroutput>bzerror</computeroutput> values denote an | ||
| 1786 | error.</para> | ||
| 1787 | |||
| 1788 | <para><computeroutput>BZ2_bzRead</computeroutput> will supply | ||
| 1789 | <computeroutput>len</computeroutput> bytes, unless the logical | ||
| 1790 | stream end is detected or an error occurs. Because of this, it | ||
| 1791 | is possible to detect the stream end by observing when the number | ||
| 1792 | of bytes returned is less than the number requested. | ||
| 1793 | Nevertheless, this is regarded as inadvisable; you should instead | ||
| 1794 | check <computeroutput>bzerror</computeroutput> after every call | ||
| 1795 | and watch out for | ||
| 1796 | <computeroutput>BZ_STREAM_END</computeroutput>.</para> | ||
| 1797 | |||
| 1798 | <para>Internally, <computeroutput>BZ2_bzRead</computeroutput> | ||
| 1799 | copies data from the compressed file in chunks of size | ||
| 1800 | <computeroutput>BZ_MAX_UNUSED</computeroutput> bytes before | ||
| 1801 | decompressing it. If the file contains more bytes than strictly | ||
| 1802 | needed to reach the logical end-of-stream, | ||
| 1803 | <computeroutput>BZ2_bzRead</computeroutput> will almost certainly | ||
| 1804 | read some of the trailing data before signalling | ||
| 1805 | <computeroutput>BZ_SEQUENCE_END</computeroutput>. To collect the | ||
| 1806 | read but unused data once | ||
| 1807 | <computeroutput>BZ_SEQUENCE_END</computeroutput> has appeared, | ||
| 1808 | call <computeroutput>BZ2_bzReadGetUnused</computeroutput> | ||
| 1809 | immediately before | ||
| 1810 | <computeroutput>BZ2_bzReadClose</computeroutput>.</para> | ||
| 1811 | |||
| 1812 | <para>Possible assignments to | ||
| 1813 | <computeroutput>bzerror</computeroutput>:</para> | ||
| 1814 | |||
| 1815 | <programlisting> | ||
| 1816 | BZ_PARAM_ERROR | ||
| 1817 | if b is NULL or buf is NULL or len < 0 | ||
| 1818 | BZ_SEQUENCE_ERROR | ||
| 1819 | if b was opened with BZ2_bzWriteOpen | ||
| 1820 | BZ_IO_ERROR | ||
| 1821 | if there is an error reading from the compressed file | ||
| 1822 | BZ_UNEXPECTED_EOF | ||
| 1823 | if the compressed file ended before | ||
| 1824 | the logical end-of-stream was detected | ||
| 1825 | BZ_DATA_ERROR | ||
| 1826 | if a data integrity error was detected in the compressed stream | ||
| 1827 | BZ_DATA_ERROR_MAGIC | ||
| 1828 | if the stream does not begin with the requisite header bytes | ||
| 1829 | (ie, is not a bzip2 data file). This is really | ||
| 1830 | a special case of BZ_DATA_ERROR. | ||
| 1831 | BZ_MEM_ERROR | ||
| 1832 | if insufficient memory was available | ||
| 1833 | BZ_STREAM_END | ||
| 1834 | if the logical end of stream was detected. | ||
| 1835 | BZ_OK | ||
| 1836 | otherwise. | ||
| 1837 | </programlisting> | ||
| 1838 | |||
| 1839 | <para>Possible return values:</para> | ||
| 1840 | |||
| 1841 | <programlisting> | ||
| 1842 | number of bytes read | ||
| 1843 | if bzerror is BZ_OK or BZ_STREAM_END | ||
| 1844 | undefined | ||
| 1845 | otherwise | ||
| 1846 | </programlisting> | ||
| 1847 | |||
| 1848 | <para>Allowable next actions:</para> | ||
| 1849 | |||
| 1850 | <programlisting> | ||
| 1851 | collect data from buf, then BZ2_bzRead or BZ2_bzReadClose | ||
| 1852 | if bzerror is BZ_OK | ||
| 1853 | collect data from buf, then BZ2_bzReadClose or BZ2_bzReadGetUnused | ||
| 1854 | if bzerror is BZ_SEQUENCE_END | ||
| 1855 | BZ2_bzReadClose | ||
| 1856 | otherwise | ||
| 1857 | </programlisting> | ||
| 1858 | |||
| 1859 | </sect2> | ||
| 1860 | |||
| 1861 | |||
| 1862 | <sect2 id="bzreadgetunused" xreflabel="BZ2_bzReadGetUnused"> | ||
| 1863 | <title><computeroutput>BZ2_bzReadGetUnused</computeroutput></title> | ||
| 1864 | |||
| 1865 | <programlisting> | ||
| 1866 | void BZ2_bzReadGetUnused( int* bzerror, BZFILE *b, | ||
| 1867 | void** unused, int* nUnused ); | ||
| 1868 | </programlisting> | ||
| 1869 | |||
| 1870 | <para>Returns data which was read from the compressed file but | ||
| 1871 | was not needed to get to the logical end-of-stream. | ||
| 1872 | <computeroutput>*unused</computeroutput> is set to the address of | ||
| 1873 | the data, and <computeroutput>*nUnused</computeroutput> to the | ||
| 1874 | number of bytes. <computeroutput>*nUnused</computeroutput> will | ||
| 1875 | be set to a value between <computeroutput>0</computeroutput> and | ||
| 1876 | <computeroutput>BZ_MAX_UNUSED</computeroutput> inclusive.</para> | ||
| 1877 | |||
| 1878 | <para>This function may only be called once | ||
| 1879 | <computeroutput>BZ2_bzRead</computeroutput> has signalled | ||
| 1880 | <computeroutput>BZ_STREAM_END</computeroutput> but before | ||
| 1881 | <computeroutput>BZ2_bzReadClose</computeroutput>.</para> | ||
| 1882 | |||
| 1883 | <para>Possible assignments to | ||
| 1884 | <computeroutput>bzerror</computeroutput>:</para> | ||
| 1885 | |||
| 1886 | <programlisting> | ||
| 1887 | BZ_PARAM_ERROR | ||
| 1888 | if b is NULL | ||
| 1889 | or unused is NULL or nUnused is NULL | ||
| 1890 | BZ_SEQUENCE_ERROR | ||
| 1891 | if BZ_STREAM_END has not been signalled | ||
| 1892 | or if b was opened with BZ2_bzWriteOpen | ||
| 1893 | BZ_OK | ||
| 1894 | otherwise | ||
| 1895 | </programlisting> | ||
| 1896 | |||
| 1897 | <para>Allowable next actions:</para> | ||
| 1898 | |||
| 1899 | <programlisting> | ||
| 1900 | BZ2_bzReadClose | ||
| 1901 | </programlisting> | ||
| 1902 | |||
| 1903 | </sect2> | ||
| 1904 | |||
| 1905 | |||
| 1906 | <sect2 id="bzreadclose" xreflabel="BZ2_bzReadClose"> | ||
| 1907 | <title><computeroutput>BZ2_bzReadClose</computeroutput></title> | ||
| 1908 | |||
| 1909 | <programlisting> | ||
| 1910 | void BZ2_bzReadClose ( int *bzerror, BZFILE *b ); | ||
| 1911 | </programlisting> | ||
| 1912 | |||
| 1913 | <para>Releases all memory pertaining to the compressed file | ||
| 1914 | <computeroutput>b</computeroutput>. | ||
| 1915 | <computeroutput>BZ2_bzReadClose</computeroutput> does not call | ||
| 1916 | <computeroutput>fclose</computeroutput> on the underlying file | ||
| 1917 | handle, so you should do that yourself if appropriate. | ||
| 1918 | <computeroutput>BZ2_bzReadClose</computeroutput> should be called | ||
| 1919 | to clean up after all error situations.</para> | ||
| 1920 | |||
| 1921 | <para>Possible assignments to | ||
| 1922 | <computeroutput>bzerror</computeroutput>:</para> | ||
| 1923 | |||
| 1924 | <programlisting> | ||
| 1925 | BZ_SEQUENCE_ERROR | ||
| 1926 | if b was opened with BZ2_bzOpenWrite | ||
| 1927 | BZ_OK | ||
| 1928 | otherwise | ||
| 1929 | </programlisting> | ||
| 1930 | |||
| 1931 | <para>Allowable next actions:</para> | ||
| 1932 | |||
| 1933 | <programlisting> | ||
| 1934 | none | ||
| 1935 | </programlisting> | ||
| 1936 | |||
| 1937 | </sect2> | ||
| 1938 | |||
| 1939 | |||
| 1940 | <sect2 id="bzwriteopen" xreflabel="BZ2_bzWriteOpen"> | ||
| 1941 | <title><computeroutput>BZ2_bzWriteOpen</computeroutput></title> | ||
| 1942 | |||
| 1943 | <programlisting> | ||
| 1944 | BZFILE *BZ2_bzWriteOpen( int *bzerror, FILE *f, | ||
| 1945 | int blockSize100k, int verbosity, | ||
| 1946 | int workFactor ); | ||
| 1947 | </programlisting> | ||
| 1948 | |||
| 1949 | <para>Prepare to write compressed data to file handle | ||
| 1950 | <computeroutput>f</computeroutput>. | ||
| 1951 | <computeroutput>f</computeroutput> should refer to a file which | ||
| 1952 | has been opened for writing, and for which the error indicator | ||
| 1953 | (<computeroutput>ferror(f)</computeroutput>)is not set.</para> | ||
| 1954 | |||
| 1955 | <para>For the meaning of parameters | ||
| 1956 | <computeroutput>blockSize100k</computeroutput>, | ||
| 1957 | <computeroutput>verbosity</computeroutput> and | ||
| 1958 | <computeroutput>workFactor</computeroutput>, see | ||
| 1959 | <computeroutput>BZ2_bzCompressInit</computeroutput>.</para> | ||
| 1960 | |||
| 1961 | <para>All required memory is allocated at this stage, so if the | ||
| 1962 | call completes successfully, | ||
| 1963 | <computeroutput>BZ_MEM_ERROR</computeroutput> cannot be signalled | ||
| 1964 | by a subsequent call to | ||
| 1965 | <computeroutput>BZ2_bzWrite</computeroutput>.</para> | ||
| 1966 | |||
| 1967 | <para>Possible assignments to | ||
| 1968 | <computeroutput>bzerror</computeroutput>:</para> | ||
| 1969 | |||
| 1970 | <programlisting> | ||
| 1971 | BZ_CONFIG_ERROR | ||
| 1972 | if the library has been mis-compiled | ||
| 1973 | BZ_PARAM_ERROR | ||
| 1974 | if f is NULL | ||
| 1975 | or blockSize100k < 1 or blockSize100k > 9 | ||
| 1976 | BZ_IO_ERROR | ||
| 1977 | if ferror(f) is nonzero | ||
| 1978 | BZ_MEM_ERROR | ||
| 1979 | if insufficient memory is available | ||
| 1980 | BZ_OK | ||
| 1981 | otherwise | ||
| 1982 | </programlisting> | ||
| 1983 | |||
| 1984 | <para>Possible return values:</para> | ||
| 1985 | |||
| 1986 | <programlisting> | ||
| 1987 | Pointer to an abstract BZFILE | ||
| 1988 | if bzerror is BZ_OK | ||
| 1989 | NULL | ||
| 1990 | otherwise | ||
| 1991 | </programlisting> | ||
| 1992 | |||
| 1993 | <para>Allowable next actions:</para> | ||
| 1994 | |||
| 1995 | <programlisting> | ||
| 1996 | BZ2_bzWrite | ||
| 1997 | if bzerror is BZ_OK | ||
| 1998 | (you could go directly to BZ2_bzWriteClose, but this would be pretty pointless) | ||
| 1999 | BZ2_bzWriteClose | ||
| 2000 | otherwise | ||
| 2001 | </programlisting> | ||
| 2002 | |||
| 2003 | </sect2> | ||
| 2004 | |||
| 2005 | |||
| 2006 | <sect2 id="bzwrite" xreflabel="BZ2_bzWrite"> | ||
| 2007 | <title><computeroutput>BZ2_bzWrite</computeroutput></title> | ||
| 2008 | |||
| 2009 | <programlisting> | ||
| 2010 | void BZ2_bzWrite ( int *bzerror, BZFILE *b, void *buf, int len ); | ||
| 2011 | </programlisting> | ||
| 2012 | |||
| 2013 | <para>Absorbs <computeroutput>len</computeroutput> bytes from the | ||
| 2014 | buffer <computeroutput>buf</computeroutput>, eventually to be | ||
| 2015 | compressed and written to the file.</para> | ||
| 2016 | |||
| 2017 | <para>Possible assignments to | ||
| 2018 | <computeroutput>bzerror</computeroutput>:</para> | ||
| 2019 | |||
| 2020 | <programlisting> | ||
| 2021 | BZ_PARAM_ERROR | ||
| 2022 | if b is NULL or buf is NULL or len < 0 | ||
| 2023 | BZ_SEQUENCE_ERROR | ||
| 2024 | if b was opened with BZ2_bzReadOpen | ||
| 2025 | BZ_IO_ERROR | ||
| 2026 | if there is an error writing the compressed file. | ||
| 2027 | BZ_OK | ||
| 2028 | otherwise | ||
| 2029 | </programlisting> | ||
| 2030 | |||
| 2031 | </sect2> | ||
| 2032 | |||
| 2033 | |||
| 2034 | <sect2 id="bzwriteclose" xreflabel="BZ2_bzWriteClose"> | ||
| 2035 | <title><computeroutput>BZ2_bzWriteClose</computeroutput></title> | ||
| 2036 | |||
| 2037 | <programlisting> | ||
| 2038 | void BZ2_bzWriteClose( int *bzerror, BZFILE* f, | ||
| 2039 | int abandon, | ||
| 2040 | unsigned int* nbytes_in, | ||
| 2041 | unsigned int* nbytes_out ); | ||
| 2042 | |||
| 2043 | void BZ2_bzWriteClose64( int *bzerror, BZFILE* f, | ||
| 2044 | int abandon, | ||
| 2045 | unsigned int* nbytes_in_lo32, | ||
| 2046 | unsigned int* nbytes_in_hi32, | ||
| 2047 | unsigned int* nbytes_out_lo32, | ||
| 2048 | unsigned int* nbytes_out_hi32 ); | ||
| 2049 | </programlisting> | ||
| 2050 | |||
| 2051 | <para>Compresses and flushes to the compressed file all data so | ||
| 2052 | far supplied by <computeroutput>BZ2_bzWrite</computeroutput>. | ||
| 2053 | The logical end-of-stream markers are also written, so subsequent | ||
| 2054 | calls to <computeroutput>BZ2_bzWrite</computeroutput> are | ||
| 2055 | illegal. All memory associated with the compressed file | ||
| 2056 | <computeroutput>b</computeroutput> is released. | ||
| 2057 | <computeroutput>fflush</computeroutput> is called on the | ||
| 2058 | compressed file, but it is not | ||
| 2059 | <computeroutput>fclose</computeroutput>'d.</para> | ||
| 2060 | |||
| 2061 | <para>If <computeroutput>BZ2_bzWriteClose</computeroutput> is | ||
| 2062 | called to clean up after an error, the only action is to release | ||
| 2063 | the memory. The library records the error codes issued by | ||
| 2064 | previous calls, so this situation will be detected automatically. | ||
| 2065 | There is no attempt to complete the compression operation, nor to | ||
| 2066 | <computeroutput>fflush</computeroutput> the compressed file. You | ||
| 2067 | can force this behaviour to happen even in the case of no error, | ||
| 2068 | by passing a nonzero value to | ||
| 2069 | <computeroutput>abandon</computeroutput>.</para> | ||
| 2070 | |||
| 2071 | <para>If <computeroutput>nbytes_in</computeroutput> is non-null, | ||
| 2072 | <computeroutput>*nbytes_in</computeroutput> will be set to be the | ||
| 2073 | total volume of uncompressed data handled. Similarly, | ||
| 2074 | <computeroutput>nbytes_out</computeroutput> will be set to the | ||
| 2075 | total volume of compressed data written. For compatibility with | ||
| 2076 | older versions of the library, | ||
| 2077 | <computeroutput>BZ2_bzWriteClose</computeroutput> only yields the | ||
| 2078 | lower 32 bits of these counts. Use | ||
| 2079 | <computeroutput>BZ2_bzWriteClose64</computeroutput> if you want | ||
| 2080 | the full 64 bit counts. These two functions are otherwise | ||
| 2081 | absolutely identical.</para> | ||
| 2082 | |||
| 2083 | <para>Possible assignments to | ||
| 2084 | <computeroutput>bzerror</computeroutput>:</para> | ||
| 2085 | |||
| 2086 | <programlisting> | ||
| 2087 | BZ_SEQUENCE_ERROR | ||
| 2088 | if b was opened with BZ2_bzReadOpen | ||
| 2089 | BZ_IO_ERROR | ||
| 2090 | if there is an error writing the compressed file | ||
| 2091 | BZ_OK | ||
| 2092 | otherwise | ||
| 2093 | </programlisting> | ||
| 2094 | |||
| 2095 | </sect2> | ||
| 2096 | |||
| 2097 | |||
| 2098 | <sect2 id="embed" xreflabel="Handling embedded compressed data streams"> | ||
| 2099 | <title>Handling embedded compressed data streams</title> | ||
| 2100 | |||
| 2101 | <para>The high-level library facilitates use of | ||
| 2102 | <computeroutput>bzip2</computeroutput> data streams which form | ||
| 2103 | some part of a surrounding, larger data stream.</para> | ||
| 2104 | |||
| 2105 | <itemizedlist mark='bullet'> | ||
| 2106 | |||
| 2107 | <listitem><para>For writing, the library takes an open file handle, | ||
| 2108 | writes compressed data to it, | ||
| 2109 | <computeroutput>fflush</computeroutput>es it but does not | ||
| 2110 | <computeroutput>fclose</computeroutput> it. The calling | ||
| 2111 | application can write its own data before and after the | ||
| 2112 | compressed data stream, using that same file handle.</para></listitem> | ||
| 2113 | |||
| 2114 | <listitem><para>Reading is more complex, and the facilities are not as | ||
| 2115 | general as they could be since generality is hard to reconcile | ||
| 2116 | with efficiency. <computeroutput>BZ2_bzRead</computeroutput> | ||
| 2117 | reads from the compressed file in blocks of size | ||
| 2118 | <computeroutput>BZ_MAX_UNUSED</computeroutput> bytes, and in | ||
| 2119 | doing so probably will overshoot the logical end of compressed | ||
| 2120 | stream. To recover this data once decompression has ended, | ||
| 2121 | call <computeroutput>BZ2_bzReadGetUnused</computeroutput> after | ||
| 2122 | the last call of <computeroutput>BZ2_bzRead</computeroutput> | ||
| 2123 | (the one returning | ||
| 2124 | <computeroutput>BZ_STREAM_END</computeroutput>) but before | ||
| 2125 | calling | ||
| 2126 | <computeroutput>BZ2_bzReadClose</computeroutput>.</para></listitem> | ||
| 2127 | |||
| 2128 | </itemizedlist> | ||
| 2129 | |||
| 2130 | <para>This mechanism makes it easy to decompress multiple | ||
| 2131 | <computeroutput>bzip2</computeroutput> streams placed end-to-end. | ||
| 2132 | As the end of one stream, when | ||
| 2133 | <computeroutput>BZ2_bzRead</computeroutput> returns | ||
| 2134 | <computeroutput>BZ_STREAM_END</computeroutput>, call | ||
| 2135 | <computeroutput>BZ2_bzReadGetUnused</computeroutput> to collect | ||
| 2136 | the unused data (copy it into your own buffer somewhere). That | ||
| 2137 | data forms the start of the next compressed stream. To start | ||
| 2138 | uncompressing that next stream, call | ||
| 2139 | <computeroutput>BZ2_bzReadOpen</computeroutput> again, feeding in | ||
| 2140 | the unused data via the <computeroutput>unused</computeroutput> / | ||
| 2141 | <computeroutput>nUnused</computeroutput> parameters. Keep doing | ||
| 2142 | this until <computeroutput>BZ_STREAM_END</computeroutput> return | ||
| 2143 | coincides with the physical end of file | ||
| 2144 | (<computeroutput>feof(f)</computeroutput>). In this situation | ||
| 2145 | <computeroutput>BZ2_bzReadGetUnused</computeroutput> will of | ||
| 2146 | course return no data.</para> | ||
| 2147 | |||
| 2148 | <para>This should give some feel for how the high-level interface | ||
| 2149 | can be used. If you require extra flexibility, you'll have to | ||
| 2150 | bite the bullet and get to grips with the low-level | ||
| 2151 | interface.</para> | ||
| 2152 | |||
| 2153 | </sect2> | ||
| 2154 | |||
| 2155 | |||
| 2156 | <sect2 id="std-rdwr" xreflabel="Standard file-reading/writing code"> | ||
| 2157 | <title>Standard file-reading/writing code</title> | ||
| 2158 | |||
| 2159 | <para>Here's how you'd write data to a compressed file:</para> | ||
| 2160 | |||
| 2161 | <programlisting> | ||
| 2162 | FILE* f; | ||
| 2163 | BZFILE* b; | ||
| 2164 | int nBuf; | ||
| 2165 | char buf[ /* whatever size you like */ ]; | ||
| 2166 | int bzerror; | ||
| 2167 | int nWritten; | ||
| 2168 | |||
| 2169 | f = fopen ( "myfile.bz2", "w" ); | ||
| 2170 | if ( !f ) { | ||
| 2171 | /* handle error */ | ||
| 2172 | } | ||
| 2173 | b = BZ2_bzWriteOpen( &bzerror, f, 9 ); | ||
| 2174 | if (bzerror != BZ_OK) { | ||
| 2175 | BZ2_bzWriteClose ( b ); | ||
| 2176 | /* handle error */ | ||
| 2177 | } | ||
| 2178 | |||
| 2179 | while ( /* condition */ ) { | ||
| 2180 | /* get data to write into buf, and set nBuf appropriately */ | ||
| 2181 | nWritten = BZ2_bzWrite ( &bzerror, b, buf, nBuf ); | ||
| 2182 | if (bzerror == BZ_IO_ERROR) { | ||
| 2183 | BZ2_bzWriteClose ( &bzerror, b ); | ||
| 2184 | /* handle error */ | ||
| 2185 | } | ||
| 2186 | } | ||
| 2187 | |||
| 2188 | BZ2_bzWriteClose( &bzerror, b ); | ||
| 2189 | if (bzerror == BZ_IO_ERROR) { | ||
| 2190 | /* handle error */ | ||
| 2191 | } | ||
| 2192 | </programlisting> | ||
| 2193 | |||
| 2194 | <para>And to read from a compressed file:</para> | ||
| 2195 | |||
| 2196 | <programlisting> | ||
| 2197 | FILE* f; | ||
| 2198 | BZFILE* b; | ||
| 2199 | int nBuf; | ||
| 2200 | char buf[ /* whatever size you like */ ]; | ||
| 2201 | int bzerror; | ||
| 2202 | int nWritten; | ||
| 2203 | |||
| 2204 | f = fopen ( "myfile.bz2", "r" ); | ||
| 2205 | if ( !f ) { | ||
| 2206 | /* handle error */ | ||
| 2207 | } | ||
| 2208 | b = BZ2_bzReadOpen ( &bzerror, f, 0, NULL, 0 ); | ||
| 2209 | if ( bzerror != BZ_OK ) { | ||
| 2210 | BZ2_bzReadClose ( &bzerror, b ); | ||
| 2211 | /* handle error */ | ||
| 2212 | } | ||
| 2213 | |||
| 2214 | bzerror = BZ_OK; | ||
| 2215 | while ( bzerror == BZ_OK && /* arbitrary other conditions */) { | ||
| 2216 | nBuf = BZ2_bzRead ( &bzerror, b, buf, /* size of buf */ ); | ||
| 2217 | if ( bzerror == BZ_OK ) { | ||
| 2218 | /* do something with buf[0 .. nBuf-1] */ | ||
| 2219 | } | ||
| 2220 | } | ||
| 2221 | if ( bzerror != BZ_STREAM_END ) { | ||
| 2222 | BZ2_bzReadClose ( &bzerror, b ); | ||
| 2223 | /* handle error */ | ||
| 2224 | } else { | ||
| 2225 | BZ2_bzReadClose ( &bzerror ); | ||
| 2226 | } | ||
| 2227 | </programlisting> | ||
| 2228 | |||
| 2229 | </sect2> | ||
| 2230 | |||
| 2231 | </sect1> | ||
| 2232 | |||
| 2233 | |||
| 2234 | <sect1 id="util-fns" xreflabel="Utility functions"> | ||
| 2235 | <title>Utility functions</title> | ||
| 2236 | |||
| 2237 | |||
| 2238 | <sect2 id="bzbufftobuffcompress" xreflabel="BZ2_bzBuffToBuffCompress"> | ||
| 2239 | <title><computeroutput>BZ2_bzBuffToBuffCompress</computeroutput></title> | ||
| 2240 | |||
| 2241 | <programlisting> | ||
| 2242 | int BZ2_bzBuffToBuffCompress( char* dest, | ||
| 2243 | unsigned int* destLen, | ||
| 2244 | char* source, | ||
| 2245 | unsigned int sourceLen, | ||
| 2246 | int blockSize100k, | ||
| 2247 | int verbosity, | ||
| 2248 | int workFactor ); | ||
| 2249 | </programlisting> | ||
| 2250 | |||
| 2251 | <para>Attempts to compress the data in <computeroutput>source[0 | ||
| 2252 | .. sourceLen-1]</computeroutput> into the destination buffer, | ||
| 2253 | <computeroutput>dest[0 .. *destLen-1]</computeroutput>. If the | ||
| 2254 | destination buffer is big enough, | ||
| 2255 | <computeroutput>*destLen</computeroutput> is set to the size of | ||
| 2256 | the compressed data, and <computeroutput>BZ_OK</computeroutput> | ||
| 2257 | is returned. If the compressed data won't fit, | ||
| 2258 | <computeroutput>*destLen</computeroutput> is unchanged, and | ||
| 2259 | <computeroutput>BZ_OUTBUFF_FULL</computeroutput> is | ||
| 2260 | returned.</para> | ||
| 2261 | |||
| 2262 | <para>Compression in this manner is a one-shot event, done with a | ||
| 2263 | single call to this function. The resulting compressed data is a | ||
| 2264 | complete <computeroutput>bzip2</computeroutput> format data | ||
| 2265 | stream. There is no mechanism for making additional calls to | ||
| 2266 | provide extra input data. If you want that kind of mechanism, | ||
| 2267 | use the low-level interface.</para> | ||
| 2268 | |||
| 2269 | <para>For the meaning of parameters | ||
| 2270 | <computeroutput>blockSize100k</computeroutput>, | ||
| 2271 | <computeroutput>verbosity</computeroutput> and | ||
| 2272 | <computeroutput>workFactor</computeroutput>, see | ||
| 2273 | <computeroutput>BZ2_bzCompressInit</computeroutput>.</para> | ||
| 2274 | |||
| 2275 | <para>To guarantee that the compressed data will fit in its | ||
| 2276 | buffer, allocate an output buffer of size 1% larger than the | ||
| 2277 | uncompressed data, plus six hundred extra bytes.</para> | ||
| 2278 | |||
| 2279 | <para><computeroutput>BZ2_bzBuffToBuffDecompress</computeroutput> | ||
| 2280 | will not write data at or beyond | ||
| 2281 | <computeroutput>dest[*destLen]</computeroutput>, even in case of | ||
| 2282 | buffer overflow.</para> | ||
| 2283 | |||
| 2284 | <para>Possible return values:</para> | ||
| 2285 | |||
| 2286 | <programlisting> | ||
| 2287 | BZ_CONFIG_ERROR | ||
| 2288 | if the library has been mis-compiled | ||
| 2289 | BZ_PARAM_ERROR | ||
| 2290 | if dest is NULL or destLen is NULL | ||
| 2291 | or blockSize100k < 1 or blockSize100k > 9 | ||
| 2292 | or verbosity < 0 or verbosity > 4 | ||
| 2293 | or workFactor < 0 or workFactor > 250 | ||
| 2294 | BZ_MEM_ERROR | ||
| 2295 | if insufficient memory is available | ||
| 2296 | BZ_OUTBUFF_FULL | ||
| 2297 | if the size of the compressed data exceeds *destLen | ||
| 2298 | BZ_OK | ||
| 2299 | otherwise | ||
| 2300 | </programlisting> | ||
| 2301 | |||
| 2302 | </sect2> | ||
| 2303 | |||
| 2304 | |||
| 2305 | <sect2 id="bzbufftobuffdecompress" xreflabel="BZ2_bzBuffToBuffDecompress"> | ||
| 2306 | <title><computeroutput>BZ2_bzBuffToBuffDecompress</computeroutput></title> | ||
| 2307 | |||
| 2308 | <programlisting> | ||
| 2309 | int BZ2_bzBuffToBuffDecompress( char* dest, | ||
| 2310 | unsigned int* destLen, | ||
| 2311 | char* source, | ||
| 2312 | unsigned int sourceLen, | ||
| 2313 | int small, | ||
| 2314 | int verbosity ); | ||
| 2315 | </programlisting> | ||
| 2316 | |||
| 2317 | <para>Attempts to decompress the data in <computeroutput>source[0 | ||
| 2318 | .. sourceLen-1]</computeroutput> into the destination buffer, | ||
| 2319 | <computeroutput>dest[0 .. *destLen-1]</computeroutput>. If the | ||
| 2320 | destination buffer is big enough, | ||
| 2321 | <computeroutput>*destLen</computeroutput> is set to the size of | ||
| 2322 | the uncompressed data, and <computeroutput>BZ_OK</computeroutput> | ||
| 2323 | is returned. If the compressed data won't fit, | ||
| 2324 | <computeroutput>*destLen</computeroutput> is unchanged, and | ||
| 2325 | <computeroutput>BZ_OUTBUFF_FULL</computeroutput> is | ||
| 2326 | returned.</para> | ||
| 2327 | |||
| 2328 | <para><computeroutput>source</computeroutput> is assumed to hold | ||
| 2329 | a complete <computeroutput>bzip2</computeroutput> format data | ||
| 2330 | stream. | ||
| 2331 | <computeroutput>BZ2_bzBuffToBuffDecompress</computeroutput> tries | ||
| 2332 | to decompress the entirety of the stream into the output | ||
| 2333 | buffer.</para> | ||
| 2334 | |||
| 2335 | <para>For the meaning of parameters | ||
| 2336 | <computeroutput>small</computeroutput> and | ||
| 2337 | <computeroutput>verbosity</computeroutput>, see | ||
| 2338 | <computeroutput>BZ2_bzDecompressInit</computeroutput>.</para> | ||
| 2339 | |||
| 2340 | <para>Because the compression ratio of the compressed data cannot | ||
| 2341 | be known in advance, there is no easy way to guarantee that the | ||
| 2342 | output buffer will be big enough. You may of course make | ||
| 2343 | arrangements in your code to record the size of the uncompressed | ||
| 2344 | data, but such a mechanism is beyond the scope of this | ||
| 2345 | library.</para> | ||
| 2346 | |||
| 2347 | <para><computeroutput>BZ2_bzBuffToBuffDecompress</computeroutput> | ||
| 2348 | will not write data at or beyond | ||
| 2349 | <computeroutput>dest[*destLen]</computeroutput>, even in case of | ||
| 2350 | buffer overflow.</para> | ||
| 2351 | |||
| 2352 | <para>Possible return values:</para> | ||
| 2353 | |||
| 2354 | <programlisting> | ||
| 2355 | BZ_CONFIG_ERROR | ||
| 2356 | if the library has been mis-compiled | ||
| 2357 | BZ_PARAM_ERROR | ||
| 2358 | if dest is NULL or destLen is NULL | ||
| 2359 | or small != 0 && small != 1 | ||
| 2360 | or verbosity < 0 or verbosity > 4 | ||
| 2361 | BZ_MEM_ERROR | ||
| 2362 | if insufficient memory is available | ||
| 2363 | BZ_OUTBUFF_FULL | ||
| 2364 | if the size of the compressed data exceeds *destLen | ||
| 2365 | BZ_DATA_ERROR | ||
| 2366 | if a data integrity error was detected in the compressed data | ||
| 2367 | BZ_DATA_ERROR_MAGIC | ||
| 2368 | if the compressed data doesn't begin with the right magic bytes | ||
| 2369 | BZ_UNEXPECTED_EOF | ||
| 2370 | if the compressed data ends unexpectedly | ||
| 2371 | BZ_OK | ||
| 2372 | otherwise | ||
| 2373 | </programlisting> | ||
| 2374 | |||
| 2375 | </sect2> | ||
| 2376 | |||
| 2377 | </sect1> | ||
| 2378 | |||
| 2379 | |||
| 2380 | <sect1 id="zlib-compat" xreflabel="zlib compatibility functions"> | ||
| 2381 | <title><computeroutput>zlib</computeroutput> compatibility functions</title> | ||
| 2382 | |||
| 2383 | <para>Yoshioka Tsuneo has contributed some functions to give | ||
| 2384 | better <computeroutput>zlib</computeroutput> compatibility. | ||
| 2385 | These functions are <computeroutput>BZ2_bzopen</computeroutput>, | ||
| 2386 | <computeroutput>BZ2_bzread</computeroutput>, | ||
| 2387 | <computeroutput>BZ2_bzwrite</computeroutput>, | ||
| 2388 | <computeroutput>BZ2_bzflush</computeroutput>, | ||
| 2389 | <computeroutput>BZ2_bzclose</computeroutput>, | ||
| 2390 | <computeroutput>BZ2_bzerror</computeroutput> and | ||
| 2391 | <computeroutput>BZ2_bzlibVersion</computeroutput>. These | ||
| 2392 | functions are not (yet) officially part of the library. If they | ||
| 2393 | break, you get to keep all the pieces. Nevertheless, I think | ||
| 2394 | they work ok.</para> | ||
| 2395 | |||
| 2396 | <programlisting> | ||
| 2397 | typedef void BZFILE; | ||
| 2398 | |||
| 2399 | const char * BZ2_bzlibVersion ( void ); | ||
| 2400 | </programlisting> | ||
| 2401 | |||
| 2402 | <para>Returns a string indicating the library version.</para> | ||
| 2403 | |||
| 2404 | <programlisting> | ||
| 2405 | BZFILE * BZ2_bzopen ( const char *path, const char *mode ); | ||
| 2406 | BZFILE * BZ2_bzdopen ( int fd, const char *mode ); | ||
| 2407 | </programlisting> | ||
| 2408 | |||
| 2409 | <para>Opens a <computeroutput>.bz2</computeroutput> file for | ||
| 2410 | reading or writing, using either its name or a pre-existing file | ||
| 2411 | descriptor. Analogous to <computeroutput>fopen</computeroutput> | ||
| 2412 | and <computeroutput>fdopen</computeroutput>.</para> | ||
| 2413 | |||
| 2414 | <programlisting> | ||
| 2415 | int BZ2_bzread ( BZFILE* b, void* buf, int len ); | ||
| 2416 | int BZ2_bzwrite ( BZFILE* b, void* buf, int len ); | ||
| 2417 | </programlisting> | ||
| 2418 | |||
| 2419 | <para>Reads/writes data from/to a previously opened | ||
| 2420 | <computeroutput>BZFILE</computeroutput>. Analogous to | ||
| 2421 | <computeroutput>fread</computeroutput> and | ||
| 2422 | <computeroutput>fwrite</computeroutput>.</para> | ||
| 2423 | |||
| 2424 | <programlisting> | ||
| 2425 | int BZ2_bzflush ( BZFILE* b ); | ||
| 2426 | void BZ2_bzclose ( BZFILE* b ); | ||
| 2427 | </programlisting> | ||
| 2428 | |||
| 2429 | <para>Flushes/closes a <computeroutput>BZFILE</computeroutput>. | ||
| 2430 | <computeroutput>BZ2_bzflush</computeroutput> doesn't actually do | ||
| 2431 | anything. Analogous to <computeroutput>fflush</computeroutput> | ||
| 2432 | and <computeroutput>fclose</computeroutput>.</para> | ||
| 2433 | |||
| 2434 | <programlisting> | ||
| 2435 | const char * BZ2_bzerror ( BZFILE *b, int *errnum ) | ||
| 2436 | </programlisting> | ||
| 2437 | |||
| 2438 | <para>Returns a string describing the more recent error status of | ||
| 2439 | <computeroutput>b</computeroutput>, and also sets | ||
| 2440 | <computeroutput>*errnum</computeroutput> to its numerical | ||
| 2441 | value.</para> | ||
| 2442 | |||
| 2443 | </sect1> | ||
| 2444 | |||
| 2445 | |||
| 2446 | <sect1 id="stdio-free" | ||
| 2447 | xreflabel="Using the library in a stdio-free environment"> | ||
| 2448 | <title>Using the library in a <computeroutput>stdio</computeroutput>-free environment</title> | ||
| 2449 | |||
| 2450 | |||
| 2451 | <sect2 id="stdio-bye" xreflabel="Getting rid of stdio"> | ||
| 2452 | <title>Getting rid of <computeroutput>stdio</computeroutput></title> | ||
| 2453 | |||
| 2454 | <para>In a deeply embedded application, you might want to use | ||
| 2455 | just the memory-to-memory functions. You can do this | ||
| 2456 | conveniently by compiling the library with preprocessor symbol | ||
| 2457 | <computeroutput>BZ_NO_STDIO</computeroutput> defined. Doing this | ||
| 2458 | gives you a library containing only the following eight | ||
| 2459 | functions:</para> | ||
| 2460 | |||
| 2461 | <para><computeroutput>BZ2_bzCompressInit</computeroutput>, | ||
| 2462 | <computeroutput>BZ2_bzCompress</computeroutput>, | ||
| 2463 | <computeroutput>BZ2_bzCompressEnd</computeroutput> | ||
| 2464 | <computeroutput>BZ2_bzDecompressInit</computeroutput>, | ||
| 2465 | <computeroutput>BZ2_bzDecompress</computeroutput>, | ||
| 2466 | <computeroutput>BZ2_bzDecompressEnd</computeroutput> | ||
| 2467 | <computeroutput>BZ2_bzBuffToBuffCompress</computeroutput>, | ||
| 2468 | <computeroutput>BZ2_bzBuffToBuffDecompress</computeroutput></para> | ||
| 2469 | |||
| 2470 | <para>When compiled like this, all functions will ignore | ||
| 2471 | <computeroutput>verbosity</computeroutput> settings.</para> | ||
| 2472 | |||
| 2473 | </sect2> | ||
| 2474 | |||
| 2475 | |||
| 2476 | <sect2 id="critical-error" xreflabel="Critical error handling"> | ||
| 2477 | <title>Critical error handling</title> | ||
| 2478 | |||
| 2479 | <para><computeroutput>libbzip2</computeroutput> contains a number | ||
| 2480 | of internal assertion checks which should, needless to say, never | ||
| 2481 | be activated. Nevertheless, if an assertion should fail, | ||
| 2482 | behaviour depends on whether or not the library was compiled with | ||
| 2483 | <computeroutput>BZ_NO_STDIO</computeroutput> set.</para> | ||
| 2484 | |||
| 2485 | <para>For a normal compile, an assertion failure yields the | ||
| 2486 | message:</para> | ||
| 2487 | |||
| 2488 | <blockquote> | ||
| 2489 | <para>bzip2/libbzip2: internal error number N.</para> | ||
| 2490 | <para>This is a bug in bzip2/libbzip2, &bz-version; of &bz-date;. | ||
| 2491 | Please report it to me at: &bz-email;. If this happened | ||
| 2492 | when you were using some program which uses libbzip2 as a | ||
| 2493 | component, you should also report this bug to the author(s) | ||
| 2494 | of that program. Please make an effort to report this bug; | ||
| 2495 | timely and accurate bug reports eventually lead to higher | ||
| 2496 | quality software. Thanks. Julian Seward, &bz-date;. | ||
| 2497 | </para></blockquote> | ||
| 2498 | |||
| 2499 | <para>where <computeroutput>N</computeroutput> is some error code | ||
| 2500 | number. If <computeroutput>N == 1007</computeroutput>, it also | ||
| 2501 | prints some extra text advising the reader that unreliable memory | ||
| 2502 | is often associated with internal error 1007. (This is a | ||
| 2503 | frequently-observed-phenomenon with versions 1.0.0/1.0.1).</para> | ||
| 2504 | |||
| 2505 | <para><computeroutput>exit(3)</computeroutput> is then | ||
| 2506 | called.</para> | ||
| 2507 | |||
| 2508 | <para>For a <computeroutput>stdio</computeroutput>-free library, | ||
| 2509 | assertion failures result in a call to a function declared | ||
| 2510 | as:</para> | ||
| 2511 | |||
| 2512 | <programlisting> | ||
| 2513 | extern void bz_internal_error ( int errcode ); | ||
| 2514 | </programlisting> | ||
| 2515 | |||
| 2516 | <para>The relevant code is passed as a parameter. You should | ||
| 2517 | supply such a function.</para> | ||
| 2518 | |||
| 2519 | <para>In either case, once an assertion failure has occurred, any | ||
| 2520 | <computeroutput>bz_stream</computeroutput> records involved can | ||
| 2521 | be regarded as invalid. You should not attempt to resume normal | ||
| 2522 | operation with them.</para> | ||
| 2523 | |||
| 2524 | <para>You may, of course, change critical error handling to suit | ||
| 2525 | your needs. As I said above, critical errors indicate bugs in | ||
| 2526 | the library and should not occur. All "normal" error situations | ||
| 2527 | are indicated via error return codes from functions, and can be | ||
| 2528 | recovered from.</para> | ||
| 2529 | |||
| 2530 | </sect2> | ||
| 2531 | |||
| 2532 | </sect1> | ||
| 2533 | |||
| 2534 | |||
| 2535 | <sect1 id="win-dll" xreflabel="Making a Windows DLL"> | ||
| 2536 | <title>Making a Windows DLL</title> | ||
| 2537 | |||
| 2538 | <para>Everything related to Windows has been contributed by | ||
| 2539 | Yoshioka Tsuneo | ||
| 2540 | (<computeroutput>QWF00133@niftyserve.or.jp</computeroutput> / | ||
| 2541 | <computeroutput>tsuneo-y@is.aist-nara.ac.jp</computeroutput>), so | ||
| 2542 | you should send your queries to him (but perhaps Cc: me, | ||
| 2543 | <computeroutput>&bz-email;</computeroutput>).</para> | ||
| 2544 | |||
| 2545 | <para>My vague understanding of what to do is: using Visual C++ | ||
| 2546 | 5.0, open the project file | ||
| 2547 | <computeroutput>libbz2.dsp</computeroutput>, and build. That's | ||
| 2548 | all.</para> | ||
| 2549 | |||
| 2550 | <para>If you can't open the project file for some reason, make a | ||
| 2551 | new one, naming these files: | ||
| 2552 | <computeroutput>blocksort.c</computeroutput>, | ||
| 2553 | <computeroutput>bzlib.c</computeroutput>, | ||
| 2554 | <computeroutput>compress.c</computeroutput>, | ||
| 2555 | <computeroutput>crctable.c</computeroutput>, | ||
| 2556 | <computeroutput>decompress.c</computeroutput>, | ||
| 2557 | <computeroutput>huffman.c</computeroutput>, | ||
| 2558 | <computeroutput>randtable.c</computeroutput> and | ||
| 2559 | <computeroutput>libbz2.def</computeroutput>. You will also need | ||
| 2560 | to name the header files <computeroutput>bzlib.h</computeroutput> | ||
| 2561 | and <computeroutput>bzlib_private.h</computeroutput>.</para> | ||
| 2562 | |||
| 2563 | <para>If you don't use VC++, you may need to define the | ||
| 2564 | proprocessor symbol | ||
| 2565 | <computeroutput>_WIN32</computeroutput>.</para> | ||
| 2566 | |||
| 2567 | <para>Finally, <computeroutput>dlltest.c</computeroutput> is a | ||
| 2568 | sample program using the DLL. It has a project file, | ||
| 2569 | <computeroutput>dlltest.dsp</computeroutput>.</para> | ||
| 2570 | |||
| 2571 | <para>If you just want a makefile for Visual C, have a look at | ||
| 2572 | <computeroutput>makefile.msc</computeroutput>.</para> | ||
| 2573 | |||
| 2574 | <para>Be aware that if you compile | ||
| 2575 | <computeroutput>bzip2</computeroutput> itself on Win32, you must | ||
| 2576 | set <computeroutput>BZ_UNIX</computeroutput> to 0 and | ||
| 2577 | <computeroutput>BZ_LCCWIN32</computeroutput> to 1, in the file | ||
| 2578 | <computeroutput>bzip2.c</computeroutput>, before compiling. | ||
| 2579 | Otherwise the resulting binary won't work correctly.</para> | ||
| 2580 | |||
| 2581 | <para>I haven't tried any of this stuff myself, but it all looks | ||
| 2582 | plausible.</para> | ||
| 2583 | |||
| 2584 | </sect1> | ||
| 2585 | |||
| 2586 | </chapter> | ||
| 2587 | |||
| 2588 | |||
| 2589 | |||
| 2590 | <chapter id="misc" xreflabel="Miscellanea"> | ||
| 2591 | <title>Miscellanea</title> | ||
| 2592 | |||
| 2593 | <para>These are just some random thoughts of mine. Your mileage | ||
| 2594 | may vary.</para> | ||
| 2595 | |||
| 2596 | |||
| 2597 | <sect1 id="limits" xreflabel="Limitations of the compressed file format"> | ||
| 2598 | <title>Limitations of the compressed file format</title> | ||
| 2599 | |||
| 2600 | <para><computeroutput>bzip2-1.0.X</computeroutput>, | ||
| 2601 | <computeroutput>0.9.5</computeroutput> and | ||
| 2602 | <computeroutput>0.9.0</computeroutput> use exactly the same file | ||
| 2603 | format as the original version, | ||
| 2604 | <computeroutput>bzip2-0.1</computeroutput>. This decision was | ||
| 2605 | made in the interests of stability. Creating yet another | ||
| 2606 | incompatible compressed file format would create further | ||
| 2607 | confusion and disruption for users.</para> | ||
| 2608 | |||
| 2609 | <para>Nevertheless, this is not a painless decision. Development | ||
| 2610 | work since the release of | ||
| 2611 | <computeroutput>bzip2-0.1</computeroutput> in August 1997 has | ||
| 2612 | shown complexities in the file format which slow down | ||
| 2613 | decompression and, in retrospect, are unnecessary. These | ||
| 2614 | are:</para> | ||
| 2615 | |||
| 2616 | <itemizedlist mark='bullet'> | ||
| 2617 | |||
| 2618 | <listitem><para>The run-length encoder, which is the first of the | ||
| 2619 | compression transformations, is entirely irrelevant. The | ||
| 2620 | original purpose was to protect the sorting algorithm from the | ||
| 2621 | very worst case input: a string of repeated symbols. But | ||
| 2622 | algorithm steps Q6a and Q6b in the original Burrows-Wheeler | ||
| 2623 | technical report (SRC-124) show how repeats can be handled | ||
| 2624 | without difficulty in block sorting.</para></listitem> | ||
| 2625 | |||
| 2626 | <listitem><para>The randomisation mechanism doesn't really need to be | ||
| 2627 | there. Udi Manber and Gene Myers published a suffix array | ||
| 2628 | construction algorithm a few years back, which can be employed | ||
| 2629 | to sort any block, no matter how repetitive, in O(N log N) | ||
| 2630 | time. Subsequent work by Kunihiko Sadakane has produced a | ||
| 2631 | derivative O(N (log N)^2) algorithm which usually outperforms | ||
| 2632 | the Manber-Myers algorithm.</para> | ||
| 2633 | |||
| 2634 | <para>I could have changed to Sadakane's algorithm, but I find | ||
| 2635 | it to be slower than <computeroutput>bzip2</computeroutput>'s | ||
| 2636 | existing algorithm for most inputs, and the randomisation | ||
| 2637 | mechanism protects adequately against bad cases. I didn't | ||
| 2638 | think it was a good tradeoff to make. Partly this is due to | ||
| 2639 | the fact that I was not flooded with email complaints about | ||
| 2640 | <computeroutput>bzip2-0.1</computeroutput>'s performance on | ||
| 2641 | repetitive data, so perhaps it isn't a problem for real | ||
| 2642 | inputs.</para> | ||
| 2643 | |||
| 2644 | <para>Probably the best long-term solution, and the one I have | ||
| 2645 | incorporated into 0.9.5 and above, is to use the existing | ||
| 2646 | sorting algorithm initially, and fall back to a O(N (log N)^2) | ||
| 2647 | algorithm if the standard algorithm gets into | ||
| 2648 | difficulties.</para></listitem> | ||
| 2649 | |||
| 2650 | <listitem><para>The compressed file format was never designed to be | ||
| 2651 | handled by a library, and I have had to jump though some hoops | ||
| 2652 | to produce an efficient implementation of decompression. It's | ||
| 2653 | a bit hairy. Try passing | ||
| 2654 | <computeroutput>decompress.c</computeroutput> through the C | ||
| 2655 | preprocessor and you'll see what I mean. Much of this | ||
| 2656 | complexity could have been avoided if the compressed size of | ||
| 2657 | each block of data was recorded in the data stream.</para></listitem> | ||
| 2658 | |||
| 2659 | <listitem><para>An Adler-32 checksum, rather than a CRC32 checksum, | ||
| 2660 | would be faster to compute.</para></listitem> | ||
| 2661 | |||
| 2662 | </itemizedlist> | ||
| 2663 | |||
| 2664 | <para>It would be fair to say that the | ||
| 2665 | <computeroutput>bzip2</computeroutput> format was frozen before I | ||
| 2666 | properly and fully understood the performance consequences of | ||
| 2667 | doing so.</para> | ||
| 2668 | |||
| 2669 | <para>Improvements which I was able to incorporate into 0.9.0, | ||
| 2670 | despite using the same file format, are:</para> | ||
| 2671 | |||
| 2672 | <itemizedlist mark='bullet'> | ||
| 2673 | |||
| 2674 | <listitem><para>Single array implementation of the inverse BWT. This | ||
| 2675 | significantly speeds up decompression, presumably because it | ||
| 2676 | reduces the number of cache misses.</para></listitem> | ||
| 2677 | |||
| 2678 | <listitem><para>Faster inverse MTF transform for large MTF values. | ||
| 2679 | The new implementation is based on the notion of sliding blocks | ||
| 2680 | of values.</para></listitem> | ||
| 2681 | |||
| 2682 | <listitem><para><computeroutput>bzip2-0.9.0</computeroutput> now reads | ||
| 2683 | and writes files with <computeroutput>fread</computeroutput> | ||
| 2684 | and <computeroutput>fwrite</computeroutput>; version 0.1 used | ||
| 2685 | <computeroutput>putc</computeroutput> and | ||
| 2686 | <computeroutput>getc</computeroutput>. Duh! Well, you live | ||
| 2687 | and learn.</para></listitem> | ||
| 2688 | |||
| 2689 | </itemizedlist> | ||
| 2690 | |||
| 2691 | <para>Further ahead, it would be nice to be able to do random | ||
| 2692 | access into files. This will require some careful design of | ||
| 2693 | compressed file formats.</para> | ||
| 2694 | |||
| 2695 | </sect1> | ||
| 2696 | |||
| 2697 | |||
| 2698 | <sect1 id="port-issues" xreflabel="Portability issues"> | ||
| 2699 | <title>Portability issues</title> | ||
| 2700 | |||
| 2701 | <para>After some consideration, I have decided not to use GNU | ||
| 2702 | <computeroutput>autoconf</computeroutput> to configure 0.9.5 or | ||
| 2703 | 1.0.</para> | ||
| 2704 | |||
| 2705 | <para><computeroutput>autoconf</computeroutput>, admirable and | ||
| 2706 | wonderful though it is, mainly assists with portability problems | ||
| 2707 | between Unix-like platforms. But | ||
| 2708 | <computeroutput>bzip2</computeroutput> doesn't have much in the | ||
| 2709 | way of portability problems on Unix; most of the difficulties | ||
| 2710 | appear when porting to the Mac, or to Microsoft's operating | ||
| 2711 | systems. <computeroutput>autoconf</computeroutput> doesn't help | ||
| 2712 | in those cases, and brings in a whole load of new | ||
| 2713 | complexity.</para> | ||
| 2714 | |||
| 2715 | <para>Most people should be able to compile the library and | ||
| 2716 | program under Unix straight out-of-the-box, so to speak, | ||
| 2717 | especially if you have a version of GNU C available.</para> | ||
| 2718 | |||
| 2719 | <para>There are a couple of | ||
| 2720 | <computeroutput>__inline__</computeroutput> directives in the | ||
| 2721 | code. GNU C (<computeroutput>gcc</computeroutput>) should be | ||
| 2722 | able to handle them. If you're not using GNU C, your C compiler | ||
| 2723 | shouldn't see them at all. If your compiler does, for some | ||
| 2724 | reason, see them and doesn't like them, just | ||
| 2725 | <computeroutput>#define</computeroutput> | ||
| 2726 | <computeroutput>__inline__</computeroutput> to be | ||
| 2727 | <computeroutput>/* */</computeroutput>. One easy way to do this | ||
| 2728 | is to compile with the flag | ||
| 2729 | <computeroutput>-D__inline__=</computeroutput>, which should be | ||
| 2730 | understood by most Unix compilers.</para> | ||
| 2731 | |||
| 2732 | <para>If you still have difficulties, try compiling with the | ||
| 2733 | macro <computeroutput>BZ_STRICT_ANSI</computeroutput> defined. | ||
| 2734 | This should enable you to build the library in a strictly ANSI | ||
| 2735 | compliant environment. Building the program itself like this is | ||
| 2736 | dangerous and not supported, since you remove | ||
| 2737 | <computeroutput>bzip2</computeroutput>'s checks against | ||
| 2738 | compressing directories, symbolic links, devices, and other | ||
| 2739 | not-really-a-file entities. This could cause filesystem | ||
| 2740 | corruption!</para> | ||
| 2741 | |||
| 2742 | <para>One other thing: if you create a | ||
| 2743 | <computeroutput>bzip2</computeroutput> binary for public distribution, | ||
| 2744 | please consider linking it statically (<computeroutput>gcc | ||
| 2745 | -static</computeroutput>). This avoids all sorts of library-version | ||
| 2746 | issues that others may encounter later on.</para> | ||
| 2747 | |||
| 2748 | <para>If you build <computeroutput>bzip2</computeroutput> on | ||
| 2749 | Win32, you must set <computeroutput>BZ_UNIX</computeroutput> to 0 | ||
| 2750 | and <computeroutput>BZ_LCCWIN32</computeroutput> to 1, in the | ||
| 2751 | file <computeroutput>bzip2.c</computeroutput>, before compiling. | ||
| 2752 | Otherwise the resulting binary won't work correctly.</para> | ||
| 2753 | |||
| 2754 | </sect1> | ||
| 2755 | |||
| 2756 | |||
| 2757 | <sect1 id="bugs" xreflabel="Reporting bugs"> | ||
| 2758 | <title>Reporting bugs</title> | ||
| 2759 | |||
| 2760 | <para>I tried pretty hard to make sure | ||
| 2761 | <computeroutput>bzip2</computeroutput> is bug free, both by | ||
| 2762 | design and by testing. Hopefully you'll never need to read this | ||
| 2763 | section for real.</para> | ||
| 2764 | |||
| 2765 | <para>Nevertheless, if <computeroutput>bzip2</computeroutput> dies | ||
| 2766 | with a segmentation fault, a bus error or an internal assertion | ||
| 2767 | failure, it will ask you to email me a bug report. Experience from | ||
| 2768 | years of feedback of bzip2 users indicates that almost all these | ||
| 2769 | problems can be traced to either compiler bugs or hardware | ||
| 2770 | problems.</para> | ||
| 2771 | |||
| 2772 | <itemizedlist mark='bullet'> | ||
| 2773 | |||
| 2774 | <listitem><para>Recompile the program with no optimisation, and | ||
| 2775 | see if it works. And/or try a different compiler. I heard all | ||
| 2776 | sorts of stories about various flavours of GNU C (and other | ||
| 2777 | compilers) generating bad code for | ||
| 2778 | <computeroutput>bzip2</computeroutput>, and I've run across two | ||
| 2779 | such examples myself.</para> | ||
| 2780 | |||
| 2781 | <para>2.7.X versions of GNU C are known to generate bad code | ||
| 2782 | from time to time, at high optimisation levels. If you get | ||
| 2783 | problems, try using the flags | ||
| 2784 | <computeroutput>-O2</computeroutput> | ||
| 2785 | <computeroutput>-fomit-frame-pointer</computeroutput> | ||
| 2786 | <computeroutput>-fno-strength-reduce</computeroutput>. You | ||
| 2787 | should specifically <emphasis>not</emphasis> use | ||
| 2788 | <computeroutput>-funroll-loops</computeroutput>.</para> | ||
| 2789 | |||
| 2790 | <para>You may notice that the Makefile runs six tests as part | ||
| 2791 | of the build process. If the program passes all of these, it's | ||
| 2792 | a pretty good (but not 100%) indication that the compiler has | ||
| 2793 | done its job correctly.</para></listitem> | ||
| 2794 | |||
| 2795 | <listitem><para>If <computeroutput>bzip2</computeroutput> | ||
| 2796 | crashes randomly, and the crashes are not repeatable, you may | ||
| 2797 | have a flaky memory subsystem. | ||
| 2798 | <computeroutput>bzip2</computeroutput> really hammers your | ||
| 2799 | memory hierarchy, and if it's a bit marginal, you may get these | ||
| 2800 | problems. Ditto if your disk or I/O subsystem is slowly | ||
| 2801 | failing. Yup, this really does happen.</para> | ||
| 2802 | |||
| 2803 | <para>Try using a different machine of the same type, and see | ||
| 2804 | if you can repeat the problem.</para></listitem> | ||
| 2805 | |||
| 2806 | <listitem><para>This isn't really a bug, but ... If | ||
| 2807 | <computeroutput>bzip2</computeroutput> tells you your file is | ||
| 2808 | corrupted on decompression, and you obtained the file via FTP, | ||
| 2809 | there is a possibility that you forgot to tell FTP to do a | ||
| 2810 | binary mode transfer. That absolutely will cause the file to | ||
| 2811 | be non-decompressible. You'll have to transfer it | ||
| 2812 | again.</para></listitem> | ||
| 2813 | |||
| 2814 | </itemizedlist> | ||
| 2815 | |||
| 2816 | <para>If you've incorporated | ||
| 2817 | <computeroutput>libbzip2</computeroutput> into your own program | ||
| 2818 | and are getting problems, please, please, please, check that the | ||
| 2819 | parameters you are passing in calls to the library, are correct, | ||
| 2820 | and in accordance with what the documentation says is allowable. | ||
| 2821 | I have tried to make the library robust against such problems, | ||
| 2822 | but I'm sure I haven't succeeded.</para> | ||
| 2823 | |||
| 2824 | <para>Finally, if the above comments don't help, you'll have to | ||
| 2825 | send me a bug report. Now, it's just amazing how many people | ||
| 2826 | will send me a bug report saying something like:</para> | ||
| 2827 | |||
| 2828 | <programlisting> | ||
| 2829 | bzip2 crashed with segmentation fault on my machine | ||
| 2830 | </programlisting> | ||
| 2831 | |||
| 2832 | <para>and absolutely nothing else. Needless to say, a such a | ||
| 2833 | report is <emphasis>totally, utterly, completely and | ||
| 2834 | comprehensively 100% useless; a waste of your time, my time, and | ||
| 2835 | net bandwidth</emphasis>. With no details at all, there's no way | ||
| 2836 | I can possibly begin to figure out what the problem is.</para> | ||
| 2837 | |||
| 2838 | <para>The rules of the game are: facts, facts, facts. Don't omit | ||
| 2839 | them because "oh, they won't be relevant". At the bare | ||
| 2840 | minimum:</para> | ||
| 2841 | |||
| 2842 | <programlisting> | ||
| 2843 | Machine type. Operating system version. | ||
| 2844 | Exact version of bzip2 (do bzip2 -V). | ||
| 2845 | Exact version of the compiler used. | ||
| 2846 | Flags passed to the compiler. | ||
| 2847 | </programlisting> | ||
| 2848 | |||
| 2849 | <para>However, the most important single thing that will help me | ||
| 2850 | is the file that you were trying to compress or decompress at the | ||
| 2851 | time the problem happened. Without that, my ability to do | ||
| 2852 | anything more than speculate about the cause, is limited.</para> | ||
| 2853 | |||
| 2854 | </sect1> | ||
| 2855 | |||
| 2856 | |||
| 2857 | <sect1 id="package" xreflabel="Did you get the right package?"> | ||
| 2858 | <title>Did you get the right package?</title> | ||
| 2859 | |||
| 2860 | <para><computeroutput>bzip2</computeroutput> is a resource hog. | ||
| 2861 | It soaks up large amounts of CPU cycles and memory. Also, it | ||
| 2862 | gives very large latencies. In the worst case, you can feed many | ||
| 2863 | megabytes of uncompressed data into the library before getting | ||
| 2864 | any compressed output, so this probably rules out applications | ||
| 2865 | requiring interactive behaviour.</para> | ||
| 2866 | |||
| 2867 | <para>These aren't faults of my implementation, I hope, but more | ||
| 2868 | an intrinsic property of the Burrows-Wheeler transform | ||
| 2869 | (unfortunately). Maybe this isn't what you want.</para> | ||
| 2870 | |||
| 2871 | <para>If you want a compressor and/or library which is faster, | ||
| 2872 | uses less memory but gets pretty good compression, and has | ||
| 2873 | minimal latency, consider Jean-loup Gailly's and Mark Adler's | ||
| 2874 | work, <computeroutput>zlib-1.2.1</computeroutput> and | ||
| 2875 | <computeroutput>gzip-1.2.4</computeroutput>. Look for them at | ||
| 2876 | <ulink url="http://www.zlib.org">http://www.zlib.org</ulink> and | ||
| 2877 | <ulink url="http://www.gzip.org">http://www.gzip.org</ulink> | ||
| 2878 | respectively.</para> | ||
| 2879 | |||
| 2880 | <para>For something faster and lighter still, you might try Markus F | ||
| 2881 | X J Oberhumer's <computeroutput>LZO</computeroutput> real-time | ||
| 2882 | compression/decompression library, at | ||
| 2883 | <ulink url="http://www.oberhumer.com/opensource">http://www.oberhumer.com/opensource</ulink>.</para> | ||
| 2884 | |||
| 2885 | </sect1> | ||
| 2886 | |||
| 2887 | |||
| 2888 | |||
| 2889 | <sect1 id="reading" xreflabel="Further Reading"> | ||
| 2890 | <title>Further Reading</title> | ||
| 2891 | |||
| 2892 | <para><computeroutput>bzip2</computeroutput> is not research | ||
| 2893 | work, in the sense that it doesn't present any new ideas. | ||
| 2894 | Rather, it's an engineering exercise based on existing | ||
| 2895 | ideas.</para> | ||
| 2896 | |||
| 2897 | <para>Four documents describe essentially all the ideas behind | ||
| 2898 | <computeroutput>bzip2</computeroutput>:</para> | ||
| 2899 | |||
| 2900 | <literallayout>Michael Burrows and D. J. Wheeler: | ||
| 2901 | "A block-sorting lossless data compression algorithm" | ||
| 2902 | 10th May 1994. | ||
| 2903 | Digital SRC Research Report 124. | ||
| 2904 | ftp://ftp.digital.com/pub/DEC/SRC/research-reports/SRC-124.ps.gz | ||
| 2905 | If you have trouble finding it, try searching at the | ||
| 2906 | New Zealand Digital Library, http://www.nzdl.org. | ||
| 2907 | |||
| 2908 | Daniel S. Hirschberg and Debra A. LeLewer | ||
| 2909 | "Efficient Decoding of Prefix Codes" | ||
| 2910 | Communications of the ACM, April 1990, Vol 33, Number 4. | ||
| 2911 | You might be able to get an electronic copy of this | ||
| 2912 | from the ACM Digital Library. | ||
| 2913 | |||
| 2914 | David J. Wheeler | ||
| 2915 | Program bred3.c and accompanying document bred3.ps. | ||
| 2916 | This contains the idea behind the multi-table Huffman coding scheme. | ||
| 2917 | ftp://ftp.cl.cam.ac.uk/users/djw3/ | ||
| 2918 | |||
| 2919 | Jon L. Bentley and Robert Sedgewick | ||
| 2920 | "Fast Algorithms for Sorting and Searching Strings" | ||
| 2921 | Available from Sedgewick's web page, | ||
| 2922 | www.cs.princeton.edu/~rs | ||
| 2923 | </literallayout> | ||
| 2924 | |||
| 2925 | <para>The following paper gives valuable additional insights into | ||
| 2926 | the algorithm, but is not immediately the basis of any code used | ||
| 2927 | in bzip2.</para> | ||
| 2928 | |||
| 2929 | <literallayout>Peter Fenwick: | ||
| 2930 | Block Sorting Text Compression | ||
| 2931 | Proceedings of the 19th Australasian Computer Science Conference, | ||
| 2932 | Melbourne, Australia. Jan 31 - Feb 2, 1996. | ||
| 2933 | ftp://ftp.cs.auckland.ac.nz/pub/peter-f/ACSC96paper.ps</literallayout> | ||
| 2934 | |||
| 2935 | <para>Kunihiko Sadakane's sorting algorithm, mentioned above, is | ||
| 2936 | available from:</para> | ||
| 2937 | |||
| 2938 | <literallayout>http://naomi.is.s.u-tokyo.ac.jp/~sada/papers/Sada98b.ps.gz | ||
| 2939 | </literallayout> | ||
| 2940 | |||
| 2941 | <para>The Manber-Myers suffix array construction algorithm is | ||
| 2942 | described in a paper available from:</para> | ||
| 2943 | |||
| 2944 | <literallayout>http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps | ||
| 2945 | </literallayout> | ||
| 2946 | |||
| 2947 | <para>Finally, the following papers document some | ||
| 2948 | investigations I made into the performance of sorting | ||
| 2949 | and decompression algorithms:</para> | ||
| 2950 | |||
| 2951 | <literallayout>Julian Seward | ||
| 2952 | On the Performance of BWT Sorting Algorithms | ||
| 2953 | Proceedings of the IEEE Data Compression Conference 2000 | ||
| 2954 | Snowbird, Utah. 28-30 March 2000. | ||
| 2955 | |||
| 2956 | Julian Seward | ||
| 2957 | Space-time Tradeoffs in the Inverse B-W Transform | ||
| 2958 | Proceedings of the IEEE Data Compression Conference 2001 | ||
| 2959 | Snowbird, Utah. 27-29 March 2001. | ||
| 2960 | </literallayout> | ||
| 2961 | |||
| 2962 | </sect1> | ||
| 2963 | |||
| 2964 | </chapter> | ||
| 2965 | |||
| 2966 | </book> | ||
diff --git a/randtable.c b/randtable.c index 5c922e9..940462d 100644 --- a/randtable.c +++ b/randtable.c | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
| 9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
| 10 | 10 | ||
| 11 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2005 Julian R Seward. All rights reserved. |
| 12 | 12 | ||
| 13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
| 14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
| @@ -42,7 +42,7 @@ | |||
| 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 43 | 43 | ||
| 44 | Julian Seward, Cambridge, UK. | 44 | Julian Seward, Cambridge, UK. |
| 45 | jseward@acm.org | 45 | jseward@bzip.org |
| 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 | 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 |
| 47 | 47 | ||
| 48 | This program is based on (at least) the work of: | 48 | This program is based on (at least) the work of: |
diff --git a/xmlproc.sh b/xmlproc.sh new file mode 100755 index 0000000..6fe4d57 --- /dev/null +++ b/xmlproc.sh | |||
| @@ -0,0 +1,99 @@ | |||
| 1 | #!/bin/bash | ||
| 2 | # see the README in this directory for usage etc. | ||
| 3 | |||
| 4 | usage() { | ||
| 5 | echo ''; | ||
| 6 | echo 'Usage: xmlproc.sh -[option] <filename.xml>'; | ||
| 7 | echo 'Specify a target from:'; | ||
| 8 | echo '-v verify xml file conforms to dtd'; | ||
| 9 | echo '-html output in html format (single file)'; | ||
| 10 | echo '-ps output in postscript format'; | ||
| 11 | echo '-pdf output in pdf format'; | ||
| 12 | exit; | ||
| 13 | } | ||
| 14 | |||
| 15 | if test $# -ne 2; then | ||
| 16 | usage | ||
| 17 | fi | ||
| 18 | # assign the variable for the output type | ||
| 19 | action=$1; shift | ||
| 20 | # assign the output filename | ||
| 21 | xmlfile=$1; shift | ||
| 22 | # and check user input it correct | ||
| 23 | if !(test -f $xmlfile); then | ||
| 24 | echo "No such file: $xmlfile"; | ||
| 25 | exit; | ||
| 26 | fi | ||
| 27 | # some other stuff we will use | ||
| 28 | OUT=output | ||
| 29 | xsl_fo=bz-fo.xsl | ||
| 30 | xsl_html=bz-html.xsl | ||
| 31 | |||
| 32 | basename=$xmlfile | ||
| 33 | basename=${basename//'.xml'/''} | ||
| 34 | |||
| 35 | fofile="${basename}.fo" | ||
| 36 | htmlfile="${basename}.html" | ||
| 37 | pdffile="${basename}.pdf" | ||
| 38 | psfile="${basename}.ps" | ||
| 39 | xmlfmtfile="${basename}.fmt" | ||
| 40 | |||
| 41 | # first process the xmlfile with CDATA tags | ||
| 42 | ./format.pl $xmlfile $xmlfmtfile | ||
| 43 | # so the shell knows where the catalogs live | ||
| 44 | export XML_CATALOG_FILES=/etc/xml/catalog | ||
| 45 | |||
| 46 | # post-processing tidy up | ||
| 47 | cleanup() { | ||
| 48 | echo "Cleaning up: # $@" | ||
| 49 | while [ $# != 0 ] | ||
| 50 | do | ||
| 51 | arg=$1; shift; | ||
| 52 | echo " deleting $arg"; | ||
| 53 | rm $arg | ||
| 54 | done | ||
| 55 | } | ||
| 56 | |||
| 57 | case $action in | ||
| 58 | -v) | ||
| 59 | flags='--noout --xinclude --noblanks --postvalid' | ||
| 60 | dtd='--dtdvalid http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd' | ||
| 61 | xmllint $flags $dtd $xmlfmtfile 2> $OUT | ||
| 62 | egrep 'error' $OUT | ||
| 63 | rm $OUT | ||
| 64 | ;; | ||
| 65 | |||
| 66 | -html) | ||
| 67 | echo "Creating $htmlfile ..." | ||
| 68 | xsltproc --nonet --xinclude -o $htmlfile $xsl_html $xmlfmtfile | ||
| 69 | cleanup $xmlfmtfile | ||
| 70 | ;; | ||
| 71 | |||
| 72 | -pdf) | ||
| 73 | echo "Creating $pdffile ..." | ||
| 74 | xsltproc --nonet --xinclude -o $fofile $xsl_fo $xmlfmtfile | ||
| 75 | pdfxmltex $fofile >$OUT </dev/null | ||
| 76 | pdfxmltex $fofile >$OUT </dev/null | ||
| 77 | pdfxmltex $fofile >$OUT </dev/null | ||
| 78 | cleanup $OUT $xmlfmtfile *.aux *.fo *.log *.out | ||
| 79 | ;; | ||
| 80 | |||
| 81 | -ps) | ||
| 82 | echo "Creating $psfile ..." | ||
| 83 | xsltproc --nonet --xinclude -o $fofile $xsl_fo $xmlfmtfile | ||
| 84 | pdfxmltex $fofile >$OUT </dev/null | ||
| 85 | pdfxmltex $fofile >$OUT </dev/null | ||
| 86 | pdfxmltex $fofile >$OUT </dev/null | ||
| 87 | pdftops $pdffile $psfile | ||
| 88 | cleanup $OUT $xmlfmtfile $pdffile *.aux *.fo *.log *.out | ||
| 89 | # passivetex is broken, so we can't go this route yet. | ||
| 90 | # xmltex $fofile >$OUT </dev/null | ||
| 91 | # xmltex $fofile >$OUT </dev/null | ||
| 92 | # xmltex $fofile >$OUT </dev/null | ||
| 93 | # dvips -R -q -o bzip-manual.ps *.dvi | ||
| 94 | ;; | ||
| 95 | |||
| 96 | *) | ||
| 97 | usage | ||
| 98 | ;; | ||
| 99 | esac | ||
