diff options
Diffstat (limited to 'doc/rfc1952.txt')
-rw-r--r-- | doc/rfc1952.txt | 675 |
1 files changed, 675 insertions, 0 deletions
diff --git a/doc/rfc1952.txt b/doc/rfc1952.txt new file mode 100644 index 0000000..a8e51b4 --- /dev/null +++ b/doc/rfc1952.txt | |||
@@ -0,0 +1,675 @@ | |||
1 | |||
2 | |||
3 | |||
4 | |||
5 | |||
6 | |||
7 | Network Working Group P. Deutsch | ||
8 | Request for Comments: 1952 Aladdin Enterprises | ||
9 | Category: Informational May 1996 | ||
10 | |||
11 | |||
12 | GZIP file format specification version 4.3 | ||
13 | |||
14 | Status of This Memo | ||
15 | |||
16 | This memo provides information for the Internet community. This memo | ||
17 | does not specify an Internet standard of any kind. Distribution of | ||
18 | this memo is unlimited. | ||
19 | |||
20 | IESG Note: | ||
21 | |||
22 | The IESG takes no position on the validity of any Intellectual | ||
23 | Property Rights statements contained in this document. | ||
24 | |||
25 | Notices | ||
26 | |||
27 | Copyright (c) 1996 L. Peter Deutsch | ||
28 | |||
29 | Permission is granted to copy and distribute this document for any | ||
30 | purpose and without charge, including translations into other | ||
31 | languages and incorporation into compilations, provided that the | ||
32 | copyright notice and this notice are preserved, and that any | ||
33 | substantive changes or deletions from the original are clearly | ||
34 | marked. | ||
35 | |||
36 | A pointer to the latest version of this and related documentation in | ||
37 | HTML format can be found at the URL | ||
38 | <ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html>. | ||
39 | |||
40 | Abstract | ||
41 | |||
42 | This specification defines a lossless compressed data format that is | ||
43 | compatible with the widely used GZIP utility. The format includes a | ||
44 | cyclic redundancy check value for detecting data corruption. The | ||
45 | format presently uses the DEFLATE method of compression but can be | ||
46 | easily extended to use other compression methods. The format can be | ||
47 | implemented readily in a manner not covered by patents. | ||
48 | |||
49 | |||
50 | |||
51 | |||
52 | |||
53 | |||
54 | |||
55 | |||
56 | |||
57 | |||
58 | Deutsch Informational [Page 1] | ||
59 | |||
60 | RFC 1952 GZIP File Format Specification May 1996 | ||
61 | |||
62 | |||
63 | Table of Contents | ||
64 | |||
65 | 1. Introduction ................................................... 2 | ||
66 | 1.1. Purpose ................................................... 2 | ||
67 | 1.2. Intended audience ......................................... 3 | ||
68 | 1.3. Scope ..................................................... 3 | ||
69 | 1.4. Compliance ................................................ 3 | ||
70 | 1.5. Definitions of terms and conventions used ................. 3 | ||
71 | 1.6. Changes from previous versions ............................ 3 | ||
72 | 2. Detailed specification ......................................... 4 | ||
73 | 2.1. Overall conventions ....................................... 4 | ||
74 | 2.2. File format ............................................... 5 | ||
75 | 2.3. Member format ............................................. 5 | ||
76 | 2.3.1. Member header and trailer ........................... 6 | ||
77 | 2.3.1.1. Extra field ................................... 8 | ||
78 | 2.3.1.2. Compliance .................................... 9 | ||
79 | 3. References .................................................. 9 | ||
80 | 4. Security Considerations .................................... 10 | ||
81 | 5. Acknowledgements ........................................... 10 | ||
82 | 6. Author's Address ........................................... 10 | ||
83 | 7. Appendix: Jean-Loup Gailly's gzip utility .................. 11 | ||
84 | 8. Appendix: Sample CRC Code .................................. 11 | ||
85 | |||
86 | 1. Introduction | ||
87 | |||
88 | 1.1. Purpose | ||
89 | |||
90 | The purpose of this specification is to define a lossless | ||
91 | compressed data format that: | ||
92 | |||
93 | * Is independent of CPU type, operating system, file system, | ||
94 | and character set, and hence can be used for interchange; | ||
95 | * Can compress or decompress a data stream (as opposed to a | ||
96 | randomly accessible file) to produce another data stream, | ||
97 | using only an a priori bounded amount of intermediate | ||
98 | storage, and hence can be used in data communications or | ||
99 | similar structures such as Unix filters; | ||
100 | * Compresses data with efficiency comparable to the best | ||
101 | currently available general-purpose compression methods, | ||
102 | and in particular considerably better than the "compress" | ||
103 | program; | ||
104 | * Can be implemented readily in a manner not covered by | ||
105 | patents, and hence can be practiced freely; | ||
106 | * Is compatible with the file format produced by the current | ||
107 | widely used gzip utility, in that conforming decompressors | ||
108 | will be able to read data produced by the existing gzip | ||
109 | compressor. | ||
110 | |||
111 | |||
112 | |||
113 | |||
114 | Deutsch Informational [Page 2] | ||
115 | |||
116 | RFC 1952 GZIP File Format Specification May 1996 | ||
117 | |||
118 | |||
119 | The data format defined by this specification does not attempt to: | ||
120 | |||
121 | * Provide random access to compressed data; | ||
122 | * Compress specialized data (e.g., raster graphics) as well as | ||
123 | the best currently available specialized algorithms. | ||
124 | |||
125 | 1.2. Intended audience | ||
126 | |||
127 | This specification is intended for use by implementors of software | ||
128 | to compress data into gzip format and/or decompress data from gzip | ||
129 | format. | ||
130 | |||
131 | The text of the specification assumes a basic background in | ||
132 | programming at the level of bits and other primitive data | ||
133 | representations. | ||
134 | |||
135 | 1.3. Scope | ||
136 | |||
137 | The specification specifies a compression method and a file format | ||
138 | (the latter assuming only that a file can store a sequence of | ||
139 | arbitrary bytes). It does not specify any particular interface to | ||
140 | a file system or anything about character sets or encodings | ||
141 | (except for file names and comments, which are optional). | ||
142 | |||
143 | 1.4. Compliance | ||
144 | |||
145 | Unless otherwise indicated below, a compliant decompressor must be | ||
146 | able to accept and decompress any file that conforms to all the | ||
147 | specifications presented here; a compliant compressor must produce | ||
148 | files that conform to all the specifications presented here. The | ||
149 | material in the appendices is not part of the specification per se | ||
150 | and is not relevant to compliance. | ||
151 | |||
152 | 1.5. Definitions of terms and conventions used | ||
153 | |||
154 | byte: 8 bits stored or transmitted as a unit (same as an octet). | ||
155 | (For this specification, a byte is exactly 8 bits, even on | ||
156 | machines which store a character on a number of bits different | ||
157 | from 8.) See below for the numbering of bits within a byte. | ||
158 | |||
159 | 1.6. Changes from previous versions | ||
160 | |||
161 | There have been no technical changes to the gzip format since | ||
162 | version 4.1 of this specification. In version 4.2, some | ||
163 | terminology was changed, and the sample CRC code was rewritten for | ||
164 | clarity and to eliminate the requirement for the caller to do pre- | ||
165 | and post-conditioning. Version 4.3 is a conversion of the | ||
166 | specification to RFC style. | ||
167 | |||
168 | |||
169 | |||
170 | Deutsch Informational [Page 3] | ||
171 | |||
172 | RFC 1952 GZIP File Format Specification May 1996 | ||
173 | |||
174 | |||
175 | 2. Detailed specification | ||
176 | |||
177 | 2.1. Overall conventions | ||
178 | |||
179 | In the diagrams below, a box like this: | ||
180 | |||
181 | +---+ | ||
182 | | | <-- the vertical bars might be missing | ||
183 | +---+ | ||
184 | |||
185 | represents one byte; a box like this: | ||
186 | |||
187 | +==============+ | ||
188 | | | | ||
189 | +==============+ | ||
190 | |||
191 | represents a variable number of bytes. | ||
192 | |||
193 | Bytes stored within a computer do not have a "bit order", since | ||
194 | they are always treated as a unit. However, a byte considered as | ||
195 | an integer between 0 and 255 does have a most- and least- | ||
196 | significant bit, and since we write numbers with the most- | ||
197 | significant digit on the left, we also write bytes with the most- | ||
198 | significant bit on the left. In the diagrams below, we number the | ||
199 | bits of a byte so that bit 0 is the least-significant bit, i.e., | ||
200 | the bits are numbered: | ||
201 | |||
202 | +--------+ | ||
203 | |76543210| | ||
204 | +--------+ | ||
205 | |||
206 | This document does not address the issue of the order in which | ||
207 | bits of a byte are transmitted on a bit-sequential medium, since | ||
208 | the data format described here is byte- rather than bit-oriented. | ||
209 | |||
210 | Within a computer, a number may occupy multiple bytes. All | ||
211 | multi-byte numbers in the format described here are stored with | ||
212 | the least-significant byte first (at the lower memory address). | ||
213 | For example, the decimal number 520 is stored as: | ||
214 | |||
215 | 0 1 | ||
216 | +--------+--------+ | ||
217 | |00001000|00000010| | ||
218 | +--------+--------+ | ||
219 | ^ ^ | ||
220 | | | | ||
221 | | + more significant byte = 2 x 256 | ||
222 | + less significant byte = 8 | ||
223 | |||
224 | |||
225 | |||
226 | Deutsch Informational [Page 4] | ||
227 | |||
228 | RFC 1952 GZIP File Format Specification May 1996 | ||
229 | |||
230 | |||
231 | 2.2. File format | ||
232 | |||
233 | A gzip file consists of a series of "members" (compressed data | ||
234 | sets). The format of each member is specified in the following | ||
235 | section. The members simply appear one after another in the file, | ||
236 | with no additional information before, between, or after them. | ||
237 | |||
238 | 2.3. Member format | ||
239 | |||
240 | Each member has the following structure: | ||
241 | |||
242 | +---+---+---+---+---+---+---+---+---+---+ | ||
243 | |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->) | ||
244 | +---+---+---+---+---+---+---+---+---+---+ | ||
245 | |||
246 | (if FLG.FEXTRA set) | ||
247 | |||
248 | +---+---+=================================+ | ||
249 | | XLEN |...XLEN bytes of "extra field"...| (more-->) | ||
250 | +---+---+=================================+ | ||
251 | |||
252 | (if FLG.FNAME set) | ||
253 | |||
254 | +=========================================+ | ||
255 | |...original file name, zero-terminated...| (more-->) | ||
256 | +=========================================+ | ||
257 | |||
258 | (if FLG.FCOMMENT set) | ||
259 | |||
260 | +===================================+ | ||
261 | |...file comment, zero-terminated...| (more-->) | ||
262 | +===================================+ | ||
263 | |||
264 | (if FLG.FHCRC set) | ||
265 | |||
266 | +---+---+ | ||
267 | | CRC16 | | ||
268 | +---+---+ | ||
269 | |||
270 | +=======================+ | ||
271 | |...compressed blocks...| (more-->) | ||
272 | +=======================+ | ||
273 | |||
274 | 0 1 2 3 4 5 6 7 | ||
275 | +---+---+---+---+---+---+---+---+ | ||
276 | | CRC32 | ISIZE | | ||
277 | +---+---+---+---+---+---+---+---+ | ||
278 | |||
279 | |||
280 | |||
281 | |||
282 | Deutsch Informational [Page 5] | ||
283 | |||
284 | RFC 1952 GZIP File Format Specification May 1996 | ||
285 | |||
286 | |||
287 | 2.3.1. Member header and trailer | ||
288 | |||
289 | ID1 (IDentification 1) | ||
290 | ID2 (IDentification 2) | ||
291 | These have the fixed values ID1 = 31 (0x1f, \037), ID2 = 139 | ||
292 | (0x8b, \213), to identify the file as being in gzip format. | ||
293 | |||
294 | CM (Compression Method) | ||
295 | This identifies the compression method used in the file. CM | ||
296 | = 0-7 are reserved. CM = 8 denotes the "deflate" | ||
297 | compression method, which is the one customarily used by | ||
298 | gzip and which is documented elsewhere. | ||
299 | |||
300 | FLG (FLaGs) | ||
301 | This flag byte is divided into individual bits as follows: | ||
302 | |||
303 | bit 0 FTEXT | ||
304 | bit 1 FHCRC | ||
305 | bit 2 FEXTRA | ||
306 | bit 3 FNAME | ||
307 | bit 4 FCOMMENT | ||
308 | bit 5 reserved | ||
309 | bit 6 reserved | ||
310 | bit 7 reserved | ||
311 | |||
312 | If FTEXT is set, the file is probably ASCII text. This is | ||
313 | an optional indication, which the compressor may set by | ||
314 | checking a small amount of the input data to see whether any | ||
315 | non-ASCII characters are present. In case of doubt, FTEXT | ||
316 | is cleared, indicating binary data. For systems which have | ||
317 | different file formats for ascii text and binary data, the | ||
318 | decompressor can use FTEXT to choose the appropriate format. | ||
319 | We deliberately do not specify the algorithm used to set | ||
320 | this bit, since a compressor always has the option of | ||
321 | leaving it cleared and a decompressor always has the option | ||
322 | of ignoring it and letting some other program handle issues | ||
323 | of data conversion. | ||
324 | |||
325 | If FHCRC is set, a CRC16 for the gzip header is present, | ||
326 | immediately before the compressed data. The CRC16 consists | ||
327 | of the two least significant bytes of the CRC32 for all | ||
328 | bytes of the gzip header up to and not including the CRC16. | ||
329 | [The FHCRC bit was never set by versions of gzip up to | ||
330 | 1.2.4, even though it was documented with a different | ||
331 | meaning in gzip 1.2.4.] | ||
332 | |||
333 | If FEXTRA is set, optional extra fields are present, as | ||
334 | described in a following section. | ||
335 | |||
336 | |||
337 | |||
338 | Deutsch Informational [Page 6] | ||
339 | |||
340 | RFC 1952 GZIP File Format Specification May 1996 | ||
341 | |||
342 | |||
343 | If FNAME is set, an original file name is present, | ||
344 | terminated by a zero byte. The name must consist of ISO | ||
345 | 8859-1 (LATIN-1) characters; on operating systems using | ||
346 | EBCDIC or any other character set for file names, the name | ||
347 | must be translated to the ISO LATIN-1 character set. This | ||
348 | is the original name of the file being compressed, with any | ||
349 | directory components removed, and, if the file being | ||
350 | compressed is on a file system with case insensitive names, | ||
351 | forced to lower case. There is no original file name if the | ||
352 | data was compressed from a source other than a named file; | ||
353 | for example, if the source was stdin on a Unix system, there | ||
354 | is no file name. | ||
355 | |||
356 | If FCOMMENT is set, a zero-terminated file comment is | ||
357 | present. This comment is not interpreted; it is only | ||
358 | intended for human consumption. The comment must consist of | ||
359 | ISO 8859-1 (LATIN-1) characters. Line breaks should be | ||
360 | denoted by a single line feed character (10 decimal). | ||
361 | |||
362 | Reserved FLG bits must be zero. | ||
363 | |||
364 | MTIME (Modification TIME) | ||
365 | This gives the most recent modification time of the original | ||
366 | file being compressed. The time is in Unix format, i.e., | ||
367 | seconds since 00:00:00 GMT, Jan. 1, 1970. (Note that this | ||
368 | may cause problems for MS-DOS and other systems that use | ||
369 | local rather than Universal time.) If the compressed data | ||
370 | did not come from a file, MTIME is set to the time at which | ||
371 | compression started. MTIME = 0 means no time stamp is | ||
372 | available. | ||
373 | |||
374 | XFL (eXtra FLags) | ||
375 | These flags are available for use by specific compression | ||
376 | methods. The "deflate" method (CM = 8) sets these flags as | ||
377 | follows: | ||
378 | |||
379 | XFL = 2 - compressor used maximum compression, | ||
380 | slowest algorithm | ||
381 | XFL = 4 - compressor used fastest algorithm | ||
382 | |||
383 | OS (Operating System) | ||
384 | This identifies the type of file system on which compression | ||
385 | took place. This may be useful in determining end-of-line | ||
386 | convention for text files. The currently defined values are | ||
387 | as follows: | ||
388 | |||
389 | |||
390 | |||
391 | |||
392 | |||
393 | |||
394 | Deutsch Informational [Page 7] | ||
395 | |||
396 | RFC 1952 GZIP File Format Specification May 1996 | ||
397 | |||
398 | |||
399 | 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32) | ||
400 | 1 - Amiga | ||
401 | 2 - VMS (or OpenVMS) | ||
402 | 3 - Unix | ||
403 | 4 - VM/CMS | ||
404 | 5 - Atari TOS | ||
405 | 6 - HPFS filesystem (OS/2, NT) | ||
406 | 7 - Macintosh | ||
407 | 8 - Z-System | ||
408 | 9 - CP/M | ||
409 | 10 - TOPS-20 | ||
410 | 11 - NTFS filesystem (NT) | ||
411 | 12 - QDOS | ||
412 | 13 - Acorn RISCOS | ||
413 | 255 - unknown | ||
414 | |||
415 | XLEN (eXtra LENgth) | ||
416 | If FLG.FEXTRA is set, this gives the length of the optional | ||
417 | extra field. See below for details. | ||
418 | |||
419 | CRC32 (CRC-32) | ||
420 | This contains a Cyclic Redundancy Check value of the | ||
421 | uncompressed data computed according to CRC-32 algorithm | ||
422 | used in the ISO 3309 standard and in section 8.1.1.6.2 of | ||
423 | ITU-T recommendation V.42. (See http://www.iso.ch for | ||
424 | ordering ISO documents. See gopher://info.itu.ch for an | ||
425 | online version of ITU-T V.42.) | ||
426 | |||
427 | ISIZE (Input SIZE) | ||
428 | This contains the size of the original (uncompressed) input | ||
429 | data modulo 2^32. | ||
430 | |||
431 | 2.3.1.1. Extra field | ||
432 | |||
433 | If the FLG.FEXTRA bit is set, an "extra field" is present in | ||
434 | the header, with total length XLEN bytes. It consists of a | ||
435 | series of subfields, each of the form: | ||
436 | |||
437 | +---+---+---+---+==================================+ | ||
438 | |SI1|SI2| LEN |... LEN bytes of subfield data ...| | ||
439 | +---+---+---+---+==================================+ | ||
440 | |||
441 | SI1 and SI2 provide a subfield ID, typically two ASCII letters | ||
442 | with some mnemonic value. Jean-Loup Gailly | ||
443 | <gzip@prep.ai.mit.edu> is maintaining a registry of subfield | ||
444 | IDs; please send him any subfield ID you wish to use. Subfield | ||
445 | IDs with SI2 = 0 are reserved for future use. The following | ||
446 | IDs are currently defined: | ||
447 | |||
448 | |||
449 | |||
450 | Deutsch Informational [Page 8] | ||
451 | |||
452 | RFC 1952 GZIP File Format Specification May 1996 | ||
453 | |||
454 | |||
455 | SI1 SI2 Data | ||
456 | ---------- ---------- ---- | ||
457 | 0x41 ('A') 0x70 ('P') Apollo file type information | ||
458 | |||
459 | LEN gives the length of the subfield data, excluding the 4 | ||
460 | initial bytes. | ||
461 | |||
462 | 2.3.1.2. Compliance | ||
463 | |||
464 | A compliant compressor must produce files with correct ID1, | ||
465 | ID2, CM, CRC32, and ISIZE, but may set all the other fields in | ||
466 | the fixed-length part of the header to default values (255 for | ||
467 | OS, 0 for all others). The compressor must set all reserved | ||
468 | bits to zero. | ||
469 | |||
470 | A compliant decompressor must check ID1, ID2, and CM, and | ||
471 | provide an error indication if any of these have incorrect | ||
472 | values. It must examine FEXTRA/XLEN, FNAME, FCOMMENT and FHCRC | ||
473 | at least so it can skip over the optional fields if they are | ||
474 | present. It need not examine any other part of the header or | ||
475 | trailer; in particular, a decompressor may ignore FTEXT and OS | ||
476 | and always produce binary output, and still be compliant. A | ||
477 | compliant decompressor must give an error indication if any | ||
478 | reserved bit is non-zero, since such a bit could indicate the | ||
479 | presence of a new field that would cause subsequent data to be | ||
480 | interpreted incorrectly. | ||
481 | |||
482 | 3. References | ||
483 | |||
484 | [1] "Information Processing - 8-bit single-byte coded graphic | ||
485 | character sets - Part 1: Latin alphabet No.1" (ISO 8859-1:1987). | ||
486 | The ISO 8859-1 (Latin-1) character set is a superset of 7-bit | ||
487 | ASCII. Files defining this character set are available as | ||
488 | iso_8859-1.* in ftp://ftp.uu.net/graphics/png/documents/ | ||
489 | |||
490 | [2] ISO 3309 | ||
491 | |||
492 | [3] ITU-T recommendation V.42 | ||
493 | |||
494 | [4] Deutsch, L.P.,"DEFLATE Compressed Data Format Specification", | ||
495 | available in ftp://ftp.uu.net/pub/archiving/zip/doc/ | ||
496 | |||
497 | [5] Gailly, J.-L., GZIP documentation, available as gzip-*.tar in | ||
498 | ftp://prep.ai.mit.edu/pub/gnu/ | ||
499 | |||
500 | [6] Sarwate, D.V., "Computation of Cyclic Redundancy Checks via Table | ||
501 | Look-Up", Communications of the ACM, 31(8), pp.1008-1013. | ||
502 | |||
503 | |||
504 | |||
505 | |||
506 | Deutsch Informational [Page 9] | ||
507 | |||
508 | RFC 1952 GZIP File Format Specification May 1996 | ||
509 | |||
510 | |||
511 | [7] Schwaderer, W.D., "CRC Calculation", April 85 PC Tech Journal, | ||
512 | pp.118-133. | ||
513 | |||
514 | [8] ftp://ftp.adelaide.edu.au/pub/rocksoft/papers/crc_v3.txt, | ||
515 | describing the CRC concept. | ||
516 | |||
517 | 4. Security Considerations | ||
518 | |||
519 | Any data compression method involves the reduction of redundancy in | ||
520 | the data. Consequently, any corruption of the data is likely to have | ||
521 | severe effects and be difficult to correct. Uncompressed text, on | ||
522 | the other hand, will probably still be readable despite the presence | ||
523 | of some corrupted bytes. | ||
524 | |||
525 | It is recommended that systems using this data format provide some | ||
526 | means of validating the integrity of the compressed data, such as by | ||
527 | setting and checking the CRC-32 check value. | ||
528 | |||
529 | 5. Acknowledgements | ||
530 | |||
531 | Trademarks cited in this document are the property of their | ||
532 | respective owners. | ||
533 | |||
534 | Jean-Loup Gailly designed the gzip format and wrote, with Mark Adler, | ||
535 | the related software described in this specification. Glenn | ||
536 | Randers-Pehrson converted this document to RFC and HTML format. | ||
537 | |||
538 | 6. Author's Address | ||
539 | |||
540 | L. Peter Deutsch | ||
541 | Aladdin Enterprises | ||
542 | 203 Santa Margarita Ave. | ||
543 | Menlo Park, CA 94025 | ||
544 | |||
545 | Phone: (415) 322-0103 (AM only) | ||
546 | FAX: (415) 322-1734 | ||
547 | EMail: <ghost@aladdin.com> | ||
548 | |||
549 | Questions about the technical content of this specification can be | ||
550 | sent by email to: | ||
551 | |||
552 | Jean-Loup Gailly <gzip@prep.ai.mit.edu> and | ||
553 | Mark Adler <madler@alumni.caltech.edu> | ||
554 | |||
555 | Editorial comments on this specification can be sent by email to: | ||
556 | |||
557 | L. Peter Deutsch <ghost@aladdin.com> and | ||
558 | Glenn Randers-Pehrson <randeg@alumni.rpi.edu> | ||
559 | |||
560 | |||
561 | |||
562 | Deutsch Informational [Page 10] | ||
563 | |||
564 | RFC 1952 GZIP File Format Specification May 1996 | ||
565 | |||
566 | |||
567 | 7. Appendix: Jean-Loup Gailly's gzip utility | ||
568 | |||
569 | The most widely used implementation of gzip compression, and the | ||
570 | original documentation on which this specification is based, were | ||
571 | created by Jean-Loup Gailly <gzip@prep.ai.mit.edu>. Since this | ||
572 | implementation is a de facto standard, we mention some more of its | ||
573 | features here. Again, the material in this section is not part of | ||
574 | the specification per se, and implementations need not follow it to | ||
575 | be compliant. | ||
576 | |||
577 | When compressing or decompressing a file, gzip preserves the | ||
578 | protection, ownership, and modification time attributes on the local | ||
579 | file system, since there is no provision for representing protection | ||
580 | attributes in the gzip file format itself. Since the file format | ||
581 | includes a modification time, the gzip decompressor provides a | ||
582 | command line switch that assigns the modification time from the file, | ||
583 | rather than the local modification time of the compressed input, to | ||
584 | the decompressed output. | ||
585 | |||
586 | 8. Appendix: Sample CRC Code | ||
587 | |||
588 | The following sample code represents a practical implementation of | ||
589 | the CRC (Cyclic Redundancy Check). (See also ISO 3309 and ITU-T V.42 | ||
590 | for a formal specification.) | ||
591 | |||
592 | The sample code is in the ANSI C programming language. Non C users | ||
593 | may find it easier to read with these hints: | ||
594 | |||
595 | & Bitwise AND operator. | ||
596 | ^ Bitwise exclusive-OR operator. | ||
597 | >> Bitwise right shift operator. When applied to an | ||
598 | unsigned quantity, as here, right shift inserts zero | ||
599 | bit(s) at the left. | ||
600 | ! Logical NOT operator. | ||
601 | ++ "n++" increments the variable n. | ||
602 | 0xNNN 0x introduces a hexadecimal (base 16) constant. | ||
603 | Suffix L indicates a long value (at least 32 bits). | ||
604 | |||
605 | /* Table of CRCs of all 8-bit messages. */ | ||
606 | unsigned long crc_table[256]; | ||
607 | |||
608 | /* Flag: has the table been computed? Initially false. */ | ||
609 | int crc_table_computed = 0; | ||
610 | |||
611 | /* Make the table for a fast CRC. */ | ||
612 | void make_crc_table(void) | ||
613 | { | ||
614 | unsigned long c; | ||
615 | |||
616 | |||
617 | |||
618 | Deutsch Informational [Page 11] | ||
619 | |||
620 | RFC 1952 GZIP File Format Specification May 1996 | ||
621 | |||
622 | |||
623 | int n, k; | ||
624 | for (n = 0; n < 256; n++) { | ||
625 | c = (unsigned long) n; | ||
626 | for (k = 0; k < 8; k++) { | ||
627 | if (c & 1) { | ||
628 | c = 0xedb88320L ^ (c >> 1); | ||
629 | } else { | ||
630 | c = c >> 1; | ||
631 | } | ||
632 | } | ||
633 | crc_table[n] = c; | ||
634 | } | ||
635 | crc_table_computed = 1; | ||
636 | } | ||
637 | |||
638 | /* | ||
639 | Update a running crc with the bytes buf[0..len-1] and return | ||
640 | the updated crc. The crc should be initialized to zero. Pre- and | ||
641 | post-conditioning (one's complement) is performed within this | ||
642 | function so it shouldn't be done by the caller. Usage example: | ||
643 | |||
644 | unsigned long crc = 0L; | ||
645 | |||
646 | while (read_buffer(buffer, length) != EOF) { | ||
647 | crc = update_crc(crc, buffer, length); | ||
648 | } | ||
649 | if (crc != original_crc) error(); | ||
650 | */ | ||
651 | unsigned long update_crc(unsigned long crc, | ||
652 | unsigned char *buf, int len) | ||
653 | { | ||
654 | unsigned long c = crc ^ 0xffffffffL; | ||
655 | int n; | ||
656 | |||
657 | if (!crc_table_computed) | ||
658 | make_crc_table(); | ||
659 | for (n = 0; n < len; n++) { | ||
660 | c = crc_table[(c ^ buf[n]) & 0xff] ^ (c >> 8); | ||
661 | } | ||
662 | return c ^ 0xffffffffL; | ||
663 | } | ||
664 | |||
665 | /* Return the CRC of the bytes buf[0..len-1]. */ | ||
666 | unsigned long crc(unsigned char *buf, int len) | ||
667 | { | ||
668 | return update_crc(0L, buf, len); | ||
669 | } | ||
670 | |||
671 | |||
672 | |||
673 | |||
674 | Deutsch Informational [Page 12] | ||
675 | |||