diff options
Diffstat (limited to 'archival')
-rw-r--r-- | archival/gzip.c | 694 |
1 files changed, 341 insertions, 353 deletions
diff --git a/archival/gzip.c b/archival/gzip.c index c882a4a2a..034bda005 100644 --- a/archival/gzip.c +++ b/archival/gzip.c | |||
@@ -201,7 +201,8 @@ typedef unsigned IPos; | |||
201 | array = xzalloc((size_t)(((size)+1L)/2) * 2*sizeof(type)); \ | 201 | array = xzalloc((size_t)(((size)+1L)/2) * 2*sizeof(type)); \ |
202 | } | 202 | } |
203 | 203 | ||
204 | #define FREE(array) { \ | 204 | #define FREE(array) \ |
205 | { \ | ||
205 | free(array); \ | 206 | free(array); \ |
206 | array = NULL; \ | 207 | array = NULL; \ |
207 | } | 208 | } |
@@ -272,21 +273,6 @@ enum { | |||
272 | }; | 273 | }; |
273 | 274 | ||
274 | 275 | ||
275 | /* =========================================================================== | ||
276 | */ | ||
277 | static void fill_window(void); | ||
278 | |||
279 | static int longest_match(IPos cur_match); | ||
280 | |||
281 | #ifdef DEBUG | ||
282 | static void check_match(IPos start, IPos match, int length); | ||
283 | #endif | ||
284 | |||
285 | |||
286 | /* from trees.c */ | ||
287 | static int ct_tally(int dist, int lc); | ||
288 | static ulg flush_block(char *buf, ulg stored_len, int eof); | ||
289 | |||
290 | /* global buffers */ | 276 | /* global buffers */ |
291 | 277 | ||
292 | /* To save memory for 16 bit systems, some arrays are overlaid between | 278 | /* To save memory for 16 bit systems, some arrays are overlaid between |
@@ -586,6 +572,65 @@ static void copy_block(char *buf, unsigned len, int header) | |||
586 | 572 | ||
587 | 573 | ||
588 | /* =========================================================================== | 574 | /* =========================================================================== |
575 | * Fill the window when the lookahead becomes insufficient. | ||
576 | * Updates strstart and lookahead, and sets eofile if end of input file. | ||
577 | * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0 | ||
578 | * OUT assertions: at least one byte has been read, or eofile is set; | ||
579 | * file reads are performed for at least two bytes (required for the | ||
580 | * translate_eol option). | ||
581 | */ | ||
582 | static void fill_window(void) | ||
583 | { | ||
584 | unsigned n, m; | ||
585 | unsigned more = WINDOW_SIZE - lookahead - strstart; | ||
586 | /* Amount of free space at the end of the window. */ | ||
587 | |||
588 | /* If the window is almost full and there is insufficient lookahead, | ||
589 | * move the upper half to the lower one to make room in the upper half. | ||
590 | */ | ||
591 | if (more == (unsigned) -1) { | ||
592 | /* Very unlikely, but possible on 16 bit machine if strstart == 0 | ||
593 | * and lookahead == 1 (input done one byte at time) | ||
594 | */ | ||
595 | more--; | ||
596 | } else if (strstart >= WSIZE + MAX_DIST) { | ||
597 | /* By the IN assertion, the window is not empty so we can't confuse | ||
598 | * more == 0 with more == 64K on a 16 bit machine. | ||
599 | */ | ||
600 | Assert(WINDOW_SIZE == 2 * WSIZE, "no sliding with BIG_MEM"); | ||
601 | |||
602 | memcpy(window, window + WSIZE, WSIZE); | ||
603 | match_start -= WSIZE; | ||
604 | strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */ | ||
605 | |||
606 | block_start -= WSIZE; | ||
607 | |||
608 | for (n = 0; n < HASH_SIZE; n++) { | ||
609 | m = head[n]; | ||
610 | head[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0); | ||
611 | } | ||
612 | for (n = 0; n < WSIZE; n++) { | ||
613 | m = prev[n]; | ||
614 | prev[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0); | ||
615 | /* If n is not on any hash chain, prev[n] is garbage but | ||
616 | * its value will never be used. | ||
617 | */ | ||
618 | } | ||
619 | more += WSIZE; | ||
620 | } | ||
621 | /* At this point, more >= 2 */ | ||
622 | if (!eofile) { | ||
623 | n = file_read(window + strstart + lookahead, more); | ||
624 | if (n == 0 || n == (unsigned) -1) { | ||
625 | eofile = 1; | ||
626 | } else { | ||
627 | lookahead += n; | ||
628 | } | ||
629 | } | ||
630 | } | ||
631 | |||
632 | |||
633 | /* =========================================================================== | ||
589 | * Update a hash value with the given input byte | 634 | * Update a hash value with the given input byte |
590 | * IN assertion: all calls to to UPDATE_HASH are made with consecutive | 635 | * IN assertion: all calls to to UPDATE_HASH are made with consecutive |
591 | * input characters, so that a running hash key can be computed from the | 636 | * input characters, so that a running hash key can be computed from the |
@@ -746,202 +791,6 @@ static void check_match(IPos start, IPos match, int length) | |||
746 | #endif | 791 | #endif |
747 | 792 | ||
748 | 793 | ||
749 | /* =========================================================================== | ||
750 | * Fill the window when the lookahead becomes insufficient. | ||
751 | * Updates strstart and lookahead, and sets eofile if end of input file. | ||
752 | * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0 | ||
753 | * OUT assertions: at least one byte has been read, or eofile is set; | ||
754 | * file reads are performed for at least two bytes (required for the | ||
755 | * translate_eol option). | ||
756 | */ | ||
757 | static void fill_window(void) | ||
758 | { | ||
759 | unsigned n, m; | ||
760 | unsigned more = WINDOW_SIZE - lookahead - strstart; | ||
761 | /* Amount of free space at the end of the window. */ | ||
762 | |||
763 | /* If the window is almost full and there is insufficient lookahead, | ||
764 | * move the upper half to the lower one to make room in the upper half. | ||
765 | */ | ||
766 | if (more == (unsigned) -1) { | ||
767 | /* Very unlikely, but possible on 16 bit machine if strstart == 0 | ||
768 | * and lookahead == 1 (input done one byte at time) | ||
769 | */ | ||
770 | more--; | ||
771 | } else if (strstart >= WSIZE + MAX_DIST) { | ||
772 | /* By the IN assertion, the window is not empty so we can't confuse | ||
773 | * more == 0 with more == 64K on a 16 bit machine. | ||
774 | */ | ||
775 | Assert(WINDOW_SIZE == 2 * WSIZE, "no sliding with BIG_MEM"); | ||
776 | |||
777 | memcpy(window, window + WSIZE, WSIZE); | ||
778 | match_start -= WSIZE; | ||
779 | strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */ | ||
780 | |||
781 | block_start -= WSIZE; | ||
782 | |||
783 | for (n = 0; n < HASH_SIZE; n++) { | ||
784 | m = head[n]; | ||
785 | head[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0); | ||
786 | } | ||
787 | for (n = 0; n < WSIZE; n++) { | ||
788 | m = prev[n]; | ||
789 | prev[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0); | ||
790 | /* If n is not on any hash chain, prev[n] is garbage but | ||
791 | * its value will never be used. | ||
792 | */ | ||
793 | } | ||
794 | more += WSIZE; | ||
795 | } | ||
796 | /* At this point, more >= 2 */ | ||
797 | if (!eofile) { | ||
798 | n = file_read(window + strstart + lookahead, more); | ||
799 | if (n == 0 || n == (unsigned) -1) { | ||
800 | eofile = 1; | ||
801 | } else { | ||
802 | lookahead += n; | ||
803 | } | ||
804 | } | ||
805 | } | ||
806 | |||
807 | |||
808 | /* =========================================================================== | ||
809 | * Same as above, but achieves better compression. We use a lazy | ||
810 | * evaluation for matches: a match is finally adopted only if there is | ||
811 | * no better match at the next window position. | ||
812 | * | ||
813 | * Processes a new input file and return its compressed length. Sets | ||
814 | * the compressed length, crc, deflate flags and internal file | ||
815 | * attributes. | ||
816 | */ | ||
817 | |||
818 | /* Flush the current block, with given end-of-file flag. | ||
819 | * IN assertion: strstart is set to the end of the current match. */ | ||
820 | #define FLUSH_BLOCK(eof) \ | ||
821 | flush_block( \ | ||
822 | block_start >= 0L \ | ||
823 | ? (char*)&window[(unsigned)block_start] \ | ||
824 | : (char*)NULL, \ | ||
825 | (long)strstart - block_start, \ | ||
826 | (eof) \ | ||
827 | ) | ||
828 | |||
829 | /* Insert string s in the dictionary and set match_head to the previous head | ||
830 | * of the hash chain (the most recent string with same hash key). Return | ||
831 | * the previous length of the hash chain. | ||
832 | * IN assertion: all calls to to INSERT_STRING are made with consecutive | ||
833 | * input characters and the first MIN_MATCH bytes of s are valid | ||
834 | * (except for the last MIN_MATCH-1 bytes of the input file). */ | ||
835 | #define INSERT_STRING(s, match_head) \ | ||
836 | { \ | ||
837 | UPDATE_HASH(ins_h, window[(s) + MIN_MATCH-1]); \ | ||
838 | prev[(s) & WMASK] = match_head = head[ins_h]; \ | ||
839 | head[ins_h] = (s); \ | ||
840 | } | ||
841 | |||
842 | static ulg deflate(void) | ||
843 | { | ||
844 | IPos hash_head; /* head of hash chain */ | ||
845 | IPos prev_match; /* previous match */ | ||
846 | int flush; /* set if current block must be flushed */ | ||
847 | int match_available = 0; /* set if previous match exists */ | ||
848 | unsigned match_length = MIN_MATCH - 1; /* length of best match */ | ||
849 | |||
850 | /* Process the input block. */ | ||
851 | while (lookahead != 0) { | ||
852 | /* Insert the string window[strstart .. strstart+2] in the | ||
853 | * dictionary, and set hash_head to the head of the hash chain: | ||
854 | */ | ||
855 | INSERT_STRING(strstart, hash_head); | ||
856 | |||
857 | /* Find the longest match, discarding those <= prev_length. | ||
858 | */ | ||
859 | prev_length = match_length, prev_match = match_start; | ||
860 | match_length = MIN_MATCH - 1; | ||
861 | |||
862 | if (hash_head != 0 && prev_length < max_lazy_match | ||
863 | && strstart - hash_head <= MAX_DIST | ||
864 | ) { | ||
865 | /* To simplify the code, we prevent matches with the string | ||
866 | * of window index 0 (in particular we have to avoid a match | ||
867 | * of the string with itself at the start of the input file). | ||
868 | */ | ||
869 | match_length = longest_match(hash_head); | ||
870 | /* longest_match() sets match_start */ | ||
871 | if (match_length > lookahead) | ||
872 | match_length = lookahead; | ||
873 | |||
874 | /* Ignore a length 3 match if it is too distant: */ | ||
875 | if (match_length == MIN_MATCH && strstart - match_start > TOO_FAR) { | ||
876 | /* If prev_match is also MIN_MATCH, match_start is garbage | ||
877 | * but we will ignore the current match anyway. | ||
878 | */ | ||
879 | match_length--; | ||
880 | } | ||
881 | } | ||
882 | /* If there was a match at the previous step and the current | ||
883 | * match is not better, output the previous match: | ||
884 | */ | ||
885 | if (prev_length >= MIN_MATCH && match_length <= prev_length) { | ||
886 | check_match(strstart - 1, prev_match, prev_length); | ||
887 | flush = ct_tally(strstart - 1 - prev_match, prev_length - MIN_MATCH); | ||
888 | |||
889 | /* Insert in hash table all strings up to the end of the match. | ||
890 | * strstart-1 and strstart are already inserted. | ||
891 | */ | ||
892 | lookahead -= prev_length - 1; | ||
893 | prev_length -= 2; | ||
894 | do { | ||
895 | strstart++; | ||
896 | INSERT_STRING(strstart, hash_head); | ||
897 | /* strstart never exceeds WSIZE-MAX_MATCH, so there are | ||
898 | * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH | ||
899 | * these bytes are garbage, but it does not matter since the | ||
900 | * next lookahead bytes will always be emitted as literals. | ||
901 | */ | ||
902 | } while (--prev_length != 0); | ||
903 | match_available = 0; | ||
904 | match_length = MIN_MATCH - 1; | ||
905 | strstart++; | ||
906 | if (flush) { | ||
907 | FLUSH_BLOCK(0); | ||
908 | block_start = strstart; | ||
909 | } | ||
910 | } else if (match_available) { | ||
911 | /* If there was no match at the previous position, output a | ||
912 | * single literal. If there was a match but the current match | ||
913 | * is longer, truncate the previous match to a single literal. | ||
914 | */ | ||
915 | Tracevv((stderr, "%c", window[strstart - 1])); | ||
916 | if (ct_tally(0, window[strstart - 1])) { | ||
917 | FLUSH_BLOCK(0); | ||
918 | block_start = strstart; | ||
919 | } | ||
920 | strstart++; | ||
921 | lookahead--; | ||
922 | } else { | ||
923 | /* There is no previous match to compare with, wait for | ||
924 | * the next step to decide. | ||
925 | */ | ||
926 | match_available = 1; | ||
927 | strstart++; | ||
928 | lookahead--; | ||
929 | } | ||
930 | Assert(strstart <= isize && lookahead <= isize, "a bit too far"); | ||
931 | |||
932 | /* Make sure that we always have enough lookahead, except | ||
933 | * at the end of the input file. We need MAX_MATCH bytes | ||
934 | * for the next match, plus MIN_MATCH bytes to insert the | ||
935 | * string following the next match. | ||
936 | */ | ||
937 | while (lookahead < MIN_LOOKAHEAD && !eofile) | ||
938 | fill_window(); | ||
939 | } | ||
940 | if (match_available) | ||
941 | ct_tally(0, window[strstart - 1]); | ||
942 | |||
943 | return FLUSH_BLOCK(1); /* eof */ | ||
944 | } | ||
945 | /* trees.c -- output deflated data using Huffman coding | 794 | /* trees.c -- output deflated data using Huffman coding |
946 | * Copyright (C) 1992-1993 Jean-loup Gailly | 795 | * Copyright (C) 1992-1993 Jean-loup Gailly |
947 | * This is free software; you can redistribute it and/or modify it under the | 796 | * This is free software; you can redistribute it and/or modify it under the |
@@ -991,10 +840,6 @@ static ulg deflate(void) | |||
991 | * file. Returns the total compressed length for the file so far. | 840 | * file. Returns the total compressed length for the file so far. |
992 | */ | 841 | */ |
993 | 842 | ||
994 | /* =========================================================================== | ||
995 | * Constants | ||
996 | */ | ||
997 | |||
998 | #define MAX_BITS 15 | 843 | #define MAX_BITS 15 |
999 | /* All codes must not exceed MAX_BITS bits */ | 844 | /* All codes must not exceed MAX_BITS bits */ |
1000 | 845 | ||
@@ -1087,9 +932,7 @@ static const extra_bits_t extra_blbits[BL_CODES] = { | |||
1087 | /* repeat a zero length 11-138 times (7 bits of repeat count) */ | 932 | /* repeat a zero length 11-138 times (7 bits of repeat count) */ |
1088 | 933 | ||
1089 | /* =========================================================================== | 934 | /* =========================================================================== |
1090 | * Local data | 935 | */ |
1091 | */ | ||
1092 | |||
1093 | /* Data structure describing a single value and its code string. */ | 936 | /* Data structure describing a single value and its code string. */ |
1094 | typedef struct ct_data { | 937 | typedef struct ct_data { |
1095 | union { | 938 | union { |
@@ -1231,8 +1074,6 @@ static int *file_method; /* pointer to DEFLATE or STORE */ | |||
1231 | 1074 | ||
1232 | /* =========================================================================== | 1075 | /* =========================================================================== |
1233 | */ | 1076 | */ |
1234 | static void init_block(void); | ||
1235 | static void gen_bitlen(tree_desc * desc); | ||
1236 | static void gen_codes(ct_data * tree, int max_code); | 1077 | static void gen_codes(ct_data * tree, int max_code); |
1237 | static void build_tree(tree_desc * desc); | 1078 | static void build_tree(tree_desc * desc); |
1238 | static void scan_tree(ct_data * tree, int max_code); | 1079 | static void scan_tree(ct_data * tree, int max_code); |
@@ -1240,7 +1081,6 @@ static void send_tree(ct_data * tree, int max_code); | |||
1240 | static int build_bl_tree(void); | 1081 | static int build_bl_tree(void); |
1241 | static void send_all_trees(int lcodes, int dcodes, int blcodes); | 1082 | static void send_all_trees(int lcodes, int dcodes, int blcodes); |
1242 | static void compress_block(ct_data * ltree, ct_data * dtree); | 1083 | static void compress_block(ct_data * ltree, ct_data * dtree); |
1243 | static void set_file_type(void); | ||
1244 | 1084 | ||
1245 | 1085 | ||
1246 | #ifndef DEBUG | 1086 | #ifndef DEBUG |
@@ -1259,9 +1099,31 @@ static void set_file_type(void); | |||
1259 | /* Mapping from a distance to a distance code. dist is the distance - 1 and | 1099 | /* Mapping from a distance to a distance code. dist is the distance - 1 and |
1260 | * must not have side effects. dist_code[256] and dist_code[257] are never | 1100 | * must not have side effects. dist_code[256] and dist_code[257] are never |
1261 | * used. | 1101 | * used. |
1102 | * The arguments must not have side effects. | ||
1103 | */ | ||
1104 | |||
1105 | |||
1106 | /* =========================================================================== | ||
1107 | * Initialize a new block. | ||
1262 | */ | 1108 | */ |
1109 | static void init_block(void) | ||
1110 | { | ||
1111 | int n; /* iterates over tree elements */ | ||
1263 | 1112 | ||
1264 | /* the arguments must not have side effects */ | 1113 | /* Initialize the trees. */ |
1114 | for (n = 0; n < L_CODES; n++) | ||
1115 | dyn_ltree[n].Freq = 0; | ||
1116 | for (n = 0; n < D_CODES; n++) | ||
1117 | dyn_dtree[n].Freq = 0; | ||
1118 | for (n = 0; n < BL_CODES; n++) | ||
1119 | bl_tree[n].Freq = 0; | ||
1120 | |||
1121 | dyn_ltree[END_BLOCK].Freq = 1; | ||
1122 | opt_len = static_len = 0L; | ||
1123 | last_lit = last_dist = last_flags = 0; | ||
1124 | flags = 0; | ||
1125 | flag_bit = 1; | ||
1126 | } | ||
1265 | 1127 | ||
1266 | 1128 | ||
1267 | /* =========================================================================== | 1129 | /* =========================================================================== |
@@ -1289,7 +1151,7 @@ static void ct_init(ush * attr, int *methodp) | |||
1289 | for (code = 0; code < LENGTH_CODES - 1; code++) { | 1151 | for (code = 0; code < LENGTH_CODES - 1; code++) { |
1290 | base_length[code] = length; | 1152 | base_length[code] = length; |
1291 | for (n = 0; n < (1 << extra_lbits[code]); n++) { | 1153 | for (n = 0; n < (1 << extra_lbits[code]); n++) { |
1292 | length_code[length++] = (uch) code; | 1154 | length_code[length++] = code; |
1293 | } | 1155 | } |
1294 | } | 1156 | } |
1295 | Assert(length == 256, "ct_init: length != 256"); | 1157 | Assert(length == 256, "ct_init: length != 256"); |
@@ -1297,7 +1159,7 @@ static void ct_init(ush * attr, int *methodp) | |||
1297 | * in two different ways: code 284 + 5 bits or code 285, so we | 1159 | * in two different ways: code 284 + 5 bits or code 285, so we |
1298 | * overwrite length_code[255] to use the best encoding: | 1160 | * overwrite length_code[255] to use the best encoding: |
1299 | */ | 1161 | */ |
1300 | length_code[length - 1] = (uch) code; | 1162 | length_code[length - 1] = code; |
1301 | 1163 | ||
1302 | /* Initialize the mapping dist (0..32K) -> dist code (0..29) */ | 1164 | /* Initialize the mapping dist (0..32K) -> dist code (0..29) */ |
1303 | dist = 0; | 1165 | dist = 0; |
@@ -1356,29 +1218,6 @@ static void ct_init(ush * attr, int *methodp) | |||
1356 | 1218 | ||
1357 | 1219 | ||
1358 | /* =========================================================================== | 1220 | /* =========================================================================== |
1359 | * Initialize a new block. | ||
1360 | */ | ||
1361 | static void init_block(void) | ||
1362 | { | ||
1363 | int n; /* iterates over tree elements */ | ||
1364 | |||
1365 | /* Initialize the trees. */ | ||
1366 | for (n = 0; n < L_CODES; n++) | ||
1367 | dyn_ltree[n].Freq = 0; | ||
1368 | for (n = 0; n < D_CODES; n++) | ||
1369 | dyn_dtree[n].Freq = 0; | ||
1370 | for (n = 0; n < BL_CODES; n++) | ||
1371 | bl_tree[n].Freq = 0; | ||
1372 | |||
1373 | dyn_ltree[END_BLOCK].Freq = 1; | ||
1374 | opt_len = static_len = 0L; | ||
1375 | last_lit = last_dist = last_flags = 0; | ||
1376 | flags = 0; | ||
1377 | flag_bit = 1; | ||
1378 | } | ||
1379 | |||
1380 | |||
1381 | /* =========================================================================== | ||
1382 | * Restore the heap property by moving down the tree starting at node k, | 1221 | * Restore the heap property by moving down the tree starting at node k, |
1383 | * exchanging a node with the smallest of its two sons if necessary, stopping | 1222 | * exchanging a node with the smallest of its two sons if necessary, stopping |
1384 | * when the heap property is re-established (each father smaller than its | 1223 | * when the heap property is re-established (each father smaller than its |
@@ -1470,7 +1309,7 @@ static void gen_bitlen(tree_desc * desc) | |||
1470 | opt_len += (ulg) f *(bits + xbits); | 1309 | opt_len += (ulg) f *(bits + xbits); |
1471 | 1310 | ||
1472 | if (stree) | 1311 | if (stree) |
1473 | static_len += (ulg) f *(stree[n].Len + xbits); | 1312 | static_len += (ulg) f * (stree[n].Len + xbits); |
1474 | } | 1313 | } |
1475 | if (overflow == 0) | 1314 | if (overflow == 0) |
1476 | return; | 1315 | return; |
@@ -1513,6 +1352,7 @@ static void gen_bitlen(tree_desc * desc) | |||
1513 | } | 1352 | } |
1514 | } | 1353 | } |
1515 | 1354 | ||
1355 | |||
1516 | /* =========================================================================== | 1356 | /* =========================================================================== |
1517 | * Generate the codes for a given tree and bit counts (which need not be | 1357 | * Generate the codes for a given tree and bit counts (which need not be |
1518 | * optimal). | 1358 | * optimal). |
@@ -1556,6 +1396,7 @@ static void gen_codes(ct_data * tree, int max_code) | |||
1556 | } | 1396 | } |
1557 | } | 1397 | } |
1558 | 1398 | ||
1399 | |||
1559 | /* =========================================================================== | 1400 | /* =========================================================================== |
1560 | * Construct one Huffman tree and assigns the code bit strings and lengths. | 1401 | * Construct one Huffman tree and assigns the code bit strings and lengths. |
1561 | * Update the total bit length for the current block. | 1402 | * Update the total bit length for the current block. |
@@ -1662,6 +1503,7 @@ static void build_tree(tree_desc * desc) | |||
1662 | gen_codes((ct_data *) tree, max_code); | 1503 | gen_codes((ct_data *) tree, max_code); |
1663 | } | 1504 | } |
1664 | 1505 | ||
1506 | |||
1665 | /* =========================================================================== | 1507 | /* =========================================================================== |
1666 | * Scan a literal or distance tree to determine the frequencies of the codes | 1508 | * Scan a literal or distance tree to determine the frequencies of the codes |
1667 | * in the bit length tree. Updates opt_len to take into account the repeat | 1509 | * in the bit length tree. Updates opt_len to take into account the repeat |
@@ -1715,6 +1557,7 @@ static void scan_tree(ct_data * tree, int max_code) | |||
1715 | } | 1557 | } |
1716 | } | 1558 | } |
1717 | 1559 | ||
1560 | |||
1718 | /* =========================================================================== | 1561 | /* =========================================================================== |
1719 | * Send a literal or distance tree in compressed form, using the codes in | 1562 | * Send a literal or distance tree in compressed form, using the codes in |
1720 | * bl_tree. | 1563 | * bl_tree. |
@@ -1772,6 +1615,7 @@ static void send_tree(ct_data * tree, int max_code) | |||
1772 | } | 1615 | } |
1773 | } | 1616 | } |
1774 | 1617 | ||
1618 | |||
1775 | /* =========================================================================== | 1619 | /* =========================================================================== |
1776 | * Construct the Huffman tree for the bit lengths and return the index in | 1620 | * Construct the Huffman tree for the bit lengths and return the index in |
1777 | * bl_order of the last bit length code to send. | 1621 | * bl_order of the last bit length code to send. |
@@ -1805,6 +1649,7 @@ static int build_bl_tree(void) | |||
1805 | return max_blindex; | 1649 | return max_blindex; |
1806 | } | 1650 | } |
1807 | 1651 | ||
1652 | |||
1808 | /* =========================================================================== | 1653 | /* =========================================================================== |
1809 | * Send the header for a block using dynamic Huffman trees: the counts, the | 1654 | * Send the header for a block using dynamic Huffman trees: the counts, the |
1810 | * lengths of the bit length codes, the literal tree and the distance tree. | 1655 | * lengths of the bit length codes, the literal tree and the distance tree. |
@@ -1834,100 +1679,32 @@ static void send_all_trees(int lcodes, int dcodes, int blcodes) | |||
1834 | Tracev((stderr, "\ndist tree: sent %ld", bits_sent)); | 1679 | Tracev((stderr, "\ndist tree: sent %ld", bits_sent)); |
1835 | } | 1680 | } |
1836 | 1681 | ||
1682 | |||
1837 | /* =========================================================================== | 1683 | /* =========================================================================== |
1838 | * Determine the best encoding for the current block: dynamic trees, static | 1684 | * Set the file type to ASCII or BINARY, using a crude approximation: |
1839 | * trees or store, and output the encoded block to the zip file. This function | 1685 | * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. |
1840 | * returns the total compressed length for the file so far. | 1686 | * IN assertion: the fields freq of dyn_ltree are set and the total of all |
1687 | * frequencies does not exceed 64K (to fit in an int on 16 bit machines). | ||
1841 | */ | 1688 | */ |
1842 | static ulg flush_block(char *buf, ulg stored_len, int eof) | 1689 | static void set_file_type(void) |
1843 | { | 1690 | { |
1844 | ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ | 1691 | int n = 0; |
1845 | int max_blindex; /* index of last bit length code of non zero freq */ | 1692 | unsigned ascii_freq = 0; |
1846 | 1693 | unsigned bin_freq = 0; | |
1847 | flag_buf[last_flags] = flags; /* Save the flags for the last 8 items */ | ||
1848 | |||
1849 | /* Check if the file is ascii or binary */ | ||
1850 | if (*file_type == (ush) UNKNOWN) | ||
1851 | set_file_type(); | ||
1852 | |||
1853 | /* Construct the literal and distance trees */ | ||
1854 | build_tree((tree_desc *) (&l_desc)); | ||
1855 | Tracev((stderr, "\nlit data: dyn %ld, stat %ld", opt_len, static_len)); | ||
1856 | |||
1857 | build_tree((tree_desc *) (&d_desc)); | ||
1858 | Tracev((stderr, "\ndist data: dyn %ld, stat %ld", opt_len, static_len)); | ||
1859 | /* At this point, opt_len and static_len are the total bit lengths of | ||
1860 | * the compressed block data, excluding the tree representations. | ||
1861 | */ | ||
1862 | |||
1863 | /* Build the bit length tree for the above two trees, and get the index | ||
1864 | * in bl_order of the last bit length code to send. | ||
1865 | */ | ||
1866 | max_blindex = build_bl_tree(); | ||
1867 | |||
1868 | /* Determine the best encoding. Compute first the block length in bytes */ | ||
1869 | opt_lenb = (opt_len + 3 + 7) >> 3; | ||
1870 | static_lenb = (static_len + 3 + 7) >> 3; | ||
1871 | |||
1872 | Trace((stderr, | ||
1873 | "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u dist %u ", | ||
1874 | opt_lenb, opt_len, static_lenb, static_len, stored_len, | ||
1875 | last_lit, last_dist)); | ||
1876 | |||
1877 | if (static_lenb <= opt_lenb) | ||
1878 | opt_lenb = static_lenb; | ||
1879 | |||
1880 | /* If compression failed and this is the first and last block, | ||
1881 | * and if the zip file can be seeked (to rewrite the local header), | ||
1882 | * the whole file is transformed into a stored file: | ||
1883 | */ | ||
1884 | if (stored_len <= opt_lenb && eof && compressed_len == 0L && seekable()) { | ||
1885 | /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */ | ||
1886 | if (buf == NULL) | ||
1887 | bb_error_msg("block vanished"); | ||
1888 | |||
1889 | copy_block(buf, (unsigned) stored_len, 0); /* without header */ | ||
1890 | compressed_len = stored_len << 3; | ||
1891 | *file_method = STORED; | ||
1892 | |||
1893 | } else if (stored_len + 4 <= opt_lenb && buf != (char *) 0) { | ||
1894 | /* 4: two words for the lengths */ | ||
1895 | /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. | ||
1896 | * Otherwise we can't have processed more than WSIZE input bytes since | ||
1897 | * the last block flush, because compression would have been | ||
1898 | * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to | ||
1899 | * transform a block into a stored block. | ||
1900 | */ | ||
1901 | send_bits((STORED_BLOCK << 1) + eof, 3); /* send block type */ | ||
1902 | compressed_len = (compressed_len + 3 + 7) & ~7L; | ||
1903 | compressed_len += (stored_len + 4) << 3; | ||
1904 | |||
1905 | copy_block(buf, (unsigned) stored_len, 1); /* with header */ | ||
1906 | |||
1907 | } else if (static_lenb == opt_lenb) { | ||
1908 | send_bits((STATIC_TREES << 1) + eof, 3); | ||
1909 | compress_block((ct_data *) static_ltree, (ct_data *) static_dtree); | ||
1910 | compressed_len += 3 + static_len; | ||
1911 | } else { | ||
1912 | send_bits((DYN_TREES << 1) + eof, 3); | ||
1913 | send_all_trees(l_desc.max_code + 1, d_desc.max_code + 1, | ||
1914 | max_blindex + 1); | ||
1915 | compress_block((ct_data *) dyn_ltree, (ct_data *) dyn_dtree); | ||
1916 | compressed_len += 3 + opt_len; | ||
1917 | } | ||
1918 | Assert(compressed_len == bits_sent, "bad compressed size"); | ||
1919 | init_block(); | ||
1920 | 1694 | ||
1921 | if (eof) { | 1695 | while (n < 7) |
1922 | bi_windup(); | 1696 | bin_freq += dyn_ltree[n++].Freq; |
1923 | compressed_len += 7; /* align on byte boundary */ | 1697 | while (n < 128) |
1698 | ascii_freq += dyn_ltree[n++].Freq; | ||
1699 | while (n < LITERALS) | ||
1700 | bin_freq += dyn_ltree[n++].Freq; | ||
1701 | *file_type = (bin_freq > (ascii_freq >> 2)) ? BINARY : ASCII; | ||
1702 | if (*file_type == BINARY && translate_eol) { | ||
1703 | bb_error_msg("-l used on binary file"); | ||
1924 | } | 1704 | } |
1925 | Tracev((stderr, "\ncomprlen %lu(%lu) ", compressed_len >> 3, | ||
1926 | compressed_len - 7 * eof)); | ||
1927 | |||
1928 | return compressed_len >> 3; | ||
1929 | } | 1705 | } |
1930 | 1706 | ||
1707 | |||
1931 | /* =========================================================================== | 1708 | /* =========================================================================== |
1932 | * Save the match info and tally the frequency counts. Return true if | 1709 | * Save the match info and tally the frequency counts. Return true if |
1933 | * the current block must be flushed. | 1710 | * the current block must be flushed. |
@@ -2034,30 +1811,241 @@ static void compress_block(ct_data * ltree, ct_data * dtree) | |||
2034 | SEND_CODE(END_BLOCK, ltree); | 1811 | SEND_CODE(END_BLOCK, ltree); |
2035 | } | 1812 | } |
2036 | 1813 | ||
1814 | |||
2037 | /* =========================================================================== | 1815 | /* =========================================================================== |
2038 | * Set the file type to ASCII or BINARY, using a crude approximation: | 1816 | * Determine the best encoding for the current block: dynamic trees, static |
2039 | * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. | 1817 | * trees or store, and output the encoded block to the zip file. This function |
2040 | * IN assertion: the fields freq of dyn_ltree are set and the total of all | 1818 | * returns the total compressed length for the file so far. |
2041 | * frequencies does not exceed 64K (to fit in an int on 16 bit machines). | ||
2042 | */ | 1819 | */ |
2043 | static void set_file_type(void) | 1820 | static ulg flush_block(char *buf, ulg stored_len, int eof) |
2044 | { | 1821 | { |
2045 | int n = 0; | 1822 | ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ |
2046 | unsigned ascii_freq = 0; | 1823 | int max_blindex; /* index of last bit length code of non zero freq */ |
2047 | unsigned bin_freq = 0; | ||
2048 | 1824 | ||
2049 | while (n < 7) | 1825 | flag_buf[last_flags] = flags; /* Save the flags for the last 8 items */ |
2050 | bin_freq += dyn_ltree[n++].Freq; | 1826 | |
2051 | while (n < 128) | 1827 | /* Check if the file is ascii or binary */ |
2052 | ascii_freq += dyn_ltree[n++].Freq; | 1828 | if (*file_type == (ush) UNKNOWN) |
2053 | while (n < LITERALS) | 1829 | set_file_type(); |
2054 | bin_freq += dyn_ltree[n++].Freq; | 1830 | |
2055 | *file_type = (bin_freq > (ascii_freq >> 2)) ? BINARY : ASCII; | 1831 | /* Construct the literal and distance trees */ |
2056 | if (*file_type == BINARY && translate_eol) { | 1832 | build_tree((tree_desc *) (&l_desc)); |
2057 | bb_error_msg("-l used on binary file"); | 1833 | Tracev((stderr, "\nlit data: dyn %ld, stat %ld", opt_len, static_len)); |
1834 | |||
1835 | build_tree((tree_desc *) (&d_desc)); | ||
1836 | Tracev((stderr, "\ndist data: dyn %ld, stat %ld", opt_len, static_len)); | ||
1837 | /* At this point, opt_len and static_len are the total bit lengths of | ||
1838 | * the compressed block data, excluding the tree representations. | ||
1839 | */ | ||
1840 | |||
1841 | /* Build the bit length tree for the above two trees, and get the index | ||
1842 | * in bl_order of the last bit length code to send. | ||
1843 | */ | ||
1844 | max_blindex = build_bl_tree(); | ||
1845 | |||
1846 | /* Determine the best encoding. Compute first the block length in bytes */ | ||
1847 | opt_lenb = (opt_len + 3 + 7) >> 3; | ||
1848 | static_lenb = (static_len + 3 + 7) >> 3; | ||
1849 | |||
1850 | Trace((stderr, | ||
1851 | "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u dist %u ", | ||
1852 | opt_lenb, opt_len, static_lenb, static_len, stored_len, | ||
1853 | last_lit, last_dist)); | ||
1854 | |||
1855 | if (static_lenb <= opt_lenb) | ||
1856 | opt_lenb = static_lenb; | ||
1857 | |||
1858 | /* If compression failed and this is the first and last block, | ||
1859 | * and if the zip file can be seeked (to rewrite the local header), | ||
1860 | * the whole file is transformed into a stored file: | ||
1861 | */ | ||
1862 | if (stored_len <= opt_lenb && eof && compressed_len == 0L && seekable()) { | ||
1863 | /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */ | ||
1864 | if (buf == NULL) | ||
1865 | bb_error_msg("block vanished"); | ||
1866 | |||
1867 | copy_block(buf, (unsigned) stored_len, 0); /* without header */ | ||
1868 | compressed_len = stored_len << 3; | ||
1869 | *file_method = STORED; | ||
1870 | |||
1871 | } else if (stored_len + 4 <= opt_lenb && buf != (char *) 0) { | ||
1872 | /* 4: two words for the lengths */ | ||
1873 | /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. | ||
1874 | * Otherwise we can't have processed more than WSIZE input bytes since | ||
1875 | * the last block flush, because compression would have been | ||
1876 | * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to | ||
1877 | * transform a block into a stored block. | ||
1878 | */ | ||
1879 | send_bits((STORED_BLOCK << 1) + eof, 3); /* send block type */ | ||
1880 | compressed_len = (compressed_len + 3 + 7) & ~7L; | ||
1881 | compressed_len += (stored_len + 4) << 3; | ||
1882 | |||
1883 | copy_block(buf, (unsigned) stored_len, 1); /* with header */ | ||
1884 | |||
1885 | } else if (static_lenb == opt_lenb) { | ||
1886 | send_bits((STATIC_TREES << 1) + eof, 3); | ||
1887 | compress_block((ct_data *) static_ltree, (ct_data *) static_dtree); | ||
1888 | compressed_len += 3 + static_len; | ||
1889 | } else { | ||
1890 | send_bits((DYN_TREES << 1) + eof, 3); | ||
1891 | send_all_trees(l_desc.max_code + 1, d_desc.max_code + 1, | ||
1892 | max_blindex + 1); | ||
1893 | compress_block((ct_data *) dyn_ltree, (ct_data *) dyn_dtree); | ||
1894 | compressed_len += 3 + opt_len; | ||
1895 | } | ||
1896 | Assert(compressed_len == bits_sent, "bad compressed size"); | ||
1897 | init_block(); | ||
1898 | |||
1899 | if (eof) { | ||
1900 | bi_windup(); | ||
1901 | compressed_len += 7; /* align on byte boundary */ | ||
1902 | } | ||
1903 | Tracev((stderr, "\ncomprlen %lu(%lu) ", compressed_len >> 3, | ||
1904 | compressed_len - 7 * eof)); | ||
1905 | |||
1906 | return compressed_len >> 3; | ||
1907 | } | ||
1908 | |||
1909 | |||
1910 | /* =========================================================================== | ||
1911 | * Same as above, but achieves better compression. We use a lazy | ||
1912 | * evaluation for matches: a match is finally adopted only if there is | ||
1913 | * no better match at the next window position. | ||
1914 | * | ||
1915 | * Processes a new input file and return its compressed length. Sets | ||
1916 | * the compressed length, crc, deflate flags and internal file | ||
1917 | * attributes. | ||
1918 | */ | ||
1919 | |||
1920 | /* Flush the current block, with given end-of-file flag. | ||
1921 | * IN assertion: strstart is set to the end of the current match. */ | ||
1922 | #define FLUSH_BLOCK(eof) \ | ||
1923 | flush_block( \ | ||
1924 | block_start >= 0L \ | ||
1925 | ? (char*)&window[(unsigned)block_start] \ | ||
1926 | : (char*)NULL, \ | ||
1927 | (long)strstart - block_start, \ | ||
1928 | (eof) \ | ||
1929 | ) | ||
1930 | |||
1931 | /* Insert string s in the dictionary and set match_head to the previous head | ||
1932 | * of the hash chain (the most recent string with same hash key). Return | ||
1933 | * the previous length of the hash chain. | ||
1934 | * IN assertion: all calls to to INSERT_STRING are made with consecutive | ||
1935 | * input characters and the first MIN_MATCH bytes of s are valid | ||
1936 | * (except for the last MIN_MATCH-1 bytes of the input file). */ | ||
1937 | #define INSERT_STRING(s, match_head) \ | ||
1938 | { \ | ||
1939 | UPDATE_HASH(ins_h, window[(s) + MIN_MATCH-1]); \ | ||
1940 | prev[(s) & WMASK] = match_head = head[ins_h]; \ | ||
1941 | head[ins_h] = (s); \ | ||
1942 | } | ||
1943 | |||
1944 | static ulg deflate(void) | ||
1945 | { | ||
1946 | IPos hash_head; /* head of hash chain */ | ||
1947 | IPos prev_match; /* previous match */ | ||
1948 | int flush; /* set if current block must be flushed */ | ||
1949 | int match_available = 0; /* set if previous match exists */ | ||
1950 | unsigned match_length = MIN_MATCH - 1; /* length of best match */ | ||
1951 | |||
1952 | /* Process the input block. */ | ||
1953 | while (lookahead != 0) { | ||
1954 | /* Insert the string window[strstart .. strstart+2] in the | ||
1955 | * dictionary, and set hash_head to the head of the hash chain: | ||
1956 | */ | ||
1957 | INSERT_STRING(strstart, hash_head); | ||
1958 | |||
1959 | /* Find the longest match, discarding those <= prev_length. | ||
1960 | */ | ||
1961 | prev_length = match_length, prev_match = match_start; | ||
1962 | match_length = MIN_MATCH - 1; | ||
1963 | |||
1964 | if (hash_head != 0 && prev_length < max_lazy_match | ||
1965 | && strstart - hash_head <= MAX_DIST | ||
1966 | ) { | ||
1967 | /* To simplify the code, we prevent matches with the string | ||
1968 | * of window index 0 (in particular we have to avoid a match | ||
1969 | * of the string with itself at the start of the input file). | ||
1970 | */ | ||
1971 | match_length = longest_match(hash_head); | ||
1972 | /* longest_match() sets match_start */ | ||
1973 | if (match_length > lookahead) | ||
1974 | match_length = lookahead; | ||
1975 | |||
1976 | /* Ignore a length 3 match if it is too distant: */ | ||
1977 | if (match_length == MIN_MATCH && strstart - match_start > TOO_FAR) { | ||
1978 | /* If prev_match is also MIN_MATCH, match_start is garbage | ||
1979 | * but we will ignore the current match anyway. | ||
1980 | */ | ||
1981 | match_length--; | ||
1982 | } | ||
1983 | } | ||
1984 | /* If there was a match at the previous step and the current | ||
1985 | * match is not better, output the previous match: | ||
1986 | */ | ||
1987 | if (prev_length >= MIN_MATCH && match_length <= prev_length) { | ||
1988 | check_match(strstart - 1, prev_match, prev_length); | ||
1989 | flush = ct_tally(strstart - 1 - prev_match, prev_length - MIN_MATCH); | ||
1990 | |||
1991 | /* Insert in hash table all strings up to the end of the match. | ||
1992 | * strstart-1 and strstart are already inserted. | ||
1993 | */ | ||
1994 | lookahead -= prev_length - 1; | ||
1995 | prev_length -= 2; | ||
1996 | do { | ||
1997 | strstart++; | ||
1998 | INSERT_STRING(strstart, hash_head); | ||
1999 | /* strstart never exceeds WSIZE-MAX_MATCH, so there are | ||
2000 | * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH | ||
2001 | * these bytes are garbage, but it does not matter since the | ||
2002 | * next lookahead bytes will always be emitted as literals. | ||
2003 | */ | ||
2004 | } while (--prev_length != 0); | ||
2005 | match_available = 0; | ||
2006 | match_length = MIN_MATCH - 1; | ||
2007 | strstart++; | ||
2008 | if (flush) { | ||
2009 | FLUSH_BLOCK(0); | ||
2010 | block_start = strstart; | ||
2011 | } | ||
2012 | } else if (match_available) { | ||
2013 | /* If there was no match at the previous position, output a | ||
2014 | * single literal. If there was a match but the current match | ||
2015 | * is longer, truncate the previous match to a single literal. | ||
2016 | */ | ||
2017 | Tracevv((stderr, "%c", window[strstart - 1])); | ||
2018 | if (ct_tally(0, window[strstart - 1])) { | ||
2019 | FLUSH_BLOCK(0); | ||
2020 | block_start = strstart; | ||
2021 | } | ||
2022 | strstart++; | ||
2023 | lookahead--; | ||
2024 | } else { | ||
2025 | /* There is no previous match to compare with, wait for | ||
2026 | * the next step to decide. | ||
2027 | */ | ||
2028 | match_available = 1; | ||
2029 | strstart++; | ||
2030 | lookahead--; | ||
2031 | } | ||
2032 | Assert(strstart <= isize && lookahead <= isize, "a bit too far"); | ||
2033 | |||
2034 | /* Make sure that we always have enough lookahead, except | ||
2035 | * at the end of the input file. We need MAX_MATCH bytes | ||
2036 | * for the next match, plus MIN_MATCH bytes to insert the | ||
2037 | * string following the next match. | ||
2038 | */ | ||
2039 | while (lookahead < MIN_LOOKAHEAD && !eofile) | ||
2040 | fill_window(); | ||
2058 | } | 2041 | } |
2042 | if (match_available) | ||
2043 | ct_tally(0, window[strstart - 1]); | ||
2044 | |||
2045 | return FLUSH_BLOCK(1); /* eof */ | ||
2059 | } | 2046 | } |
2060 | 2047 | ||
2048 | |||
2061 | /* =========================================================================== | 2049 | /* =========================================================================== |
2062 | * Deflate in to out. | 2050 | * Deflate in to out. |
2063 | * IN assertions: the input and output buffers are cleared. | 2051 | * IN assertions: the input and output buffers are cleared. |