summaryrefslogtreecommitdiff
path: root/archival
diff options
context:
space:
mode:
Diffstat (limited to 'archival')
-rw-r--r--archival/gzip.c694
1 files changed, 341 insertions, 353 deletions
diff --git a/archival/gzip.c b/archival/gzip.c
index c882a4a2a..034bda005 100644
--- a/archival/gzip.c
+++ b/archival/gzip.c
@@ -201,7 +201,8 @@ typedef unsigned IPos;
201 array = xzalloc((size_t)(((size)+1L)/2) * 2*sizeof(type)); \ 201 array = xzalloc((size_t)(((size)+1L)/2) * 2*sizeof(type)); \
202} 202}
203 203
204#define FREE(array) { \ 204#define FREE(array) \
205{ \
205 free(array); \ 206 free(array); \
206 array = NULL; \ 207 array = NULL; \
207} 208}
@@ -272,21 +273,6 @@ enum {
272}; 273};
273 274
274 275
275/* ===========================================================================
276 */
277static void fill_window(void);
278
279static int longest_match(IPos cur_match);
280
281#ifdef DEBUG
282static void check_match(IPos start, IPos match, int length);
283#endif
284
285
286/* from trees.c */
287static int ct_tally(int dist, int lc);
288static ulg flush_block(char *buf, ulg stored_len, int eof);
289
290/* global buffers */ 276/* global buffers */
291 277
292/* To save memory for 16 bit systems, some arrays are overlaid between 278/* To save memory for 16 bit systems, some arrays are overlaid between
@@ -586,6 +572,65 @@ static void copy_block(char *buf, unsigned len, int header)
586 572
587 573
588/* =========================================================================== 574/* ===========================================================================
575 * Fill the window when the lookahead becomes insufficient.
576 * Updates strstart and lookahead, and sets eofile if end of input file.
577 * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0
578 * OUT assertions: at least one byte has been read, or eofile is set;
579 * file reads are performed for at least two bytes (required for the
580 * translate_eol option).
581 */
582static void fill_window(void)
583{
584 unsigned n, m;
585 unsigned more = WINDOW_SIZE - lookahead - strstart;
586 /* Amount of free space at the end of the window. */
587
588 /* If the window is almost full and there is insufficient lookahead,
589 * move the upper half to the lower one to make room in the upper half.
590 */
591 if (more == (unsigned) -1) {
592 /* Very unlikely, but possible on 16 bit machine if strstart == 0
593 * and lookahead == 1 (input done one byte at time)
594 */
595 more--;
596 } else if (strstart >= WSIZE + MAX_DIST) {
597 /* By the IN assertion, the window is not empty so we can't confuse
598 * more == 0 with more == 64K on a 16 bit machine.
599 */
600 Assert(WINDOW_SIZE == 2 * WSIZE, "no sliding with BIG_MEM");
601
602 memcpy(window, window + WSIZE, WSIZE);
603 match_start -= WSIZE;
604 strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */
605
606 block_start -= WSIZE;
607
608 for (n = 0; n < HASH_SIZE; n++) {
609 m = head[n];
610 head[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0);
611 }
612 for (n = 0; n < WSIZE; n++) {
613 m = prev[n];
614 prev[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0);
615 /* If n is not on any hash chain, prev[n] is garbage but
616 * its value will never be used.
617 */
618 }
619 more += WSIZE;
620 }
621 /* At this point, more >= 2 */
622 if (!eofile) {
623 n = file_read(window + strstart + lookahead, more);
624 if (n == 0 || n == (unsigned) -1) {
625 eofile = 1;
626 } else {
627 lookahead += n;
628 }
629 }
630}
631
632
633/* ===========================================================================
589 * Update a hash value with the given input byte 634 * Update a hash value with the given input byte
590 * IN assertion: all calls to to UPDATE_HASH are made with consecutive 635 * IN assertion: all calls to to UPDATE_HASH are made with consecutive
591 * input characters, so that a running hash key can be computed from the 636 * input characters, so that a running hash key can be computed from the
@@ -746,202 +791,6 @@ static void check_match(IPos start, IPos match, int length)
746#endif 791#endif
747 792
748 793
749/* ===========================================================================
750 * Fill the window when the lookahead becomes insufficient.
751 * Updates strstart and lookahead, and sets eofile if end of input file.
752 * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0
753 * OUT assertions: at least one byte has been read, or eofile is set;
754 * file reads are performed for at least two bytes (required for the
755 * translate_eol option).
756 */
757static void fill_window(void)
758{
759 unsigned n, m;
760 unsigned more = WINDOW_SIZE - lookahead - strstart;
761 /* Amount of free space at the end of the window. */
762
763 /* If the window is almost full and there is insufficient lookahead,
764 * move the upper half to the lower one to make room in the upper half.
765 */
766 if (more == (unsigned) -1) {
767 /* Very unlikely, but possible on 16 bit machine if strstart == 0
768 * and lookahead == 1 (input done one byte at time)
769 */
770 more--;
771 } else if (strstart >= WSIZE + MAX_DIST) {
772 /* By the IN assertion, the window is not empty so we can't confuse
773 * more == 0 with more == 64K on a 16 bit machine.
774 */
775 Assert(WINDOW_SIZE == 2 * WSIZE, "no sliding with BIG_MEM");
776
777 memcpy(window, window + WSIZE, WSIZE);
778 match_start -= WSIZE;
779 strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */
780
781 block_start -= WSIZE;
782
783 for (n = 0; n < HASH_SIZE; n++) {
784 m = head[n];
785 head[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0);
786 }
787 for (n = 0; n < WSIZE; n++) {
788 m = prev[n];
789 prev[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0);
790 /* If n is not on any hash chain, prev[n] is garbage but
791 * its value will never be used.
792 */
793 }
794 more += WSIZE;
795 }
796 /* At this point, more >= 2 */
797 if (!eofile) {
798 n = file_read(window + strstart + lookahead, more);
799 if (n == 0 || n == (unsigned) -1) {
800 eofile = 1;
801 } else {
802 lookahead += n;
803 }
804 }
805}
806
807
808/* ===========================================================================
809 * Same as above, but achieves better compression. We use a lazy
810 * evaluation for matches: a match is finally adopted only if there is
811 * no better match at the next window position.
812 *
813 * Processes a new input file and return its compressed length. Sets
814 * the compressed length, crc, deflate flags and internal file
815 * attributes.
816 */
817
818/* Flush the current block, with given end-of-file flag.
819 * IN assertion: strstart is set to the end of the current match. */
820#define FLUSH_BLOCK(eof) \
821 flush_block( \
822 block_start >= 0L \
823 ? (char*)&window[(unsigned)block_start] \
824 : (char*)NULL, \
825 (long)strstart - block_start, \
826 (eof) \
827 )
828
829/* Insert string s in the dictionary and set match_head to the previous head
830 * of the hash chain (the most recent string with same hash key). Return
831 * the previous length of the hash chain.
832 * IN assertion: all calls to to INSERT_STRING are made with consecutive
833 * input characters and the first MIN_MATCH bytes of s are valid
834 * (except for the last MIN_MATCH-1 bytes of the input file). */
835#define INSERT_STRING(s, match_head) \
836{ \
837 UPDATE_HASH(ins_h, window[(s) + MIN_MATCH-1]); \
838 prev[(s) & WMASK] = match_head = head[ins_h]; \
839 head[ins_h] = (s); \
840}
841
842static ulg deflate(void)
843{
844 IPos hash_head; /* head of hash chain */
845 IPos prev_match; /* previous match */
846 int flush; /* set if current block must be flushed */
847 int match_available = 0; /* set if previous match exists */
848 unsigned match_length = MIN_MATCH - 1; /* length of best match */
849
850 /* Process the input block. */
851 while (lookahead != 0) {
852 /* Insert the string window[strstart .. strstart+2] in the
853 * dictionary, and set hash_head to the head of the hash chain:
854 */
855 INSERT_STRING(strstart, hash_head);
856
857 /* Find the longest match, discarding those <= prev_length.
858 */
859 prev_length = match_length, prev_match = match_start;
860 match_length = MIN_MATCH - 1;
861
862 if (hash_head != 0 && prev_length < max_lazy_match
863 && strstart - hash_head <= MAX_DIST
864 ) {
865 /* To simplify the code, we prevent matches with the string
866 * of window index 0 (in particular we have to avoid a match
867 * of the string with itself at the start of the input file).
868 */
869 match_length = longest_match(hash_head);
870 /* longest_match() sets match_start */
871 if (match_length > lookahead)
872 match_length = lookahead;
873
874 /* Ignore a length 3 match if it is too distant: */
875 if (match_length == MIN_MATCH && strstart - match_start > TOO_FAR) {
876 /* If prev_match is also MIN_MATCH, match_start is garbage
877 * but we will ignore the current match anyway.
878 */
879 match_length--;
880 }
881 }
882 /* If there was a match at the previous step and the current
883 * match is not better, output the previous match:
884 */
885 if (prev_length >= MIN_MATCH && match_length <= prev_length) {
886 check_match(strstart - 1, prev_match, prev_length);
887 flush = ct_tally(strstart - 1 - prev_match, prev_length - MIN_MATCH);
888
889 /* Insert in hash table all strings up to the end of the match.
890 * strstart-1 and strstart are already inserted.
891 */
892 lookahead -= prev_length - 1;
893 prev_length -= 2;
894 do {
895 strstart++;
896 INSERT_STRING(strstart, hash_head);
897 /* strstart never exceeds WSIZE-MAX_MATCH, so there are
898 * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH
899 * these bytes are garbage, but it does not matter since the
900 * next lookahead bytes will always be emitted as literals.
901 */
902 } while (--prev_length != 0);
903 match_available = 0;
904 match_length = MIN_MATCH - 1;
905 strstart++;
906 if (flush) {
907 FLUSH_BLOCK(0);
908 block_start = strstart;
909 }
910 } else if (match_available) {
911 /* If there was no match at the previous position, output a
912 * single literal. If there was a match but the current match
913 * is longer, truncate the previous match to a single literal.
914 */
915 Tracevv((stderr, "%c", window[strstart - 1]));
916 if (ct_tally(0, window[strstart - 1])) {
917 FLUSH_BLOCK(0);
918 block_start = strstart;
919 }
920 strstart++;
921 lookahead--;
922 } else {
923 /* There is no previous match to compare with, wait for
924 * the next step to decide.
925 */
926 match_available = 1;
927 strstart++;
928 lookahead--;
929 }
930 Assert(strstart <= isize && lookahead <= isize, "a bit too far");
931
932 /* Make sure that we always have enough lookahead, except
933 * at the end of the input file. We need MAX_MATCH bytes
934 * for the next match, plus MIN_MATCH bytes to insert the
935 * string following the next match.
936 */
937 while (lookahead < MIN_LOOKAHEAD && !eofile)
938 fill_window();
939 }
940 if (match_available)
941 ct_tally(0, window[strstart - 1]);
942
943 return FLUSH_BLOCK(1); /* eof */
944}
945/* trees.c -- output deflated data using Huffman coding 794/* trees.c -- output deflated data using Huffman coding
946 * Copyright (C) 1992-1993 Jean-loup Gailly 795 * Copyright (C) 1992-1993 Jean-loup Gailly
947 * This is free software; you can redistribute it and/or modify it under the 796 * This is free software; you can redistribute it and/or modify it under the
@@ -991,10 +840,6 @@ static ulg deflate(void)
991 * file. Returns the total compressed length for the file so far. 840 * file. Returns the total compressed length for the file so far.
992 */ 841 */
993 842
994/* ===========================================================================
995 * Constants
996 */
997
998#define MAX_BITS 15 843#define MAX_BITS 15
999/* All codes must not exceed MAX_BITS bits */ 844/* All codes must not exceed MAX_BITS bits */
1000 845
@@ -1087,9 +932,7 @@ static const extra_bits_t extra_blbits[BL_CODES] = {
1087/* repeat a zero length 11-138 times (7 bits of repeat count) */ 932/* repeat a zero length 11-138 times (7 bits of repeat count) */
1088 933
1089/* =========================================================================== 934/* ===========================================================================
1090 * Local data 935*/
1091 */
1092
1093/* Data structure describing a single value and its code string. */ 936/* Data structure describing a single value and its code string. */
1094typedef struct ct_data { 937typedef struct ct_data {
1095 union { 938 union {
@@ -1231,8 +1074,6 @@ static int *file_method; /* pointer to DEFLATE or STORE */
1231 1074
1232/* =========================================================================== 1075/* ===========================================================================
1233 */ 1076 */
1234static void init_block(void);
1235static void gen_bitlen(tree_desc * desc);
1236static void gen_codes(ct_data * tree, int max_code); 1077static void gen_codes(ct_data * tree, int max_code);
1237static void build_tree(tree_desc * desc); 1078static void build_tree(tree_desc * desc);
1238static void scan_tree(ct_data * tree, int max_code); 1079static void scan_tree(ct_data * tree, int max_code);
@@ -1240,7 +1081,6 @@ static void send_tree(ct_data * tree, int max_code);
1240static int build_bl_tree(void); 1081static int build_bl_tree(void);
1241static void send_all_trees(int lcodes, int dcodes, int blcodes); 1082static void send_all_trees(int lcodes, int dcodes, int blcodes);
1242static void compress_block(ct_data * ltree, ct_data * dtree); 1083static void compress_block(ct_data * ltree, ct_data * dtree);
1243static void set_file_type(void);
1244 1084
1245 1085
1246#ifndef DEBUG 1086#ifndef DEBUG
@@ -1259,9 +1099,31 @@ static void set_file_type(void);
1259/* Mapping from a distance to a distance code. dist is the distance - 1 and 1099/* Mapping from a distance to a distance code. dist is the distance - 1 and
1260 * must not have side effects. dist_code[256] and dist_code[257] are never 1100 * must not have side effects. dist_code[256] and dist_code[257] are never
1261 * used. 1101 * used.
1102 * The arguments must not have side effects.
1103 */
1104
1105
1106/* ===========================================================================
1107 * Initialize a new block.
1262 */ 1108 */
1109static void init_block(void)
1110{
1111 int n; /* iterates over tree elements */
1263 1112
1264/* the arguments must not have side effects */ 1113 /* Initialize the trees. */
1114 for (n = 0; n < L_CODES; n++)
1115 dyn_ltree[n].Freq = 0;
1116 for (n = 0; n < D_CODES; n++)
1117 dyn_dtree[n].Freq = 0;
1118 for (n = 0; n < BL_CODES; n++)
1119 bl_tree[n].Freq = 0;
1120
1121 dyn_ltree[END_BLOCK].Freq = 1;
1122 opt_len = static_len = 0L;
1123 last_lit = last_dist = last_flags = 0;
1124 flags = 0;
1125 flag_bit = 1;
1126}
1265 1127
1266 1128
1267/* =========================================================================== 1129/* ===========================================================================
@@ -1289,7 +1151,7 @@ static void ct_init(ush * attr, int *methodp)
1289 for (code = 0; code < LENGTH_CODES - 1; code++) { 1151 for (code = 0; code < LENGTH_CODES - 1; code++) {
1290 base_length[code] = length; 1152 base_length[code] = length;
1291 for (n = 0; n < (1 << extra_lbits[code]); n++) { 1153 for (n = 0; n < (1 << extra_lbits[code]); n++) {
1292 length_code[length++] = (uch) code; 1154 length_code[length++] = code;
1293 } 1155 }
1294 } 1156 }
1295 Assert(length == 256, "ct_init: length != 256"); 1157 Assert(length == 256, "ct_init: length != 256");
@@ -1297,7 +1159,7 @@ static void ct_init(ush * attr, int *methodp)
1297 * in two different ways: code 284 + 5 bits or code 285, so we 1159 * in two different ways: code 284 + 5 bits or code 285, so we
1298 * overwrite length_code[255] to use the best encoding: 1160 * overwrite length_code[255] to use the best encoding:
1299 */ 1161 */
1300 length_code[length - 1] = (uch) code; 1162 length_code[length - 1] = code;
1301 1163
1302 /* Initialize the mapping dist (0..32K) -> dist code (0..29) */ 1164 /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
1303 dist = 0; 1165 dist = 0;
@@ -1356,29 +1218,6 @@ static void ct_init(ush * attr, int *methodp)
1356 1218
1357 1219
1358/* =========================================================================== 1220/* ===========================================================================
1359 * Initialize a new block.
1360 */
1361static void init_block(void)
1362{
1363 int n; /* iterates over tree elements */
1364
1365 /* Initialize the trees. */
1366 for (n = 0; n < L_CODES; n++)
1367 dyn_ltree[n].Freq = 0;
1368 for (n = 0; n < D_CODES; n++)
1369 dyn_dtree[n].Freq = 0;
1370 for (n = 0; n < BL_CODES; n++)
1371 bl_tree[n].Freq = 0;
1372
1373 dyn_ltree[END_BLOCK].Freq = 1;
1374 opt_len = static_len = 0L;
1375 last_lit = last_dist = last_flags = 0;
1376 flags = 0;
1377 flag_bit = 1;
1378}
1379
1380
1381/* ===========================================================================
1382 * Restore the heap property by moving down the tree starting at node k, 1221 * Restore the heap property by moving down the tree starting at node k,
1383 * exchanging a node with the smallest of its two sons if necessary, stopping 1222 * exchanging a node with the smallest of its two sons if necessary, stopping
1384 * when the heap property is re-established (each father smaller than its 1223 * when the heap property is re-established (each father smaller than its
@@ -1470,7 +1309,7 @@ static void gen_bitlen(tree_desc * desc)
1470 opt_len += (ulg) f *(bits + xbits); 1309 opt_len += (ulg) f *(bits + xbits);
1471 1310
1472 if (stree) 1311 if (stree)
1473 static_len += (ulg) f *(stree[n].Len + xbits); 1312 static_len += (ulg) f * (stree[n].Len + xbits);
1474 } 1313 }
1475 if (overflow == 0) 1314 if (overflow == 0)
1476 return; 1315 return;
@@ -1513,6 +1352,7 @@ static void gen_bitlen(tree_desc * desc)
1513 } 1352 }
1514} 1353}
1515 1354
1355
1516/* =========================================================================== 1356/* ===========================================================================
1517 * Generate the codes for a given tree and bit counts (which need not be 1357 * Generate the codes for a given tree and bit counts (which need not be
1518 * optimal). 1358 * optimal).
@@ -1556,6 +1396,7 @@ static void gen_codes(ct_data * tree, int max_code)
1556 } 1396 }
1557} 1397}
1558 1398
1399
1559/* =========================================================================== 1400/* ===========================================================================
1560 * Construct one Huffman tree and assigns the code bit strings and lengths. 1401 * Construct one Huffman tree and assigns the code bit strings and lengths.
1561 * Update the total bit length for the current block. 1402 * Update the total bit length for the current block.
@@ -1662,6 +1503,7 @@ static void build_tree(tree_desc * desc)
1662 gen_codes((ct_data *) tree, max_code); 1503 gen_codes((ct_data *) tree, max_code);
1663} 1504}
1664 1505
1506
1665/* =========================================================================== 1507/* ===========================================================================
1666 * Scan a literal or distance tree to determine the frequencies of the codes 1508 * Scan a literal or distance tree to determine the frequencies of the codes
1667 * in the bit length tree. Updates opt_len to take into account the repeat 1509 * in the bit length tree. Updates opt_len to take into account the repeat
@@ -1715,6 +1557,7 @@ static void scan_tree(ct_data * tree, int max_code)
1715 } 1557 }
1716} 1558}
1717 1559
1560
1718/* =========================================================================== 1561/* ===========================================================================
1719 * Send a literal or distance tree in compressed form, using the codes in 1562 * Send a literal or distance tree in compressed form, using the codes in
1720 * bl_tree. 1563 * bl_tree.
@@ -1772,6 +1615,7 @@ static void send_tree(ct_data * tree, int max_code)
1772 } 1615 }
1773} 1616}
1774 1617
1618
1775/* =========================================================================== 1619/* ===========================================================================
1776 * Construct the Huffman tree for the bit lengths and return the index in 1620 * Construct the Huffman tree for the bit lengths and return the index in
1777 * bl_order of the last bit length code to send. 1621 * bl_order of the last bit length code to send.
@@ -1805,6 +1649,7 @@ static int build_bl_tree(void)
1805 return max_blindex; 1649 return max_blindex;
1806} 1650}
1807 1651
1652
1808/* =========================================================================== 1653/* ===========================================================================
1809 * Send the header for a block using dynamic Huffman trees: the counts, the 1654 * Send the header for a block using dynamic Huffman trees: the counts, the
1810 * lengths of the bit length codes, the literal tree and the distance tree. 1655 * lengths of the bit length codes, the literal tree and the distance tree.
@@ -1834,100 +1679,32 @@ static void send_all_trees(int lcodes, int dcodes, int blcodes)
1834 Tracev((stderr, "\ndist tree: sent %ld", bits_sent)); 1679 Tracev((stderr, "\ndist tree: sent %ld", bits_sent));
1835} 1680}
1836 1681
1682
1837/* =========================================================================== 1683/* ===========================================================================
1838 * Determine the best encoding for the current block: dynamic trees, static 1684 * Set the file type to ASCII or BINARY, using a crude approximation:
1839 * trees or store, and output the encoded block to the zip file. This function 1685 * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise.
1840 * returns the total compressed length for the file so far. 1686 * IN assertion: the fields freq of dyn_ltree are set and the total of all
1687 * frequencies does not exceed 64K (to fit in an int on 16 bit machines).
1841 */ 1688 */
1842static ulg flush_block(char *buf, ulg stored_len, int eof) 1689static void set_file_type(void)
1843{ 1690{
1844 ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ 1691 int n = 0;
1845 int max_blindex; /* index of last bit length code of non zero freq */ 1692 unsigned ascii_freq = 0;
1846 1693 unsigned bin_freq = 0;
1847 flag_buf[last_flags] = flags; /* Save the flags for the last 8 items */
1848
1849 /* Check if the file is ascii or binary */
1850 if (*file_type == (ush) UNKNOWN)
1851 set_file_type();
1852
1853 /* Construct the literal and distance trees */
1854 build_tree((tree_desc *) (&l_desc));
1855 Tracev((stderr, "\nlit data: dyn %ld, stat %ld", opt_len, static_len));
1856
1857 build_tree((tree_desc *) (&d_desc));
1858 Tracev((stderr, "\ndist data: dyn %ld, stat %ld", opt_len, static_len));
1859 /* At this point, opt_len and static_len are the total bit lengths of
1860 * the compressed block data, excluding the tree representations.
1861 */
1862
1863 /* Build the bit length tree for the above two trees, and get the index
1864 * in bl_order of the last bit length code to send.
1865 */
1866 max_blindex = build_bl_tree();
1867
1868 /* Determine the best encoding. Compute first the block length in bytes */
1869 opt_lenb = (opt_len + 3 + 7) >> 3;
1870 static_lenb = (static_len + 3 + 7) >> 3;
1871
1872 Trace((stderr,
1873 "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u dist %u ",
1874 opt_lenb, opt_len, static_lenb, static_len, stored_len,
1875 last_lit, last_dist));
1876
1877 if (static_lenb <= opt_lenb)
1878 opt_lenb = static_lenb;
1879
1880 /* If compression failed and this is the first and last block,
1881 * and if the zip file can be seeked (to rewrite the local header),
1882 * the whole file is transformed into a stored file:
1883 */
1884 if (stored_len <= opt_lenb && eof && compressed_len == 0L && seekable()) {
1885 /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */
1886 if (buf == NULL)
1887 bb_error_msg("block vanished");
1888
1889 copy_block(buf, (unsigned) stored_len, 0); /* without header */
1890 compressed_len = stored_len << 3;
1891 *file_method = STORED;
1892
1893 } else if (stored_len + 4 <= opt_lenb && buf != (char *) 0) {
1894 /* 4: two words for the lengths */
1895 /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
1896 * Otherwise we can't have processed more than WSIZE input bytes since
1897 * the last block flush, because compression would have been
1898 * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
1899 * transform a block into a stored block.
1900 */
1901 send_bits((STORED_BLOCK << 1) + eof, 3); /* send block type */
1902 compressed_len = (compressed_len + 3 + 7) & ~7L;
1903 compressed_len += (stored_len + 4) << 3;
1904
1905 copy_block(buf, (unsigned) stored_len, 1); /* with header */
1906
1907 } else if (static_lenb == opt_lenb) {
1908 send_bits((STATIC_TREES << 1) + eof, 3);
1909 compress_block((ct_data *) static_ltree, (ct_data *) static_dtree);
1910 compressed_len += 3 + static_len;
1911 } else {
1912 send_bits((DYN_TREES << 1) + eof, 3);
1913 send_all_trees(l_desc.max_code + 1, d_desc.max_code + 1,
1914 max_blindex + 1);
1915 compress_block((ct_data *) dyn_ltree, (ct_data *) dyn_dtree);
1916 compressed_len += 3 + opt_len;
1917 }
1918 Assert(compressed_len == bits_sent, "bad compressed size");
1919 init_block();
1920 1694
1921 if (eof) { 1695 while (n < 7)
1922 bi_windup(); 1696 bin_freq += dyn_ltree[n++].Freq;
1923 compressed_len += 7; /* align on byte boundary */ 1697 while (n < 128)
1698 ascii_freq += dyn_ltree[n++].Freq;
1699 while (n < LITERALS)
1700 bin_freq += dyn_ltree[n++].Freq;
1701 *file_type = (bin_freq > (ascii_freq >> 2)) ? BINARY : ASCII;
1702 if (*file_type == BINARY && translate_eol) {
1703 bb_error_msg("-l used on binary file");
1924 } 1704 }
1925 Tracev((stderr, "\ncomprlen %lu(%lu) ", compressed_len >> 3,
1926 compressed_len - 7 * eof));
1927
1928 return compressed_len >> 3;
1929} 1705}
1930 1706
1707
1931/* =========================================================================== 1708/* ===========================================================================
1932 * Save the match info and tally the frequency counts. Return true if 1709 * Save the match info and tally the frequency counts. Return true if
1933 * the current block must be flushed. 1710 * the current block must be flushed.
@@ -2034,30 +1811,241 @@ static void compress_block(ct_data * ltree, ct_data * dtree)
2034 SEND_CODE(END_BLOCK, ltree); 1811 SEND_CODE(END_BLOCK, ltree);
2035} 1812}
2036 1813
1814
2037/* =========================================================================== 1815/* ===========================================================================
2038 * Set the file type to ASCII or BINARY, using a crude approximation: 1816 * Determine the best encoding for the current block: dynamic trees, static
2039 * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. 1817 * trees or store, and output the encoded block to the zip file. This function
2040 * IN assertion: the fields freq of dyn_ltree are set and the total of all 1818 * returns the total compressed length for the file so far.
2041 * frequencies does not exceed 64K (to fit in an int on 16 bit machines).
2042 */ 1819 */
2043static void set_file_type(void) 1820static ulg flush_block(char *buf, ulg stored_len, int eof)
2044{ 1821{
2045 int n = 0; 1822 ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
2046 unsigned ascii_freq = 0; 1823 int max_blindex; /* index of last bit length code of non zero freq */
2047 unsigned bin_freq = 0;
2048 1824
2049 while (n < 7) 1825 flag_buf[last_flags] = flags; /* Save the flags for the last 8 items */
2050 bin_freq += dyn_ltree[n++].Freq; 1826
2051 while (n < 128) 1827 /* Check if the file is ascii or binary */
2052 ascii_freq += dyn_ltree[n++].Freq; 1828 if (*file_type == (ush) UNKNOWN)
2053 while (n < LITERALS) 1829 set_file_type();
2054 bin_freq += dyn_ltree[n++].Freq; 1830
2055 *file_type = (bin_freq > (ascii_freq >> 2)) ? BINARY : ASCII; 1831 /* Construct the literal and distance trees */
2056 if (*file_type == BINARY && translate_eol) { 1832 build_tree((tree_desc *) (&l_desc));
2057 bb_error_msg("-l used on binary file"); 1833 Tracev((stderr, "\nlit data: dyn %ld, stat %ld", opt_len, static_len));
1834
1835 build_tree((tree_desc *) (&d_desc));
1836 Tracev((stderr, "\ndist data: dyn %ld, stat %ld", opt_len, static_len));
1837 /* At this point, opt_len and static_len are the total bit lengths of
1838 * the compressed block data, excluding the tree representations.
1839 */
1840
1841 /* Build the bit length tree for the above two trees, and get the index
1842 * in bl_order of the last bit length code to send.
1843 */
1844 max_blindex = build_bl_tree();
1845
1846 /* Determine the best encoding. Compute first the block length in bytes */
1847 opt_lenb = (opt_len + 3 + 7) >> 3;
1848 static_lenb = (static_len + 3 + 7) >> 3;
1849
1850 Trace((stderr,
1851 "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u dist %u ",
1852 opt_lenb, opt_len, static_lenb, static_len, stored_len,
1853 last_lit, last_dist));
1854
1855 if (static_lenb <= opt_lenb)
1856 opt_lenb = static_lenb;
1857
1858 /* If compression failed and this is the first and last block,
1859 * and if the zip file can be seeked (to rewrite the local header),
1860 * the whole file is transformed into a stored file:
1861 */
1862 if (stored_len <= opt_lenb && eof && compressed_len == 0L && seekable()) {
1863 /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */
1864 if (buf == NULL)
1865 bb_error_msg("block vanished");
1866
1867 copy_block(buf, (unsigned) stored_len, 0); /* without header */
1868 compressed_len = stored_len << 3;
1869 *file_method = STORED;
1870
1871 } else if (stored_len + 4 <= opt_lenb && buf != (char *) 0) {
1872 /* 4: two words for the lengths */
1873 /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
1874 * Otherwise we can't have processed more than WSIZE input bytes since
1875 * the last block flush, because compression would have been
1876 * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
1877 * transform a block into a stored block.
1878 */
1879 send_bits((STORED_BLOCK << 1) + eof, 3); /* send block type */
1880 compressed_len = (compressed_len + 3 + 7) & ~7L;
1881 compressed_len += (stored_len + 4) << 3;
1882
1883 copy_block(buf, (unsigned) stored_len, 1); /* with header */
1884
1885 } else if (static_lenb == opt_lenb) {
1886 send_bits((STATIC_TREES << 1) + eof, 3);
1887 compress_block((ct_data *) static_ltree, (ct_data *) static_dtree);
1888 compressed_len += 3 + static_len;
1889 } else {
1890 send_bits((DYN_TREES << 1) + eof, 3);
1891 send_all_trees(l_desc.max_code + 1, d_desc.max_code + 1,
1892 max_blindex + 1);
1893 compress_block((ct_data *) dyn_ltree, (ct_data *) dyn_dtree);
1894 compressed_len += 3 + opt_len;
1895 }
1896 Assert(compressed_len == bits_sent, "bad compressed size");
1897 init_block();
1898
1899 if (eof) {
1900 bi_windup();
1901 compressed_len += 7; /* align on byte boundary */
1902 }
1903 Tracev((stderr, "\ncomprlen %lu(%lu) ", compressed_len >> 3,
1904 compressed_len - 7 * eof));
1905
1906 return compressed_len >> 3;
1907}
1908
1909
1910/* ===========================================================================
1911 * Same as above, but achieves better compression. We use a lazy
1912 * evaluation for matches: a match is finally adopted only if there is
1913 * no better match at the next window position.
1914 *
1915 * Processes a new input file and return its compressed length. Sets
1916 * the compressed length, crc, deflate flags and internal file
1917 * attributes.
1918 */
1919
1920/* Flush the current block, with given end-of-file flag.
1921 * IN assertion: strstart is set to the end of the current match. */
1922#define FLUSH_BLOCK(eof) \
1923 flush_block( \
1924 block_start >= 0L \
1925 ? (char*)&window[(unsigned)block_start] \
1926 : (char*)NULL, \
1927 (long)strstart - block_start, \
1928 (eof) \
1929 )
1930
1931/* Insert string s in the dictionary and set match_head to the previous head
1932 * of the hash chain (the most recent string with same hash key). Return
1933 * the previous length of the hash chain.
1934 * IN assertion: all calls to to INSERT_STRING are made with consecutive
1935 * input characters and the first MIN_MATCH bytes of s are valid
1936 * (except for the last MIN_MATCH-1 bytes of the input file). */
1937#define INSERT_STRING(s, match_head) \
1938{ \
1939 UPDATE_HASH(ins_h, window[(s) + MIN_MATCH-1]); \
1940 prev[(s) & WMASK] = match_head = head[ins_h]; \
1941 head[ins_h] = (s); \
1942}
1943
1944static ulg deflate(void)
1945{
1946 IPos hash_head; /* head of hash chain */
1947 IPos prev_match; /* previous match */
1948 int flush; /* set if current block must be flushed */
1949 int match_available = 0; /* set if previous match exists */
1950 unsigned match_length = MIN_MATCH - 1; /* length of best match */
1951
1952 /* Process the input block. */
1953 while (lookahead != 0) {
1954 /* Insert the string window[strstart .. strstart+2] in the
1955 * dictionary, and set hash_head to the head of the hash chain:
1956 */
1957 INSERT_STRING(strstart, hash_head);
1958
1959 /* Find the longest match, discarding those <= prev_length.
1960 */
1961 prev_length = match_length, prev_match = match_start;
1962 match_length = MIN_MATCH - 1;
1963
1964 if (hash_head != 0 && prev_length < max_lazy_match
1965 && strstart - hash_head <= MAX_DIST
1966 ) {
1967 /* To simplify the code, we prevent matches with the string
1968 * of window index 0 (in particular we have to avoid a match
1969 * of the string with itself at the start of the input file).
1970 */
1971 match_length = longest_match(hash_head);
1972 /* longest_match() sets match_start */
1973 if (match_length > lookahead)
1974 match_length = lookahead;
1975
1976 /* Ignore a length 3 match if it is too distant: */
1977 if (match_length == MIN_MATCH && strstart - match_start > TOO_FAR) {
1978 /* If prev_match is also MIN_MATCH, match_start is garbage
1979 * but we will ignore the current match anyway.
1980 */
1981 match_length--;
1982 }
1983 }
1984 /* If there was a match at the previous step and the current
1985 * match is not better, output the previous match:
1986 */
1987 if (prev_length >= MIN_MATCH && match_length <= prev_length) {
1988 check_match(strstart - 1, prev_match, prev_length);
1989 flush = ct_tally(strstart - 1 - prev_match, prev_length - MIN_MATCH);
1990
1991 /* Insert in hash table all strings up to the end of the match.
1992 * strstart-1 and strstart are already inserted.
1993 */
1994 lookahead -= prev_length - 1;
1995 prev_length -= 2;
1996 do {
1997 strstart++;
1998 INSERT_STRING(strstart, hash_head);
1999 /* strstart never exceeds WSIZE-MAX_MATCH, so there are
2000 * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH
2001 * these bytes are garbage, but it does not matter since the
2002 * next lookahead bytes will always be emitted as literals.
2003 */
2004 } while (--prev_length != 0);
2005 match_available = 0;
2006 match_length = MIN_MATCH - 1;
2007 strstart++;
2008 if (flush) {
2009 FLUSH_BLOCK(0);
2010 block_start = strstart;
2011 }
2012 } else if (match_available) {
2013 /* If there was no match at the previous position, output a
2014 * single literal. If there was a match but the current match
2015 * is longer, truncate the previous match to a single literal.
2016 */
2017 Tracevv((stderr, "%c", window[strstart - 1]));
2018 if (ct_tally(0, window[strstart - 1])) {
2019 FLUSH_BLOCK(0);
2020 block_start = strstart;
2021 }
2022 strstart++;
2023 lookahead--;
2024 } else {
2025 /* There is no previous match to compare with, wait for
2026 * the next step to decide.
2027 */
2028 match_available = 1;
2029 strstart++;
2030 lookahead--;
2031 }
2032 Assert(strstart <= isize && lookahead <= isize, "a bit too far");
2033
2034 /* Make sure that we always have enough lookahead, except
2035 * at the end of the input file. We need MAX_MATCH bytes
2036 * for the next match, plus MIN_MATCH bytes to insert the
2037 * string following the next match.
2038 */
2039 while (lookahead < MIN_LOOKAHEAD && !eofile)
2040 fill_window();
2058 } 2041 }
2042 if (match_available)
2043 ct_tally(0, window[strstart - 1]);
2044
2045 return FLUSH_BLOCK(1); /* eof */
2059} 2046}
2060 2047
2048
2061/* =========================================================================== 2049/* ===========================================================================
2062 * Deflate in to out. 2050 * Deflate in to out.
2063 * IN assertions: the input and output buffers are cleared. 2051 * IN assertions: the input and output buffers are cleared.