aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Adler <madler@alumni.caltech.edu>2025-04-09 23:35:16 -0700
committerMark Adler <git@madler.net>2025-12-06 17:39:44 -0800
commit95278e4ef9de87294dea0c94184bd8fe4316c3a9 (patch)
treecea18c4601b460ef7ed5e63910e9cd2ae7c518ce
parent1ab1026a20282383d9cd2282f81461655bea4028 (diff)
downloadzlib-95278e4ef9de87294dea0c94184bd8fe4316c3a9.tar.gz
zlib-95278e4ef9de87294dea0c94184bd8fe4316c3a9.tar.bz2
zlib-95278e4ef9de87294dea0c94184bd8fe4316c3a9.zip
Improve the discrimination between trailing garbage and bad gzip.
This proceeds to try to decode whatever follows the last gzip member, and concludes that it is acceptable trailing garbage only if it results in a data error without decompressing any data. This commit also reduces the probability of a false-positive gzip header detection.
-rw-r--r--gzguts.h1
-rw-r--r--gzlib.c1
-rw-r--r--gzread.c65
3 files changed, 37 insertions, 30 deletions
diff --git a/gzguts.h b/gzguts.h
index 69c77eb..687f2ff 100644
--- a/gzguts.h
+++ b/gzguts.h
@@ -183,6 +183,7 @@ typedef struct {
183 unsigned char *out; /* output buffer (double-sized when reading) */ 183 unsigned char *out; /* output buffer (double-sized when reading) */
184 int direct; /* 0 if processing gzip, 1 if transparent */ 184 int direct; /* 0 if processing gzip, 1 if transparent */
185 /* just for reading */ 185 /* just for reading */
186 int junk; /* -1 = start, 1 = junk candidate, 0 = in gzip */
186 int how; /* 0: get header, 1: copy, 2: decompress */ 187 int how; /* 0: get header, 1: copy, 2: decompress */
187 z_off64_t start; /* where the gzip data started, for rewinding */ 188 z_off64_t start; /* where the gzip data started, for rewinding */
188 int eof; /* true if end of input file reached */ 189 int eof; /* true if end of input file reached */
diff --git a/gzlib.c b/gzlib.c
index 4c1aa83..79a7e97 100644
--- a/gzlib.c
+++ b/gzlib.c
@@ -72,6 +72,7 @@ local void gz_reset(gz_statep state) {
72 state->eof = 0; /* not at end of file */ 72 state->eof = 0; /* not at end of file */
73 state->past = 0; /* have not read past end yet */ 73 state->past = 0; /* have not read past end yet */
74 state->how = LOOK; /* look for gzip header */ 74 state->how = LOOK; /* look for gzip header */
75 state->junk = -1; /* mark first member */
75 } 76 }
76 else /* for writing ... */ 77 else /* for writing ... */
77 state->reset = 0; /* no deflateReset pending */ 78 state->reset = 0; /* no deflateReset pending */
diff --git a/gzread.c b/gzread.c
index 6fefe89..ac8be77 100644
--- a/gzread.c
+++ b/gzread.c
@@ -106,47 +106,42 @@ local int gz_look(gz_statep state) {
106 } 106 }
107 } 107 }
108 108
109 /* if transparent reading is disabled, simply read as gzip */ 109 /* if transparent reading is disabled, which would only be at the start, or
110 if (state->direct == -1) { 110 if we're looking for a gzip member after the first one, which is not at
111 the start, then proceed directly to look for a gzip member next */
112 if (state->direct == -1 || state->junk == 0) {
111 inflateReset(strm); 113 inflateReset(strm);
112 state->how = GZIP; 114 state->how = GZIP;
115 state->junk = state->junk != -1;
113 state->direct = 0; 116 state->direct = 0;
114 return 0; 117 return 0;
115 } 118 }
116 119
117 /* get at least the magic bytes in the input buffer */ 120 /* otherwise we're at the start with auto-detect -- we check to see if the
118 if (strm->avail_in < 2) { 121 first four bytes could be gzip header in order to decide whether or not
119 if (gz_avail(state) == -1) 122 this will be a transparent read */
120 return -1; 123
121 if (strm->avail_in == 0) 124 /* load any header bytes into the input buffer -- if the input is empty,
122 return 0; 125 then it's not an error as this is a transparent read of zero bytes */
123 } 126 if (gz_avail(state) == -1)
127 return -1;
128 if (strm->avail_in == 0)
129 return 0;
124 130
125 /* look for gzip magic bytes -- if there, do gzip decoding (note: there is 131 /* see if this is (likely) gzip input -- if the first four bytes are
126 a logical dilemma here when considering the case of a partially written 132 consistent with a gzip header, then go look for the first gzip member,
127 gzip file, to wit, if a single 31 byte is written, then we cannot tell 133 otherwise proceed to copy the input transparently */
128 whether this is a single-byte file, or just a partially written gzip 134 if (strm->avail_in > 3 &&
129 file -- for here we assume that if a gzip file is being written, then 135 strm->next_in[0] == 31 && strm->next_in[1] == 139 &&
130 the header will be written in a single operation, so that reading a 136 strm->next_in[2] == 8 && strm->next_in[3] < 32) {
131 single byte is sufficient indication that it is not a gzip file) */
132 if (strm->avail_in > 1 &&
133 strm->next_in[0] == 31 && strm->next_in[1] == 139) {
134 inflateReset(strm); 137 inflateReset(strm);
135 state->how = GZIP; 138 state->how = GZIP;
139 state->junk = 1;
136 state->direct = 0; 140 state->direct = 0;
137 return 0; 141 return 0;
138 } 142 }
139 143
140 /* no gzip header -- if we were decoding gzip before, then this is trailing 144 /* doing raw i/o: copy any leftover input to output -- this assumes that
141 garbage. Ignore the trailing garbage and finish. */
142 if (state->direct == 0) {
143 strm->avail_in = 0;
144 state->eof = 1;
145 state->x.have = 0;
146 return 0;
147 }
148
149 /* doing raw i/o, copy any leftover input to output -- this assumes that
150 the output buffer is larger than the input buffer, which also assures 145 the output buffer is larger than the input buffer, which also assures
151 space for gzungetc() */ 146 space for gzungetc() */
152 state->x.next = state->out; 147 state->x.next = state->out;
@@ -154,7 +149,6 @@ local int gz_look(gz_statep state) {
154 state->x.have = strm->avail_in; 149 state->x.have = strm->avail_in;
155 strm->avail_in = 0; 150 strm->avail_in = 0;
156 state->how = COPY; 151 state->how = COPY;
157 state->direct = 1;
158 return 0; 152 return 0;
159} 153}
160 154
@@ -181,6 +175,9 @@ local int gz_decomp(gz_statep state) {
181 175
182 /* decompress and handle errors */ 176 /* decompress and handle errors */
183 ret = inflate(strm, Z_NO_FLUSH); 177 ret = inflate(strm, Z_NO_FLUSH);
178 if (strm->avail_out < had)
179 /* any decompressed data marks this as a real gzip stream */
180 state->junk = 0;
184 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { 181 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
185 gz_error(state, Z_STREAM_ERROR, 182 gz_error(state, Z_STREAM_ERROR,
186 "internal error: inflate stream corrupt"); 183 "internal error: inflate stream corrupt");
@@ -191,6 +188,12 @@ local int gz_decomp(gz_statep state) {
191 return -1; 188 return -1;
192 } 189 }
193 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ 190 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
191 if (state->junk == 1) { /* trailing garbage is ok */
192 strm->avail_in = 0;
193 state->eof = 1;
194 state->how = LOOK;
195 break;
196 }
194 gz_error(state, Z_DATA_ERROR, 197 gz_error(state, Z_DATA_ERROR,
195 strm->msg == NULL ? "compressed data error" : strm->msg); 198 strm->msg == NULL ? "compressed data error" : strm->msg);
196 return -1; 199 return -1;
@@ -202,8 +205,10 @@ local int gz_decomp(gz_statep state) {
202 state->x.next = strm->next_out - state->x.have; 205 state->x.next = strm->next_out - state->x.have;
203 206
204 /* if the gzip stream completed successfully, look for another */ 207 /* if the gzip stream completed successfully, look for another */
205 if (ret == Z_STREAM_END) 208 if (ret == Z_STREAM_END) {
209 state->junk = 0;
206 state->how = LOOK; 210 state->how = LOOK;
211 }
207 212
208 /* good decompression */ 213 /* good decompression */
209 return 0; 214 return 0;