Improve the discrimination between trailing garbage and bad gzip.

This proceeds to try to decode whatever follows the last gzip
member, and concludes that it is acceptable trailing garbage only
if it results in a data error without decompressing any data. This
commit also reduces the probability of a false-positive gzip header
detection.
This commit is contained in:
Mark Adler 2025-04-09 23:35:16 -07:00 committed by Mark Adler
parent 1ab1026a20
commit 95278e4ef9
3 changed files with 37 additions and 30 deletions

View File

@ -183,6 +183,7 @@ typedef struct {
unsigned char *out; /* output buffer (double-sized when reading) */
int direct; /* 0 if processing gzip, 1 if transparent */
/* just for reading */
int junk; /* -1 = start, 1 = junk candidate, 0 = in gzip */
int how; /* 0: get header, 1: copy, 2: decompress */
z_off64_t start; /* where the gzip data started, for rewinding */
int eof; /* true if end of input file reached */

View File

@ -72,6 +72,7 @@ local void gz_reset(gz_statep state) {
state->eof = 0; /* not at end of file */
state->past = 0; /* have not read past end yet */
state->how = LOOK; /* look for gzip header */
state->junk = -1; /* mark first member */
}
else /* for writing ... */
state->reset = 0; /* no deflateReset pending */

View File

@ -106,47 +106,42 @@ local int gz_look(gz_statep state) {
}
}
/* if transparent reading is disabled, simply read as gzip */
if (state->direct == -1) {
/* if transparent reading is disabled, which would only be at the start, or
if we're looking for a gzip member after the first one, which is not at
the start, then proceed directly to look for a gzip member next */
if (state->direct == -1 || state->junk == 0) {
inflateReset(strm);
state->how = GZIP;
state->junk = state->junk != -1;
state->direct = 0;
return 0;
}
/* get at least the magic bytes in the input buffer */
if (strm->avail_in < 2) {
if (gz_avail(state) == -1)
return -1;
if (strm->avail_in == 0)
return 0;
}
/* otherwise we're at the start with auto-detect -- we check to see if the
first four bytes could be gzip header in order to decide whether or not
this will be a transparent read */
/* look for gzip magic bytes -- if there, do gzip decoding (note: there is
a logical dilemma here when considering the case of a partially written
gzip file, to wit, if a single 31 byte is written, then we cannot tell
whether this is a single-byte file, or just a partially written gzip
file -- for here we assume that if a gzip file is being written, then
the header will be written in a single operation, so that reading a
single byte is sufficient indication that it is not a gzip file) */
if (strm->avail_in > 1 &&
strm->next_in[0] == 31 && strm->next_in[1] == 139) {
/* load any header bytes into the input buffer -- if the input is empty,
then it's not an error as this is a transparent read of zero bytes */
if (gz_avail(state) == -1)
return -1;
if (strm->avail_in == 0)
return 0;
/* see if this is (likely) gzip input -- if the first four bytes are
consistent with a gzip header, then go look for the first gzip member,
otherwise proceed to copy the input transparently */
if (strm->avail_in > 3 &&
strm->next_in[0] == 31 && strm->next_in[1] == 139 &&
strm->next_in[2] == 8 && strm->next_in[3] < 32) {
inflateReset(strm);
state->how = GZIP;
state->junk = 1;
state->direct = 0;
return 0;
}
/* no gzip header -- if we were decoding gzip before, then this is trailing
garbage. Ignore the trailing garbage and finish. */
if (state->direct == 0) {
strm->avail_in = 0;
state->eof = 1;
state->x.have = 0;
return 0;
}
/* doing raw i/o, copy any leftover input to output -- this assumes that
/* doing raw i/o: copy any leftover input to output -- this assumes that
the output buffer is larger than the input buffer, which also assures
space for gzungetc() */
state->x.next = state->out;
@ -154,7 +149,6 @@ local int gz_look(gz_statep state) {
state->x.have = strm->avail_in;
strm->avail_in = 0;
state->how = COPY;
state->direct = 1;
return 0;
}
@ -181,6 +175,9 @@ local int gz_decomp(gz_statep state) {
/* decompress and handle errors */
ret = inflate(strm, Z_NO_FLUSH);
if (strm->avail_out < had)
/* any decompressed data marks this as a real gzip stream */
state->junk = 0;
if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
gz_error(state, Z_STREAM_ERROR,
"internal error: inflate stream corrupt");
@ -191,6 +188,12 @@ local int gz_decomp(gz_statep state) {
return -1;
}
if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
if (state->junk == 1) { /* trailing garbage is ok */
strm->avail_in = 0;
state->eof = 1;
state->how = LOOK;
break;
}
gz_error(state, Z_DATA_ERROR,
strm->msg == NULL ? "compressed data error" : strm->msg);
return -1;
@ -202,8 +205,10 @@ local int gz_decomp(gz_statep state) {
state->x.next = strm->next_out - state->x.have;
/* if the gzip stream completed successfully, look for another */
if (ret == Z_STREAM_END)
if (ret == Z_STREAM_END) {
state->junk = 0;
state->how = LOOK;
}
/* good decompression */
return 0;