On Mon, 15 Jul 2024 10:06:20 GMT, Jaikiran Pai <j...@openjdk.org> wrote:
>> Archie Cobbs has updated the pull request with a new target base due to a >> merge or a rebase. The pull request now contains 12 commits: >> >> - Bump @since from 23 to 24. >> - Merge branch 'master' into JDK-8322256 >> - Relabel "trailing garbage" as "extra bytes" to sound less accusatory. >> - Simplify code by eliminating an impossible case. >> - Field name change and Javadoc wording tweaks. >> - Merge branch 'master' into JDK-8322256 >> - Javadoc wording tweaks. >> - Merge branch 'master' into JDK-8322256 >> - Clarify exceptions: sometimes ZipException, sometimes EOFException. >> - Merge branch 'master' into JDK-8322256 >> - ... and 2 more: https://git.openjdk.org/jdk/compare/75dc2f85...f845a75b > > src/java.base/share/classes/java/util/zip/GZIPInputStream.java line 153: > >> 151: */ >> 152: public GZIPInputStream(InputStream in, int size, >> 153: boolean allowConcatenation, boolean ignoreExtraBytes) >> throws IOException { > > I haven't reviewed the javadoc changes. I will do that separately once we > settle down on the API and implementation changes. > As for this new constructor, I think the only new parameter we should > introduce is whether or not the underlying input stream `in` is > expected/allowed to have concatenated GZIP stream(s). So I think we should > remove the "ignoreExtraBytes" new parameters from this constructor and. > Additionally, I think the `allowConcatenation` should instead be named > `allowConcatenatedGZIPStream`: > > > public GZIPInputStream(InputStream in, int size, boolean > allowConcatenatedGZIPStream) throws IOException { Just to provide a more concrete input, here's a very minimal tested version of what I had in mind: --- a/src/java.base/share/classes/java/util/zip/GZIPInputStream.java +++ b/src/java.base/share/classes/java/util/zip/GZIPInputStream.java @@ -55,6 +55,8 @@ public class GZIPInputStream extends InflaterInputStream { private boolean closed = false; + private final boolean allowConcatenatedGZIPStream; + /** * Check to make sure that this stream has not been closed */ @@ -76,14 +78,7 @@ private void ensureOpen() throws IOException { * @throws IllegalArgumentException if {@code size <= 0} */ public GZIPInputStream(InputStream in, int size) throws IOException { - super(in, createInflater(in, size), size); - usesDefaultInflater = true; - try { - readHeader(in); - } catch (IOException ioe) { - this.inf.end(); - throw ioe; - } + this(in, size, true); } /* @@ -111,7 +106,28 @@ private static Inflater createInflater(InputStream in, int size) { * @throws IOException if an I/O error has occurred */ public GZIPInputStream(InputStream in) throws IOException { - this(in, 512); + this(in, 512, true); + } + + /** + * WIP + * @param in the input stream + * @param size the input buffer size + * @param allowConcatenatedGZIPStream true if the input stream is allowed to contain + * concatenated GZIP streams. false otherwise + * @throws IOException if an I/O error has occurred + */ + public GZIPInputStream(InputStream in, int size, boolean allowConcatenatedGZIPStream) + throws IOException { + super(in, createInflater(in, size), size); + this.allowConcatenatedGZIPStream = allowConcatenatedGZIPStream; + usesDefaultInflater = true; + try { + readHeader(in); + } catch (IOException ioe) { + this.inf.end(); + throw ioe; + } } /** @@ -150,10 +166,45 @@ public int read(byte[] buf, int off, int len) throws IOException { } int n = super.read(buf, off, len); if (n == -1) { - if (readTrailer()) + // reading of deflated data is now complete, we now read the GZIP stream trailer + readTrailer(); + if (!allowConcatenatedGZIPStream) { eos = true; - else + } else { + // This GZIPInputStream instance was created to allow potential + // concatenated GZIP stream, so we now try and read the next + // GZIP stream header, if any. + + // use any un-inflated remaining bytes from the stream + InputStream headerIS = in; + int remainingUnInflated = inf.getRemaining(); + if (remainingUnInflated > 0) { + headerIS = new SequenceInputStream( + new ByteArrayInputStream(this.buf, this.len - remainingUnInflated, + remainingUnInflated), + new FilterInputStream(in) { + public void close() throws IOException {} + }); + } + int numHeaderBytes; + try { + numHeaderBytes = readHeader(headerIS); // next GZIP stream header, if any + } catch (EOFException _) { + // TODO: somehow verify that this failed when reading the + // first byte of the header? + eos = true; + return n; // -1 + } + // reset the inflater to read the deflated content of the next GZIP stream + inf.reset(); + // adjust the input to the inflater correctly past the GZIP stream header + if (remainingUnInflated > numHeaderBytes) { + inf.setInput(this.buf, this.len - remainingUnInflated + numHeaderBytes, + remainingUnInflated - numHeaderBytes); + } + // read the next GZIP stream's deflated data return this.read(buf, off, len); + } } else { crc.update(buf, off, n); } @@ -242,12 +293,12 @@ private int readHeader(InputStream this_in) throws IOException { * reached, false if there are more (concatenated gzip * data set) */ - private boolean readTrailer() throws IOException { + private void readTrailer() throws IOException { InputStream in = this.in; - int n = inf.getRemaining(); - if (n > 0) { + int remainingUnInflated = inf.getRemaining(); + if (remainingUnInflated > 0) { in = new SequenceInputStream( - new ByteArrayInputStream(buf, len - n, n), + new ByteArrayInputStream(buf, len - remainingUnInflated, remainingUnInflated), new FilterInputStream(in) { public void close() throws IOException {} }); @@ -255,20 +306,15 @@ public void close() throws IOException {} // Uses left-to-right evaluation order if ((readUInt(in) != crc.getValue()) || // rfc1952; ISIZE is the input size modulo 2^32 - (readUInt(in) != (inf.getBytesWritten() & 0xffffffffL))) + (readUInt(in) != (inf.getBytesWritten() & 0xffffffffL))) { throw new ZipException("Corrupt GZIP trailer"); - - // try concatenated case - int m = 8; // this.trailer - try { - m += readHeader(in); // next.header - } catch (IOException ze) { - return true; // ignore any malformed, do nothing } + int numTrailerBytes = 8; inf.reset(); - if (n > m) - inf.setInput(buf, len - n + m, n - m); - return false; + if (remainingUnInflated > numTrailerBytes) { + inf.setInput(buf, len - remainingUnInflated + numTrailerBytes, + remainingUnInflated - numTrailerBytes); + } } /* It still has one or two unanswered questions that I haven't fully thought of, but I thought this diff might be useful while discussing these changes. ------------- PR Review Comment: https://git.openjdk.org/jdk/pull/18385#discussion_r1677807848