Hi (At least) our ZipFile and SevenZFile use RandomAccessFile to read an actual file, meaning they cannot operate on file contents in an array, java.nio channels and buffers, or anything other than an actual file, even if it is randomly accessible. RandomAccessFile also cannot benefit from memory-mapped I/O.
RandomAccessFile isn't an interface nor an abstract class, nor can any constructors be used by subclasses to make it access something other than a file, thus I've used a design where a general SeekableInputStream is used by ZipFile and SevenZFile, and subclasses of it implement various possibilities: SeekableFileInputStream implemented on top of RandomAccessFile and SeekableByteArrayInputStream on top of byte[]. It would of course be possible to make a SeekableChannelInputStream etc. The API is the subset of methods from RandomAccessFile that we currently use. The patch is attached. Is everyone happy with this design? Can I commit it? Thank you Damjan
diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/BoundedSeekableInputStream.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/BoundedSeekableInputStream.java index 482bd7a..edd6dbd 100644 --- a/src/main/java/org/apache/commons/compress/archivers/sevenz/BoundedSeekableInputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/BoundedSeekableInputStream.java @@ -19,13 +19,14 @@ package org.apache.commons.compress.archivers.sevenz; import java.io.IOException; import java.io.InputStream; -import java.io.RandomAccessFile; -class BoundedRandomAccessFileInputStream extends InputStream { - private final RandomAccessFile file; +import org.apache.commons.compress.utils.SeekableInputStream; + +class BoundedSeekableInputStream extends InputStream { + private final SeekableInputStream file; private long bytesRemaining; - public BoundedRandomAccessFileInputStream(final RandomAccessFile file, + public BoundedSeekableInputStream(final SeekableInputStream file, final long size) { this.file = file; this.bytesRemaining = size; diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java index 398783f..8c847bf 100644 --- a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java @@ -34,6 +34,8 @@ import org.apache.commons.compress.utils.BoundedInputStream; import org.apache.commons.compress.utils.CRC32VerifyingInputStream; import org.apache.commons.compress.utils.CharsetNames; import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.SeekableFileInputStream; +import org.apache.commons.compress.utils.SeekableInputStream; /** * Reads a 7z file, using RandomAccessFile under @@ -68,7 +70,7 @@ public class SevenZFile implements Closeable { static final int SIGNATURE_HEADER_SIZE = 32; private final String fileName; - private RandomAccessFile file; + private SeekableInputStream file; private final Archive archive; private int currentEntryIndex = -1; private int currentFolderIndex = -1; @@ -90,9 +92,44 @@ public class SevenZFile implements Closeable { * @throws IOException if reading the archive fails */ public SevenZFile(final File filename, final byte[] password) throws IOException { + this(new SeekableFileInputStream(new RandomAccessFile(filename, "r")), + filename.getAbsolutePath(), password); + } + + /** + * Reads a file as unecrypted 7z archive + * + * @param filename the file to read + * @throws IOException if reading the archive fails + */ + public SevenZFile(final File filename) throws IOException { + this(filename, null); + } + + /** + * Reads a SeekableInputStream as an unencrypted 7z archive. + * + * @param seekableInputStream the stream to read + * @throws IOException if reading the archive fails + */ + public SevenZFile(final SeekableInputStream seekableInputStream) throws IOException { + this(seekableInputStream, seekableInputStream.toString(), null); + } + + /** + * Reads a SeekableInputStream as 7z archive + * + * @param seekableInputStream the stream to read + * @param password optional password if the archive is encrypted - + * the byte array is supposed to be the UTF16-LE encoded + * representation of the password. + * @throws IOException if reading the archive fails + */ + public SevenZFile(final SeekableInputStream seekableInputStream, + final String fileName, final byte[] password) throws IOException { boolean succeeded = false; - this.file = new RandomAccessFile(filename, "r"); - this.fileName = filename.getAbsolutePath(); + this.file = seekableInputStream; + this.fileName = fileName; try { archive = readHeaders(password); if (password != null) { @@ -108,16 +145,6 @@ public class SevenZFile implements Closeable { } } } - - /** - * Reads a file as unecrypted 7z archive - * - * @param filename the file to read - * @throws IOException if reading the archive fails - */ - public SevenZFile(final File filename) throws IOException { - this(filename, null); - } /** * Closes the archive. @@ -210,7 +237,7 @@ public class SevenZFile implements Closeable { DataInputStream dataInputStream = null; try { dataInputStream = new DataInputStream(new CRC32VerifyingInputStream( - new BoundedRandomAccessFileInputStream(file, 20), 20, startHeaderCrc)); + new BoundedSeekableInputStream(file, 20), 20, startHeaderCrc)); startHeader.nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); startHeader.nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); startHeader.nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); @@ -272,7 +299,7 @@ public class SevenZFile implements Closeable { 0; file.seek(folderOffset); - InputStream inputStreamStack = new BoundedRandomAccessFileInputStream(file, + InputStream inputStreamStack = new BoundedSeekableInputStream(file, archive.packSizes[firstPackStreamIndex]); for (final Coder coder : folder.getOrderedCoders()) { if (coder.numInStreams != 1 || coder.numOutStreams != 1) { @@ -853,7 +880,7 @@ public class SevenZFile implements Closeable { private InputStream buildDecoderStack(final Folder folder, final long folderOffset, final int firstPackStreamIndex, SevenZArchiveEntry entry) throws IOException { file.seek(folderOffset); - InputStream inputStreamStack = new BoundedRandomAccessFileInputStream(file, + InputStream inputStreamStack = new BoundedSeekableInputStream(file, archive.packSizes[firstPackStreamIndex]); LinkedList<SevenZMethodConfiguration> methods = new LinkedList<SevenZMethodConfiguration>(); for (final Coder coder : folder.getOrderedCoders()) { diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java index e8bcc03..26ba6e5 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java @@ -17,6 +17,12 @@ */ package org.apache.commons.compress.archivers.zip; +import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; +import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; +import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; +import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; +import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; + import java.io.BufferedInputStream; import java.io.Closeable; import java.io.EOFException; @@ -37,12 +43,8 @@ import java.util.zip.InflaterInputStream; import java.util.zip.ZipException; import org.apache.commons.compress.utils.IOUtils; - -import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; -import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; -import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; -import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; -import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; +import org.apache.commons.compress.utils.SeekableFileInputStream; +import org.apache.commons.compress.utils.SeekableInputStream; /** * Replacement for <code>java.util.ZipFile</code>. @@ -124,7 +126,7 @@ public class ZipFile implements Closeable { /** * The actual data source. */ - private final RandomAccessFile archive; + private final SeekableInputStream archive; /** * Whether to look for and use Unicode extra fields. @@ -142,6 +144,7 @@ public class ZipFile implements Closeable { private final byte[] CFH_BUF = new byte[CFH_LEN]; private final byte[] SHORT_BUF = new byte[SHORT]; + /** * Opens the given file for reading, assuming "UTF8" for file names. * @@ -165,6 +168,16 @@ public class ZipFile implements Closeable { } /** + * Takes ownership of the given stream for reading, assuming "UTF8". + * + * @param seekableInputStream the stream to read from + * @throws IOException if an error occurs while reading the file. + */ + public ZipFile(SeekableInputStream seekableInputStream) throws IOException { + this(seekableInputStream, ZipEncodingHelper.UTF8); + } + + /** * Opens the given file for reading, assuming the specified * encoding for file names, scanning unicode extra fields. * @@ -193,6 +206,18 @@ public class ZipFile implements Closeable { } /** + * Takes ownership of the given stream for reading, assuming the specified + * encoding for file names and scanning for unicode extra fields. + * + * @param seekableInputStream the stream to read from + * @param encoding the encoding to use for file names, use null + * @throws IOException if an error occurs while reading the file. + */ + public ZipFile(SeekableInputStream seekableInputStream, String encoding) throws IOException { + this(seekableInputStream, encoding, true); + } + + /** * Opens the given file for reading, assuming the specified * encoding for file names. * @@ -206,11 +231,33 @@ public class ZipFile implements Closeable { */ public ZipFile(File f, String encoding, boolean useUnicodeExtraFields) throws IOException { - this.archiveName = f.getAbsolutePath(); + this(new SeekableFileInputStream(new RandomAccessFile(f, "r")), f.getAbsolutePath(), + encoding, useUnicodeExtraFields); + } + + /** + * Takes ownership of the given stream for reading, assuming the specified + * encoding for file names and optionally scanning for unicode extra fields. + * + * @param seekableInputStream the stream to read from + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @param useUnicodeExtraFields whether to use InfoZIP Unicode + * @throws IOException + */ + public ZipFile(SeekableInputStream seekableInputStream, String encoding, + boolean useUnicodeExtraFields) throws IOException { + this(seekableInputStream, seekableInputStream.toString(), encoding, useUnicodeExtraFields); + } + + private ZipFile(SeekableInputStream seekableInputStream, + String archiveName, String encoding, boolean useUnicodeExtraFields) + throws IOException { + this.archiveName = archiveName; this.encoding = encoding; this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); this.useUnicodeExtraFields = useUnicodeExtraFields; - archive = new RandomAccessFile(f, "r"); + archive = seekableInputStream; boolean success = false; try { Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = @@ -225,6 +272,7 @@ public class ZipFile implements Closeable { } } + /** * The encoding to use for filenames and the file comment. *
--------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@commons.apache.org For additional commands, e-mail: dev-h...@commons.apache.org