Uwe Voigt created COMPRESS-722:
----------------------------------
Summary: ZStandard compression does not work correctly in
ZipArchiveOutputStream
Key: COMPRESS-722
URL: https://issues.apache.org/jira/browse/COMPRESS-722
Project: Commons Compress
Issue Type: Bug
Components: Archivers, Compressors
Affects Versions: 1.28.0
Reporter: Uwe Voigt
Hi,
When writing a using ZSD compression this leads to corrupt archive.
The expectation is (correct me if I am wrong) that the following code should
produce functional archive:
{code:java}
@Test
public void doSave() throws Exception {
try (ZipArchiveOutputStream out = new ZipArchiveOutputStream(new
File("test-zstd.zip"))) {
ZipArchiveEntry e1 = new ZipArchiveEntry("one.txt");
e1.setMethod(ZipMethod.ZSTD.getCode());
out.putArchiveEntry(e1);
out.write("""
text file
with multiple line
""".getBytes());
out.closeArchiveEntry();
ZipArchiveEntry e2 = new ZipArchiveEntry("two.txt");
e2.setMethod(ZipMethod.ZSTD.getCode());
out.putArchiveEntry(e2);
out.write("""
another text file
some more line
""".getBytes());
out.closeArchiveEntry();
}
org.apache.commons.compress.archivers.zip.ZipFile zipFile =
org.apache.commons.compress.archivers.zip.ZipFile.builder().setFile("test-zstd.zip").get();
Enumeration<ZipArchiveEntry> entries = zipFile.getEntries();
ZipArchiveEntry lastEntry = null;
while (entries.hasMoreElements()) {
ZipArchiveEntry entry = entries.nextElement();
System.out.println(entry.getName() + " / " + entry.getSize() + " / " +
entry.getCompressedSize());
if (!entries.hasMoreElements())
lastEntry = entry;
}
try (InputStream in = new
ZstdCompressorInputStream(zipFile.getInputStream(lastEntry))) {
System.out.println(new String(in.readAllBytes()));
}
}
{code}
Instead, an exception is thrown:
{{}}
{code:java}
com.github.luben.zstd.ZstdIOException: Truncated source at
com.github.luben.zstd.ZstdInputStreamNoFinalizer.readInternal(ZstdInputStreamNoFinalizer.java:174)
at
com.github.luben.zstd.ZstdInputStreamNoFinalizer.read(ZstdInputStreamNoFinalizer.java:136)
at com.github.luben.zstd.ZstdInputStream.read(ZstdInputStream.java:103) at
org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream.read(ZstdCompressorInputStream.java:119)
at java.base/java.io.InputStream.readNBytes(InputStream.java:412) at
java.base/java.io.InputStream.readAllBytes(InputStream.java:349) at
zipeditor.Zip64Test.doSave(Zip64Test.java:123) ...{code}
The only working code would be this
{code:java}
publicstatic OutputStream noClose(OutputStream out) {
return new OutputStream() {
@Override
public void write(int b) throws IOException {
out.write(b);
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
out.write(b, off, len);
}
@Override
public void flush() throws IOException {
out.flush();
}
@Override
public void close() throws IOException {
// do nothing
}
};
}
@Test
public void doSave() throws Exception {
try (ZipArchiveOutputStream out = new ZipArchiveOutputStream(new
File("test-zstd.zip"))) {
ZipArchiveEntry e1 = new ZipArchiveEntry("one.txt");
e1.setMethod(ZipMethod.ZSTD.getCode());
byte[] bytes = """
text file
with multiple line
""".getBytes();
e1.setSize(bytes.length);
out.putArchiveEntry(e1);
try (OutputStream zstdOutput =
ZstdCompressorOutputStream.builder().setOutputStream(noClose(out)).get()) {
zstdOutput.write(bytes);
zstdOutput.flush();
}
out.closeArchiveEntry();
ZipArchiveEntry e2 = new ZipArchiveEntry("two.txt");
e2.setMethod(ZipMethod.ZSTD.getCode());
String second = """
another text file
some more line
""";
e2.setSize(second.getBytes().length);
out.putArchiveEntry(e2);
try (OutputStream o2 =
ZstdCompressorOutputStream.builder().setOutputStream(noClose(out)).get()) {
o2.write(second.getBytes());
o2.flush();
}
out.closeArchiveEntry();
}
boolean jni = true;
org.apache.commons.compress.archivers.zip.ZipFile zipFile =
org.apache.commons.compress.archivers.zip.ZipFile
.builder().setZstdInputStreamFactory((inpStream) -> jni ? new
ZstdInputStreamNoFinalizer(inpStream) : new
io.airlift.compress.zstd.ZstdInputStream(inpStream))
.setFile("test-zstd.zip").get();
Enumeration<ZipArchiveEntry> entries = zipFile.getEntries();
ZipArchiveEntry lastEntry = null;
while (entries.hasMoreElements()) {
ZipArchiveEntry entry = entries.nextElement();
System.out.println(entry.getName() + " / " + entry.getSize() + " / " +
entry.getCompressedSize());
if (!entries.hasMoreElements())
lastEntry = entry;
}
InputStream in = zipFile.getInputStream(lastEntry);
System.out.println(new String(in.readAllBytes()));
zipFile.close();
}
{code}
{color:#000000}I think it should not be left to the caller of the API to create
such a lot of code just to workaround something that should be done by the
library.{color}
{color:#000000}Can you please investigate and hopefully fix that
behavior.{color}
{color:#000000}Best regards,{color}
{color:#000000}Uwe{color}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)