Stefan Bodewig schrieb:
> I started to take some baby steps implementing it, in particular
>
> On 2009-02-13, Stefan Bodewig <[email protected]> wrote:
>
>> Currently I think the best default approach would be to use UTF-8 as
>> the default encoding and set the EFS bit since this will create
>> archives compatible with java.util.zip but has the additional benefit
>> of clearly stating it is using UTF-8.
>
> UTF-8 is now the default for ZipArchiveOutputStream and ZipFile, EFS
> support is not yet in.
>
> The InfoZIP extra fields are supported, but one has to write them
> manually right now. They should be read transparently by ZipFile but
> don't affect the file name or comment ATM.
>
> Wolfgang, you may notice a few minor tweaks to your original code. Do
> you happen to have stand-alone tests for the Unicode extra fields
> anywhere?
A rudimentary test is in my original patch as attached to SANDBOX-176.
I have refactored this test to the current SVN revision an attached to this
mail. The test needs either be refactored to use ZipFile or
ZipArchiveInputStream has to be implemented ;-)
I also had to expose the ZipEncodingHelper functionality to the public in
order to compile the new test.
You also need the two zip files attached to SANBOX-176 in src/test/resources
in order to run the interoperability test.
> I took the liberty to apply the same patches to Ant trunk as well.
Nice to see ;-)
Best regards,
Wolfgang
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.commons.compress.archivers;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import org.apache.commons.compress.AbstractTestCase;
import org.apache.commons.compress.archivers.zip.UnicodePathExtraField;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
import org.apache.commons.compress.archivers.zip.ZipExtraField;
public class TestUtf8ZipFiles extends AbstractTestCase
{
private static final String UTF_8 = "utf-8";
private static final String CP437 = "cp437";
private static final String US_ASCII = "US-ASCII";
private static final String ASCII_TXT = "ascii.txt";
private static final String EURO_FOR_DOLLAR_TXT = "\u20AC_for_Dollar.txt";
private static final String OIL_BARREL_TXT = "\u00D6lf\u00E4sser.txt";
private void createTestFile(File file, String encoding) throws UnsupportedEncodingException, IOException {
ZipArchiveOutputStream zos = new ZipArchiveOutputStream(file);
zos.setEncoding(encoding);
ZipArchiveEntry ze = new ZipArchiveEntry(OIL_BARREL_TXT);
if (!ZipEncodingHelper.canEncodeName(ze.getName(),zos.getEncoding()))
ze.addExtraField(new UnicodePathExtraField(ze.getName(),zos.getEncoding()));
zos.putNextEntry(ze);
zos.write("Hello, world!".getBytes("US-ASCII"));
zos.closeEntry();
ze = new ZipArchiveEntry(EURO_FOR_DOLLAR_TXT);
if (!ZipEncodingHelper.canEncodeName(ze.getName(),zos.getEncoding()))
ze.addExtraField(new UnicodePathExtraField(ze.getName(),zos.getEncoding()));
zos.putNextEntry(ze);
zos.write("Give me your money!".getBytes("US-ASCII"));
zos.closeEntry();
ze = new ZipArchiveEntry(ASCII_TXT);
if (!ZipEncodingHelper.canEncodeName(ze.getName(),zos.getEncoding()))
ze.addExtraField(new UnicodePathExtraField(ze.getName(),zos.getEncoding()));
zos.putNextEntry(ze);
zos.write("ascii".getBytes("US-ASCII"));
zos.closeEntry();
zos.close();
}
private UnicodePathExtraField findUniCodePath(ZipArchiveEntry ze) {
ZipExtraField[] efs = ze.getExtraFields();
for (int i=0;i<efs.length;++i) {
if (efs[i].getHeaderId().equals(UnicodePathExtraField.UPATH_ID)) {
return (UnicodePathExtraField) efs[i];
}
}
return null;
}
private void testFile(File file, String encoding) throws IOException {
ZipArchiveInputStream zis = new ZipArchiveInputStream(new FileInputStream(file));
ZipArchiveEntry ze = (ZipArchiveEntry) zis.getNextEntry();
while (ze != null) {
if (ze.getName().endsWith("sser.txt")) {
if (!OIL_BARREL_TXT.equals(ze.getName())) {
UnicodePathExtraField ucpf = findUniCodePath(ze);
UnicodePathExtraField ucpe = new UnicodePathExtraField(OIL_BARREL_TXT,encoding);
assertNotNull(ucpf);
assertEquals(ucpe.getNameCRC32(),ucpf.getNameCRC32());
assertEquals(OIL_BARREL_TXT,new String(ucpf.getUnicodeName(),UTF_8));
}
}
else if (ze.getName().endsWith("_for_Dollar.txt")) {
if (!EURO_FOR_DOLLAR_TXT.equals(ze.getName())) {
UnicodePathExtraField ucpf = findUniCodePath(ze);
UnicodePathExtraField ucpe = new UnicodePathExtraField(EURO_FOR_DOLLAR_TXT,encoding);
assertNotNull(ucpf);
assertEquals(ucpe.getNameCRC32(),ucpf.getNameCRC32());
assertEquals(EURO_FOR_DOLLAR_TXT,new String(ucpf.getUnicodeName(),UTF_8));
}
}
else if (!ze.getName().equals(ASCII_TXT)) {
throw new AssertionError("Urecognized ZIP entry with name ["+ze.getName()+"] found.");
}
ze = (ZipArchiveEntry) zis.getNextEntry();
}
}
public void testUtf8FileRoundtrip() throws IOException {
File file = new File(/*dir,*/"target/utf8-test.zip");
createTestFile(file,UTF_8);
testFile(file,UTF_8);
}
public void testCP437FileRoundtrip() throws IOException {
File file = new File(/*dir,*/"target/cp437-utf8x.zip");
createTestFile(file,CP437);
testFile(file,CP437);
}
public void testASCIIFileRoundtrip() throws IOException {
File file = new File(/*dir,*/"target/cp437-utf8x.zip");
createTestFile(file,US_ASCII);
testFile(file,US_ASCII);
}
/*
public void testUtf8Interoperability() throws IOException {
File file1 = super.getFile("utf8-7zip-test.zip");
File file2 = super.getFile("utf8-winzip-test.zip");
testFile(file1,CP437);
testFile(file2,CP437);
}
*/
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]