Stefan Bodewig schrieb: > I started to take some baby steps implementing it, in particular > > On 2009-02-13, Stefan Bodewig <bode...@apache.org> wrote: > >> Currently I think the best default approach would be to use UTF-8 as >> the default encoding and set the EFS bit since this will create >> archives compatible with java.util.zip but has the additional benefit >> of clearly stating it is using UTF-8. > > UTF-8 is now the default for ZipArchiveOutputStream and ZipFile, EFS > support is not yet in. > > The InfoZIP extra fields are supported, but one has to write them > manually right now. They should be read transparently by ZipFile but > don't affect the file name or comment ATM. > > Wolfgang, you may notice a few minor tweaks to your original code. Do > you happen to have stand-alone tests for the Unicode extra fields > anywhere?
A rudimentary test is in my original patch as attached to SANDBOX-176. I have refactored this test to the current SVN revision an attached to this mail. The test needs either be refactored to use ZipFile or ZipArchiveInputStream has to be implemented ;-) I also had to expose the ZipEncodingHelper functionality to the public in order to compile the new test. You also need the two zip files attached to SANBOX-176 in src/test/resources in order to run the interoperability test. > I took the liberty to apply the same patches to Ant trunk as well. Nice to see ;-) Best regards, Wolfgang
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package org.apache.commons.compress.archivers; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.UnsupportedEncodingException; import org.apache.commons.compress.AbstractTestCase; import org.apache.commons.compress.archivers.zip.UnicodePathExtraField; import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; import org.apache.commons.compress.archivers.zip.ZipExtraField; public class TestUtf8ZipFiles extends AbstractTestCase { private static final String UTF_8 = "utf-8"; private static final String CP437 = "cp437"; private static final String US_ASCII = "US-ASCII"; private static final String ASCII_TXT = "ascii.txt"; private static final String EURO_FOR_DOLLAR_TXT = "\u20AC_for_Dollar.txt"; private static final String OIL_BARREL_TXT = "\u00D6lf\u00E4sser.txt"; private void createTestFile(File file, String encoding) throws UnsupportedEncodingException, IOException { ZipArchiveOutputStream zos = new ZipArchiveOutputStream(file); zos.setEncoding(encoding); ZipArchiveEntry ze = new ZipArchiveEntry(OIL_BARREL_TXT); if (!ZipEncodingHelper.canEncodeName(ze.getName(),zos.getEncoding())) ze.addExtraField(new UnicodePathExtraField(ze.getName(),zos.getEncoding())); zos.putNextEntry(ze); zos.write("Hello, world!".getBytes("US-ASCII")); zos.closeEntry(); ze = new ZipArchiveEntry(EURO_FOR_DOLLAR_TXT); if (!ZipEncodingHelper.canEncodeName(ze.getName(),zos.getEncoding())) ze.addExtraField(new UnicodePathExtraField(ze.getName(),zos.getEncoding())); zos.putNextEntry(ze); zos.write("Give me your money!".getBytes("US-ASCII")); zos.closeEntry(); ze = new ZipArchiveEntry(ASCII_TXT); if (!ZipEncodingHelper.canEncodeName(ze.getName(),zos.getEncoding())) ze.addExtraField(new UnicodePathExtraField(ze.getName(),zos.getEncoding())); zos.putNextEntry(ze); zos.write("ascii".getBytes("US-ASCII")); zos.closeEntry(); zos.close(); } private UnicodePathExtraField findUniCodePath(ZipArchiveEntry ze) { ZipExtraField[] efs = ze.getExtraFields(); for (int i=0;i<efs.length;++i) { if (efs[i].getHeaderId().equals(UnicodePathExtraField.UPATH_ID)) { return (UnicodePathExtraField) efs[i]; } } return null; } private void testFile(File file, String encoding) throws IOException { ZipArchiveInputStream zis = new ZipArchiveInputStream(new FileInputStream(file)); ZipArchiveEntry ze = (ZipArchiveEntry) zis.getNextEntry(); while (ze != null) { if (ze.getName().endsWith("sser.txt")) { if (!OIL_BARREL_TXT.equals(ze.getName())) { UnicodePathExtraField ucpf = findUniCodePath(ze); UnicodePathExtraField ucpe = new UnicodePathExtraField(OIL_BARREL_TXT,encoding); assertNotNull(ucpf); assertEquals(ucpe.getNameCRC32(),ucpf.getNameCRC32()); assertEquals(OIL_BARREL_TXT,new String(ucpf.getUnicodeName(),UTF_8)); } } else if (ze.getName().endsWith("_for_Dollar.txt")) { if (!EURO_FOR_DOLLAR_TXT.equals(ze.getName())) { UnicodePathExtraField ucpf = findUniCodePath(ze); UnicodePathExtraField ucpe = new UnicodePathExtraField(EURO_FOR_DOLLAR_TXT,encoding); assertNotNull(ucpf); assertEquals(ucpe.getNameCRC32(),ucpf.getNameCRC32()); assertEquals(EURO_FOR_DOLLAR_TXT,new String(ucpf.getUnicodeName(),UTF_8)); } } else if (!ze.getName().equals(ASCII_TXT)) { throw new AssertionError("Urecognized ZIP entry with name ["+ze.getName()+"] found."); } ze = (ZipArchiveEntry) zis.getNextEntry(); } } public void testUtf8FileRoundtrip() throws IOException { File file = new File(/*dir,*/"target/utf8-test.zip"); createTestFile(file,UTF_8); testFile(file,UTF_8); } public void testCP437FileRoundtrip() throws IOException { File file = new File(/*dir,*/"target/cp437-utf8x.zip"); createTestFile(file,CP437); testFile(file,CP437); } public void testASCIIFileRoundtrip() throws IOException { File file = new File(/*dir,*/"target/cp437-utf8x.zip"); createTestFile(file,US_ASCII); testFile(file,US_ASCII); } /* public void testUtf8Interoperability() throws IOException { File file1 = super.getFile("utf8-7zip-test.zip"); File file2 = super.getFile("utf8-winzip-test.zip"); testFile(file1,CP437); testFile(file2,CP437); } */ }
--------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@commons.apache.org For additional commands, e-mail: dev-h...@commons.apache.org