Stefan Bodewig schrieb:
> I started to take some baby steps implementing it, in particular
> 
> On 2009-02-13, Stefan Bodewig <bode...@apache.org> wrote:
> 
>> Currently I think the best default approach would be to use UTF-8 as
>> the default encoding and set the EFS bit since this will create
>> archives compatible with java.util.zip but has the additional benefit
>> of clearly stating it is using UTF-8.
> 
> UTF-8 is now the default for ZipArchiveOutputStream and ZipFile, EFS
> support is not yet in.
> 
> The InfoZIP extra fields are supported, but one has to write them
> manually right now.  They should be read transparently by ZipFile but
> don't affect the file name or comment ATM.
> 
> Wolfgang, you may notice a few minor tweaks to your original code.  Do
> you happen to have stand-alone tests for the Unicode extra fields
> anywhere?

A rudimentary test is in my original patch as attached to SANDBOX-176.
I have refactored this test to the current SVN revision an attached to this
mail. The test needs either be refactored to use ZipFile or
ZipArchiveInputStream has to be implemented ;-)

  I also had to expose the ZipEncodingHelper functionality to the public in
order to compile the new test.

  You also need the two zip files attached to SANBOX-176 in src/test/resources
in order to run the interoperability test.

> I took the liberty to apply the same patches to Ant trunk as well.

Nice to see ;-)

  Best regards,

     Wolfgang
/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */

package org.apache.commons.compress.archivers;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;

import org.apache.commons.compress.AbstractTestCase;
import org.apache.commons.compress.archivers.zip.UnicodePathExtraField;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
import org.apache.commons.compress.archivers.zip.ZipExtraField;

public class TestUtf8ZipFiles extends AbstractTestCase
{

    private static final String UTF_8 = "utf-8";
    private static final String CP437 = "cp437";
    private static final String US_ASCII = "US-ASCII";
    private static final String ASCII_TXT = "ascii.txt";
    private static final String EURO_FOR_DOLLAR_TXT = "\u20AC_for_Dollar.txt";
    private static final String OIL_BARREL_TXT = "\u00D6lf\u00E4sser.txt";

    private void createTestFile(File file, String encoding) throws UnsupportedEncodingException, IOException {
        
        ZipArchiveOutputStream zos = new ZipArchiveOutputStream(file);
        
        zos.setEncoding(encoding);
    
        ZipArchiveEntry ze = new ZipArchiveEntry(OIL_BARREL_TXT);
    
        if (!ZipEncodingHelper.canEncodeName(ze.getName(),zos.getEncoding()))
            ze.addExtraField(new UnicodePathExtraField(ze.getName(),zos.getEncoding()));
        
        zos.putNextEntry(ze);
        zos.write("Hello, world!".getBytes("US-ASCII"));
        zos.closeEntry();
        
        ze = new ZipArchiveEntry(EURO_FOR_DOLLAR_TXT);
        if (!ZipEncodingHelper.canEncodeName(ze.getName(),zos.getEncoding()))
            ze.addExtraField(new UnicodePathExtraField(ze.getName(),zos.getEncoding()));
        
        zos.putNextEntry(ze);
        zos.write("Give me your money!".getBytes("US-ASCII"));
        zos.closeEntry();
    
        ze = new ZipArchiveEntry(ASCII_TXT);
        
        if (!ZipEncodingHelper.canEncodeName(ze.getName(),zos.getEncoding()))
            ze.addExtraField(new UnicodePathExtraField(ze.getName(),zos.getEncoding()));
        
        zos.putNextEntry(ze);
        zos.write("ascii".getBytes("US-ASCII"));
        zos.closeEntry();
    
        zos.close();
    }
  
    private UnicodePathExtraField findUniCodePath(ZipArchiveEntry ze) {
        
        ZipExtraField[] efs = ze.getExtraFields();
        
        for (int i=0;i<efs.length;++i) {
            
            if (efs[i].getHeaderId().equals(UnicodePathExtraField.UPATH_ID)) {
             
                return (UnicodePathExtraField) efs[i];
               
            }
        }
        return null;
    }
    
    private void testFile(File file, String encoding) throws IOException {
        
        ZipArchiveInputStream zis = new ZipArchiveInputStream(new FileInputStream(file));
        
        ZipArchiveEntry ze = (ZipArchiveEntry) zis.getNextEntry();
        
        while (ze != null) {
            
            if (ze.getName().endsWith("sser.txt")) {
                
                if (!OIL_BARREL_TXT.equals(ze.getName())) {
                    
                    UnicodePathExtraField ucpf = findUniCodePath(ze);
                    UnicodePathExtraField ucpe = new UnicodePathExtraField(OIL_BARREL_TXT,encoding);
                    
                    assertNotNull(ucpf);
                    assertEquals(ucpe.getNameCRC32(),ucpf.getNameCRC32());
                    assertEquals(OIL_BARREL_TXT,new String(ucpf.getUnicodeName(),UTF_8));
                }
            }
            else if (ze.getName().endsWith("_for_Dollar.txt")) {
                
                if (!EURO_FOR_DOLLAR_TXT.equals(ze.getName())) {
                    
                    UnicodePathExtraField ucpf = findUniCodePath(ze);
                    UnicodePathExtraField ucpe = new UnicodePathExtraField(EURO_FOR_DOLLAR_TXT,encoding);
                    
                    assertNotNull(ucpf);
                    assertEquals(ucpe.getNameCRC32(),ucpf.getNameCRC32());
                    assertEquals(EURO_FOR_DOLLAR_TXT,new String(ucpf.getUnicodeName(),UTF_8));
                }
               
            }
            else if (!ze.getName().equals(ASCII_TXT)) {
                throw new AssertionError("Urecognized ZIP entry with name ["+ze.getName()+"] found.");
            }
            
            ze = (ZipArchiveEntry) zis.getNextEntry();
        }
    }
    
    public void testUtf8FileRoundtrip() throws IOException {
        
        File file = new File(/*dir,*/"target/utf8-test.zip");
        
        createTestFile(file,UTF_8);
        
        testFile(file,UTF_8);
    }
    
   public void testCP437FileRoundtrip() throws IOException {
        
        File file = new File(/*dir,*/"target/cp437-utf8x.zip");
        
        createTestFile(file,CP437);
        
        testFile(file,CP437);
   }
   
   public void testASCIIFileRoundtrip() throws IOException {
       
       File file = new File(/*dir,*/"target/cp437-utf8x.zip");
       
       createTestFile(file,US_ASCII);
       
       testFile(file,US_ASCII);
   }
    /*
    public void testUtf8Interoperability() throws IOException {
        File file1 = super.getFile("utf8-7zip-test.zip");
        File file2 = super.getFile("utf8-winzip-test.zip");

        testFile(file1,CP437);
        testFile(file2,CP437);
        
    }
    */
}

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@commons.apache.org
For additional commands, e-mail: dev-h...@commons.apache.org

Reply via email to