Hello everyone. I have found the next bug using present org.apache.tools.tar
package. Tar Archive was created on one system (for example Windows XP -
default charset CP-1251). This tar archive contains TarEntries were named with
using national characters like German umlauts. Than this archive file was
copied on Linux system (default charset UTF-8) - after unpackin this archive
file there - information was lost (TarEntries names were lost). There is
possible solution for this problem.
Index: ant-core/src/main/org/apache/tools/tar/TarInputStream.java
===================================================================
--- ant-core/src/main/org/apache/tools/tar/TarInputStream.java (revision 776302)
+++ ant-core/src/main/org/apache/tools/tar/TarInputStream.java (working copy)
@@ -264,10 +264,10 @@
if (currEntry != null && currEntry.isGNULongNameEntry()) {
// read in the name
StringBuffer longName = new StringBuffer();
- byte[] buf = new byte[SMALL_BUFFER_SIZE];
+ byte[] buf = new byte[(int)currEntry.getSize()];
int length = 0;
while ((length = read(buf)) >= 0) {
- longName.append(new String(buf, 0, length));
+ longName.append(new String(buf, 0, length, "UTF-8"));
}
getNextEntry();
if (currEntry == null) {
Index: ant-core/src/main/org/apache/tools/tar/TarOutputStream.java
===================================================================
--- ant-core/src/main/org/apache/tools/tar/TarOutputStream.java (revision
776302)
+++ ant-core/src/main/org/apache/tools/tar/TarOutputStream.java (working copy)
@@ -179,9 +179,10 @@
TarEntry longLinkEntry = new
TarEntry(TarConstants.GNU_LONGLINK,
TarConstants.LF_GNUTYPE_LONGNAME);
- longLinkEntry.setSize(entry.getName().length() + 1);
+ byte[] nameBytes = entry.getName().getBytes("UTF-8");
+ longLinkEntry.setSize(nameBytes.length + 1);
putNextEntry(longLinkEntry);
- write(entry.getName().getBytes());
+ write(nameBytes);
write(0);
closeEntry();
} else if (longFileMode != LONGFILE_TRUNCATE) {
Index: ant-core/src/main/org/apache/tools/tar/TarUtils.java
===================================================================
--- ant-core/src/main/org/apache/tools/tar/TarUtils.java (revision 776302)
+++ ant-core/src/main/org/apache/tools/tar/TarUtils.java (working copy)
@@ -23,6 +23,8 @@
package org.apache.tools.tar;
+import java.io.UnsupportedEncodingException;
+
/**
* This class provides static utility methods to work with byte streams.
*
@@ -79,15 +81,21 @@
* @return The header's entry name.
*/
public static StringBuffer parseName(byte[] header, int offset, int
length) {
- StringBuffer result = new StringBuffer(length);
+ StringBuffer result = null;
+ int nameLen = length;
+
int end = offset + length;
-
for (int i = offset; i < end; ++i) {
- if (header[i] == 0) {
+ if(header[i] == 0) {
+ nameLen = i - offset;
break;
}
+ }
- result.append((char) header[i]);
+ try {
+ result = new StringBuffer(new String(header, offset, nameLen,
"UTF-8"));
+ } catch(UnsupportedEncodingException e) {
+ e.printStackTrace();
}
return result;
@@ -103,18 +111,23 @@
* @return The number of bytes in a header's entry name.
*/
public static int getNameBytes(StringBuffer name, byte[] buf, int offset,
int length) {
- int i;
+ int nameLength = -1;
+ try
+ {
+ byte nameBytes[] = name.toString().getBytes("UTF-8");
+ nameLength = nameBytes.length ;
+ System.arraycopy(nameBytes, 0, buf, offset, nameLength);
+ } catch(UnsupportedEncodingException e) {
+ e.printStackTrace();
+ }
- for (i = 0; i < length && i < name.length(); ++i) {
- buf[offset + i] = (byte) name.charAt(i);
- }
- for (; i < length; ++i) {
- buf[offset + i] = 0;
- }
+ for (; nameLength < length; ++nameLength) {
+ buf[offset + nameLength] = 0;
+ }
- return offset + length;
- }
+ return offset + length;
+ }
/**
* Parse an octal integer from a header buffer.
Best Regards.
Alexander Borisevich
BelDTS Minsk Belarus