Hi guys,
With test case[1] below, you can see guessContent does not support
UTF8/32 BOM. This problem could be solved with the patch[2].
The patch is straight forward:
1. read more bytes since UTF32
2. add xml type support in utf8 and utf32 BOM.
[1] test case:
public static void main(String[] args)throws IOException {
String header ="<?xml";
String[] encs =new String[]
{"UTF-8","UTF-16BE","UTF-16LE","UTF-32BE","UTF-32LE"};
InputStream is;
for (String enc : encs) {
System.out.println(enc+":");
is =new ByteArrayInputStream(toBOMBytes(header, enc));
String mime = URLConnection.guessContentTypeFromStream(is);
System.out.println(mime);
is.close();
}
}
private static byte[] toBOMBytes(String text,String enc)throws IOException
{
ByteArrayOutputStream bos =new ByteArrayOutputStream();
if (enc.equals("UTF-8")) {
bos.write(new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF });
}
if (enc.equals("UTF-16BE")) {
bos.write(new byte[] { (byte) 0xFE, (byte) 0xFF });
}
if (enc.equals("UTF-16LE")) {
bos.write(new byte[] { (byte) 0xFF, (byte) 0xFE });
}
if (enc.equals("UTF-32BE")) {
bos.write(new byte[] { (byte) 0x00, (byte) 0x00, (byte) 0xFE,
(byte) 0xFF });
}
if (enc.equals("UTF-32LE")) {
bos.write(new byte[] { (byte) 0xFF, (byte) 0xFE, (byte) 0x00,
(byte) 0x00 });
}
bos.write(text.getBytes(enc));
return bos.toByteArray();
}
[2] patch:
diff --git src/share/classes/java/net/URLConnection.java
src/share/classes/java/net/URLConnection.java
--- src/share/classes/java/net/URLConnection.java
+++ src/share/classes/java/net/URLConnection.java
@@ -1422,7 +1422,7 @@
if (!is.markSupported())
return null;
- is.mark(12);
+ is.mark(16);
int c1 = is.read();
int c2 = is.read();
int c3 = is.read();
@@ -1434,6 +1434,11 @@
int c9 = is.read();
int c10 = is.read();
int c11 = is.read();
+ int c12 = is.read();
+ int c13 = is.read();
+ int c14 = is.read();
+ int c15 = is.read();
+ int c16 = is.read();
is.reset();
if (c1 == 0xCA&& c2 == 0xFE&& c3 == 0xBA&& c4 == 0xBE) {
@@ -1461,6 +1466,13 @@
}
}
+ // big and little endian UTF-8 encodings, with BOM
+ if (c1 == 0xef&& c2 == 0xbb&& c3 == 0xbf) {
+ if (c4 == '<'&& c5 == '?'&& c6 == 'x') {
+ return "application/xml";
+ }
+ }
+
// big and little endian UTF-16 encodings, with byte order mark
if (c1 == 0xfe&& c2 == 0xff) {
if (c3 == 0&& c4 == '<'&& c5 == 0&& c6 == '?'&&
@@ -1476,6 +1488,19 @@
}
}
+ // big and little endian UTF-32 encodings, with BOM
+ if (c1 == 0xff&& c2 == 0xfe&& c3 == 0x0&& c4 == 0x0) {
+ if (c5 == '<'&& c9 == '?'&& c13 == 'x') {
+ return "application/xml";
+ }
+ }
+
+ if (c1 == 0x0&& c2 == 0x0&& c3 == 0xfe&& c4 == 0xff) {
+ if (c8 == '<'&& c12 == '?'&& c16 == 'x') {
+ return "application/xml";
+ }
+ }
+
if (c1 == 'G'&& c2 == 'I'&& c3 == 'F'&& c4 == '8') {
return "image/gif";
}