Hi guys,

With test case[1] below, you can see guessContent does not support UTF8/32 BOM. This problem could be solved with the patch[2].
The patch is straight forward:
1. read more bytes since UTF32
2. add xml type support in utf8 and utf32 BOM.

[1] test case:
public  static  void main(String[] args)throws  IOException {
        String  header ="<?xml";
        String[] encs =new  String[] 
{"UTF-8","UTF-16BE","UTF-16LE","UTF-32BE","UTF-32LE"};
        InputStream is;
        for  (String  enc : encs) {
            System.out.println(enc+":");
            is =new  ByteArrayInputStream(toBOMBytes(header, enc));
            String  mime = URLConnection.guessContentTypeFromStream(is);
            System.out.println(mime);

            is.close();
        }
}

private  static  byte[] toBOMBytes(String  text,String  enc)throws  IOException 
{
        ByteArrayOutputStream bos =new  ByteArrayOutputStream();

        if  (enc.equals("UTF-8")) {
            bos.write(new  byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF });
        }
        if  (enc.equals("UTF-16BE")) {
            bos.write(new  byte[] { (byte) 0xFE, (byte) 0xFF });
        }
        if  (enc.equals("UTF-16LE")) {
            bos.write(new  byte[] { (byte) 0xFF, (byte) 0xFE });
        }
        if  (enc.equals("UTF-32BE")) {
            bos.write(new  byte[] { (byte) 0x00, (byte) 0x00, (byte) 0xFE,
                                   (byte) 0xFF });
        }
        if  (enc.equals("UTF-32LE")) {
            bos.write(new  byte[] { (byte) 0xFF, (byte) 0xFE, (byte) 0x00,
                                   (byte) 0x00 });
        }

        bos.write(text.getBytes(enc));
        return  bos.toByteArray();
}

[2] patch:
diff --git src/share/classes/java/net/URLConnection.java 
src/share/classes/java/net/URLConnection.java
--- src/share/classes/java/net/URLConnection.java
+++ src/share/classes/java/net/URLConnection.java
@@ -1422,7 +1422,7 @@
         if (!is.markSupported())
             return null;

-        is.mark(12);
+        is.mark(16);
         int c1 = is.read();
         int c2 = is.read();
         int c3 = is.read();
@@ -1434,6 +1434,11 @@
         int c9 = is.read();
         int c10 = is.read();
         int c11 = is.read();
+       int c12 = is.read();
+       int c13 = is.read();
+       int c14 = is.read();
+       int c15 = is.read();
+       int c16 = is.read();
         is.reset();

         if (c1 == 0xCA&&  c2 == 0xFE&&  c3 == 0xBA&&  c4 == 0xBE) {
@@ -1461,6 +1466,13 @@
             }
         }

+       // big and little endian UTF-8 encodings, with BOM
+       if (c1 == 0xef&&  c2 == 0xbb&&  c3 == 0xbf) {
+           if (c4 == '<'&&  c5 == '?'&&  c6 == 'x') {
+               return "application/xml";
+           }
+       }
+
         // big and little endian UTF-16 encodings, with byte order mark
         if (c1 == 0xfe&&  c2 == 0xff) {
             if (c3 == 0&&  c4 == '<'&&  c5 == 0&&  c6 == '?'&&
@@ -1476,6 +1488,19 @@
             }
         }

+       // big and little endian UTF-32 encodings, with BOM
+       if (c1 == 0xff&&  c2 == 0xfe&&  c3 == 0x0&&  c4 == 0x0) {
+           if (c5 == '<'&&  c9 == '?'&&  c13 == 'x') {
+               return "application/xml";
+           }
+       }
+
+       if (c1 == 0x0&&  c2 == 0x0&&  c3 == 0xfe&&  c4 == 0xff) {
+           if (c8 == '<'&&  c12 == '?'&&  c16 == 'x') {
+               return "application/xml";
+           }
+       }
+
         if (c1 == 'G'&&  c2 == 'I'&&  c3 == 'F'&&  c4 == '8') {
             return "image/gif";
         }



Reply via email to