On Jun 5, 2012, at 20:20, sebb <seb...@gmail.com> wrote: > On 5 June 2012 15:48, <ggreg...@apache.org> wrote: >> Author: ggregory >> Date: Tue Jun 5 14:48:01 2012 >> New Revision: 1346400 >> >> URL: http://svn.apache.org/viewvc?rev=1346400&view=rev >> Log: >> [IO-320] Add XmlStreamReader support for UTF-32. >> [IO-331] BOMInputStream wrongly detects UTF-32LE_BOM files as UTF-16LE_BOM >> files in method getBOM(). > > Please try to keep commits to a single fix.
This *is* one fix. One JIRA is a different lower level expression of the other. Gary > >> >> Modified: >> commons/proper/io/trunk/src/changes/changes.xml >> >> commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java >> >> commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java >> >> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java >> >> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java >> >> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java >> >> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java >> >> Modified: commons/proper/io/trunk/src/changes/changes.xml >> URL: >> http://svn.apache.org/viewvc/commons/proper/io/trunk/src/changes/changes.xml?rev=1346400&r1=1346399&r2=1346400&view=diff >> ============================================================================== >> --- commons/proper/io/trunk/src/changes/changes.xml (original) >> +++ commons/proper/io/trunk/src/changes/changes.xml Tue Jun 5 14:48:01 2012 >> @@ -47,6 +47,12 @@ The <action> type attribute can be add,u >> <body> >> <!-- The release date is the date RC is cut --> >> <release version="2.4" date="2012-TDB-TDB" description=""> >> + <action issue="IO-320" dev="ggregory" type="add"> >> + Add XmlStreamReader support for UTF-32. >> + </action> >> + <action issue="IO-331" dev="ggregory" type="add"> >> + BOMInputStream wrongly detects UTF-32LE_BOM files as UTF-16LE_BOM >> files in method getBOM(). >> + </action> >> <action issue="IO-332" dev="ggregory" type="fix" due-to="liangly"> >> Improve tailer's reading performance. >> </action> >> >> Modified: >> commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java >> URL: >> http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java?rev=1346400&r1=1346399&r2=1346400&view=diff >> ============================================================================== >> --- >> commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java >> (original) >> +++ >> commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java >> Tue Jun 5 14:48:01 2012 >> @@ -19,54 +19,66 @@ package org.apache.commons.io.input; >> import java.io.IOException; >> import java.io.InputStream; >> import java.util.Arrays; >> +import java.util.Comparator; >> import java.util.List; >> >> import org.apache.commons.io.ByteOrderMark; >> >> /** >> - * This class is used to wrap a stream that includes an encoded >> - * {@link ByteOrderMark} as its first bytes. >> - * >> - * This class detects these bytes and, if required, can automatically skip >> them >> - * and return the subsequent byte as the first byte in the stream. >> - * >> + * This class is used to wrap a stream that includes an encoded {@link >> ByteOrderMark} as its first bytes. >> + * >> + * This class detects these bytes and, if required, can automatically skip >> them and return the subsequent byte as the >> + * first byte in the stream. >> + * >> * The {@link ByteOrderMark} implementation has the following pre-defined >> BOMs: >> * <ul> >> - * <li>UTF-8 - {@link ByteOrderMark#UTF_8}</li> >> - * <li>UTF-16BE - {@link ByteOrderMark#UTF_16LE}</li> >> - * <li>UTF-16LE - {@link ByteOrderMark#UTF_16BE}</li> >> + * <li>UTF-8 - {@link ByteOrderMark#UTF_8}</li> >> + * <li>UTF-16BE - {@link ByteOrderMark#UTF_16LE}</li> >> + * <li>UTF-16LE - {@link ByteOrderMark#UTF_16BE}</li> >> + * <li>UTF-32BE - {@link ByteOrderMark#UTF_32LE}</li> >> + * <li>UTF-32LE - {@link ByteOrderMark#UTF_32BE}</li> >> * </ul> >> - * >> - * >> + * >> + * >> * <h3>Example 1 - Detect and exclude a UTF-8 BOM</h3> >> + * >> * <pre> >> - * BOMInputStream bomIn = new BOMInputStream(in); >> - * if (bomIn.hasBOM()) { >> - * // has a UTF-8 BOM >> - * } >> + * BOMInputStream bomIn = new BOMInputStream(in); >> + * if (bomIn.hasBOM()) { >> + * // has a UTF-8 BOM >> + * } >> * </pre> >> - * >> + * >> * <h3>Example 2 - Detect a UTF-8 BOM (but don't exclude it)</h3> >> + * >> * <pre> >> - * boolean include = true; >> - * BOMInputStream bomIn = new BOMInputStream(in, include); >> - * if (bomIn.hasBOM()) { >> - * // has a UTF-8 BOM >> - * } >> + * boolean include = true; >> + * BOMInputStream bomIn = new BOMInputStream(in, include); >> + * if (bomIn.hasBOM()) { >> + * // has a UTF-8 BOM >> + * } >> * </pre> >> - * >> + * >> * <h3>Example 3 - Detect Multiple BOMs</h3> >> + * >> * <pre> >> - * BOMInputStream bomIn = new BOMInputStream(in, >> ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE); >> - * if (bomIn.hasBOM() == false) { >> - * // No BOM found >> - * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) { >> - * // has a UTF-16LE BOM >> - * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) { >> - * // has a UTF-16BE BOM >> - * } >> + * BOMInputStream bomIn = new BOMInputStream(in, >> + * ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, >> + * ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE >> + * ); >> + * if (bomIn.hasBOM() == false) { >> + * // No BOM found >> + * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) { >> + * // has a UTF-16LE BOM >> + * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) { >> + * // has a UTF-16BE BOM >> + * } else if (bomIn.hasBOM(ByteOrderMark.UTF_32LE)) { >> + * // has a UTF-32LE BOM >> + * } else if (bomIn.hasBOM(ByteOrderMark.UTF_32BE)) { >> + * // has a UTF-32BE BOM >> + * } >> * </pre> >> - * >> + * >> * @see org.apache.commons.io.ByteOrderMark >> * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia - >> Byte Order Mark</a> >> * @version $Id$ >> @@ -74,6 +86,9 @@ import org.apache.commons.io.ByteOrderMa >> */ >> public class BOMInputStream extends ProxyInputStream { >> private final boolean include; >> + /** >> + * BOMs are sorted from longest to shortest. >> + */ >> private final List<ByteOrderMark> boms; >> private ByteOrderMark byteOrderMark; >> private int[] firstBytes; >> @@ -83,42 +98,66 @@ public class BOMInputStream extends Prox >> private boolean markedAtStart; >> >> /** >> - * Constructs a new BOM InputStream that excludes >> - * a {@link ByteOrderMark#UTF_8} BOM. >> - * @param delegate the InputStream to delegate to >> + * Constructs a new BOM InputStream that excludes a {@link >> ByteOrderMark#UTF_8} BOM. >> + * >> + * @param delegate >> + * the InputStream to delegate to >> */ >> public BOMInputStream(InputStream delegate) { >> this(delegate, false, ByteOrderMark.UTF_8); >> } >> >> /** >> - * Constructs a new BOM InputStream that detects a >> - * a {@link ByteOrderMark#UTF_8} and optionally includes it. >> - * @param delegate the InputStream to delegate to >> - * @param include true to include the UTF-8 BOM or >> - * false to exclude it >> + * Constructs a new BOM InputStream that detects a a {@link >> ByteOrderMark#UTF_8} and optionally includes it. >> + * >> + * @param delegate >> + * the InputStream to delegate to >> + * @param include >> + * true to include the UTF-8 BOM or false to exclude it >> */ >> public BOMInputStream(InputStream delegate, boolean include) { >> this(delegate, include, ByteOrderMark.UTF_8); >> } >> >> /** >> - * Constructs a new BOM InputStream that excludes >> - * the specified BOMs. >> - * @param delegate the InputStream to delegate to >> - * @param boms The BOMs to detect and exclude >> + * Constructs a new BOM InputStream that excludes the specified BOMs. >> + * >> + * @param delegate >> + * the InputStream to delegate to >> + * @param boms >> + * The BOMs to detect and exclude >> */ >> public BOMInputStream(InputStream delegate, ByteOrderMark... boms) { >> this(delegate, false, boms); >> } >> >> /** >> - * Constructs a new BOM InputStream that detects the >> - * specified BOMs and optionally includes them. >> - * @param delegate the InputStream to delegate to >> - * @param include true to include the specified BOMs or >> - * false to exclude them >> - * @param boms The BOMs to detect and optionally exclude >> + * Compares ByteOrderMark objects in descending length order. >> + */ >> + private static final Comparator<ByteOrderMark> >> ByteOrderMarkLengthComparator = new Comparator<ByteOrderMark>() { >> + >> + public int compare(ByteOrderMark bom1, ByteOrderMark bom2) { >> + int len1 = bom1.length(); >> + int len2 = bom2.length(); >> + if (len1 > len2) { >> + return -1; >> + } >> + if (len2 > len1) { >> + return 1; >> + } >> + return 0; >> + } >> + }; >> + >> + /** >> + * Constructs a new BOM InputStream that detects the specified BOMs and >> optionally includes them. >> + * >> + * @param delegate >> + * the InputStream to delegate to >> + * @param include >> + * true to include the specified BOMs or false to exclude >> them >> + * @param boms >> + * The BOMs to detect and optionally exclude >> */ >> public BOMInputStream(InputStream delegate, boolean include, >> ByteOrderMark... boms) { >> super(delegate); >> @@ -126,15 +165,18 @@ public class BOMInputStream extends Prox >> throw new IllegalArgumentException("No BOMs specified"); >> } >> this.include = include; >> + // Sort the BOMs to match the longest BOM first because some BOMs >> have the same starting two bytes. >> + Arrays.sort(boms, ByteOrderMarkLengthComparator); >> this.boms = Arrays.asList(boms); >> + >> } >> >> /** >> * Indicates whether the stream contains one of the specified BOMs. >> - * >> - * @return true if the stream has one of the specified BOMs, otherwise >> false >> - * if it does not >> - * @throws IOException if an error reading the first bytes of the >> stream occurs >> + * >> + * @return true if the stream has one of the specified BOMs, otherwise >> false if it does not >> + * @throws IOException >> + * if an error reading the first bytes of the stream occurs >> */ >> public boolean hasBOM() throws IOException { >> return getBOM() != null; >> @@ -142,13 +184,14 @@ public class BOMInputStream extends Prox >> >> /** >> * Indicates whether the stream contains the specified BOM. >> - * >> - * @param bom The BOM to check for >> - * @return true if the stream has the specified BOM, otherwise false >> - * if it does not >> - * @throws IllegalArgumentException if the BOM is not one the stream >> - * is configured to detect >> - * @throws IOException if an error reading the first bytes of the >> stream occurs >> + * >> + * @param bom >> + * The BOM to check for >> + * @return true if the stream has the specified BOM, otherwise false if >> it does not >> + * @throws IllegalArgumentException >> + * if the BOM is not one the stream is configured to detect >> + * @throws IOException >> + * if an error reading the first bytes of the stream occurs >> */ >> public boolean hasBOM(ByteOrderMark bom) throws IOException { >> if (!boms.contains(bom)) { >> @@ -159,31 +202,34 @@ public class BOMInputStream extends Prox >> >> /** >> * Return the BOM (Byte Order Mark). >> - * >> + * >> * @return The BOM or null if none >> - * @throws IOException if an error reading the first bytes of the >> stream occurs >> + * @throws IOException >> + * if an error reading the first bytes of the stream occurs >> */ >> public ByteOrderMark getBOM() throws IOException { >> if (firstBytes == null) { >> fbLength = 0; >> - int max = 0; >> - for (ByteOrderMark bom : boms) { >> - max = Math.max(max, bom.length()); >> - } >> - firstBytes = new int[max]; >> + // BOMs are sorted from longest to shortest >> + final int maxBomSize = boms.get(0).length(); >> + firstBytes = new int[maxBomSize]; >> + // Read first maxBomSize bytes >> for (int i = 0; i < firstBytes.length; i++) { >> firstBytes[i] = in.read(); >> fbLength++; >> if (firstBytes[i] < 0) { >> break; >> } >> - >> - byteOrderMark = find(); >> - if (byteOrderMark != null) { >> - if (!include) { >> + } >> + // match BOM in firstBytes >> + byteOrderMark = find(); >> + if (byteOrderMark != null) { >> + if (!include) { >> + if (byteOrderMark.length() < firstBytes.length) { >> + fbIndex = byteOrderMark.length(); >> + } else { >> fbLength = 0; >> } >> - break; >> } >> } >> } >> @@ -192,9 +238,10 @@ public class BOMInputStream extends Prox >> >> /** >> * Return the BOM charset Name - {@link ByteOrderMark#getCharsetName()}. >> - * >> + * >> * @return The BOM charset Name or null if no BOM found >> - * @throws IOException if an error reading the first bytes of the >> stream occurs >> + * @throws IOException >> + * if an error reading the first bytes of the stream occurs >> * >> */ >> public String getBOMCharsetName() throws IOException { >> @@ -203,12 +250,13 @@ public class BOMInputStream extends Prox >> } >> >> /** >> - * This method reads and either preserves or skips the first bytes in >> the >> - * stream. It behaves like the single-byte <code>read()</code> method, >> - * either returning a valid byte or -1 to indicate that the initial >> bytes >> - * have been processed already. >> + * This method reads and either preserves or skips the first bytes in >> the stream. It behaves like the single-byte >> + * <code>read()</code> method, either returning a valid byte or -1 to >> indicate that the initial bytes have been >> + * processed already. >> + * >> * @return the byte read (excluding BOM) or -1 if the end of stream >> - * @throws IOException if an I/O error occurs >> + * @throws IOException >> + * if an I/O error occurs >> */ >> private int readFirstBytes() throws IOException { >> getBOM(); >> @@ -217,7 +265,7 @@ public class BOMInputStream extends Prox >> >> /** >> * Find a BOM with the specified bytes. >> - * >> + * >> * @return The matched BOM or null if none matched >> */ >> private ByteOrderMark find() { >> @@ -231,14 +279,16 @@ public class BOMInputStream extends Prox >> >> /** >> * Check if the bytes match a BOM. >> - * >> - * @param bom The BOM >> + * >> + * @param bom >> + * The BOM >> * @return true if the bytes match the bom, otherwise false >> */ >> private boolean matches(ByteOrderMark bom) { >> - if (bom.length() != fbLength) { >> - return false; >> - } >> + // if (bom.length() != fbLength) { >> + // return false; >> + // } >> + // firstBytes may be bigger than the BOM bytes >> for (int i = 0; i < bom.length(); i++) { >> if (bom.get(i) != firstBytes[i]) { >> return false; >> @@ -247,15 +297,16 @@ public class BOMInputStream extends Prox >> return true; >> } >> >> - >> //---------------------------------------------------------------------------- >> - // Implementation of InputStream >> - >> //---------------------------------------------------------------------------- >> + // >> ---------------------------------------------------------------------------- >> + // Implementation of InputStream >> + // >> ---------------------------------------------------------------------------- >> >> /** >> - * Invokes the delegate's <code>read()</code> method, detecting and >> - * optionally skipping BOM. >> + * Invokes the delegate's <code>read()</code> method, detecting and >> optionally skipping BOM. >> + * >> * @return the byte read (excluding BOM) or -1 if the end of stream >> - * @throws IOException if an I/O error occurs >> + * @throws IOException >> + * if an I/O error occurs >> */ >> @Override >> public int read() throws IOException { >> @@ -264,13 +315,17 @@ public class BOMInputStream extends Prox >> } >> >> /** >> - * Invokes the delegate's <code>read(byte[], int, int)</code> method, >> detecting >> - * and optionally skipping BOM. >> - * @param buf the buffer to read the bytes into >> - * @param off The start offset >> - * @param len The number of bytes to read (excluding BOM) >> + * Invokes the delegate's <code>read(byte[], int, int)</code> method, >> detecting and optionally skipping BOM. >> + * >> + * @param buf >> + * the buffer to read the bytes into >> + * @param off >> + * The start offset >> + * @param len >> + * The number of bytes to read (excluding BOM) >> * @return the number of bytes read or -1 if the end of stream >> - * @throws IOException if an I/O error occurs >> + * @throws IOException >> + * if an I/O error occurs >> */ >> @Override >> public int read(byte[] buf, int off, int len) throws IOException { >> @@ -289,12 +344,13 @@ public class BOMInputStream extends Prox >> } >> >> /** >> - * Invokes the delegate's <code>read(byte[])</code> method, detecting >> and >> - * optionally skipping BOM. >> - * @param buf the buffer to read the bytes into >> - * @return the number of bytes read (excluding BOM) >> - * or -1 if the end of stream >> - * @throws IOException if an I/O error occurs >> + * Invokes the delegate's <code>read(byte[])</code> method, detecting >> and optionally skipping BOM. >> + * >> + * @param buf >> + * the buffer to read the bytes into >> + * @return the number of bytes read (excluding BOM) or -1 if the end of >> stream >> + * @throws IOException >> + * if an I/O error occurs >> */ >> @Override >> public int read(byte[] buf) throws IOException { >> @@ -303,7 +359,9 @@ public class BOMInputStream extends Prox >> >> /** >> * Invokes the delegate's <code>mark(int)</code> method. >> - * @param readlimit read ahead limit >> + * >> + * @param readlimit >> + * read ahead limit >> */ >> @Override >> public synchronized void mark(int readlimit) { >> @@ -314,7 +372,9 @@ public class BOMInputStream extends Prox >> >> /** >> * Invokes the delegate's <code>reset()</code> method. >> - * @throws IOException if an I/O error occurs >> + * >> + * @throws IOException >> + * if an I/O error occurs >> */ >> @Override >> public synchronized void reset() throws IOException { >> @@ -327,11 +387,13 @@ public class BOMInputStream extends Prox >> } >> >> /** >> - * Invokes the delegate's <code>skip(long)</code> method, detecting >> - * and optionallyskipping BOM. >> - * @param n the number of bytes to skip >> + * Invokes the delegate's <code>skip(long)</code> method, detecting and >> optionallyskipping BOM. >> + * >> + * @param n >> + * the number of bytes to skip >> * @return the number of bytes to skipped or -1 if the end of stream >> - * @throws IOException if an I/O error occurs >> + * @throws IOException >> + * if an I/O error occurs >> */ >> @Override >> public long skip(long n) throws IOException { >> >> Modified: >> commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java >> URL: >> http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java?rev=1346400&r1=1346399&r2=1346400&view=diff >> ============================================================================== >> --- >> commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java >> (original) >> +++ >> commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java >> Tue Jun 5 14:48:01 2012 >> @@ -74,23 +74,36 @@ public class XmlStreamReader extends Rea >> >> private static final String UTF_16LE = "UTF-16LE"; >> >> + private static final String UTF_32BE = "UTF-32BE"; >> + >> + private static final String UTF_32LE = "UTF-32LE"; >> + >> private static final String UTF_16 = "UTF-16"; >> >> + private static final String UTF_32 = "UTF-32"; >> + >> private static final String EBCDIC = "CP1047"; >> >> private static final ByteOrderMark[] BOMS = new ByteOrderMark[] { >> ByteOrderMark.UTF_8, >> ByteOrderMark.UTF_16BE, >> - ByteOrderMark.UTF_16LE >> + ByteOrderMark.UTF_16LE, >> + ByteOrderMark.UTF_32BE, >> + ByteOrderMark.UTF_32LE >> }; >> + >> + // UTF_16LE and UTF_32LE have the same two starting BOM bytes. >> private static final ByteOrderMark[] XML_GUESS_BYTES = new >> ByteOrderMark[] { >> new ByteOrderMark(UTF_8, 0x3C, 0x3F, 0x78, 0x6D), >> new ByteOrderMark(UTF_16BE, 0x00, 0x3C, 0x00, 0x3F), >> new ByteOrderMark(UTF_16LE, 0x3C, 0x00, 0x3F, 0x00), >> + new ByteOrderMark(UTF_32BE, 0x00, 0x00, 0x00, 0x3C, >> + 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, >> 0x00, 0x6D), >> + new ByteOrderMark(UTF_32LE, 0x3C, 0x00, 0x00, 0x00, >> + 0x3F, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D, 0x00, >> 0x00, 0x00), >> new ByteOrderMark(EBCDIC, 0x4C, 0x6F, 0xA7, 0x94) >> }; >> >> - >> private final Reader reader; >> >> private final String encoding; >> @@ -532,6 +545,19 @@ public class XmlStreamReader extends Rea >> return bomEnc; >> } >> >> + // BOM is UTF-32BE or UTF-32LE >> + if (bomEnc.equals(UTF_32BE) || bomEnc.equals(UTF_32LE)) { >> + if (xmlGuessEnc != null && !xmlGuessEnc.equals(bomEnc)) { >> + String msg = MessageFormat.format(RAW_EX_1, new Object[] { >> bomEnc, xmlGuessEnc, xmlEnc }); >> + throw new XmlStreamReaderException(msg, bomEnc, >> xmlGuessEnc, xmlEnc); >> + } >> + if (xmlEnc != null && !xmlEnc.equals(UTF_32) && >> !xmlEnc.equals(bomEnc)) { >> + String msg = MessageFormat.format(RAW_EX_1, new Object[] { >> bomEnc, xmlGuessEnc, xmlEnc }); >> + throw new XmlStreamReaderException(msg, bomEnc, >> xmlGuessEnc, xmlEnc); >> + } >> + return bomEnc; >> + } >> + >> // BOM is something else >> String msg = MessageFormat.format(RAW_EX_2, new Object[] { bomEnc, >> xmlGuessEnc, xmlEnc }); >> throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc); >> @@ -598,6 +624,24 @@ public class XmlStreamReader extends Rea >> throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, >> xmlGuessEnc, xmlEnc); >> } >> >> + // UTF-32BE or UTF-132E content type encoding >> + if (cTEnc.equals(UTF_32BE) || cTEnc.equals(UTF_32LE)) { >> + if (bomEnc != null) { >> + String msg = MessageFormat.format(HTTP_EX_1, cTMime, cTEnc, >> bomEnc, xmlGuessEnc, xmlEnc); >> + throw new XmlStreamReaderException(msg, cTMime, cTEnc, >> bomEnc, xmlGuessEnc, xmlEnc); >> + } >> + return cTEnc; >> + } >> + >> + // UTF-32 content type encoding >> + if (cTEnc.equals(UTF_32)) { >> + if (bomEnc != null && bomEnc.startsWith(UTF_32)) { >> + return bomEnc; >> + } >> + String msg = MessageFormat.format(HTTP_EX_2, cTMime, cTEnc, >> bomEnc, xmlGuessEnc, xmlEnc); >> + throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, >> xmlGuessEnc, xmlEnc); >> + } >> + >> return cTEnc; >> } >> >> >> Modified: >> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java >> URL: >> http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java?rev=1346400&r1=1346399&r2=1346400&view=diff >> ============================================================================== >> --- >> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java >> (original) >> +++ >> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java >> Tue Jun 5 14:48:01 2012 >> @@ -31,7 +31,6 @@ import java.util.HashMap; >> import java.util.Map; >> >> import org.apache.commons.io.IOUtils; >> -import org.junit.Ignore; >> import org.junit.Test; >> >> /** >> @@ -96,13 +95,11 @@ public class XmlStreamReaderTest { >> } >> >> @Test >> - @Ignore >> public void testRawNoBomUtf32BE() throws Exception { >> _testRawNoBomValid("UTF-32BE"); >> } >> >> @Test >> - @Ignore >> public void testRawNoBomUtf32LE() throws Exception { >> _testRawNoBomValid("UTF-32LE"); >> } >> @@ -121,7 +118,7 @@ public class XmlStreamReaderTest { >> InputStream is = getXmlStream(encoding + "-bom", XML3, encoding, >> encoding); >> XmlStreamReader xmlReader = new XmlStreamReader(is, false); >> - if (!encoding.equals("UTF-16")) { >> + if (!encoding.equals("UTF-16") && !encoding.equals("UTF-32")) { >> assertEquals(xmlReader.getEncoding(), encoding); >> } else { >> assertEquals(xmlReader.getEncoding() >> @@ -135,7 +132,7 @@ public class XmlStreamReaderTest { >> try { >> XmlStreamReader xmlReader = new XmlStreamReader(is, false); >> String foundEnc = xmlReader.getEncoding(); >> - fail("It should have failed for BOM " + bomEnc + ", streamEnc " >> + fail("Expected IOException for BOM " + bomEnc + ", streamEnc " >> + streamEnc + " and prologEnc " + prologEnc + ": found " >> + foundEnc); >> } catch (IOException ex) { >> @@ -154,6 +151,9 @@ public class XmlStreamReaderTest { >> _testRawBomInvalid("UTF-16BE-bom", "UTF-16BE", "UTF-16LE"); >> _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-16BE"); >> _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-8"); >> + _testRawBomInvalid("UTF-32BE-bom", "UTF-32BE", "UTF-32LE"); >> + _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-32BE"); >> + _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-8"); >> } >> >> @Test >> @@ -168,114 +168,105 @@ public class XmlStreamReaderTest { >> } >> >> @Test >> - @Ignore >> public void testRawBomUtf32() throws Exception { >> _testRawBomValid("UTF-32BE"); >> _testRawBomValid("UTF-32LE"); >> _testRawBomValid("UTF-32"); >> - } >> + >> + _testRawBomInvalid("UTF-32BE-bom", "UTF-32BE", "UTF-32LE"); >> + _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-32BE"); >> + _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-8"); >> +} >> >> >> @Test >> public void testHttp() throws Exception { >> // niallp 2010-10-06 - remove following 2 tests - I reinstated >> - // checks for non-UTF-16 encodings (18 tests) and these failed >> - //_testHttpValid("application/xml", "no-bom", "US-ASCII", null); >> - //_testHttpValid("application/xml", "UTF-8-bom", "US-ASCII", null); >> + // checks for non-UTF-16 encodings (18 tests) and these failed >> + // _testHttpValid("application/xml", "no-bom", "US-ASCII", null); >> + // _testHttpValid("application/xml", "UTF-8-bom", "US-ASCII", null); >> _testHttpValid("application/xml", "UTF-8-bom", "UTF-8", null); >> _testHttpValid("application/xml", "UTF-8-bom", "UTF-8", "UTF-8"); >> - _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", >> "UTF-8", >> - null); >> - _testHttpValid("application/xml;charset=\"UTF-8\"", "UTF-8-bom", >> - "UTF-8", null); >> - _testHttpValid("application/xml;charset='UTF-8'", "UTF-8-bom", >> "UTF-8", >> - null); >> - _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", >> "UTF-8", >> - "UTF-8"); >> - _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", >> - "UTF-16BE", null); >> - _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", >> - "UTF-16BE", "UTF-16"); >> - _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", >> - "UTF-16BE", "UTF-16BE"); >> - >> - _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", >> - "UTF-16BE", null); >> - _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", >> - "UTF-16BE", "UTF-16"); >> - _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", >> - "UTF-16BE", "UTF-16BE"); >> + _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", >> "UTF-8", null); >> + _testHttpValid("application/xml;charset=\"UTF-8\"", "UTF-8-bom", >> "UTF-8", null); >> + _testHttpValid("application/xml;charset='UTF-8'", "UTF-8-bom", >> "UTF-8", null); >> + _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", >> "UTF-8", "UTF-8"); >> + _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", >> "UTF-16BE", null); >> + _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", >> "UTF-16BE", "UTF-16"); >> + _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", >> "UTF-16BE", "UTF-16BE"); >> + >> + _testHttpInvalid("application/xml;charset=UTF-16BE", >> "UTF-16BE-bom", "UTF-16BE", null); >> + _testHttpInvalid("application/xml;charset=UTF-16BE", >> "UTF-16BE-bom", "UTF-16BE", "UTF-16"); >> + _testHttpInvalid("application/xml;charset=UTF-16BE", >> "UTF-16BE-bom", "UTF-16BE", "UTF-16BE"); >> + >> + _testHttpInvalid("application/xml;charset=UTF-32BE", >> "UTF-32BE-bom", "UTF-32BE", null); >> + _testHttpInvalid("application/xml;charset=UTF-32BE", >> "UTF-32BE-bom", "UTF-32BE", "UTF-32"); >> + _testHttpInvalid("application/xml;charset=UTF-32BE", >> "UTF-32BE-bom", "UTF-32BE", "UTF-32BE"); >> + >> _testHttpInvalid("application/xml", "UTF-8-bom", "US-ASCII", >> "US-ASCII"); >> - _testHttpInvalid("application/xml;charset=UTF-16", "UTF-16LE", >> "UTF-8", >> - "UTF-8"); >> - _testHttpInvalid("application/xml;charset=UTF-16", "no-bom", >> - "UTF-16BE", "UTF-16BE"); >> + _testHttpInvalid("application/xml;charset=UTF-16", "UTF-16LE", >> "UTF-8", "UTF-8"); >> + _testHttpInvalid("application/xml;charset=UTF-16", "no-bom", >> "UTF-16BE", "UTF-16BE"); >> + _testHttpInvalid("application/xml;charset=UTF-32", "UTF-32LE", >> "UTF-8", "UTF-8"); >> + _testHttpInvalid("application/xml;charset=UTF-32", "no-bom", >> "UTF-32BE", "UTF-32BE"); >> >> _testHttpValid("text/xml", "no-bom", "US-ASCII", null); >> _testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", >> "UTF-8"); >> _testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null); >> - _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", >> "UTF-16BE", >> - null); >> - _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", >> "UTF-16BE", >> - "UTF-16"); >> - _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", >> "UTF-16BE", >> - "UTF-16BE"); >> + _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", >> "UTF-16BE", null); >> + _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", >> "UTF-16BE", "UTF-16"); >> + _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", >> "UTF-16BE", "UTF-16BE"); >> + _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", >> "UTF-32BE", null); >> + _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", >> "UTF-32BE", "UTF-32"); >> + _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", >> "UTF-32BE", "UTF-32BE"); >> _testHttpValid("text/xml", "UTF-8-bom", "US-ASCII", null); >> >> - _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", >> "UTF-8", >> - null, null); >> - _testAlternateDefaultEncoding("application/xml", "no-bom", >> "US-ASCII", >> - null, "US-ASCII"); >> - _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", >> "UTF-8", >> - null, "UTF-8"); >> - _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", >> null, >> - null); >> - _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", >> null, >> - "US-ASCII"); >> - _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", >> null, >> - "UTF-8"); >> - >> - _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", >> - "UTF-16BE", null); >> - _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", >> - "UTF-16BE", "UTF-16"); >> - _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", >> - "UTF-16BE", "UTF-16BE"); >> - _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", >> - "UTF-16BE"); >> + _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", >> "UTF-8", null, null); >> + _testAlternateDefaultEncoding("application/xml", "no-bom", >> "US-ASCII", null, "US-ASCII"); >> + _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", >> "UTF-8", null, "UTF-8"); >> + _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", >> null, null); >> + _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", >> null, "US-ASCII"); >> + _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", >> null, "UTF-8"); >> + >> + _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", >> "UTF-16BE", null); >> + _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", >> "UTF-16BE", "UTF-16"); >> + _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", >> "UTF-16BE", "UTF-16BE"); >> + _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", >> "UTF-16BE"); >> _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", >> null); >> >> + _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", >> "UTF-32BE", null); >> + _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", >> "UTF-32BE", "UTF-32"); >> + _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", >> "UTF-32BE", "UTF-32BE"); >> + _testHttpInvalid("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", >> "UTF-32BE"); >> + _testHttpInvalid("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", >> null); >> + >> _testHttpLenient("text/xml", "no-bom", "US-ASCII", null, "US-ASCII"); >> - _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", >> - "UTF-8", "UTF-8"); >> - _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", >> null, >> - "UTF-8"); >> - _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", >> "UTF-16BE", >> - null, "UTF-16BE"); >> - _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", >> "UTF-16BE", >> - "UTF-16", "UTF-16"); >> - _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", >> "UTF-16BE", >> - "UTF-16BE", "UTF-16BE"); >> + _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", >> "UTF-8", "UTF-8"); >> + _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", >> null, "UTF-8"); >> + _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", >> "UTF-16BE", null, "UTF-16BE"); >> + _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", >> "UTF-16BE", "UTF-16", "UTF-16"); >> + _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", >> "UTF-16BE", "UTF-16BE", "UTF-16BE"); >> + _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", >> "UTF-32BE", null, "UTF-32BE"); >> + _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", >> "UTF-32BE", "UTF-32", "UTF-32"); >> + _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", >> "UTF-32BE", "UTF-32BE", "UTF-32BE"); >> _testHttpLenient("text/xml", "UTF-8-bom", "US-ASCII", null, >> "US-ASCII"); >> >> - _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", >> - "UTF-16BE", null, "UTF-16BE"); >> - _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", >> - "UTF-16BE", "UTF-16", "UTF-16"); >> - _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", >> - "UTF-16BE", "UTF-16BE", "UTF-16BE"); >> - _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", >> - "UTF-16BE", "UTF-16BE"); >> - _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", >> null, >> - "UTF-16"); >> + _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", >> "UTF-16BE", null, "UTF-16BE"); >> + _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", >> "UTF-16BE", "UTF-16", "UTF-16"); >> + _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", >> "UTF-16BE", "UTF-16BE", "UTF-16BE"); >> + _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", >> "UTF-16BE", "UTF-16BE"); >> + _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", >> null, "UTF-16"); >> + >> + _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", >> "UTF-32BE", null, "UTF-32BE"); >> + _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", >> "UTF-32BE", "UTF-32", "UTF-32"); >> + _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", >> "UTF-32BE", "UTF-32BE", "UTF-32BE"); >> + _testHttpLenient("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", >> "UTF-32BE", "UTF-32BE"); >> + _testHttpLenient("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", >> null, "UTF-32"); >> >> - _testHttpLenient("text/html", "no-bom", "US-ASCII", "US-ASCII", >> - "US-ASCII"); >> + _testHttpLenient("text/html", "no-bom", "US-ASCII", "US-ASCII", >> "US-ASCII"); >> _testHttpLenient("text/html", "no-bom", "US-ASCII", null, >> "US-ASCII"); >> - _testHttpLenient("text/html;charset=UTF-8", "no-bom", "US-ASCII", >> - "UTF-8", "UTF-8"); >> - _testHttpLenient("text/html;charset=UTF-16BE", "no-bom", "US-ASCII", >> - "UTF-8", "UTF-8"); >> + _testHttpLenient("text/html;charset=UTF-8", "no-bom", "US-ASCII", >> "UTF-8", "UTF-8"); >> + _testHttpLenient("text/html;charset=UTF-16BE", "no-bom", >> "US-ASCII", "UTF-8", "UTF-8"); >> + _testHttpLenient("text/html;charset=UTF-32BE", "no-bom", >> "US-ASCII", "UTF-8", "UTF-8"); >> } >> >> @Test >> >> Modified: >> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java >> URL: >> http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java?rev=1346400&r1=1346399&r2=1346400&view=diff >> ============================================================================== >> --- >> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java >> (original) >> +++ >> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java >> Tue Jun 5 14:48:01 2012 >> @@ -24,7 +24,6 @@ import static org.junit.Assert.fail; >> import java.io.ByteArrayInputStream; >> import java.io.IOException; >> >> -import org.junit.Ignore; >> import org.junit.Test; >> >> /** >> @@ -156,12 +155,13 @@ public class XmlStreamReaderUtilitiesTes >> >> /** BOM calculateRawEncoding() Test */ >> @Test >> - @Ignore >> + //@Ignore >> public void testCalculateRawEncodingStandardUtf32() throws IOException { >> // Standard BOM Checks BOM Other Default >> + testCalculateRawEncodingStandard("UTF-8", "UTF-32BE", >> "UTF-32LE"); >> testCalculateRawEncodingStandard("UTF-32BE", "UTF-8", "UTF-32LE"); >> testCalculateRawEncodingStandard("UTF-32LE", "UTF-8", "UTF-32BE"); >> - } >> +} >> >> private void testCalculateRawEncodingStandard(String bomEnc, String >> otherEnc, String defaultEnc) throws IOException { >> // Expected BOM Guess XMLEnc Default >> @@ -178,7 +178,7 @@ public class XmlStreamReaderUtilitiesTes >> >> /** Additional UTF-16 calculateRawEncoding() Test */ >> @Test >> - public void testCalculateRawEncodingAdditonalkUTF16() throws >> IOException { >> + public void testCalculateRawEncodingAdditonalUTF16() throws IOException >> { >> // BOM Guess XML >> Default >> checkRawError(RAWMGS1, "UTF-16BE", "UTF-16", null, >> null); >> checkRawEncoding("UTF-16BE", "UTF-16BE", null, "UTF-16", >> null); >> @@ -192,6 +192,22 @@ public class XmlStreamReaderUtilitiesTes >> checkRawError(RAWMGS1, "UTF-16LE", "UTF-16LE", "UTF-16BE", >> null); >> } >> >> + /** Additional UTF-32 calculateRawEncoding() Test */ >> + @Test >> + public void testCalculateRawEncodingAdditonalUTF32() throws IOException >> { >> + // BOM Guess XML >> Default >> + checkRawError(RAWMGS1, "UTF-32BE", "UTF-32", null, >> null); >> + checkRawEncoding("UTF-32BE", "UTF-32BE", null, "UTF-32", >> null); >> + checkRawEncoding("UTF-32BE", "UTF-32BE", "UTF-32BE", "UTF-32", >> null); >> + checkRawError(RAWMGS1, "UTF-32BE", null, "UTF-32LE", >> null); >> + checkRawError(RAWMGS1, "UTF-32BE", "UTF-32BE", "UTF-32LE", >> null); >> + checkRawError(RAWMGS1, "UTF-32LE", "UTF-32", null, >> null); >> + checkRawEncoding("UTF-32LE", "UTF-32LE", null, "UTF-32", >> null); >> + checkRawEncoding("UTF-32LE", "UTF-32LE", "UTF-32LE", "UTF-32", >> null); >> + checkRawError(RAWMGS1, "UTF-32LE", null, "UTF-32BE", >> null); >> + checkRawError(RAWMGS1, "UTF-32LE", "UTF-32LE", "UTF-32BE", >> null); >> + } >> + >> private void checkRawEncoding(String expected, >> String bomEnc, String xmlGuessEnc, String xmlEnc, String >> defaultEncoding) throws IOException { >> StringBuilder builder = new StringBuilder(); >> @@ -207,8 +223,7 @@ public class XmlStreamReaderUtilitiesTes >> protected String calculateRawEncoding(String bomEnc, String xmlGuessEnc, >> String xmlEnc, >> String defaultEncoding) throws IOException { >> MockXmlStreamReader mock = new MockXmlStreamReader(defaultEncoding); >> - String encoding = mock.calculateRawEncoding(bomEnc, xmlGuessEnc, >> xmlEnc); >> - return encoding; >> + return mock.calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc); >> } >> >> private void checkRawError(String msgSuffix, >> @@ -257,7 +272,7 @@ public class XmlStreamReaderUtilitiesTes >> >> /** Test calculate HTTP Encoding */ >> @Test >> - @Ignore >> + //@Ignore >> public void testCalculateHttpEncodingUtf32() throws IOException { >> // No BOM Expected Lenient cType BOM >> Guess XML Default >> checkHttpEncoding("UTF-32LE", true, null, null, >> null, "UTF-32LE", null); >> @@ -277,7 +292,7 @@ public class XmlStreamReaderUtilitiesTes >> private void checkHttpEncoding(String expected, boolean lenient, String >> httpContentType, >> String bomEnc, String xmlGuessEnc, String xmlEnc, String >> defaultEncoding) throws IOException { >> StringBuilder builder = new StringBuilder(); >> - builder.append("HttpEncoding: ").append(bomEnc).append("], "); >> + builder.append("HttpEncoding=[").append(bomEnc).append("], "); >> builder.append("lenient=[").append(lenient).append("], "); >> >> builder.append("httpContentType=[").append(httpContentType).append("], "); >> builder.append("bomEnc=[").append(bomEnc).append("], "); >> @@ -291,8 +306,7 @@ public class XmlStreamReaderUtilitiesTes >> protected String calculateHttpEncoding(String httpContentType, String >> bomEnc, String xmlGuessEnc, >> String xmlEnc, boolean lenient, String defaultEncoding) throws >> IOException { >> MockXmlStreamReader mock = new MockXmlStreamReader(defaultEncoding); >> - String encoding = mock.calculateHttpEncoding(httpContentType, >> bomEnc, xmlGuessEnc, xmlEnc, lenient); >> - return encoding; >> + return mock.calculateHttpEncoding(httpContentType, bomEnc, >> xmlGuessEnc, xmlEnc, lenient); >> } >> >> private void checkHttpError(String msgSuffix, boolean lenient, String >> httpContentType, >> >> Modified: >> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java >> URL: >> http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java?rev=1346400&r1=1346399&r2=1346400&view=diff >> ============================================================================== >> --- >> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java >> (original) >> +++ >> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java >> Tue Jun 5 14:48:01 2012 >> @@ -74,6 +74,12 @@ public class XmlStreamReader extends Rea >> >> private static final String UTF_16 = "UTF-16"; >> >> + private static final String UTF_32BE = "UTF-32BE"; >> + >> + private static final String UTF_32LE = "UTF-32LE"; >> + >> + private static final String UTF_32 = "UTF-32"; >> + >> private static final String EBCDIC = "CP1047"; >> >> private static String staticDefaultEncoding = null; >> @@ -447,6 +453,10 @@ public class XmlStreamReader extends Rea >> && (xmlGuessEnc.equals(UTF_16BE) || xmlGuessEnc >> .equals(UTF_16LE))) { >> encoding = xmlGuessEnc; >> + } else if (xmlEnc.equals(UTF_32) >> + && (xmlGuessEnc.equals(UTF_32BE) || xmlGuessEnc >> + .equals(UTF_32LE))) { >> + encoding = xmlGuessEnc; >> } else { >> encoding = xmlEnc; >> } >> @@ -474,6 +484,18 @@ public class XmlStreamReader extends Rea >> bomEnc, xmlGuessEnc, xmlEnc, is); >> } >> encoding = bomEnc; >> + } else if (bomEnc.equals(UTF_32BE) || bomEnc.equals(UTF_32LE)) { >> + if (xmlGuessEnc != null && !xmlGuessEnc.equals(bomEnc)) { >> + throw new XmlStreamReaderException(RAW_EX_1.format(new >> Object[] { bomEnc, >> + xmlGuessEnc, xmlEnc }), bomEnc, xmlGuessEnc, >> xmlEnc, is); >> + } >> + if (xmlEnc != null && !xmlEnc.equals(UTF_32) >> + && !xmlEnc.equals(bomEnc)) { >> + throw new XmlStreamReaderException(RAW_EX_1 >> + .format(new Object[] { bomEnc, xmlGuessEnc, xmlEnc >> }), >> + bomEnc, xmlGuessEnc, xmlEnc, is); >> + } >> + encoding = bomEnc; >> } else { >> throw new XmlStreamReaderException(RAW_EX_2.format(new Object[] { >> bomEnc, xmlGuessEnc, xmlEnc }), bomEnc, xmlGuessEnc, >> @@ -516,6 +538,21 @@ public class XmlStreamReader extends Rea >> xmlGuessEnc, xmlEnc }), cTMime, >> cTEnc, >> bomEnc, xmlGuessEnc, xmlEnc, is); >> } >> + } else if (bomEnc != null >> + && (cTEnc.equals(UTF_32BE) || >> cTEnc.equals(UTF_32LE))) { >> + throw new XmlStreamReaderException(HTTP_EX_1 >> + .format(new Object[] { cTMime, cTEnc, bomEnc, >> + xmlGuessEnc, xmlEnc }), cTMime, cTEnc, >> + bomEnc, xmlGuessEnc, xmlEnc, is); >> + } else if (cTEnc.equals(UTF_32)) { >> + if (bomEnc != null && bomEnc.startsWith(UTF_32)) { >> + encoding = bomEnc; >> + } else { >> + throw new XmlStreamReaderException(HTTP_EX_2 >> + .format(new Object[] { cTMime, cTEnc, >> bomEnc, >> + xmlGuessEnc, xmlEnc }), cTMime, >> cTEnc, >> + bomEnc, xmlGuessEnc, xmlEnc, is); >> + } >> } else { >> encoding = cTEnc; >> } >> >> Modified: >> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java >> URL: >> http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java?rev=1346400&r1=1346399&r2=1346400&view=diff >> ============================================================================== >> --- >> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java >> (original) >> +++ >> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java >> Tue Jun 5 14:48:01 2012 >> @@ -36,11 +36,10 @@ public class XmlStreamReaderUtilitiesCom >> protected String calculateHttpEncoding(String httpContentType, String >> bomEnc, String xmlGuessEnc, >> String xmlEnc, boolean lenient, String defaultEncoding) throws >> IOException { >> MockXmlStreamReader mock = new MockXmlStreamReader(defaultEncoding); >> - String encoding = mock.calculateHttpEncoding( >> + return mock.calculateHttpEncoding( >> XmlStreamReader.getContentTypeMime(httpContentType), >> XmlStreamReader.getContentTypeEncoding(httpContentType), >> bomEnc, xmlGuessEnc, xmlEnc, null, lenient); >> - return encoding; >> } >> >> /** Mock {@link XmlStreamReader} implementation */ >> >> > > --------------------------------------------------------------------- > To unsubscribe, e-mail: dev-unsubscr...@commons.apache.org > For additional commands, e-mail: dev-h...@commons.apache.org > --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@commons.apache.org For additional commands, e-mail: dev-h...@commons.apache.org