This is an automated email from the ASF dual-hosted git repository.
davsclaus pushed a commit to branch camel-2.21.x
in repository https://gitbox.apache.org/repos/asf/camel.git
The following commit(s) were added to refs/heads/camel-2.21.x by this push:
new 504271a CAMEL-12769: Combination of File consumer with charset and
Split DSL with XPath doesn't parse XML correctly (#2505)
504271a is described below
commit 504271ae71e87db294de0bb2f4d10bbd4e0c0cda
Author: Tadayoshi Sato <[email protected]>
AuthorDate: Tue Sep 4 23:36:31 2018 +0900
CAMEL-12769: Combination of File consumer with charset and Split DSL with
XPath doesn't parse XML correctly (#2505)
---
.../org/apache/camel/converter/IOConverter.java | 87 ++++++++++++++--------
.../apache/camel/converter/jaxp/XmlConverter.java | 11 ++-
.../camel/converter/IOConverterCharsetTest.java | 18 ++---
3 files changed, 75 insertions(+), 41 deletions(-)
diff --git
a/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java
b/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java
index 073547e..ae02a2c 100644
--- a/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java
+++ b/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java
@@ -81,40 +81,18 @@ public final class IOConverter {
return IOHelper.buffered(new FileInputStream(file));
}
+ /**
+ * Converts the given {@link File} with the given charset to {@link
InputStream} with the JVM default charset
+ *
+ * @param file the file to be converted
+ * @param charset the charset the file is read with
+ * @return the input stream with the JVM default charset
+ */
public static InputStream toInputStream(File file, String charset) throws
IOException {
if (charset != null) {
- final BufferedReader reader = toReader(file, charset);
- final Charset defaultStreamCharset = defaultCharset.get();
- return new InputStream() {
- private ByteBuffer bufferBytes;
- private CharBuffer bufferedChars = CharBuffer.allocate(4096);
-
- @Override
- public int read() throws IOException {
- if (bufferBytes == null || bufferBytes.remaining() <= 0) {
- bufferedChars.clear();
- int len = reader.read(bufferedChars);
- bufferedChars.flip();
- if (len == -1) {
- return -1;
- }
- bufferBytes =
defaultStreamCharset.encode(bufferedChars);
- }
- return bufferBytes.get();
- }
-
- @Override
- public void close() throws IOException {
- reader.close();
- }
-
- @Override
- public void reset() throws IOException {
- reader.reset();
- }
- };
+ return new EncodingInputStream(file, charset);
} else {
- return IOHelper.buffered(new FileInputStream(file));
+ return toInputStream(file);
}
}
@@ -501,6 +479,53 @@ public final class IOConverter {
}
/**
+ * Encoding-aware input stream.
+ */
+ public static class EncodingInputStream extends InputStream {
+
+ private final File file;
+ private final BufferedReader reader;
+ private final Charset defaultStreamCharset;
+
+ private ByteBuffer bufferBytes;
+ private CharBuffer bufferedChars = CharBuffer.allocate(4096);
+
+ public EncodingInputStream(File file, String charset) throws
IOException {
+ this.file = file;
+ reader = toReader(file, charset);
+ defaultStreamCharset = defaultCharset.get();
+ }
+
+ @Override
+ public int read() throws IOException {
+ if (bufferBytes == null || bufferBytes.remaining() <= 0) {
+ bufferedChars.clear();
+ int len = reader.read(bufferedChars);
+ bufferedChars.flip();
+ if (len == -1) {
+ return -1;
+ }
+ bufferBytes = defaultStreamCharset.encode(bufferedChars);
+ }
+ return bufferBytes.get();
+ }
+
+ @Override
+ public void close() throws IOException {
+ reader.close();
+ }
+
+ @Override
+ public void reset() throws IOException {
+ reader.reset();
+ }
+
+ public InputStream toOriginalInputStream() throws
FileNotFoundException {
+ return new FileInputStream(file);
+ }
+ }
+
+ /**
* Encoding-aware file reader.
*/
private static class EncodingFileReader extends InputStreamReader {
diff --git
a/camel-core/src/main/java/org/apache/camel/converter/jaxp/XmlConverter.java
b/camel-core/src/main/java/org/apache/camel/converter/jaxp/XmlConverter.java
index 7c48ec1..8a4ecd9 100644
--- a/camel-core/src/main/java/org/apache/camel/converter/jaxp/XmlConverter.java
+++ b/camel-core/src/main/java/org/apache/camel/converter/jaxp/XmlConverter.java
@@ -69,8 +69,10 @@ import org.apache.camel.BytesSource;
import org.apache.camel.Converter;
import org.apache.camel.Exchange;
import org.apache.camel.StringSource;
+import org.apache.camel.converter.IOConverter;
import org.apache.camel.util.IOHelper;
import org.apache.camel.util.ObjectHelper;
+import org.apache.camel.util.StringHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -869,7 +871,14 @@ public class XmlConverter {
@Converter
public Document toDOMDocument(InputStream in, Exchange exchange) throws
IOException, SAXException, ParserConfigurationException {
DocumentBuilder documentBuilder =
createDocumentBuilder(getDocumentBuilderFactory(exchange));
- return documentBuilder.parse(in);
+ if (in instanceof IOConverter.EncodingInputStream) {
+ // DocumentBuilder detects encoding from XML declaration, so we
need to
+ // revert the converted encoding for the input stream
+ IOConverter.EncodingInputStream encIn =
(IOConverter.EncodingInputStream) in;
+ return documentBuilder.parse(encIn.toOriginalInputStream());
+ } else {
+ return documentBuilder.parse(in);
+ }
}
/**
diff --git
a/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java
b/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java
index 2abf0e2..133071b 100644
---
a/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java
+++
b/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java
@@ -35,8 +35,8 @@ public class IOConverterCharsetTest extends
ContextTestSupport {
switchToDefaultCharset(StandardCharsets.UTF_8);
File file = new
File("src/test/resources/org/apache/camel/converter/german.utf-8.txt");
try (InputStream in = IOConverter.toInputStream(file, "UTF-8");
- BufferedReader reader = new BufferedReader(new InputStreamReader(in,
StandardCharsets.UTF_8));
- BufferedReader naiveReader = new BufferedReader(new
InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
StandardCharsets.UTF_8))) {
+ BufferedReader reader = new BufferedReader(new
InputStreamReader(in, StandardCharsets.UTF_8));
+ BufferedReader naiveReader = new BufferedReader(new
InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
StandardCharsets.UTF_8))) {
String line = reader.readLine();
String naiveLine = naiveReader.readLine();
assertEquals(naiveLine, line);
@@ -48,8 +48,8 @@ public class IOConverterCharsetTest extends
ContextTestSupport {
switchToDefaultCharset(StandardCharsets.ISO_8859_1);
File file = new
File("src/test/resources/org/apache/camel/converter/german.utf-8.txt");
try (InputStream in = IOConverter.toInputStream(file, "UTF-8");
- BufferedReader reader = new BufferedReader(new InputStreamReader(in,
StandardCharsets.ISO_8859_1));
- BufferedReader naiveReader = new BufferedReader(new
InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
StandardCharsets.UTF_8))) {
+ BufferedReader reader = new BufferedReader(new
InputStreamReader(in, StandardCharsets.ISO_8859_1));
+ BufferedReader naiveReader = new BufferedReader(new
InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
StandardCharsets.UTF_8))) {
String line = reader.readLine();
String naiveLine = naiveReader.readLine();
assertEquals(naiveLine, line);
@@ -61,8 +61,8 @@ public class IOConverterCharsetTest extends
ContextTestSupport {
switchToDefaultCharset(StandardCharsets.UTF_8);
File file = new
File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt");
try (InputStream in = IOConverter.toInputStream(file, "ISO-8859-1");
- BufferedReader reader = new BufferedReader(new InputStreamReader(in,
StandardCharsets.UTF_8));
- BufferedReader naiveReader = new BufferedReader(new
InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
"ISO-8859-1"))) {
+ BufferedReader reader = new BufferedReader(new
InputStreamReader(in, StandardCharsets.UTF_8));
+ BufferedReader naiveReader = new BufferedReader(new
InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
"ISO-8859-1"))) {
String line = reader.readLine();
String naiveLine = naiveReader.readLine();
assertEquals(naiveLine, line);
@@ -74,7 +74,7 @@ public class IOConverterCharsetTest extends
ContextTestSupport {
switchToDefaultCharset(StandardCharsets.UTF_8);
File file = new
File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt");
try (InputStream in = IOConverter.toInputStream(file, "ISO-8859-1");
- InputStream naiveIn =
Files.newInputStream(Paths.get(file.getAbsolutePath()))) {
+ InputStream naiveIn =
Files.newInputStream(Paths.get(file.getAbsolutePath()))) {
byte[] bytes = new byte[8192];
in.read(bytes);
byte[] naiveBytes = new byte[8192];
@@ -86,7 +86,7 @@ public class IOConverterCharsetTest extends
ContextTestSupport {
public void testToReaderFileWithCharsetUTF8() throws Exception {
File file = new
File("src/test/resources/org/apache/camel/converter/german.utf-8.txt");
try (BufferedReader reader = IOConverter.toReader(file, "UTF-8");
- BufferedReader naiveReader = new BufferedReader(new
InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
StandardCharsets.UTF_8))) {
+ BufferedReader naiveReader = new BufferedReader(new
InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
StandardCharsets.UTF_8))) {
String line = reader.readLine();
String naiveLine = naiveReader.readLine();
assertEquals(naiveLine, line);
@@ -97,7 +97,7 @@ public class IOConverterCharsetTest extends
ContextTestSupport {
public void testToReaderFileWithCharsetLatin1() throws Exception {
File file = new
File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt");
try (BufferedReader reader = IOConverter.toReader(file, "ISO-8859-1");
- BufferedReader naiveReader = new BufferedReader(new
InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
"ISO-8859-1"))) {
+ BufferedReader naiveReader = new BufferedReader(new
InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
"ISO-8859-1"))) {
String line = reader.readLine();
String naiveLine = naiveReader.readLine();
assertEquals(naiveLine, line);