This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch opennlp-2.x
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/opennlp-2.x by this push:
new d9aaa472 OPENNLP-1819: Align DictionaryEntryPersistor XML parsing with
XmlUtil helper (#1020)
d9aaa472 is described below
commit d9aaa472e0e6672d672bfc296a69ab68f8757959
Author: Richard Zowalla <[email protected]>
AuthorDate: Fri Apr 17 17:40:35 2026 +0200
OPENNLP-1819: Align DictionaryEntryPersistor XML parsing with XmlUtil
helper (#1020)
---
.../serializer/DictionaryEntryPersistor.java | 14 ++----
.../src/main/java/opennlp/tools/util/XmlUtil.java | 52 ++++++++++++++++------
2 files changed, 42 insertions(+), 24 deletions(-)
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
index c8e3ad42..199e95ad 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
@@ -24,8 +24,6 @@ import java.nio.charset.StandardCharsets;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
@@ -43,6 +41,7 @@ import org.xml.sax.helpers.AttributesImpl;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.StringList;
+import opennlp.tools.util.XmlUtil;
import opennlp.tools.util.model.UncloseableInputStream;
/**
@@ -52,9 +51,6 @@ import opennlp.tools.util.model.UncloseableInputStream;
* @see Dictionary
*/
public class DictionaryEntryPersistor {
-
- private static final SAXParserFactory SAX_PARSER_FACTORY =
SAXParserFactory.newInstance();
- private static final String SAX_FEATURE_NAMESPACES =
"http://xml.org/sax/features/namespaces";
// TODO: should check for invalid format, make it save
private static class DictionaryContenthandler implements ContentHandler {
@@ -230,15 +226,11 @@ public class DictionaryEntryPersistor {
XMLReader xmlReader;
try {
- xmlReader = SAX_PARSER_FACTORY.newSAXParser().getXMLReader();
- // Note:
- // There is a compatibility problem here: JAXP default is false while
SAX 2 default is true!
- // OpenNLP requires it activated!
- xmlReader.setFeature(SAX_FEATURE_NAMESPACES, true);
+ xmlReader = XmlUtil.createSaxParser().getXMLReader();
xmlReader.setContentHandler(handler);
xmlReader.parse(new InputSource(new UncloseableInputStream(in)));
}
- catch (ParserConfigurationException | SAXException e) {
+ catch (SAXException e) {
throw new InvalidFormatException("The profile data stream has " +
"an invalid format!", e);
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/XmlUtil.java
b/opennlp-tools/src/main/java/opennlp/tools/util/XmlUtil.java
index c5adafb1..96d2cfb7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/XmlUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/XmlUtil.java
@@ -36,20 +36,31 @@ public class XmlUtil {
* Create a new {@link DocumentBuilder} which processes XML securely.
*
* @return A valid {@link DocumentBuilder} instance.
- *
* @throws IllegalStateException Thrown if errors occurred creating the
builder.
*/
public static DocumentBuilder createDocumentBuilder() {
+ final DocumentBuilderFactory documentBuilderFactory =
DocumentBuilderFactory.newInstance();
+ try {
+
documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+ } catch (ParserConfigurationException e) {
+ /// {@link XMLConstants.FEATURE_SECURE_PROCESSING} is not supported on
Android.
+ /// See {@link DocumentBuilderFactory#setFeature}
+ logger.warn("Failed to enable XMLConstants.FEATURE_SECURE_PROCESSING,
it's unsupported on" +
+ " this platform.", e);
+ }
try {
- DocumentBuilderFactory documentBuilderFactory =
DocumentBuilderFactory.newInstance();
- try {
-
documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
- } catch (ParserConfigurationException e) {
- /// {@link XMLConstants.FEATURE_SECURE_PROCESSING} is not supported on
Android.
- /// See {@link DocumentBuilderFactory#setFeature}
- logger.warn("Failed to enable XMLConstants.FEATURE_SECURE_PROCESSING,
it's unsupported on" +
- " this platform.", e);
- }
+ documentBuilderFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD,
"");
+ documentBuilderFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA,
"");
+ documentBuilderFactory.setFeature(
+ "http://apache.org/xml/features/disallow-doctype-decl", true);
+ documentBuilderFactory.setFeature(
+ "http://xml.org/sax/features/external-general-entities", false);
+ documentBuilderFactory.setFeature(
+ "http://xml.org/sax/features/external-parameter-entities", false);
+ documentBuilderFactory.setFeature(
+ "http://apache.org/xml/features/nonvalidating/load-external-dtd",
false);
+ documentBuilderFactory.setXIncludeAware(false);
+ documentBuilderFactory.setExpandEntityReferences(false);
return documentBuilderFactory.newDocumentBuilder();
} catch (ParserConfigurationException e) {
throw new IllegalStateException(e);
@@ -60,14 +71,29 @@ public class XmlUtil {
* Create a new {@link SAXParser} which processes XML securely.
*
* @return A valid {@link SAXParser} instance.
- *
* @throws IllegalStateException Thrown if errors occurred creating the
parser.
*/
public static SAXParser createSaxParser() {
- SAXParserFactory spf = SAXParserFactory.newInstance();
+ final SAXParserFactory spf = SAXParserFactory.newInstance();
+ spf.setNamespaceAware(true);
+ spf.setXIncludeAware(false);
try {
spf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
- return spf.newSAXParser();
+ } catch (ParserConfigurationException | SAXException e) {
+ /// {@link XMLConstants.FEATURE_SECURE_PROCESSING} is not supported on
Android.
+ /// See {@link SAXParserFactory#setFeature}
+ logger.warn("Failed to enable XMLConstants.FEATURE_SECURE_PROCESSING,
it's unsupported on" +
+ " this platform.", e);
+ }
+ try {
+ spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl",
true);
+ spf.setFeature("http://xml.org/sax/features/external-general-entities",
false);
+
spf.setFeature("http://xml.org/sax/features/external-parameter-entities",
false);
+
spf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd",
false);
+ final SAXParser parser = spf.newSAXParser();
+ parser.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
+ parser.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
+ return parser;
} catch (ParserConfigurationException | SAXException e) {
throw new IllegalStateException(e);
}