This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch opennlp-1.x
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/opennlp-1.x by this push:
new 584438942 [1.x] OPENNLP-1819: Align DictionaryEntryPersistor XML
parsing with XmlUtil helper (#1080)
584438942 is described below
commit 584438942c87a84d1be0ffa0daf5309bbd20e3d5
Author: Richard Zowalla <[email protected]>
AuthorDate: Fri Jun 12 16:26:52 2026 +0200
[1.x] OPENNLP-1819: Align DictionaryEntryPersistor XML parsing with XmlUtil
helper (#1080)
Backport of #1020 to opennlp-1.x.
DictionaryEntryPersistor.create() built its reader via the deprecated and
insecure XMLReaderFactory.createXMLReader(), bypassing the secure parser
configuration. Route it through XmlUtil.createSaxParser().getXMLReader()
so dictionary parsing benefits from the hardened, XXE-safe configuration
(namespace awareness is now set on the factory).
Also harden XmlUtil itself: disable external DTD/schema access and
external general/parameter entities, disallow DOCTYPE declarations, turn
off XInclude and entity-reference expansion. FEATURE_SECURE_PROCESSING is
attempted in a guarded block so platforms that do not support it (e.g.
Android) still work.
Adapted for opennlp-1.x: no slf4j on this branch, so the unsupported-
feature warning is emitted via System.err (the branch's logging idiom).
---
.../serializer/DictionaryEntryPersistor.java | 4 +-
.../src/main/java/opennlp/tools/util/XmlUtil.java | 51 +++++++++++++++++++---
2 files changed, 46 insertions(+), 9 deletions(-)
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
index 603afade6..07e5aa16e 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
@@ -38,10 +38,10 @@ import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.AttributesImpl;
-import org.xml.sax.helpers.XMLReaderFactory;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.StringList;
+import opennlp.tools.util.XmlUtil;
import opennlp.tools.util.model.UncloseableInputStream;
/**
@@ -217,7 +217,7 @@ public class DictionaryEntryPersistor {
XMLReader xmlReader;
try {
- xmlReader = XMLReaderFactory.createXMLReader();
+ xmlReader = XmlUtil.createSaxParser().getXMLReader();
xmlReader.setContentHandler(profileContentHandler);
xmlReader.parse(new InputSource(new UncloseableInputStream(in)));
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/XmlUtil.java
b/opennlp-tools/src/main/java/opennlp/tools/util/XmlUtil.java
index 39cc8dbe0..39ed353a0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/XmlUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/XmlUtil.java
@@ -29,14 +29,34 @@ import org.xml.sax.SAXException;
public class XmlUtil {
/**
- * Create a new DocumentBuilder which processes XML securely.
+ * Create a new {@link DocumentBuilder} which processes XML securely.
*
- * @return a DocumentBuilder
+ * @return A valid {@link DocumentBuilder} instance.
+ * @throws IllegalStateException Thrown if errors occurred creating the
builder.
*/
public static DocumentBuilder createDocumentBuilder() {
+ final DocumentBuilderFactory documentBuilderFactory =
DocumentBuilderFactory.newInstance();
try {
- DocumentBuilderFactory documentBuilderFactory =
DocumentBuilderFactory.newInstance();
documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+ } catch (ParserConfigurationException e) {
+ // XMLConstants.FEATURE_SECURE_PROCESSING is not supported on Android.
+ // See DocumentBuilderFactory#setFeature
+ System.err.println("Failed to enable
XMLConstants.FEATURE_SECURE_PROCESSING, " +
+ "it's unsupported on this platform: " + e.getMessage());
+ }
+ try {
+ documentBuilderFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD,
"");
+ documentBuilderFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA,
"");
+ documentBuilderFactory.setFeature(
+ "http://apache.org/xml/features/disallow-doctype-decl", true);
+ documentBuilderFactory.setFeature(
+ "http://xml.org/sax/features/external-general-entities", false);
+ documentBuilderFactory.setFeature(
+ "http://xml.org/sax/features/external-parameter-entities", false);
+ documentBuilderFactory.setFeature(
+ "http://apache.org/xml/features/nonvalidating/load-external-dtd",
false);
+ documentBuilderFactory.setXIncludeAware(false);
+ documentBuilderFactory.setExpandEntityReferences(false);
return documentBuilderFactory.newDocumentBuilder();
} catch (ParserConfigurationException e) {
throw new IllegalStateException(e);
@@ -44,15 +64,32 @@ public class XmlUtil {
}
/**
- * Create a new SAXParser which processes XML securely.
+ * Create a new {@link SAXParser} which processes XML securely.
*
- * @return a SAXParser
+ * @return A valid {@link SAXParser} instance.
+ * @throws IllegalStateException Thrown if errors occurred creating the
parser.
*/
public static SAXParser createSaxParser() {
- SAXParserFactory spf = SAXParserFactory.newInstance();
+ final SAXParserFactory spf = SAXParserFactory.newInstance();
+ spf.setNamespaceAware(true);
+ spf.setXIncludeAware(false);
try {
spf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
- return spf.newSAXParser();
+ } catch (ParserConfigurationException | SAXException e) {
+ // XMLConstants.FEATURE_SECURE_PROCESSING is not supported on Android.
+ // See SAXParserFactory#setFeature
+ System.err.println("Failed to enable
XMLConstants.FEATURE_SECURE_PROCESSING, " +
+ "it's unsupported on this platform: " + e.getMessage());
+ }
+ try {
+ spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl",
true);
+ spf.setFeature("http://xml.org/sax/features/external-general-entities",
false);
+
spf.setFeature("http://xml.org/sax/features/external-parameter-entities",
false);
+
spf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd",
false);
+ final SAXParser parser = spf.newSAXParser();
+ parser.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
+ parser.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
+ return parser;
} catch (ParserConfigurationException | SAXException e) {
throw new IllegalStateException(e);
}