This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/main by this push:
     new b85e45f0 OPENNLP-1819: Align DictionaryEntryPersistor XML parsing with 
XmlUtil helper (#1019)
b85e45f0 is described below

commit b85e45f064a9b9089b65ca73c75544092fec4eae
Author: Richard Zowalla <[email protected]>
AuthorDate: Fri Apr 17 17:39:13 2026 +0200

    OPENNLP-1819: Align DictionaryEntryPersistor XML parsing with XmlUtil 
helper (#1019)
---
 .../serializer/DictionaryEntryPersistor.java       | 14 ++----
 .../src/main/java/opennlp/tools/util/XmlUtil.java  | 52 ++++++++++++++++------
 2 files changed, 42 insertions(+), 24 deletions(-)

diff --git 
a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
 
b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
index c8e3ad42..199e95ad 100644
--- 
a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
+++ 
b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
@@ -24,8 +24,6 @@ import java.nio.charset.StandardCharsets;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.parsers.SAXParserFactory;
 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.Transformer;
 import javax.xml.transform.TransformerConfigurationException;
@@ -43,6 +41,7 @@ import org.xml.sax.helpers.AttributesImpl;
 import opennlp.tools.dictionary.Dictionary;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.StringList;
+import opennlp.tools.util.XmlUtil;
 import opennlp.tools.util.model.UncloseableInputStream;
 
 /**
@@ -52,9 +51,6 @@ import opennlp.tools.util.model.UncloseableInputStream;
  * @see Dictionary
  */
 public class DictionaryEntryPersistor {
-  
-  private static final SAXParserFactory SAX_PARSER_FACTORY = 
SAXParserFactory.newInstance();
-  private static final String SAX_FEATURE_NAMESPACES = 
"http://xml.org/sax/features/namespaces";;
 
   // TODO: should check for invalid format, make it save
   private static class DictionaryContenthandler implements ContentHandler {
@@ -230,15 +226,11 @@ public class DictionaryEntryPersistor {
 
     XMLReader xmlReader;
     try {
-      xmlReader = SAX_PARSER_FACTORY.newSAXParser().getXMLReader();
-      // Note:
-      // There is a compatibility problem here: JAXP default is false while 
SAX 2 default is true!
-      // OpenNLP requires it activated!
-      xmlReader.setFeature(SAX_FEATURE_NAMESPACES, true);
+      xmlReader = XmlUtil.createSaxParser().getXMLReader();
       xmlReader.setContentHandler(handler);
       xmlReader.parse(new InputSource(new UncloseableInputStream(in)));
     }
-    catch (ParserConfigurationException | SAXException e) {
+    catch (SAXException e) {
       throw new InvalidFormatException("The profile data stream has " +
           "an invalid format!", e);
     }
diff --git 
a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/XmlUtil.java 
b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/XmlUtil.java
index c5adafb1..96d2cfb7 100644
--- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/XmlUtil.java
+++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/XmlUtil.java
@@ -36,20 +36,31 @@ public class XmlUtil {
    * Create a new {@link DocumentBuilder} which processes XML securely.
    *
    * @return A valid {@link DocumentBuilder} instance.
-   *
    * @throws IllegalStateException Thrown if errors occurred creating the 
builder.
    */
   public static DocumentBuilder createDocumentBuilder() {
+    final DocumentBuilderFactory documentBuilderFactory = 
DocumentBuilderFactory.newInstance();
+    try {
+      
documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+    } catch (ParserConfigurationException e) {
+      /// {@link XMLConstants.FEATURE_SECURE_PROCESSING} is not supported on 
Android.
+      /// See {@link DocumentBuilderFactory#setFeature}
+      logger.warn("Failed to enable XMLConstants.FEATURE_SECURE_PROCESSING, 
it's unsupported on" +
+          " this platform.", e);
+    }
     try {
-      DocumentBuilderFactory documentBuilderFactory = 
DocumentBuilderFactory.newInstance();
-      try {
-        
documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
-      } catch (ParserConfigurationException e) {
-        /// {@link XMLConstants.FEATURE_SECURE_PROCESSING} is not supported on 
Android.
-        /// See {@link DocumentBuilderFactory#setFeature}
-        logger.warn("Failed to enable XMLConstants.FEATURE_SECURE_PROCESSING, 
it's unsupported on" +
-                " this platform.", e);
-      }
+      documentBuilderFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, 
"");
+      documentBuilderFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, 
"");
+      documentBuilderFactory.setFeature(
+          "http://apache.org/xml/features/disallow-doctype-decl";, true);
+      documentBuilderFactory.setFeature(
+          "http://xml.org/sax/features/external-general-entities";, false);
+      documentBuilderFactory.setFeature(
+          "http://xml.org/sax/features/external-parameter-entities";, false);
+      documentBuilderFactory.setFeature(
+          "http://apache.org/xml/features/nonvalidating/load-external-dtd";, 
false);
+      documentBuilderFactory.setXIncludeAware(false);
+      documentBuilderFactory.setExpandEntityReferences(false);
       return documentBuilderFactory.newDocumentBuilder();
     } catch (ParserConfigurationException e) {
       throw new IllegalStateException(e);
@@ -60,14 +71,29 @@ public class XmlUtil {
    * Create a new {@link SAXParser} which processes XML securely.
    *
    * @return A valid {@link SAXParser} instance.
-   *
    * @throws IllegalStateException Thrown if errors occurred creating the 
parser.
    */
   public static SAXParser createSaxParser() {
-    SAXParserFactory spf = SAXParserFactory.newInstance();
+    final SAXParserFactory spf = SAXParserFactory.newInstance();
+    spf.setNamespaceAware(true);
+    spf.setXIncludeAware(false);
     try {
       spf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
-      return spf.newSAXParser();
+    } catch (ParserConfigurationException | SAXException e) {
+      /// {@link XMLConstants.FEATURE_SECURE_PROCESSING} is not supported on 
Android.
+      /// See {@link SAXParserFactory#setFeature}
+      logger.warn("Failed to enable XMLConstants.FEATURE_SECURE_PROCESSING, 
it's unsupported on" +
+          " this platform.", e);
+    }
+    try {
+      spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl";, 
true);
+      spf.setFeature("http://xml.org/sax/features/external-general-entities";, 
false);
+      
spf.setFeature("http://xml.org/sax/features/external-parameter-entities";, 
false);
+      
spf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd";,
 false);
+      final SAXParser parser = spf.newSAXParser();
+      parser.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
+      parser.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
+      return parser;
     } catch (ParserConfigurationException | SAXException e) {
       throw new IllegalStateException(e);
     }

Reply via email to