This is an automated email from the ASF dual-hosted git repository. rzo1 pushed a commit to branch OPENNLP-1819 in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit 4cc2628f5785498e314a5bb2ce09c384983a72e7 Author: Richard Zowalla <[email protected]> AuthorDate: Tue Apr 14 19:13:00 2026 +0200 OPENNLP-1819 - Align DictionaryEntryPersistor XML parsing with XmlUtil helper --- .../dictionary/serializer/DictionaryEntryPersistor.java | 14 +++----------- .../src/main/java/opennlp/tools/util/XmlUtil.java | 5 +++++ 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java index c8e3ad42..199e95ad 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java @@ -24,8 +24,6 @@ import java.nio.charset.StandardCharsets; import java.util.Iterator; import java.util.LinkedList; import java.util.List; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParserFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; @@ -43,6 +41,7 @@ import org.xml.sax.helpers.AttributesImpl; import opennlp.tools.dictionary.Dictionary; import opennlp.tools.util.InvalidFormatException; import opennlp.tools.util.StringList; +import opennlp.tools.util.XmlUtil; import opennlp.tools.util.model.UncloseableInputStream; /** @@ -52,9 +51,6 @@ import opennlp.tools.util.model.UncloseableInputStream; * @see Dictionary */ public class DictionaryEntryPersistor { - - private static final SAXParserFactory SAX_PARSER_FACTORY = SAXParserFactory.newInstance(); - private static final String SAX_FEATURE_NAMESPACES = "http://xml.org/sax/features/namespaces"; // TODO: should check for invalid format, make it save private static class DictionaryContenthandler implements ContentHandler { @@ -230,15 +226,11 @@ public class DictionaryEntryPersistor { XMLReader xmlReader; try { - xmlReader = SAX_PARSER_FACTORY.newSAXParser().getXMLReader(); - // Note: - // There is a compatibility problem here: JAXP default is false while SAX 2 default is true! - // OpenNLP requires it activated! - xmlReader.setFeature(SAX_FEATURE_NAMESPACES, true); + xmlReader = XmlUtil.createSaxParser().getXMLReader(); xmlReader.setContentHandler(handler); xmlReader.parse(new InputSource(new UncloseableInputStream(in))); } - catch (ParserConfigurationException | SAXException e) { + catch (SAXException e) { throw new InvalidFormatException("The profile data stream has " + "an invalid format!", e); } diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/XmlUtil.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/XmlUtil.java index c5adafb1..40859a28 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/XmlUtil.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/XmlUtil.java @@ -65,8 +65,13 @@ public class XmlUtil { */ public static SAXParser createSaxParser() { SAXParserFactory spf = SAXParserFactory.newInstance(); + spf.setNamespaceAware(true); try { spf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + spf.setFeature("http://xml.org/sax/features/external-general-entities", false); + spf.setFeature("http://xml.org/sax/features/external-parameter-entities", false); + spf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); return spf.newSAXParser(); } catch (ParserConfigurationException | SAXException e) { throw new IllegalStateException(e);
