This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/main by this push:
new 6da32a6e OPENNLP-1820: Restrict ExtensionLoader to allowlisted package
prefixes (#1021)
6da32a6e is described below
commit 6da32a6e72c0436074afc77de5db8da8b3b3f972
Author: subbudvk <[email protected]>
AuthorDate: Mon Apr 27 20:21:52 2026 +0530
OPENNLP-1820: Restrict ExtensionLoader to allowlisted package prefixes
(#1021)
OPENNLP-1820: Restrict ExtensionLoader to allowlisted package prefixes
(#1021)
* Restrict extension class loading to allowlisted packages
* Support unregister classes in ExtensionLoader allowlist
* Add docs for extension loading changes
---
.../opennlp/tools/util/ext/ExtensionLoader.java | 106 +++++++++++-
opennlp-docs/src/docbkx/extension.xml | 42 +++--
.../tools/util/ext/ExtensionLoaderTest.java | 184 ++++++++++++++++++++-
3 files changed, 316 insertions(+), 16 deletions(-)
diff --git
a/opennlp-api/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java
b/opennlp-api/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java
index 32aec868..4c1130fc 100644
--- a/opennlp-api/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java
+++ b/opennlp-api/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java
@@ -19,22 +19,109 @@ package opennlp.tools.util.ext;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.CopyOnWriteArraySet;
import opennlp.tools.commons.Internal;
/**
* The {@link ExtensionLoader} is responsible to load extensions to the
OpenNLP library.
* <p>
+ * Only classes whose fully-qualified name starts with a registered package
prefix are
+ * permitted. The default allowed prefix is {@code opennlp.}, which covers all
built-in
+ * factories and serializers.
+ * <p>
+ * To allow custom extension classes from other packages, either:
+ * <ul>
+ * <li>Call {@link #registerAllowedPackage(String)} programmatically before
loading
+ * any model that uses the custom class.</li>
+ * <li>Set the system property {@code OPENNLP_EXT_ALLOWED_PACKAGES} to a
+ * comma-separated list of package prefixes at JVM startup, e.g.
+ * {@code -DOPENNLP_EXT_ALLOWED_PACKAGES=com.acme.nlp.,com.other.}.</li>
+ * </ul>
+ * <p>
* <b>Note:</b>
* Do not use this class, internal use only!
*/
@Internal
public class ExtensionLoader {
+ /**
+ * System property for supplying additional allowed package prefixes.
+ * Value is a comma-separated list, e.g. {@code com.acme.nlp.,com.other.}.
+ * <p>
+ * This property is read once at class-load time. If it cannot be set via
+ * {@code -D} at JVM startup (e.g. in embedded or test scenarios), call
+ * {@link #registerAllowedPackage(String)} before loading any model that
+ * uses a custom factory or serializer.
+ */
+ public static final String ALLOWED_PACKAGES_PROPERTY =
"OPENNLP_EXT_ALLOWED_PACKAGES";
+
+ /**
+ * Package prefixes whose classes are permitted to be instantiated as
extensions.
+ * Seeded from {@code opennlp.} plus any prefixes in {@link
#ALLOWED_PACKAGES_PROPERTY}.
+ */
+ private static final Set<String> ALLOWED_PREFIXES = initAllowedPrefixes();
+
+ private static Set<String> initAllowedPrefixes() {
+ Set<String> prefixes = new
CopyOnWriteArraySet<>(Collections.singleton("opennlp."));
+ String prop = System.getProperty(ALLOWED_PACKAGES_PROPERTY, "");
+ if (!prop.isBlank()) {
+ Arrays.stream(prop.split(","))
+ .map(String::trim)
+ .filter(s -> !s.isBlank())
+ .map(s -> s.endsWith(".") ? s : s + ".")
+ .forEach(prefixes::add);
+ }
+ return prefixes;
+ }
+
private ExtensionLoader() {
}
- // Pass in the type (interface) of the class to load
+ /**
+ * Registers an additional package prefix whose classes are permitted to be
+ * loaded as OpenNLP extensions. Call this once at application startup,
before
+ * loading any model that uses a custom factory or serializer from that
package.
+ * <p>
+ * The prefix is normalized to end with {@code '.'} to prevent collision
attacks
+ * (e.g. registering {@code "com.acme"} cannot be exploited via {@code
"com.acmeevil.*"}).
+ *
+ * @param packagePrefix The package prefix to allow, e.g. {@code
"com.example.nlp"}.
+ * Must not be {@code null} or blank.
+ * @throws NullPointerException if {@code packagePrefix} is {@code null}.
+ * @throws IllegalArgumentException if {@code packagePrefix} is blank.
+ */
+ public static void registerAllowedPackage(String packagePrefix) {
+ Objects.requireNonNull(packagePrefix, "packagePrefix must not be null");
+ if (packagePrefix.isBlank()) {
+ throw new IllegalArgumentException("packagePrefix must not be blank");
+ }
+ String normalized = packagePrefix.endsWith(".") ? packagePrefix :
packagePrefix + ".";
+ ALLOWED_PREFIXES.add(normalized);
+ }
+
+ /**
+ * Removes a previously registered package prefix. Has no effect if the
prefix
+ * was not registered. The default {@code opennlp.} prefix can also be
removed,
+ * though this is not recommended.
+ * <p>
+ * The prefix is normalized to end with {@code '.'} before removal, matching
the
+ * normalization applied in {@link #registerAllowedPackage(String)}.
+ *
+ * @param packagePrefix The package prefix to remove, e.g. {@code
"com.example.nlp"}.
+ * Must not be {@code null}.
+ * @throws NullPointerException if {@code packagePrefix} is {@code null}.
+ */
+ public static void unregisterAllowedPackage(String packagePrefix) {
+ Objects.requireNonNull(packagePrefix, "packagePrefix must not be null");
+ String normalized = packagePrefix.endsWith(".") ? packagePrefix :
packagePrefix + ".";
+ ALLOWED_PREFIXES.remove(normalized);
+ }
+
/**
* Instantiates a user provided extension to OpenNLP.
* <p>
@@ -51,11 +138,26 @@ public class ExtensionLoader {
*
* @return the instance of the extension class
*
- * @throws ExtensionNotLoadedException Thrown if the load operation failed.
+ * @throws ExtensionNotLoadedException Thrown if the load operation failed or
+ * the class is not in an allowed package.
*/
@SuppressWarnings("unchecked")
public static <T> T instantiateExtension(Class<T> clazz, String
extensionClassName) {
+ if (extensionClassName == null) {
+ throw new ExtensionNotLoadedException("extensionClassName must not be
null");
+ }
+
+ // Validate BEFORE Class.forName() — Class.forName() executes static
initializers
+ // (CWE-470), which must not run for untrusted class names.
+ boolean allowed =
ALLOWED_PREFIXES.stream().anyMatch(extensionClassName::startsWith);
+ if (!allowed) {
+ throw new ExtensionNotLoadedException(
+ "Class '" + extensionClassName + "' is not in an allowed package. " +
+ "Register the package via ExtensionLoader.registerAllowedPackage()
or set " +
+ "the system property " + ALLOWED_PACKAGES_PROPERTY + " at JVM
startup.");
+ }
+
// First try to load extension and instantiate extension from class path
try {
Class<?> extClazz = Class.forName(extensionClassName);
diff --git a/opennlp-docs/src/docbkx/extension.xml
b/opennlp-docs/src/docbkx/extension.xml
index 76d1f35e..36d08ba3 100644
--- a/opennlp-docs/src/docbkx/extension.xml
+++ b/opennlp-docs/src/docbkx/extension.xml
@@ -21,21 +21,37 @@ specific language governing permissions and limitations
under the License.
-->
-<chapter xml:id="tools.extension">
+<chapter xml:id="tools.extension" xmlns:xlink="http://www.w3.org/1999/xlink">
<title>Extending OpenNLP</title>
<para>
-In OpenNLP extension can be used to add new functionality and to
-heavily customize an existing component. Most components define
-a factory class which can be implemented to customize the creation
-of it. And some components allow to add new feature generators.
+OpenNLP extensions allow customization of existing components. Most components
define a factory
+class that can be implemented to control their creation. The implementation
class must implement
+the required interface and have a public no-argument constructor.
</para>
-<section xml:id="tools.extension.writing">
- <title>Writing an extension</title>
- <para>
- In many places it is possible to pass in an extension class name to
customize
- some aspect of OpenNLP. The implementation class needs to implement the
specified
- interface and should have a public no-argument constructor.
- </para>
+<section xml:id="tools.extension.allowlist">
+ <title>Extension Class Allowlist</title>
+ <para>
+ Extension classes are loaded by <code>ExtensionLoader</code> via
reflection. Only classes
+ whose fully-qualified name starts with a registered package prefix are
permitted.
+ The default prefix is <code>opennlp.</code>, covering all built-in
factories and serializers.
+ </para>
+ <para>
+ To use a custom class outside <code>opennlp.</code>, register its package
before loading
+ any model that references it:
+ </para>
+ <programlisting language="java">
+<![CDATA[ExtensionLoader.registerAllowedPackage("com.example.nlp");]]>
+ </programlisting>
+ <para>
+ Alternatively, set the system property at JVM startup:
+ </para>
+ <screen>
+<![CDATA[-DOPENNLP_EXT_ALLOWED_PACKAGES=com.example.nlp.,com.other.factories.]]>
+ </screen>
+ <para>
+ A registered prefix can be removed with
<code>ExtensionLoader.unregisterAllowedPackage(String)</code>.
+ </para>
</section>
-</chapter>
\ No newline at end of file
+
+</chapter>
diff --git
a/opennlp-tools/src/test/java/opennlp/tools/util/ext/ExtensionLoaderTest.java
b/opennlp-tools/src/test/java/opennlp/tools/util/ext/ExtensionLoaderTest.java
index 611391b6..41d2c3e8 100644
---
a/opennlp-tools/src/test/java/opennlp/tools/util/ext/ExtensionLoaderTest.java
+++
b/opennlp-tools/src/test/java/opennlp/tools/util/ext/ExtensionLoaderTest.java
@@ -17,12 +17,12 @@
package opennlp.tools.util.ext;
+import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
public class ExtensionLoaderTest {
- // define an interface here
interface TestStringGenerator {
String generateTestString();
}
@@ -33,6 +33,17 @@ public class ExtensionLoaderTest {
}
}
+ @AfterEach
+ void reset() {
+ System.clearProperty(ExtensionLoader.ALLOWED_PACKAGES_PROPERTY);
+ ExtensionLoader.unregisterAllowedPackage("opennlp.tools.util.ext");
+ ExtensionLoader.unregisterAllowedPackage("com.acme");
+ ExtensionLoader.unregisterAllowedPackage("com.example.nlp");
+ ExtensionLoader.unregisterAllowedPackage("java.lang");
+ }
+
+ // --- existing test ---
+
@Test
void testLoadingStringGenerator() {
TestStringGenerator g =
ExtensionLoader.instantiateExtension(TestStringGenerator.class,
@@ -40,4 +51,175 @@ public class ExtensionLoaderTest {
Assertions.assertEquals("test", g.generateTestString());
}
+ // --- allowlist tests ---
+
+ /**
+ * Classes in opennlp.* are allowed by default — no registration needed.
+ */
+ @Test
+ void testBuiltinOpennlpPackageAllowedByDefault() {
+ Assertions.assertDoesNotThrow(() ->
+ ExtensionLoader.instantiateExtension(TestStringGenerator.class,
+ TestStringGeneratorImpl.class.getName()));
+ }
+
+ /**
+ * A class outside opennlp.* is rejected without registration.
+ * This is the core security invariant — untrusted class names from model
+ * manifests must not reach Class.forName() without an explicit allowlist
entry.
+ */
+ @Test
+ void testUnregisteredPackageIsRejected() {
+ ExtensionNotLoadedException ex = Assertions.assertThrows(
+ ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(
+ TestStringGenerator.class,
+ "com.example.exploit.MaliciousFactory"));
+
+ Assertions.assertTrue(ex.getMessage().contains("not in an allowed
package"),
+ "exception message should mention allowed package");
+ }
+
+ /**
+ * Allowlist check runs before Class.forName() — even for non-existent
classes
+ * the error must be "not in an allowed package", never "could not be
located".
+ */
+ @Test
+ void testAllowlistGateRunsBeforeClassForName() {
+ ExtensionNotLoadedException ex = Assertions.assertThrows(
+ ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(
+ TestStringGenerator.class,
+ "com.example.DoesNotExistOnClasspath$$Probe"));
+
+ Assertions.assertTrue(ex.getMessage().contains("not in an allowed
package"),
+ "allowlist must reject before Class.forName(); got: " +
ex.getMessage());
+ Assertions.assertFalse(ex.getMessage().contains("could not be located"),
+ "Class.forName() must not have been reached; got: " + ex.getMessage());
+ }
+
+ /**
+ * After registerAllowedPackage(), a class from that package passes the
allowlist gate.
+ * Uses java.lang.String — outside opennlp.* so registration is required.
+ * The call fails on assignability (String is not a TestStringGenerator),
not on
+ * the allowlist check, proving the gate let it through.
+ */
+ @Test
+ void testRegisteredPackageIsAllowed() {
+ ExtensionLoader.registerAllowedPackage("java.lang");
+
+ ExtensionNotLoadedException ex = Assertions.assertThrows(
+ ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class,
"java.lang.String"));
+
+ Assertions.assertFalse(ex.getMessage().contains("not in an allowed
package"),
+ "gate should have passed; got: " + ex.getMessage());
+ }
+
+ /**
+ * unregisterAllowedPackage() removes a previously registered prefix.
+ * A class from that package is rejected after removal.
+ */
+ @Test
+ void testUnregisterAllowedPackage() {
+ ExtensionLoader.registerAllowedPackage("java.lang");
+ ExtensionLoader.unregisterAllowedPackage("java.lang");
+
+ ExtensionNotLoadedException ex = Assertions.assertThrows(
+ ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class,
"java.lang.String"));
+
+ Assertions.assertTrue(ex.getMessage().contains("not in an allowed
package"));
+ }
+
+ /**
+ * Prefix collision: registering "com.acme" must not permit "com.acmeevil.*".
+ */
+ @Test
+ void testPrefixCollisionPrevented() {
+ ExtensionLoader.registerAllowedPackage("com.acme");
+
+ ExtensionNotLoadedException ex = Assertions.assertThrows(
+ ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(
+ TestStringGenerator.class,
+ "com.acmeevil.Exploit"));
+
+ Assertions.assertTrue(ex.getMessage().contains("not in an allowed
package"));
+ }
+
+ /**
+ * registerAllowedPackage() throws NullPointerException for null and
+ * IllegalArgumentException for blank inputs.
+ */
+ @Test
+ void testRegisterAllowedPackageRejectsNullAndBlank() {
+ Assertions.assertThrows(NullPointerException.class,
+ () -> ExtensionLoader.registerAllowedPackage(null));
+
+ Assertions.assertThrows(IllegalArgumentException.class,
+ () -> ExtensionLoader.registerAllowedPackage(""));
+
+ Assertions.assertThrows(IllegalArgumentException.class,
+ () -> ExtensionLoader.registerAllowedPackage(" "));
+ }
+
+ /**
+ * A null class name is rejected with ExtensionNotLoadedException before
+ * any allowlist or class-loading logic runs.
+ */
+ @Test
+ void testNullClassNameIsRejected() {
+ Assertions.assertThrows(ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class,
null));
+ }
+
+ // --- system property escape hatch tests ---
+
+ /**
+ * OPENNLP_EXT_ALLOWED_PACKAGES is read at class-load time, so in-process
tests
+ * cannot re-trigger that path. The equivalent is verified programmatically:
+ * a package outside opennlp.* registered via registerAllowedPackage()
passes the gate.
+ * Uses java.lang.String — fails on assignability, not on the allowlist
check.
+ */
+ @Test
+ void testSystemPropertyAddsAllowedPackage() {
+ ExtensionLoader.registerAllowedPackage("java.lang");
+
+ ExtensionNotLoadedException ex = Assertions.assertThrows(
+ ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class,
"java.lang.String"));
+
+ Assertions.assertFalse(ex.getMessage().contains("not in an allowed
package"),
+ "registered package should pass the gate; got: " + ex.getMessage());
+ }
+
+ /**
+ * Multiple packages can be registered independently.
+ */
+ @Test
+ void testSystemPropertyMultiplePackages() {
+ ExtensionLoader.registerAllowedPackage("com.example.nlp");
+ ExtensionLoader.registerAllowedPackage("java.lang");
+
+ ExtensionNotLoadedException ex = Assertions.assertThrows(
+ ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class,
"java.lang.String"));
+
+ Assertions.assertFalse(ex.getMessage().contains("not in an allowed
package"),
+ "registered package should pass the gate; got: " + ex.getMessage());
+ }
+
+ /**
+ * System property prefix collision prevention — same dot-normalization
applies.
+ */
+ @Test
+ void testSystemPropertyPrefixCollisionPrevented() {
+ ExtensionLoader.registerAllowedPackage("com.acme");
+
+ Assertions.assertThrows(ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(
+ TestStringGenerator.class,
+ "com.acmeevil.Exploit"));
+ }
}