This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch opennlp-1.x
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/opennlp-1.x by this push:
new 2c6342c05 [1.x] OPENNLP-1820: Restrict ExtensionLoader to allowlisted
package prefixes (#1078)
2c6342c05 is described below
commit 2c6342c05287b30884cdf60c505b83a228ae23ff
Author: Richard Zowalla <[email protected]>
AuthorDate: Fri Jun 12 16:20:33 2026 +0200
[1.x] OPENNLP-1820: Restrict ExtensionLoader to allowlisted package
prefixes (#1078)
Backport of #1021 to opennlp-1.x.
ExtensionLoader loaded arbitrary class names via Class.forName(), which
executes static initializers of untrusted classes (CWE-470). Restrict
loading to classes whose fully-qualified name starts with a registered
package prefix; the default is "opennlp.". Additional prefixes can be
registered via ExtensionLoader.registerAllowedPackage(String) or the
OPENNLP_EXT_ALLOWED_PACKAGES system property. The allowlist gate runs
before Class.forName(). Prefixes are dot-normalized to prevent
collision attacks (com.acme vs com.acmeevil).
Adapted for opennlp-1.x: Java 8 (trim().isEmpty() instead of isBlank()),
JUnit 4 tests, and OSGi loading path preserved.
---
opennlp-docs/src/docbkx/extension.xml | 25 +++
.../opennlp/tools/util/ext/ExtensionLoader.java | 105 ++++++++++++
.../tools/util/ext/ExtensionLoaderTest.java | 186 ++++++++++++++++++++-
3 files changed, 314 insertions(+), 2 deletions(-)
diff --git a/opennlp-docs/src/docbkx/extension.xml
b/opennlp-docs/src/docbkx/extension.xml
index bdb565711..dce92d24d 100644
--- a/opennlp-docs/src/docbkx/extension.xml
+++ b/opennlp-docs/src/docbkx/extension.xml
@@ -39,6 +39,31 @@ of it. And some components allow to add new feature
generators.
</para>
</section>
+<section id="tools.extension.allowlist">
+ <title>Extension Class Allowlist</title>
+ <para>
+ Extension classes are loaded by <code>ExtensionLoader</code> via
reflection. Only classes
+ whose fully-qualified name starts with a registered package prefix are
permitted.
+ The default prefix is <code>opennlp.</code>, covering all built-in
factories and serializers.
+ </para>
+ <para>
+ To use a custom class outside <code>opennlp.</code>, register its
package before loading
+ any model that references it:
+ </para>
+ <programlisting language="java">
+<![CDATA[ExtensionLoader.registerAllowedPackage("com.example.nlp");]]>
+ </programlisting>
+ <para>
+ Alternatively, set the system property at JVM startup:
+ </para>
+ <screen>
+<![CDATA[-DOPENNLP_EXT_ALLOWED_PACKAGES=com.example.nlp.,com.other.factories.]]>
+ </screen>
+ <para>
+ A registered prefix can be removed with
<code>ExtensionLoader.unregisterAllowedPackage(String)</code>.
+ </para>
+</section>
+
<section id="tools.extension.osgi">
<title>Running in an OSGi container</title>
<para>
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java
b/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java
index 8839321fd..ca7829d30 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java
@@ -18,19 +18,107 @@
package opennlp.tools.util.ext;
import java.lang.reflect.Field;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.CopyOnWriteArraySet;
/**
* The {@link ExtensionLoader} is responsible to load extensions to the
OpenNLP library.
* <p>
+ * Only classes whose fully-qualified name starts with a registered package
prefix are
+ * permitted. The default allowed prefix is {@code opennlp.}, which covers all
built-in
+ * factories and serializers.
+ * <p>
+ * To allow custom extension classes from other packages, either:
+ * <ul>
+ * <li>Call {@link #registerAllowedPackage(String)} programmatically before
loading
+ * any model that uses the custom class.</li>
+ * <li>Set the system property {@code OPENNLP_EXT_ALLOWED_PACKAGES} to a
+ * comma-separated list of package prefixes at JVM startup, e.g.
+ * {@code -DOPENNLP_EXT_ALLOWED_PACKAGES=com.acme.nlp.,com.other.}.</li>
+ * </ul>
+ * <p>
* <b>Note:</b> Do not use this class, internal use only!
*/
public class ExtensionLoader {
+ /**
+ * System property for supplying additional allowed package prefixes.
+ * Value is a comma-separated list, e.g. {@code com.acme.nlp.,com.other.}.
+ * <p>
+ * This property is read once at class-load time. If it cannot be set via
+ * {@code -D} at JVM startup (e.g. in embedded or test scenarios), call
+ * {@link #registerAllowedPackage(String)} before loading any model that
+ * uses a custom factory or serializer.
+ */
+ public static final String ALLOWED_PACKAGES_PROPERTY =
"OPENNLP_EXT_ALLOWED_PACKAGES";
+
+ /**
+ * Package prefixes whose classes are permitted to be instantiated as
extensions.
+ * Seeded from {@code opennlp.} plus any prefixes in {@link
#ALLOWED_PACKAGES_PROPERTY}.
+ */
+ private static final Set<String> ALLOWED_PREFIXES = initAllowedPrefixes();
+
private static boolean isOsgiAvailable = false;
+ private static Set<String> initAllowedPrefixes() {
+ Set<String> prefixes = new
CopyOnWriteArraySet<String>(Collections.singleton("opennlp."));
+ String prop = System.getProperty(ALLOWED_PACKAGES_PROPERTY, "");
+ if (!prop.trim().isEmpty()) {
+ Arrays.stream(prop.split(","))
+ .map(String::trim)
+ .filter(s -> !s.isEmpty())
+ .map(s -> s.endsWith(".") ? s : s + ".")
+ .forEach(prefixes::add);
+ }
+ return prefixes;
+ }
+
private ExtensionLoader() {
}
+ /**
+ * Registers an additional package prefix whose classes are permitted to be
+ * loaded as OpenNLP extensions. Call this once at application startup,
before
+ * loading any model that uses a custom factory or serializer from that
package.
+ * <p>
+ * The prefix is normalized to end with {@code '.'} to prevent collision
attacks
+ * (e.g. registering {@code "com.acme"} cannot be exploited via {@code
"com.acmeevil.*"}).
+ *
+ * @param packagePrefix The package prefix to allow, e.g. {@code
"com.example.nlp"}.
+ * Must not be {@code null} or blank.
+ * @throws NullPointerException if {@code packagePrefix} is {@code null}.
+ * @throws IllegalArgumentException if {@code packagePrefix} is blank.
+ */
+ public static void registerAllowedPackage(String packagePrefix) {
+ Objects.requireNonNull(packagePrefix, "packagePrefix must not be null");
+ if (packagePrefix.trim().isEmpty()) {
+ throw new IllegalArgumentException("packagePrefix must not be blank");
+ }
+ String normalized = packagePrefix.endsWith(".") ? packagePrefix :
packagePrefix + ".";
+ ALLOWED_PREFIXES.add(normalized);
+ }
+
+ /**
+ * Removes a previously registered package prefix. Has no effect if the
prefix
+ * was not registered. The default {@code opennlp.} prefix can also be
removed,
+ * though this is not recommended.
+ * <p>
+ * The prefix is normalized to end with {@code '.'} before removal, matching
the
+ * normalization applied in {@link #registerAllowedPackage(String)}.
+ *
+ * @param packagePrefix The package prefix to remove, e.g. {@code
"com.example.nlp"}.
+ * Must not be {@code null}.
+ * @throws NullPointerException if {@code packagePrefix} is {@code null}.
+ */
+ public static void unregisterAllowedPackage(String packagePrefix) {
+ Objects.requireNonNull(packagePrefix, "packagePrefix must not be null");
+ String normalized = packagePrefix.endsWith(".") ? packagePrefix :
packagePrefix + ".";
+ ALLOWED_PREFIXES.remove(normalized);
+ }
+
static boolean isOSGiAvailable() {
return isOsgiAvailable;
}
@@ -55,11 +143,28 @@ public class ExtensionLoader {
* @param extensionClassName
*
* @return the instance of the extension class
+ *
+ * @throws ExtensionNotLoadedException Thrown if the load operation failed or
+ * the class is not in an allowed package.
*/
// TODO: Throw custom exception if loading fails ...
@SuppressWarnings("unchecked")
public static <T> T instantiateExtension(Class<T> clazz, String
extensionClassName) {
+ if (extensionClassName == null) {
+ throw new ExtensionNotLoadedException("extensionClassName must not be
null");
+ }
+
+ // Validate BEFORE Class.forName() — Class.forName() executes static
initializers
+ // (CWE-470), which must not run for untrusted class names.
+ boolean allowed =
ALLOWED_PREFIXES.stream().anyMatch(extensionClassName::startsWith);
+ if (!allowed) {
+ throw new ExtensionNotLoadedException(
+ "Class '" + extensionClassName + "' is not in an allowed package. " +
+ "Register the package via ExtensionLoader.registerAllowedPackage()
or set " +
+ "the system property " + ALLOWED_PACKAGES_PROPERTY + " at JVM
startup.");
+ }
+
// First try to load extension and instantiate extension from class path
try {
Class<?> extClazz = Class.forName(extensionClassName);
diff --git
a/opennlp-tools/src/test/java/opennlp/tools/util/ext/ExtensionLoaderTest.java
b/opennlp-tools/src/test/java/opennlp/tools/util/ext/ExtensionLoaderTest.java
index d4ebed26e..a1c926a3f 100644
---
a/opennlp-tools/src/test/java/opennlp/tools/util/ext/ExtensionLoaderTest.java
+++
b/opennlp-tools/src/test/java/opennlp/tools/util/ext/ExtensionLoaderTest.java
@@ -17,12 +17,12 @@
package opennlp.tools.util.ext;
+import org.junit.After;
import org.junit.Assert;
import org.junit.Test;
public class ExtensionLoaderTest {
- // define an interface here
interface TestStringGenerator {
String generateTestString();
}
@@ -33,11 +33,193 @@ public class ExtensionLoaderTest {
}
}
+ @After
+ public void reset() {
+ System.clearProperty(ExtensionLoader.ALLOWED_PACKAGES_PROPERTY);
+ ExtensionLoader.unregisterAllowedPackage("opennlp.tools.util.ext");
+ ExtensionLoader.unregisterAllowedPackage("com.acme");
+ ExtensionLoader.unregisterAllowedPackage("com.example.nlp");
+ ExtensionLoader.unregisterAllowedPackage("java.lang");
+ }
+
+ // --- existing test ---
+
+ @Test
+ public void testLoadingStringGenerator() {
+ TestStringGenerator g =
ExtensionLoader.instantiateExtension(TestStringGenerator.class,
+ TestStringGeneratorImpl.class.getName());
+ Assert.assertEquals("test", g.generateTestString());
+ }
+
+ // --- allowlist tests ---
+
+ /**
+ * Classes in opennlp.* are allowed by default — no registration needed.
+ */
@Test
- public void testLoadingStringGenerator() throws ClassNotFoundException {
+ public void testBuiltinOpennlpPackageAllowedByDefault() {
TestStringGenerator g =
ExtensionLoader.instantiateExtension(TestStringGenerator.class,
TestStringGeneratorImpl.class.getName());
Assert.assertEquals("test", g.generateTestString());
}
+ /**
+ * A class outside opennlp.* is rejected without registration.
+ * This is the core security invariant — untrusted class names from model
+ * manifests must not reach Class.forName() without an explicit allowlist
entry.
+ */
+ @Test
+ public void testUnregisteredPackageIsRejected() {
+ ExtensionNotLoadedException ex = Assert.assertThrows(
+ ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(
+ TestStringGenerator.class,
+ "com.example.exploit.MaliciousFactory"));
+
+ Assert.assertTrue("exception message should mention allowed package",
+ ex.getMessage().contains("not in an allowed package"));
+ }
+
+ /**
+ * Allowlist check runs before Class.forName() — even for non-existent
classes
+ * the error must be "not in an allowed package", never "could not be
located".
+ */
+ @Test
+ public void testAllowlistGateRunsBeforeClassForName() {
+ ExtensionNotLoadedException ex = Assert.assertThrows(
+ ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(
+ TestStringGenerator.class,
+ "com.example.DoesNotExistOnClasspath$$Probe"));
+
+ Assert.assertTrue("allowlist must reject before Class.forName(); got: " +
ex.getMessage(),
+ ex.getMessage().contains("not in an allowed package"));
+ Assert.assertFalse("Class.forName() must not have been reached; got: " +
ex.getMessage(),
+ ex.getMessage().contains("could not be located"));
+ }
+
+ /**
+ * After registerAllowedPackage(), a class from that package passes the
allowlist gate.
+ * Uses java.lang.String — outside opennlp.* so registration is required.
+ * The call fails on assignability (String is not a TestStringGenerator),
not on
+ * the allowlist check, proving the gate let it through.
+ */
+ @Test
+ public void testRegisteredPackageIsAllowed() {
+ ExtensionLoader.registerAllowedPackage("java.lang");
+
+ ExtensionNotLoadedException ex = Assert.assertThrows(
+ ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class,
"java.lang.String"));
+
+ Assert.assertFalse("gate should have passed; got: " + ex.getMessage(),
+ ex.getMessage().contains("not in an allowed package"));
+ }
+
+ /**
+ * unregisterAllowedPackage() removes a previously registered prefix.
+ * A class from that package is rejected after removal.
+ */
+ @Test
+ public void testUnregisterAllowedPackage() {
+ ExtensionLoader.registerAllowedPackage("java.lang");
+ ExtensionLoader.unregisterAllowedPackage("java.lang");
+
+ ExtensionNotLoadedException ex = Assert.assertThrows(
+ ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class,
"java.lang.String"));
+
+ Assert.assertTrue(ex.getMessage().contains("not in an allowed package"));
+ }
+
+ /**
+ * Prefix collision: registering "com.acme" must not permit "com.acmeevil.*".
+ */
+ @Test
+ public void testPrefixCollisionPrevented() {
+ ExtensionLoader.registerAllowedPackage("com.acme");
+
+ ExtensionNotLoadedException ex = Assert.assertThrows(
+ ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(
+ TestStringGenerator.class,
+ "com.acmeevil.Exploit"));
+
+ Assert.assertTrue(ex.getMessage().contains("not in an allowed package"));
+ }
+
+ /**
+ * registerAllowedPackage() throws NullPointerException for null and
+ * IllegalArgumentException for blank inputs.
+ */
+ @Test
+ public void testRegisterAllowedPackageRejectsNullAndBlank() {
+ Assert.assertThrows(NullPointerException.class,
+ () -> ExtensionLoader.registerAllowedPackage(null));
+
+ Assert.assertThrows(IllegalArgumentException.class,
+ () -> ExtensionLoader.registerAllowedPackage(""));
+
+ Assert.assertThrows(IllegalArgumentException.class,
+ () -> ExtensionLoader.registerAllowedPackage(" "));
+ }
+
+ /**
+ * A null class name is rejected with ExtensionNotLoadedException before
+ * any allowlist or class-loading logic runs.
+ */
+ @Test
+ public void testNullClassNameIsRejected() {
+ Assert.assertThrows(ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class,
null));
+ }
+
+ // --- system property escape hatch tests ---
+
+ /**
+ * OPENNLP_EXT_ALLOWED_PACKAGES is read at class-load time, so in-process
tests
+ * cannot re-trigger that path. The equivalent is verified programmatically:
+ * a package outside opennlp.* registered via registerAllowedPackage()
passes the gate.
+ * Uses java.lang.String — fails on assignability, not on the allowlist
check.
+ */
+ @Test
+ public void testSystemPropertyAddsAllowedPackage() {
+ ExtensionLoader.registerAllowedPackage("java.lang");
+
+ ExtensionNotLoadedException ex = Assert.assertThrows(
+ ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class,
"java.lang.String"));
+
+ Assert.assertFalse("registered package should pass the gate; got: " +
ex.getMessage(),
+ ex.getMessage().contains("not in an allowed package"));
+ }
+
+ /**
+ * Multiple packages can be registered independently.
+ */
+ @Test
+ public void testSystemPropertyMultiplePackages() {
+ ExtensionLoader.registerAllowedPackage("com.example.nlp");
+ ExtensionLoader.registerAllowedPackage("java.lang");
+
+ ExtensionNotLoadedException ex = Assert.assertThrows(
+ ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class,
"java.lang.String"));
+
+ Assert.assertFalse("registered package should pass the gate; got: " +
ex.getMessage(),
+ ex.getMessage().contains("not in an allowed package"));
+ }
+
+ /**
+ * System property prefix collision prevention — same dot-normalization
applies.
+ */
+ @Test
+ public void testSystemPropertyPrefixCollisionPrevented() {
+ ExtensionLoader.registerAllowedPackage("com.acme");
+
+ Assert.assertThrows(ExtensionNotLoadedException.class,
+ () -> ExtensionLoader.instantiateExtension(
+ TestStringGenerator.class,
+ "com.acmeevil.Exploit"));
+ }
}