This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch opennlp-1.x
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/opennlp-1.x by this push:
     new 2c6342c05 [1.x] OPENNLP-1820: Restrict ExtensionLoader to allowlisted 
package prefixes (#1078)
2c6342c05 is described below

commit 2c6342c05287b30884cdf60c505b83a228ae23ff
Author: Richard Zowalla <[email protected]>
AuthorDate: Fri Jun 12 16:20:33 2026 +0200

    [1.x] OPENNLP-1820: Restrict ExtensionLoader to allowlisted package 
prefixes (#1078)
    
    Backport of #1021 to opennlp-1.x.
    
    ExtensionLoader loaded arbitrary class names via Class.forName(), which
    executes static initializers of untrusted classes (CWE-470). Restrict
    loading to classes whose fully-qualified name starts with a registered
    package prefix; the default is "opennlp.". Additional prefixes can be
    registered via ExtensionLoader.registerAllowedPackage(String) or the
    OPENNLP_EXT_ALLOWED_PACKAGES system property. The allowlist gate runs
    before Class.forName(). Prefixes are dot-normalized to prevent
    collision attacks (com.acme vs com.acmeevil).
    
    Adapted for opennlp-1.x: Java 8 (trim().isEmpty() instead of isBlank()),
    JUnit 4 tests, and OSGi loading path preserved.
---
 opennlp-docs/src/docbkx/extension.xml              |  25 +++
 .../opennlp/tools/util/ext/ExtensionLoader.java    | 105 ++++++++++++
 .../tools/util/ext/ExtensionLoaderTest.java        | 186 ++++++++++++++++++++-
 3 files changed, 314 insertions(+), 2 deletions(-)

diff --git a/opennlp-docs/src/docbkx/extension.xml 
b/opennlp-docs/src/docbkx/extension.xml
index bdb565711..dce92d24d 100644
--- a/opennlp-docs/src/docbkx/extension.xml
+++ b/opennlp-docs/src/docbkx/extension.xml
@@ -39,6 +39,31 @@ of it. And some components allow to add new feature 
generators.
        </para>
 </section>
 
+<section id="tools.extension.allowlist">
+       <title>Extension Class Allowlist</title>
+       <para>
+       Extension classes are loaded by <code>ExtensionLoader</code> via 
reflection. Only classes
+       whose fully-qualified name starts with a registered package prefix are 
permitted.
+       The default prefix is <code>opennlp.</code>, covering all built-in 
factories and serializers.
+       </para>
+       <para>
+       To use a custom class outside <code>opennlp.</code>, register its 
package before loading
+       any model that references it:
+       </para>
+       <programlisting language="java">
+<![CDATA[ExtensionLoader.registerAllowedPackage("com.example.nlp");]]>
+       </programlisting>
+       <para>
+       Alternatively, set the system property at JVM startup:
+       </para>
+       <screen>
+<![CDATA[-DOPENNLP_EXT_ALLOWED_PACKAGES=com.example.nlp.,com.other.factories.]]>
+       </screen>
+       <para>
+       A registered prefix can be removed with 
<code>ExtensionLoader.unregisterAllowedPackage(String)</code>.
+       </para>
+</section>
+
 <section id="tools.extension.osgi">
        <title>Running in an OSGi container</title>
        <para>
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java 
b/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java
index 8839321fd..ca7829d30 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java
@@ -18,19 +18,107 @@
 package opennlp.tools.util.ext;
 
 import java.lang.reflect.Field;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.CopyOnWriteArraySet;
 
 /**
  * The {@link ExtensionLoader} is responsible to load extensions to the 
OpenNLP library.
  * <p>
+ * Only classes whose fully-qualified name starts with a registered package 
prefix are
+ * permitted. The default allowed prefix is {@code opennlp.}, which covers all 
built-in
+ * factories and serializers.
+ * <p>
+ * To allow custom extension classes from other packages, either:
+ * <ul>
+ *   <li>Call {@link #registerAllowedPackage(String)} programmatically before 
loading
+ *       any model that uses the custom class.</li>
+ *   <li>Set the system property {@code OPENNLP_EXT_ALLOWED_PACKAGES} to a
+ *       comma-separated list of package prefixes at JVM startup, e.g.
+ *       {@code -DOPENNLP_EXT_ALLOWED_PACKAGES=com.acme.nlp.,com.other.}.</li>
+ * </ul>
+ * <p>
  * <b>Note:</b> Do not use this class, internal use only!
  */
 public class ExtensionLoader {
 
+  /**
+   * System property for supplying additional allowed package prefixes.
+   * Value is a comma-separated list, e.g. {@code com.acme.nlp.,com.other.}.
+   * <p>
+   * This property is read once at class-load time. If it cannot be set via
+   * {@code -D} at JVM startup (e.g. in embedded or test scenarios), call
+   * {@link #registerAllowedPackage(String)} before loading any model that
+   * uses a custom factory or serializer.
+   */
+  public static final String ALLOWED_PACKAGES_PROPERTY = 
"OPENNLP_EXT_ALLOWED_PACKAGES";
+
+  /**
+   * Package prefixes whose classes are permitted to be instantiated as 
extensions.
+   * Seeded from {@code opennlp.} plus any prefixes in {@link 
#ALLOWED_PACKAGES_PROPERTY}.
+   */
+  private static final Set<String> ALLOWED_PREFIXES = initAllowedPrefixes();
+
   private static boolean isOsgiAvailable = false;
 
+  private static Set<String> initAllowedPrefixes() {
+    Set<String> prefixes = new 
CopyOnWriteArraySet<String>(Collections.singleton("opennlp."));
+    String prop = System.getProperty(ALLOWED_PACKAGES_PROPERTY, "");
+    if (!prop.trim().isEmpty()) {
+      Arrays.stream(prop.split(","))
+          .map(String::trim)
+          .filter(s -> !s.isEmpty())
+          .map(s -> s.endsWith(".") ? s : s + ".")
+          .forEach(prefixes::add);
+    }
+    return prefixes;
+  }
+
   private ExtensionLoader() {
   }
 
+  /**
+   * Registers an additional package prefix whose classes are permitted to be
+   * loaded as OpenNLP extensions. Call this once at application startup, 
before
+   * loading any model that uses a custom factory or serializer from that 
package.
+   * <p>
+   * The prefix is normalized to end with {@code '.'} to prevent collision 
attacks
+   * (e.g. registering {@code "com.acme"} cannot be exploited via {@code 
"com.acmeevil.*"}).
+   *
+   * @param packagePrefix The package prefix to allow, e.g. {@code 
"com.example.nlp"}.
+   *                      Must not be {@code null} or blank.
+   * @throws NullPointerException if {@code packagePrefix} is {@code null}.
+   * @throws IllegalArgumentException if {@code packagePrefix} is blank.
+   */
+  public static void registerAllowedPackage(String packagePrefix) {
+    Objects.requireNonNull(packagePrefix, "packagePrefix must not be null");
+    if (packagePrefix.trim().isEmpty()) {
+      throw new IllegalArgumentException("packagePrefix must not be blank");
+    }
+    String normalized = packagePrefix.endsWith(".") ? packagePrefix : 
packagePrefix + ".";
+    ALLOWED_PREFIXES.add(normalized);
+  }
+
+  /**
+   * Removes a previously registered package prefix. Has no effect if the 
prefix
+   * was not registered. The default {@code opennlp.} prefix can also be 
removed,
+   * though this is not recommended.
+   * <p>
+   * The prefix is normalized to end with {@code '.'} before removal, matching 
the
+   * normalization applied in {@link #registerAllowedPackage(String)}.
+   *
+   * @param packagePrefix The package prefix to remove, e.g. {@code 
"com.example.nlp"}.
+   *                      Must not be {@code null}.
+   * @throws NullPointerException if {@code packagePrefix} is {@code null}.
+   */
+  public static void unregisterAllowedPackage(String packagePrefix) {
+    Objects.requireNonNull(packagePrefix, "packagePrefix must not be null");
+    String normalized = packagePrefix.endsWith(".") ? packagePrefix : 
packagePrefix + ".";
+    ALLOWED_PREFIXES.remove(normalized);
+  }
+
   static boolean isOSGiAvailable() {
     return isOsgiAvailable;
   }
@@ -55,11 +143,28 @@ public class ExtensionLoader {
    * @param extensionClassName
    *
    * @return the instance of the extension class
+   *
+   * @throws ExtensionNotLoadedException Thrown if the load operation failed or
+   *         the class is not in an allowed package.
    */
   // TODO: Throw custom exception if loading fails ...
   @SuppressWarnings("unchecked")
   public static <T> T instantiateExtension(Class<T> clazz, String 
extensionClassName) {
 
+    if (extensionClassName == null) {
+      throw new ExtensionNotLoadedException("extensionClassName must not be 
null");
+    }
+
+    // Validate BEFORE Class.forName() — Class.forName() executes static 
initializers
+    // (CWE-470), which must not run for untrusted class names.
+    boolean allowed = 
ALLOWED_PREFIXES.stream().anyMatch(extensionClassName::startsWith);
+    if (!allowed) {
+      throw new ExtensionNotLoadedException(
+          "Class '" + extensionClassName + "' is not in an allowed package. " +
+          "Register the package via ExtensionLoader.registerAllowedPackage() 
or set " +
+          "the system property " + ALLOWED_PACKAGES_PROPERTY + " at JVM 
startup.");
+    }
+
     // First try to load extension and instantiate extension from class path
     try {
       Class<?> extClazz = Class.forName(extensionClassName);
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/util/ext/ExtensionLoaderTest.java 
b/opennlp-tools/src/test/java/opennlp/tools/util/ext/ExtensionLoaderTest.java
index d4ebed26e..a1c926a3f 100644
--- 
a/opennlp-tools/src/test/java/opennlp/tools/util/ext/ExtensionLoaderTest.java
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/util/ext/ExtensionLoaderTest.java
@@ -17,12 +17,12 @@
 
 package opennlp.tools.util.ext;
 
+import org.junit.After;
 import org.junit.Assert;
 import org.junit.Test;
 
 public class ExtensionLoaderTest {
 
-  // define an interface here
   interface TestStringGenerator {
     String generateTestString();
   }
@@ -33,11 +33,193 @@ public class ExtensionLoaderTest {
     }
   }
 
+  @After
+  public void reset() {
+    System.clearProperty(ExtensionLoader.ALLOWED_PACKAGES_PROPERTY);
+    ExtensionLoader.unregisterAllowedPackage("opennlp.tools.util.ext");
+    ExtensionLoader.unregisterAllowedPackage("com.acme");
+    ExtensionLoader.unregisterAllowedPackage("com.example.nlp");
+    ExtensionLoader.unregisterAllowedPackage("java.lang");
+  }
+
+  // --- existing test ---
+
+  @Test
+  public void testLoadingStringGenerator() {
+    TestStringGenerator g = 
ExtensionLoader.instantiateExtension(TestStringGenerator.class,
+        TestStringGeneratorImpl.class.getName());
+    Assert.assertEquals("test", g.generateTestString());
+  }
+
+  // --- allowlist tests ---
+
+  /**
+   * Classes in opennlp.* are allowed by default — no registration needed.
+   */
   @Test
-  public void testLoadingStringGenerator() throws ClassNotFoundException {
+  public void testBuiltinOpennlpPackageAllowedByDefault() {
     TestStringGenerator g = 
ExtensionLoader.instantiateExtension(TestStringGenerator.class,
         TestStringGeneratorImpl.class.getName());
     Assert.assertEquals("test", g.generateTestString());
   }
 
+  /**
+   * A class outside opennlp.* is rejected without registration.
+   * This is the core security invariant — untrusted class names from model
+   * manifests must not reach Class.forName() without an explicit allowlist 
entry.
+   */
+  @Test
+  public void testUnregisteredPackageIsRejected() {
+    ExtensionNotLoadedException ex = Assert.assertThrows(
+        ExtensionNotLoadedException.class,
+        () -> ExtensionLoader.instantiateExtension(
+            TestStringGenerator.class,
+            "com.example.exploit.MaliciousFactory"));
+
+    Assert.assertTrue("exception message should mention allowed package",
+        ex.getMessage().contains("not in an allowed package"));
+  }
+
+  /**
+   * Allowlist check runs before Class.forName() — even for non-existent 
classes
+   * the error must be "not in an allowed package", never "could not be 
located".
+   */
+  @Test
+  public void testAllowlistGateRunsBeforeClassForName() {
+    ExtensionNotLoadedException ex = Assert.assertThrows(
+        ExtensionNotLoadedException.class,
+        () -> ExtensionLoader.instantiateExtension(
+            TestStringGenerator.class,
+            "com.example.DoesNotExistOnClasspath$$Probe"));
+
+    Assert.assertTrue("allowlist must reject before Class.forName(); got: " + 
ex.getMessage(),
+        ex.getMessage().contains("not in an allowed package"));
+    Assert.assertFalse("Class.forName() must not have been reached; got: " + 
ex.getMessage(),
+        ex.getMessage().contains("could not be located"));
+  }
+
+  /**
+   * After registerAllowedPackage(), a class from that package passes the 
allowlist gate.
+   * Uses java.lang.String — outside opennlp.* so registration is required.
+   * The call fails on assignability (String is not a TestStringGenerator), 
not on
+   * the allowlist check, proving the gate let it through.
+   */
+  @Test
+  public void testRegisteredPackageIsAllowed() {
+    ExtensionLoader.registerAllowedPackage("java.lang");
+
+    ExtensionNotLoadedException ex = Assert.assertThrows(
+        ExtensionNotLoadedException.class,
+        () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class, 
"java.lang.String"));
+
+    Assert.assertFalse("gate should have passed; got: " + ex.getMessage(),
+        ex.getMessage().contains("not in an allowed package"));
+  }
+
+  /**
+   * unregisterAllowedPackage() removes a previously registered prefix.
+   * A class from that package is rejected after removal.
+   */
+  @Test
+  public void testUnregisterAllowedPackage() {
+    ExtensionLoader.registerAllowedPackage("java.lang");
+    ExtensionLoader.unregisterAllowedPackage("java.lang");
+
+    ExtensionNotLoadedException ex = Assert.assertThrows(
+        ExtensionNotLoadedException.class,
+        () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class, 
"java.lang.String"));
+
+    Assert.assertTrue(ex.getMessage().contains("not in an allowed package"));
+  }
+
+  /**
+   * Prefix collision: registering "com.acme" must not permit "com.acmeevil.*".
+   */
+  @Test
+  public void testPrefixCollisionPrevented() {
+    ExtensionLoader.registerAllowedPackage("com.acme");
+
+    ExtensionNotLoadedException ex = Assert.assertThrows(
+        ExtensionNotLoadedException.class,
+        () -> ExtensionLoader.instantiateExtension(
+            TestStringGenerator.class,
+            "com.acmeevil.Exploit"));
+
+    Assert.assertTrue(ex.getMessage().contains("not in an allowed package"));
+  }
+
+  /**
+   * registerAllowedPackage() throws NullPointerException for null and
+   * IllegalArgumentException for blank inputs.
+   */
+  @Test
+  public void testRegisterAllowedPackageRejectsNullAndBlank() {
+    Assert.assertThrows(NullPointerException.class,
+        () -> ExtensionLoader.registerAllowedPackage(null));
+
+    Assert.assertThrows(IllegalArgumentException.class,
+        () -> ExtensionLoader.registerAllowedPackage(""));
+
+    Assert.assertThrows(IllegalArgumentException.class,
+        () -> ExtensionLoader.registerAllowedPackage("   "));
+  }
+
+  /**
+   * A null class name is rejected with ExtensionNotLoadedException before
+   * any allowlist or class-loading logic runs.
+   */
+  @Test
+  public void testNullClassNameIsRejected() {
+    Assert.assertThrows(ExtensionNotLoadedException.class,
+        () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class, 
null));
+  }
+
+  // --- system property escape hatch tests ---
+
+  /**
+   * OPENNLP_EXT_ALLOWED_PACKAGES is read at class-load time, so in-process 
tests
+   * cannot re-trigger that path. The equivalent is verified programmatically:
+   * a package outside opennlp.* registered via registerAllowedPackage() 
passes the gate.
+   * Uses java.lang.String — fails on assignability, not on the allowlist 
check.
+   */
+  @Test
+  public void testSystemPropertyAddsAllowedPackage() {
+    ExtensionLoader.registerAllowedPackage("java.lang");
+
+    ExtensionNotLoadedException ex = Assert.assertThrows(
+        ExtensionNotLoadedException.class,
+        () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class, 
"java.lang.String"));
+
+    Assert.assertFalse("registered package should pass the gate; got: " + 
ex.getMessage(),
+        ex.getMessage().contains("not in an allowed package"));
+  }
+
+  /**
+   * Multiple packages can be registered independently.
+   */
+  @Test
+  public void testSystemPropertyMultiplePackages() {
+    ExtensionLoader.registerAllowedPackage("com.example.nlp");
+    ExtensionLoader.registerAllowedPackage("java.lang");
+
+    ExtensionNotLoadedException ex = Assert.assertThrows(
+        ExtensionNotLoadedException.class,
+        () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class, 
"java.lang.String"));
+
+    Assert.assertFalse("registered package should pass the gate; got: " + 
ex.getMessage(),
+        ex.getMessage().contains("not in an allowed package"));
+  }
+
+  /**
+   * System property prefix collision prevention — same dot-normalization 
applies.
+   */
+  @Test
+  public void testSystemPropertyPrefixCollisionPrevented() {
+    ExtensionLoader.registerAllowedPackage("com.acme");
+
+    Assert.assertThrows(ExtensionNotLoadedException.class,
+        () -> ExtensionLoader.instantiateExtension(
+            TestStringGenerator.class,
+            "com.acmeevil.Exploit"));
+  }
 }

Reply via email to