This is an automated email from the ASF dual-hosted git repository. rzo1 pushed a commit to branch opennlp-2.x in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit 44a02a7eeb17fdde62a69ed79e92c5074013cadd Author: subbudvk <[email protected]> AuthorDate: Mon Apr 27 20:21:52 2026 +0530 OPENNLP-1820: Restrict ExtensionLoader to allowlisted package prefixes (#1021) OPENNLP-1820: Restrict ExtensionLoader to allowlisted package prefixes (#1021) * Restrict extension class loading to allowlisted packages * Support unregister classes in ExtensionLoader allowlist * Add docs for extension loading changes (cherry picked from commit 6da32a6e72c0436074afc77de5db8da8b3b3f972) --- opennlp-docs/src/docbkx/extension.xml | 42 +++-- .../opennlp/tools/util/ext/ExtensionLoader.java | 106 +++++++++++- .../tools/util/ext/ExtensionLoaderTest.java | 184 ++++++++++++++++++++- 3 files changed, 316 insertions(+), 16 deletions(-) diff --git a/opennlp-docs/src/docbkx/extension.xml b/opennlp-docs/src/docbkx/extension.xml index 76d1f35e..36d08ba3 100644 --- a/opennlp-docs/src/docbkx/extension.xml +++ b/opennlp-docs/src/docbkx/extension.xml @@ -21,21 +21,37 @@ specific language governing permissions and limitations under the License. --> -<chapter xml:id="tools.extension"> +<chapter xml:id="tools.extension" xmlns:xlink="http://www.w3.org/1999/xlink"> <title>Extending OpenNLP</title> <para> -In OpenNLP extension can be used to add new functionality and to -heavily customize an existing component. Most components define -a factory class which can be implemented to customize the creation -of it. And some components allow to add new feature generators. +OpenNLP extensions allow customization of existing components. Most components define a factory +class that can be implemented to control their creation. The implementation class must implement +the required interface and have a public no-argument constructor. </para> -<section xml:id="tools.extension.writing"> - <title>Writing an extension</title> - <para> - In many places it is possible to pass in an extension class name to customize - some aspect of OpenNLP. The implementation class needs to implement the specified - interface and should have a public no-argument constructor. - </para> +<section xml:id="tools.extension.allowlist"> + <title>Extension Class Allowlist</title> + <para> + Extension classes are loaded by <code>ExtensionLoader</code> via reflection. Only classes + whose fully-qualified name starts with a registered package prefix are permitted. + The default prefix is <code>opennlp.</code>, covering all built-in factories and serializers. + </para> + <para> + To use a custom class outside <code>opennlp.</code>, register its package before loading + any model that references it: + </para> + <programlisting language="java"> +<![CDATA[ExtensionLoader.registerAllowedPackage("com.example.nlp");]]> + </programlisting> + <para> + Alternatively, set the system property at JVM startup: + </para> + <screen> +<![CDATA[-DOPENNLP_EXT_ALLOWED_PACKAGES=com.example.nlp.,com.other.factories.]]> + </screen> + <para> + A registered prefix can be removed with <code>ExtensionLoader.unregisterAllowedPackage(String)</code>. + </para> </section> -</chapter> \ No newline at end of file + +</chapter> diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java b/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java index 32aec868..4c1130fc 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java @@ -19,22 +19,109 @@ package opennlp.tools.util.ext; import java.lang.reflect.Field; import java.lang.reflect.InvocationTargetException; +import java.util.Arrays; +import java.util.Collections; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.CopyOnWriteArraySet; import opennlp.tools.commons.Internal; /** * The {@link ExtensionLoader} is responsible to load extensions to the OpenNLP library. * <p> + * Only classes whose fully-qualified name starts with a registered package prefix are + * permitted. The default allowed prefix is {@code opennlp.}, which covers all built-in + * factories and serializers. + * <p> + * To allow custom extension classes from other packages, either: + * <ul> + * <li>Call {@link #registerAllowedPackage(String)} programmatically before loading + * any model that uses the custom class.</li> + * <li>Set the system property {@code OPENNLP_EXT_ALLOWED_PACKAGES} to a + * comma-separated list of package prefixes at JVM startup, e.g. + * {@code -DOPENNLP_EXT_ALLOWED_PACKAGES=com.acme.nlp.,com.other.}.</li> + * </ul> + * <p> * <b>Note:</b> * Do not use this class, internal use only! */ @Internal public class ExtensionLoader { + /** + * System property for supplying additional allowed package prefixes. + * Value is a comma-separated list, e.g. {@code com.acme.nlp.,com.other.}. + * <p> + * This property is read once at class-load time. If it cannot be set via + * {@code -D} at JVM startup (e.g. in embedded or test scenarios), call + * {@link #registerAllowedPackage(String)} before loading any model that + * uses a custom factory or serializer. + */ + public static final String ALLOWED_PACKAGES_PROPERTY = "OPENNLP_EXT_ALLOWED_PACKAGES"; + + /** + * Package prefixes whose classes are permitted to be instantiated as extensions. + * Seeded from {@code opennlp.} plus any prefixes in {@link #ALLOWED_PACKAGES_PROPERTY}. + */ + private static final Set<String> ALLOWED_PREFIXES = initAllowedPrefixes(); + + private static Set<String> initAllowedPrefixes() { + Set<String> prefixes = new CopyOnWriteArraySet<>(Collections.singleton("opennlp.")); + String prop = System.getProperty(ALLOWED_PACKAGES_PROPERTY, ""); + if (!prop.isBlank()) { + Arrays.stream(prop.split(",")) + .map(String::trim) + .filter(s -> !s.isBlank()) + .map(s -> s.endsWith(".") ? s : s + ".") + .forEach(prefixes::add); + } + return prefixes; + } + private ExtensionLoader() { } - // Pass in the type (interface) of the class to load + /** + * Registers an additional package prefix whose classes are permitted to be + * loaded as OpenNLP extensions. Call this once at application startup, before + * loading any model that uses a custom factory or serializer from that package. + * <p> + * The prefix is normalized to end with {@code '.'} to prevent collision attacks + * (e.g. registering {@code "com.acme"} cannot be exploited via {@code "com.acmeevil.*"}). + * + * @param packagePrefix The package prefix to allow, e.g. {@code "com.example.nlp"}. + * Must not be {@code null} or blank. + * @throws NullPointerException if {@code packagePrefix} is {@code null}. + * @throws IllegalArgumentException if {@code packagePrefix} is blank. + */ + public static void registerAllowedPackage(String packagePrefix) { + Objects.requireNonNull(packagePrefix, "packagePrefix must not be null"); + if (packagePrefix.isBlank()) { + throw new IllegalArgumentException("packagePrefix must not be blank"); + } + String normalized = packagePrefix.endsWith(".") ? packagePrefix : packagePrefix + "."; + ALLOWED_PREFIXES.add(normalized); + } + + /** + * Removes a previously registered package prefix. Has no effect if the prefix + * was not registered. The default {@code opennlp.} prefix can also be removed, + * though this is not recommended. + * <p> + * The prefix is normalized to end with {@code '.'} before removal, matching the + * normalization applied in {@link #registerAllowedPackage(String)}. + * + * @param packagePrefix The package prefix to remove, e.g. {@code "com.example.nlp"}. + * Must not be {@code null}. + * @throws NullPointerException if {@code packagePrefix} is {@code null}. + */ + public static void unregisterAllowedPackage(String packagePrefix) { + Objects.requireNonNull(packagePrefix, "packagePrefix must not be null"); + String normalized = packagePrefix.endsWith(".") ? packagePrefix : packagePrefix + "."; + ALLOWED_PREFIXES.remove(normalized); + } + /** * Instantiates a user provided extension to OpenNLP. * <p> @@ -51,11 +138,26 @@ public class ExtensionLoader { * * @return the instance of the extension class * - * @throws ExtensionNotLoadedException Thrown if the load operation failed. + * @throws ExtensionNotLoadedException Thrown if the load operation failed or + * the class is not in an allowed package. */ @SuppressWarnings("unchecked") public static <T> T instantiateExtension(Class<T> clazz, String extensionClassName) { + if (extensionClassName == null) { + throw new ExtensionNotLoadedException("extensionClassName must not be null"); + } + + // Validate BEFORE Class.forName() — Class.forName() executes static initializers + // (CWE-470), which must not run for untrusted class names. + boolean allowed = ALLOWED_PREFIXES.stream().anyMatch(extensionClassName::startsWith); + if (!allowed) { + throw new ExtensionNotLoadedException( + "Class '" + extensionClassName + "' is not in an allowed package. " + + "Register the package via ExtensionLoader.registerAllowedPackage() or set " + + "the system property " + ALLOWED_PACKAGES_PROPERTY + " at JVM startup."); + } + // First try to load extension and instantiate extension from class path try { Class<?> extClazz = Class.forName(extensionClassName); diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/ext/ExtensionLoaderTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/ext/ExtensionLoaderTest.java index 611391b6..41d2c3e8 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/util/ext/ExtensionLoaderTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/util/ext/ExtensionLoaderTest.java @@ -17,12 +17,12 @@ package opennlp.tools.util.ext; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; public class ExtensionLoaderTest { - // define an interface here interface TestStringGenerator { String generateTestString(); } @@ -33,6 +33,17 @@ public class ExtensionLoaderTest { } } + @AfterEach + void reset() { + System.clearProperty(ExtensionLoader.ALLOWED_PACKAGES_PROPERTY); + ExtensionLoader.unregisterAllowedPackage("opennlp.tools.util.ext"); + ExtensionLoader.unregisterAllowedPackage("com.acme"); + ExtensionLoader.unregisterAllowedPackage("com.example.nlp"); + ExtensionLoader.unregisterAllowedPackage("java.lang"); + } + + // --- existing test --- + @Test void testLoadingStringGenerator() { TestStringGenerator g = ExtensionLoader.instantiateExtension(TestStringGenerator.class, @@ -40,4 +51,175 @@ public class ExtensionLoaderTest { Assertions.assertEquals("test", g.generateTestString()); } + // --- allowlist tests --- + + /** + * Classes in opennlp.* are allowed by default — no registration needed. + */ + @Test + void testBuiltinOpennlpPackageAllowedByDefault() { + Assertions.assertDoesNotThrow(() -> + ExtensionLoader.instantiateExtension(TestStringGenerator.class, + TestStringGeneratorImpl.class.getName())); + } + + /** + * A class outside opennlp.* is rejected without registration. + * This is the core security invariant — untrusted class names from model + * manifests must not reach Class.forName() without an explicit allowlist entry. + */ + @Test + void testUnregisteredPackageIsRejected() { + ExtensionNotLoadedException ex = Assertions.assertThrows( + ExtensionNotLoadedException.class, + () -> ExtensionLoader.instantiateExtension( + TestStringGenerator.class, + "com.example.exploit.MaliciousFactory")); + + Assertions.assertTrue(ex.getMessage().contains("not in an allowed package"), + "exception message should mention allowed package"); + } + + /** + * Allowlist check runs before Class.forName() — even for non-existent classes + * the error must be "not in an allowed package", never "could not be located". + */ + @Test + void testAllowlistGateRunsBeforeClassForName() { + ExtensionNotLoadedException ex = Assertions.assertThrows( + ExtensionNotLoadedException.class, + () -> ExtensionLoader.instantiateExtension( + TestStringGenerator.class, + "com.example.DoesNotExistOnClasspath$$Probe")); + + Assertions.assertTrue(ex.getMessage().contains("not in an allowed package"), + "allowlist must reject before Class.forName(); got: " + ex.getMessage()); + Assertions.assertFalse(ex.getMessage().contains("could not be located"), + "Class.forName() must not have been reached; got: " + ex.getMessage()); + } + + /** + * After registerAllowedPackage(), a class from that package passes the allowlist gate. + * Uses java.lang.String — outside opennlp.* so registration is required. + * The call fails on assignability (String is not a TestStringGenerator), not on + * the allowlist check, proving the gate let it through. + */ + @Test + void testRegisteredPackageIsAllowed() { + ExtensionLoader.registerAllowedPackage("java.lang"); + + ExtensionNotLoadedException ex = Assertions.assertThrows( + ExtensionNotLoadedException.class, + () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class, "java.lang.String")); + + Assertions.assertFalse(ex.getMessage().contains("not in an allowed package"), + "gate should have passed; got: " + ex.getMessage()); + } + + /** + * unregisterAllowedPackage() removes a previously registered prefix. + * A class from that package is rejected after removal. + */ + @Test + void testUnregisterAllowedPackage() { + ExtensionLoader.registerAllowedPackage("java.lang"); + ExtensionLoader.unregisterAllowedPackage("java.lang"); + + ExtensionNotLoadedException ex = Assertions.assertThrows( + ExtensionNotLoadedException.class, + () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class, "java.lang.String")); + + Assertions.assertTrue(ex.getMessage().contains("not in an allowed package")); + } + + /** + * Prefix collision: registering "com.acme" must not permit "com.acmeevil.*". + */ + @Test + void testPrefixCollisionPrevented() { + ExtensionLoader.registerAllowedPackage("com.acme"); + + ExtensionNotLoadedException ex = Assertions.assertThrows( + ExtensionNotLoadedException.class, + () -> ExtensionLoader.instantiateExtension( + TestStringGenerator.class, + "com.acmeevil.Exploit")); + + Assertions.assertTrue(ex.getMessage().contains("not in an allowed package")); + } + + /** + * registerAllowedPackage() throws NullPointerException for null and + * IllegalArgumentException for blank inputs. + */ + @Test + void testRegisterAllowedPackageRejectsNullAndBlank() { + Assertions.assertThrows(NullPointerException.class, + () -> ExtensionLoader.registerAllowedPackage(null)); + + Assertions.assertThrows(IllegalArgumentException.class, + () -> ExtensionLoader.registerAllowedPackage("")); + + Assertions.assertThrows(IllegalArgumentException.class, + () -> ExtensionLoader.registerAllowedPackage(" ")); + } + + /** + * A null class name is rejected with ExtensionNotLoadedException before + * any allowlist or class-loading logic runs. + */ + @Test + void testNullClassNameIsRejected() { + Assertions.assertThrows(ExtensionNotLoadedException.class, + () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class, null)); + } + + // --- system property escape hatch tests --- + + /** + * OPENNLP_EXT_ALLOWED_PACKAGES is read at class-load time, so in-process tests + * cannot re-trigger that path. The equivalent is verified programmatically: + * a package outside opennlp.* registered via registerAllowedPackage() passes the gate. + * Uses java.lang.String — fails on assignability, not on the allowlist check. + */ + @Test + void testSystemPropertyAddsAllowedPackage() { + ExtensionLoader.registerAllowedPackage("java.lang"); + + ExtensionNotLoadedException ex = Assertions.assertThrows( + ExtensionNotLoadedException.class, + () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class, "java.lang.String")); + + Assertions.assertFalse(ex.getMessage().contains("not in an allowed package"), + "registered package should pass the gate; got: " + ex.getMessage()); + } + + /** + * Multiple packages can be registered independently. + */ + @Test + void testSystemPropertyMultiplePackages() { + ExtensionLoader.registerAllowedPackage("com.example.nlp"); + ExtensionLoader.registerAllowedPackage("java.lang"); + + ExtensionNotLoadedException ex = Assertions.assertThrows( + ExtensionNotLoadedException.class, + () -> ExtensionLoader.instantiateExtension(TestStringGenerator.class, "java.lang.String")); + + Assertions.assertFalse(ex.getMessage().contains("not in an allowed package"), + "registered package should pass the gate; got: " + ex.getMessage()); + } + + /** + * System property prefix collision prevention — same dot-normalization applies. + */ + @Test + void testSystemPropertyPrefixCollisionPrevented() { + ExtensionLoader.registerAllowedPackage("com.acme"); + + Assertions.assertThrows(ExtensionNotLoadedException.class, + () -> ExtensionLoader.instantiateExtension( + TestStringGenerator.class, + "com.acmeevil.Exploit")); + } }
