This is an automated email from the ASF dual-hosted git repository.

tballison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new ffd712980b TIKA-4750 - improve error msg when component not on 
classpath (#2868)
ffd712980b is described below

commit ffd712980b6a30a8152a961ebb7ffa1c353754a7
Author: Tim Allison <[email protected]>
AuthorDate: Fri Jun 5 08:49:47 2026 -0400

    TIKA-4750 - improve error msg when component not on classpath (#2868)
---
 .../tika/serialization/ComponentNameResolver.java  | 49 ++++++++++++++++++++--
 .../serialization/ComponentNameResolverTest.java   | 45 ++++++++++++++++++++
 2 files changed, 90 insertions(+), 4 deletions(-)

diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/ComponentNameResolver.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/ComponentNameResolver.java
index b1e1d6673a..0198815344 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/serialization/ComponentNameResolver.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/serialization/ComponentNameResolver.java
@@ -21,6 +21,7 @@ import java.util.HashSet;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
+import java.util.TreeSet;
 import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.tika.config.loader.ComponentInfo;
@@ -113,10 +114,50 @@ public final class ComponentNameResolver {
                 }
             }
         }
-        throw new ClassNotFoundException(
-                "Component '" + name + "' is not registered. " +
-                "Components must be registered via @TikaComponent annotation 
or .idx file. " +
-                "Arbitrary class names are not allowed for security reasons.");
+        throw new ClassNotFoundException(unregisteredMessage(name));
+    }
+
+    /**
+     * Builds a diagnostic message for an unregistered component name. It 
calls out the
+     * two usual causes -- the name is misspelled, or the module that provides 
it is not
+     * on the classpath (optional components such as the Tess4J OCR parser 
ship in
+     * separate jars that must be added explicitly) -- and lists the names that
+     * <em>are</em> registered so the caller can find the right one (or notice 
that
+     * nothing registered, which means no {@code .idx} files were on the 
classpath).
+     */
+    private static String unregisteredMessage(String name) {
+        TreeSet<String> known = new TreeSet<>();
+        for (ComponentRegistry registry : REGISTRIES.values()) {
+            known.addAll(registry.getAllComponents().keySet());
+        }
+        StringBuilder sb = new StringBuilder()
+                .append("Component '").append(name).append("' is not 
registered. ")
+                .append("Either the name is misspelled, or the module that 
provides it is ")
+                .append("not on the classpath -- optional components (for 
example the Tess4J ")
+                .append("OCR parser in tika-parser-tess4j-module) ship as 
separate jars that ")
+                .append("must be added explicitly. ");
+        if (known.isEmpty()) {
+            sb.append("No components are currently registered "
+                    + "(no META-INF/tika/*.idx files were found on the 
classpath). ");
+        } else {
+            sb.append(known.size()).append(" registered component(s): ");
+            int shown = 0;
+            int cap = 50;
+            for (String registered : known) {
+                if (shown == cap) {
+                    sb.append(", ...");
+                    break;
+                }
+                if (shown > 0) {
+                    sb.append(", ");
+                }
+                sb.append(registered);
+                shown++;
+            }
+            sb.append(". ");
+        }
+        sb.append("Arbitrary class names are not allowed for security 
reasons.");
+        return sb.toString();
     }
 
     /**
diff --git 
a/tika-serialization/src/test/java/org/apache/tika/serialization/ComponentNameResolverTest.java
 
b/tika-serialization/src/test/java/org/apache/tika/serialization/ComponentNameResolverTest.java
new file mode 100644
index 0000000000..40dd3b9764
--- /dev/null
+++ 
b/tika-serialization/src/test/java/org/apache/tika/serialization/ComponentNameResolverTest.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.serialization;
+
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import org.junit.jupiter.api.Test;
+
+public class ComponentNameResolverTest {
+
+    /**
+     * Resolving an unknown component name must produce an actionable message: 
it should
+     * call out the two usual causes (a typo, or the providing module not 
being on the
+     * classpath -- the TIKA-4750 scenario for tess4j-parser) rather than just 
stating
+     * the registration rule.
+     */
+    @Test
+    public void unregisteredComponentGivesActionableMessage() {
+        ClassNotFoundException e = assertThrows(ClassNotFoundException.class,
+                () -> ComponentNameResolver.resolveClass(
+                        "definitely-not-a-real-parser-xyz", 
getClass().getClassLoader()));
+        String msg = e.getMessage();
+        assertTrue(msg.contains("definitely-not-a-real-parser-xyz"), msg);
+        assertTrue(msg.contains("misspelled"), msg);
+        assertTrue(msg.contains("not on the classpath"), msg);
+        // names the opt-in-module cause concretely so users know what to add
+        assertTrue(msg.contains("tika-parser-tess4j-module"), msg);
+        assertTrue(msg.contains("Arbitrary class names are not allowed"), msg);
+    }
+}

Reply via email to