This is an automated email from the ASF dual-hosted git repository. tballison pushed a commit to branch TIKA-4723 in repository https://gitbox.apache.org/repos/asf/tika.git
commit 095d1cb3e4555a41460a88fa282887db61e9a116 Author: tallison <[email protected]> AuthorDate: Tue May 12 12:48:54 2026 -0400 TIKA-4723 - slim down grpc and fix several other release changes in 4.x --- pom.xml | 13 +++-- .../apache/tika/pipes/grpc/TikaGrpcServerImpl.java | 16 +++++++ .../tika-parser-scientific-package/pom.xml | 55 ++++++++++++++++++++++ .../tika-parser-sqlite3-package/pom.xml | 50 ++++++++++++++++++++ .../tika-parser-nlp-package/pom.xml | 10 ++++ tika-serialization/pom.xml | 6 --- 6 files changed, 137 insertions(+), 13 deletions(-) diff --git a/pom.xml b/pom.xml index db31dcbf72..ce97d63c90 100644 --- a/pom.xml +++ b/pom.xml @@ -118,14 +118,13 @@ <fileset dir="${basedir}"> <include name="CHANGES.txt" /> <include name="target/*-src.zip*" /> - <include name="tika-parsers/tika-parsers-extended/tika-parser-scientific-package/target/tika-parser-scientific-package-${project.version}.jar*" /> - <include name="tika-parsers/tika-parsers-extended/tika-parser-sqlite3-package/target/tika-parser-sqlite3-package-${project.version}.jar*" /> - <include name="tika-parsers/tika-parsers-ml/tika-parser-nlp-package/target/tika-parser-nlp-package-${project.version}.jar*" /> - <include name="tika-app/target/tika-app-${project.version}.jar*" /> - <include name="tika-server/tika-server-standard/target/tika-server-standard-${project.version}.jar*" /> - <include name="tika-server/tika-server-standard/target/tika-server-standard-${project.version}-bin.tgz*" /> + <include name="tika-parsers/tika-parsers-extended/tika-parser-scientific-package/target/tika-parser-scientific-package-${project.version}-shaded.jar*" /> + <include name="tika-parsers/tika-parsers-extended/tika-parser-sqlite3-package/target/tika-parser-sqlite3-package-${project.version}-shaded.jar*" /> + <include name="tika-parsers/tika-parsers-ml/tika-parser-nlp-package/target/tika-parser-nlp-package-${project.version}-shaded.jar*" /> + <include name="tika-app/target/tika-app-${project.version}.zip*" /> <include name="tika-server/tika-server-standard/target/tika-server-standard-${project.version}-bin.zip*" /> - <include name="tika-eval/tika-eval-app/target/tika-eval-app-${project.version}.jar*" /> + <include name="tika-eval/tika-eval-app/target/tika-eval-app-${project.version}.zip*" /> + <include name="tika-pipes/tika-pipes-plugins/*/target/tika-pipes-*-${project.version}.zip*" /> </fileset> </copy> <checksum algorithm="SHA-512" fileext=".sha512"> diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java index c0b2d6b4d6..60cdcefa41 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java @@ -130,6 +130,22 @@ class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase { pluginManager = new org.pf4j.DefaultPluginManager(); } + // Fail fast if no plugins were loaded. tika-grpc requires at least one + // pf4j plugin (a fetcher and typically an emitter) to be useful; + // starting up with an empty plugins state leaves the server running + // but every RPC would fail with "fetcher type unknown". Better to + // refuse to start and tell the user exactly what to do. + if (pluginManager.getPlugins().isEmpty()) { + throw new TikaConfigException( + "tika-grpc requires at least one tika-pipes plugin to be loaded, " + + "but none were found. Place " + + "tika-pipes-<plugin>-" + getClass().getPackage().getImplementationVersion() + + ".zip files in a `plugins/` directory next to tika-grpc.jar " + + "(or configure `plugin-roots` in your tika config). " + + "Plugin zips are published at " + + "https://downloads.apache.org/tika/<version>/."); + } + this.configStore = createConfigStore(); fetcherManager = FetcherManager.load(pluginManager, tikaJsonConfig, true, this.configStore); diff --git a/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/pom.xml b/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/pom.xml index 1c909a1a8a..fd10604144 100644 --- a/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/pom.xml +++ b/tika-parsers/tika-parsers-extended/tika-parser-scientific-package/pom.xml @@ -60,6 +60,61 @@ </archive> </configuration> </plugin> + <!-- + Build a shaded (fat) jar alongside the slim main artifact. The shade + output is written to ${project.build.directory}/<artifact>-<v>-shaded.jar + and is NOT attached to the Maven project, so `mvn deploy` only + uploads the slim main jar to Maven Central. The shaded jar exists on + disk for Apache dist staging (picked up by the apache-release antrun + copy step in the root pom). Same Central/dist separation that + TIKA-4723 set up for tika-pipes plugin zips. + --> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>${maven.shade.version}</version> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <outputFile>${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar</outputFile> + <shadedArtifactAttached>false</shadedArtifactAttached> + <createDependencyReducedPom>false</createDependencyReducedPom> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>module-info.class</exclude> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + <exclude>META-INF/DEPENDENCIES</exclude> + <exclude>META-INF/MANIFEST.MF</exclude> + <exclude>META-INF/LICENSE.md</exclude> + <exclude>META-INF/NOTICE.md</exclude> + <!-- clutter not needed in jar --> + <exclude>resources/grib1/nasa/README*.pdf</exclude> + </excludes> + </filter> + </filters> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer"> + <addHeader>false</addHeader> + </transformer> + <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer" /> + <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" /> + <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer"> + <resource>META-INF/LICENSE</resource> + <file>target/classes/META-INF/LICENSE</file> + </transformer> + </transformers> + </configuration> + </execution> + </executions> + </plugin> <plugin> <groupId>org.apache.rat</groupId> <artifactId>apache-rat-plugin</artifactId> diff --git a/tika-parsers/tika-parsers-extended/tika-parser-sqlite3-package/pom.xml b/tika-parsers/tika-parsers-extended/tika-parser-sqlite3-package/pom.xml index 072e9248e7..ec7d8dc82e 100644 --- a/tika-parsers/tika-parsers-extended/tika-parser-sqlite3-package/pom.xml +++ b/tika-parsers/tika-parsers-extended/tika-parser-sqlite3-package/pom.xml @@ -56,6 +56,56 @@ </execution> </executions> </plugin> + <!-- + Build a shaded (fat) jar alongside the slim main artifact. Output is + ${project.build.directory}/<artifact>-<v>-shaded.jar and is NOT + attached to the Maven project — `mvn deploy` only uploads the slim + main jar to Maven Central; the shaded jar is staged to Apache dist + by the apache-release antrun copy step in the root pom. Same + Central/dist separation that TIKA-4723 set up for tika-pipes + plugin zips. + --> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>${maven.shade.version}</version> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <outputFile>${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar</outputFile> + <shadedArtifactAttached>false</shadedArtifactAttached> + <createDependencyReducedPom>false</createDependencyReducedPom> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + <exclude>META-INF/DEPENDENCIES</exclude> + <exclude>META-INF/MANIFEST.MF</exclude> + </excludes> + </filter> + </filters> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer"> + <addHeader>false</addHeader> + </transformer> + <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer" /> + <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" /> + <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer"> + <resource>META-INF/LICENSE</resource> + <file>target/classes/META-INF/LICENSE</file> + </transformer> + </transformers> + </configuration> + </execution> + </executions> + </plugin> </plugins> </build> diff --git a/tika-parsers/tika-parsers-ml/tika-parser-nlp-package/pom.xml b/tika-parsers/tika-parsers-ml/tika-parser-nlp-package/pom.xml index 28201251bc..463a41a46b 100644 --- a/tika-parsers/tika-parsers-ml/tika-parser-nlp-package/pom.xml +++ b/tika-parsers/tika-parsers-ml/tika-parser-nlp-package/pom.xml @@ -64,6 +64,16 @@ <goal>shade</goal> </goals> <configuration> + <!-- + Direct the shaded jar to a `-shaded.jar` filename so the slim + main artifact stays intact for Maven Central, and + shadedArtifactAttached=false keeps the fat jar out of the + Maven artifact set entirely. The fat jar exists on disk for + Apache dist staging only. Same Central/dist separation + TIKA-4723 set up for tika-pipes plugin zips. + --> + <outputFile>${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar</outputFile> + <shadedArtifactAttached>false</shadedArtifactAttached> <createDependencyReducedPom> false </createDependencyReducedPom> diff --git a/tika-serialization/pom.xml b/tika-serialization/pom.xml index e70491506b..d9345490c1 100644 --- a/tika-serialization/pom.xml +++ b/tika-serialization/pom.xml @@ -62,12 +62,6 @@ <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-databind</artifactId> </dependency> - <dependency> - <groupId>org.projectlombok</groupId> - <artifactId>lombok</artifactId> - <version>${lombok.version}</version> - <scope>provided</scope> - </dependency> <!-- Test dependencies --> <dependency> <groupId>org.junit.jupiter</groupId>
