This is an automated email from the ASF dual-hosted git repository.
tballison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 4f6ad8b0f3 TIKA-4739 (#2837)
4f6ad8b0f3 is described below
commit 4f6ad8b0f3c98c47ed87c92c103999f0d30a06ea
Author: Tim Allison <[email protected]>
AuthorDate: Wed May 27 10:01:51 2026 -0400
TIKA-4739 (#2837)
---
.../advanced/integration-testing/tika-app.adoc | 8 +-
.../migration-to-4x/migrating-tika-server-4x.adoc | 10 +-
.../pages/migration-to-4x/migrating-to-4x.adoc | 4 +-
docs/modules/ROOT/pages/pipes/configuration.adoc | 4 -
.../apache/tika/cli/XmlToJsonConfigConverter.java | 8 +-
.../tika/cli/XmlToJsonConfigConverterTest.java | 6 +-
.../test/resources/configs/config-template.json | 2 -
.../src/test/resources/configs/tika-config2.json | 2 +-
.../test/resources/tika-config-ignite-local.json | 20 +-
.../src/test/resources/tika-config-ignite.json | 20 +-
.../src/test/resources/s3/tika-config-s3.json | 2 +-
.../resources/configs/tika-config-rendering.json | 2 +-
.../test/resources/configs/tika-libpst-config.json | 2 +-
.../resources/configs/tika-libpst-eml-config.json | 2 +-
.../org/apache/tika/async/cli/PluginsWriter.java | 207 +++++++++++++++------
.../configs/config-content-only-default.json | 2 -
.../test/resources/configs/config-template.json | 2 -
tika-pipes/tika-pipes-core/pom.xml | 9 +
.../tika/pipes/core/AbstractComponentManager.java | 15 ++
.../org/apache/tika/pipes/core/PipesConfig.java | 26 +--
.../tika/pipes/core/config/ConfigMerger.java | 3 -
.../tika/pipes/core/config/ConfigOverrides.java | 14 +-
.../tika/pipes/core/config/ConfigMergerTest.java | 3 +-
.../core/testutil/AbstractConfigExamplesTest.java | 89 +++++++++
.../apache/tika/pipes/fork/PipesForkParser.java | 1 -
.../tika/pipes/fork/PipesForkParserConfig.java | 11 --
tika-pipes/tika-pipes-plugins/pom.xml | 7 +
.../pipes/atlassianjwt/ConfigExamplesTest.java | 33 +---
.../tika/pipes/azblob/ConfigExamplesTest.java | 71 ++-----
.../apache/tika/pipes/csv/ConfigExamplesTest.java | 33 +---
.../apache/tika/pipes/es/ConfigExamplesTest.java | 63 ++-----
.../apache/tika/pipes/fs/ConfigExamplesTest.java | 28 +--
.../config-examples/file-system-emitter.json | 7 +-
.../config-examples/file-system-fetcher.json | 7 +-
.../config-examples/file-system-pipeline.json | 14 +-
.../apache/tika/pipes/gcs/ConfigExamplesTest.java | 53 +-----
.../tika/pipes/googledrive/ConfigExamplesTest.java | 33 +---
.../apache/tika/pipes/http/ConfigExamplesTest.java | 34 +---
.../apache/tika/pipes/jdbc/ConfigExamplesTest.java | 52 +-----
.../apache/tika/pipes/json/ConfigExamplesTest.java | 33 +---
.../tika/pipes/kafka/ConfigExamplesTest.java | 51 +----
.../pipes/microsoftgraph/ConfigExamplesTest.java | 33 +---
.../tika/pipes/opensearch/ConfigExamplesTest.java | 50 +----
.../apache/tika/pipes/s3/ConfigExamplesTest.java | 53 +-----
.../apache/tika/pipes/solr/ConfigExamplesTest.java | 52 +-----
.../config/loader/AbstractSpiComponentLoader.java | 45 ++++-
.../tika/config/loader/ComponentInstantiator.java | 14 +-
.../apache/tika/config/loader/ParserLoader.java | 7 +
.../apache/tika/config/loader/TikaJsonConfig.java | 2 +-
.../apache/tika/config/loader/TikaLoaderTest.java | 115 +++++++++++-
50 files changed, 581 insertions(+), 783 deletions(-)
diff --git a/docs/modules/ROOT/pages/advanced/integration-testing/tika-app.adoc
b/docs/modules/ROOT/pages/advanced/integration-testing/tika-app.adoc
index 22eaad3e85..12cbbe3f7b 100644
--- a/docs/modules/ROOT/pages/advanced/integration-testing/tika-app.adoc
+++ b/docs/modules/ROOT/pages/advanced/integration-testing/tika-app.adoc
@@ -291,10 +291,14 @@ Create `/tmp/tika-app-test/my-config.json`:
"emitterId": "fse"
}
},
+ "parse-context": {
+ "timeout-limits": {
+ "progressTimeoutMillis": 60000
+ }
+ },
"pipes": {
"parseMode": "RMETA",
- "numClients": 2,
- "timeoutMillis": 60000
+ "numClients": 2
},
"plugin-roots": "/tmp/tika-app-test/plugins"
}
diff --git
a/docs/modules/ROOT/pages/migration-to-4x/migrating-tika-server-4x.adoc
b/docs/modules/ROOT/pages/migration-to-4x/migrating-tika-server-4x.adoc
index 7ac5e0914b..2837be74b5 100644
--- a/docs/modules/ROOT/pages/migration-to-4x/migrating-tika-server-4x.adoc
+++ b/docs/modules/ROOT/pages/migration-to-4x/migrating-tika-server-4x.adoc
@@ -101,7 +101,7 @@ The `/tika` endpoint no longer routes based on `Accept`
headers. Use explicit pa
The following `TikaServerConfig` options have been removed:
-* `taskTimeoutMillis` - Now configured via `pipes.timeoutMillis`
+* `taskTimeoutMillis` - Now configured via
`parse-context.timeout-limits.progressTimeoutMillis` (and optionally
`totalTaskTimeoutMillis`); see xref:pipes/timeouts.adoc[Timeouts].
* `taskPulseMillis` - No longer needed
* `minimumTimeoutMillis` - No longer needed
@@ -125,9 +125,13 @@ All tika-server configurations must now include a `pipes`
section and a `file-sy
}
}
},
+ "parse-context": {
+ "timeout-limits": {
+ "progressTimeoutMillis": 30000
+ }
+ },
"pipes": {
- "numClients": 2,
- "timeoutMillis": 30000
+ "numClients": 2
},
"plugin-roots": "path/to/plugins"
}
diff --git a/docs/modules/ROOT/pages/migration-to-4x/migrating-to-4x.adoc
b/docs/modules/ROOT/pages/migration-to-4x/migrating-to-4x.adoc
index 34ef91d778..830e63104a 100644
--- a/docs/modules/ROOT/pages/migration-to-4x/migrating-to-4x.adoc
+++ b/docs/modules/ROOT/pages/migration-to-4x/migrating-to-4x.adoc
@@ -83,7 +83,7 @@ The converter currently supports:
NOTE: When you configure a parser with specific settings in JSON, the loader
automatically
excludes it from SPI loading. The parser (e.g., `pdf-parser`) is not even
instantiated in
-`default-parser` if there's a definition for it in the tika-config.json.
Explicit `_exclude`
+`default-parser` if there's a definition for it in the tika-config.json.
Explicit `exclude`
directives are only needed when you want to disable a parser entirely without
providing
custom configuration.
@@ -103,7 +103,7 @@ custom configuration.
|Exclusions
|`<parser-exclude class="..."/>`
-|`"_exclude": ["component-name"]` (only needed to disable a parser entirely)
+|`"exclude": ["component-name"]` (only needed to disable a parser entirely)
|===
=== Limitations
diff --git a/docs/modules/ROOT/pages/pipes/configuration.adoc
b/docs/modules/ROOT/pages/pipes/configuration.adoc
index f0a004ae98..e4e3d0b1c2 100644
--- a/docs/modules/ROOT/pages/pipes/configuration.adoc
+++ b/docs/modules/ROOT/pages/pipes/configuration.adoc
@@ -77,10 +77,6 @@ See also xref:pipes/timeouts.adoc[Timeouts] for the full
timeout model.
|`1000`
|Interval (ms) between heartbeats sent from the forked process. Must be
significantly less than `socketTimeoutMs`.
-|`startupTimeoutMillis`
-|`240000`
-|Maximum time (ms) to wait for a forked process to start up.
-
|`shutdownClientAfterMillis`
|`300000`
|Shut down an idle forked process after this many milliseconds of inactivity.
diff --git
a/tika-app/src/main/java/org/apache/tika/cli/XmlToJsonConfigConverter.java
b/tika-app/src/main/java/org/apache/tika/cli/XmlToJsonConfigConverter.java
index 9be0ee12ee..fc43b553d5 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/XmlToJsonConfigConverter.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/XmlToJsonConfigConverter.java
@@ -118,7 +118,7 @@ import org.apache.tika.utils.XMLReaderUtils;
* },
* {
* "default-parser": {
- * "_exclude": ["pdf-parser"]
+ * "exclude": ["pdf-parser"]
* }
* }
* ]
@@ -257,9 +257,9 @@ public class XmlToJsonConfigConverter {
for (Map<String, Object> parserEntry : parsersList) {
if (parserEntry.containsKey("default-parser")) {
Map<?, ?> config = (Map<?, ?>)
parserEntry.get("default-parser");
- if (config.containsKey("_exclude")) {
+ if (config.containsKey("exclude")) {
@SuppressWarnings("unchecked")
- List<String> excludes = (List<String>)
config.get("_exclude");
+ List<String> excludes = (List<String>)
config.get("exclude");
excludedParsers.addAll(excludes);
}
}
@@ -364,7 +364,7 @@ public class XmlToJsonConfigConverter {
}
if (excludes != null && !excludes.isEmpty()) {
- config.put("_exclude", excludes);
+ config.put("exclude", excludes);
}
Map<String, Object> result = new LinkedHashMap<>();
diff --git
a/tika-app/src/test/java/org/apache/tika/cli/XmlToJsonConfigConverterTest.java
b/tika-app/src/test/java/org/apache/tika/cli/XmlToJsonConfigConverterTest.java
index e1a31e51ab..ec138fb884 100644
---
a/tika-app/src/test/java/org/apache/tika/cli/XmlToJsonConfigConverterTest.java
+++
b/tika-app/src/test/java/org/apache/tika/cli/XmlToJsonConfigConverterTest.java
@@ -89,8 +89,8 @@ public class XmlToJsonConfigConverterTest {
String json = new String(Files.readAllBytes(jsonPath),
StandardCharsets.UTF_8);
- // Verify exclude is at the correct level (with underscore prefix)
- assertTrue(json.contains("\"_exclude\""), "Should have _exclude
array");
+ // Verify exclude is at the correct level (no underscore prefix; SPI
loader reads "exclude")
+ assertTrue(json.contains("\"exclude\""), "Should have exclude array");
assertFalse(json.contains("\"_decorate\""), "_decorate should not be
used for parser excludes");
assertTrue(json.contains("\"jsoup-parser\""), "Should exclude
jsoup-parser");
assertTrue(json.contains("\"pdf-parser\""), "Should exclude
pdf-parser");
@@ -218,7 +218,7 @@ public class XmlToJsonConfigConverterTest {
String json = new String(Files.readAllBytes(jsonPath),
StandardCharsets.UTF_8);
// Verify the JSON still contains the exclusions (we don't remove
them, just inform)
- assertTrue(json.contains("\"_exclude\""), "Should still have _exclude
array");
+ assertTrue(json.contains("\"exclude\""), "Should still have exclude
array");
assertTrue(json.contains("\"pdf-parser\""), "Should have pdf-parser
configured");
assertTrue(json.contains("\"jsoup-parser\""), "Should have
jsoup-parser configured");
diff --git a/tika-app/src/test/resources/configs/config-template.json
b/tika-app/src/test/resources/configs/config-template.json
index 1cce9b9de9..d5e49dad9e 100644
--- a/tika-app/src/test/resources/configs/config-template.json
+++ b/tika-app/src/test/resources/configs/config-template.json
@@ -45,8 +45,6 @@
"queueSize": 10000,
"numEmitters": 1,
"emitIntermediateResults": false,
- "startupTimeoutMillis": 240000,
- "sleepOnStartupTimeoutMillis": 240000,
"shutdownClientAfterMillis": 300000,
"numClients": 4,
"maxFilesProcessedPerProcess": 10000,
diff --git a/tika-app/src/test/resources/configs/tika-config2.json
b/tika-app/src/test/resources/configs/tika-config2.json
index d25f49d852..e2de71710b 100644
--- a/tika-app/src/test/resources/configs/tika-config2.json
+++ b/tika-app/src/test/resources/configs/tika-config2.json
@@ -2,7 +2,7 @@
"parsers": [
{
"default-parser": {
- "_exclude": ["executable-parser"],
+ "exclude": ["executable-parser"],
"_mime-exclude": ["image/jpeg", "application/pdf"]
}
},
diff --git
a/tika-e2e-tests/tika-grpc/src/test/resources/tika-config-ignite-local.json
b/tika-e2e-tests/tika-grpc/src/test/resources/tika-config-ignite-local.json
index a8e19f8bb0..93499c5669 100644
--- a/tika-e2e-tests/tika-grpc/src/test/resources/tika-config-ignite-local.json
+++ b/tika-e2e-tests/tika-grpc/src/test/resources/tika-config-ignite-local.json
@@ -30,23 +30,5 @@
"--add-opens=java.management/sun.management=ALL-UNNAMED",
"--add-opens=java.desktop/java.awt.font=ALL-UNNAMED"
]
- },
- "fetchers": [
- {
- "fs": {
- "staticFetcher": {
- "basePath": "target/govdocs1"
- }
- }
- }
- ],
- "emitters": [
- {
- "fs": {
- "defaultEmitter": {
- "basePath": "/tmp/output"
- }
- }
- }
- ]
+ }
}
diff --git
a/tika-e2e-tests/tika-grpc/src/test/resources/tika-config-ignite.json
b/tika-e2e-tests/tika-grpc/src/test/resources/tika-config-ignite.json
index e39b9fb2a2..3e08f8e1df 100644
--- a/tika-e2e-tests/tika-grpc/src/test/resources/tika-config-ignite.json
+++ b/tika-e2e-tests/tika-grpc/src/test/resources/tika-config-ignite.json
@@ -30,23 +30,5 @@
"--add-opens=java.management/sun.management=ALL-UNNAMED",
"--add-opens=java.desktop/java.awt.font=ALL-UNNAMED"
]
- },
- "fetchers": [
- {
- "fs": {
- "staticFetcher": {
- "basePath": "/tika/govdocs1"
- }
- }
- }
- ],
- "emitters": [
- {
- "fs": {
- "defaultEmitter": {
- "basePath": "/tmp/output"
- }
- }
- }
- ]
+ }
}
diff --git
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
index bca9d1a664..03b18eec91 100644
---
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
+++
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
@@ -2,7 +2,7 @@
"parsers": [
{
"default-parser": {
- "_exclude": [
+ "exclude": [
"tesseract-ocr-parser"
]
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-integration-tests/src/test/resources/configs/tika-config-rendering.json
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-integration-tests/src/test/resources/configs/tika-config-rendering.json
index a77e805e53..e8376fd8de 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-integration-tests/src/test/resources/configs/tika-config-rendering.json
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-integration-tests/src/test/resources/configs/tika-config-rendering.json
@@ -2,7 +2,7 @@
"parsers": [
{
"default-parser": {
- "parser-excludes": ["pdf-parser"]
+ "exclude": ["pdf-parser"]
}
},
{
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/configs/tika-libpst-config.json
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/configs/tika-libpst-config.json
index a113eef1db..0c67173c7a 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/configs/tika-libpst-config.json
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/configs/tika-libpst-config.json
@@ -2,7 +2,7 @@
"parsers": [
{
"default-parser": {
- "_exclude": [
+ "exclude": [
"outlook-pst-parser",
"pst-mail-item-parser"
]
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/configs/tika-libpst-eml-config.json
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/configs/tika-libpst-eml-config.json
index 000e284592..571a9087f4 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/configs/tika-libpst-eml-config.json
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/configs/tika-libpst-eml-config.json
@@ -2,7 +2,7 @@
"parsers": [
{
"default-parser": {
- "_exclude": [
+ "exclude": [
"outlook-pst-parser",
"pst-mail-item-parser"
]
diff --git
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
index dbc3de3935..f3ddce13af 100644
---
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
+++
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
@@ -43,50 +43,27 @@ public class PluginsWriter {
}
void write(Path output) throws IOException {
- Path baseInput = StringUtils.isBlank(simpleAsyncConfig.getInputDir())
- ? Paths.get(".").toAbsolutePath()
- : Paths.get(simpleAsyncConfig.getInputDir());
- Path baseOutput = StringUtils.isBlank(simpleAsyncConfig.getOutputDir())
- ? null
- : Paths.get(simpleAsyncConfig.getOutputDir());
- if (Files.isRegularFile(baseInput)) {
+ boolean inputExplicit =
!StringUtils.isBlank(simpleAsyncConfig.getInputDir());
+ boolean outputExplicit =
!StringUtils.isBlank(simpleAsyncConfig.getOutputDir());
+
+ // -i / -o resolution. When unset they're null, in which case we don't
+ // override anything the user (or the post-merge placeholder sweep
+ // below) put in place.
+ Path baseInput = inputExplicit ?
Paths.get(simpleAsyncConfig.getInputDir()) : null;
+ if (baseInput != null && Files.isRegularFile(baseInput)) {
baseInput = baseInput.toAbsolutePath().getParent();
if (baseInput == null) {
throw new IllegalArgumentException("File must be at least one
directory below root");
}
}
+ Path baseOutput = outputExplicit
+ ? Paths.get(simpleAsyncConfig.getOutputDir())
+ : null;
try {
ObjectMapper objectMapper = TikaObjectMapperFactory.getMapper();
ObjectNode root = (ObjectNode) objectMapper.readTree(
getClass().getResourceAsStream("/config-template.json"));
- // Set fetcher basePath
- ObjectNode fetchers = (ObjectNode) root.get("fetchers");
- if (fetchers != null && fetchers.has("fsf")) {
- ObjectNode fsf = (ObjectNode) fetchers.get("fsf");
- if (fsf != null && fsf.has("file-system-fetcher")) {
- ObjectNode fsFetcher = (ObjectNode)
fsf.get("file-system-fetcher");
- fsFetcher.put("basePath",
baseInput.toAbsolutePath().toString());
- }
- }
-
- // Set emitter basePath
- ObjectNode emitters = (ObjectNode) root.get("emitters");
- if (baseOutput != null && emitters != null && emitters.has("fse"))
{
- ObjectNode fse = (ObjectNode) emitters.get("fse");
- if (fse != null && fse.has("file-system-emitter")) {
- ObjectNode fsEmitter = (ObjectNode)
fse.get("file-system-emitter");
- fsEmitter.put("basePath",
baseOutput.toAbsolutePath().toString());
- }
- }
-
- // Set pipes-iterator basePath
- ObjectNode pipesIterator = (ObjectNode) root.get("pipes-iterator");
- if (pipesIterator != null &&
pipesIterator.has("file-system-pipes-iterator")) {
- ObjectNode fsIterator = (ObjectNode)
pipesIterator.get("file-system-pipes-iterator");
- fsIterator.put("basePath",
baseInput.toAbsolutePath().toString());
- }
-
// Set plugin-roots
String pluginString;
if (!StringUtils.isBlank(simpleAsyncConfig.getPluginsDir())) {
@@ -100,18 +77,53 @@ public class PluginsWriter {
}
root.put("plugin-roots", pluginString);
- // If the user provided a -c config, merge their settings first.
- // This brings in parsers, parse-context, metadata-filters, and
- // optionally pipes config (e.g. forkedJvmArgs with log4j
settings).
+ // Merge user's --config first so the CLI overrides below land on
+ // the final merged document. Doing this in the other order means
+ // mergeUserConfig's shallow replace silently wipes any patch we
+ // applied before the merge — exactly the bug behind TIKA-4739
+ // ("-i/-o don't override basePath as documented").
if (!StringUtils.isBlank(simpleAsyncConfig.getTikaConfig())) {
Path userConfigPath =
Paths.get(simpleAsyncConfig.getTikaConfig());
JsonNode userRoot =
objectMapper.readTree(userConfigPath.toFile());
mergeUserConfig(root, (ObjectNode) userRoot);
}
- // Now apply CLI overrides on top of whatever pipes config exists.
- // This lets the user have forkedJvmArgs in their config (e.g.
log4j)
- // while still controlling numClients and Xmx from the command
line.
+ // Resolve any unfilled placeholders left over from
+ // config-template.json. These leak through when --config is
+ // supplied but the user's config does not redefine the relevant
+ // section (e.g. user overrides only `pipes` and inherits the
+ // template's `fetchers`). We replace only the literal placeholder
+ // strings, so a user-supplied real basePath is never trampled.
+ // Default to CWD; the explicit -i/-o overrides below will further
+ // refine when set.
+ String defaultBasePath =
Paths.get(".").toAbsolutePath().toString();
+ replaceFileSystemBasePathPlaceholder(root, "fetchers",
"file-system-fetcher",
+ "FETCHER_BASE_PATH", defaultBasePath);
+ replaceSingletonFileSystemBasePathPlaceholder(root,
"pipes-iterator",
+ "file-system-pipes-iterator", "FETCHER_BASE_PATH",
defaultBasePath);
+ replaceFileSystemBasePathPlaceholder(root, "emitters",
"file-system-emitter",
+ "EMITTER_BASE_PATH", defaultBasePath);
+
+ // Apply -i / -o on top of the merged document by component TYPE
+ // rather than hardcoded id ("fsf"/"fse"). This way users who
+ // renamed their filesystem fetcher/emitter still get the override,
+ // and non-filesystem fetchers/emitters (S3, GCS, etc.) are left
+ // untouched. baseInput/baseOutput are null when the user supplied
+ // --config without -i/-o, in which case their basePath values stay
+ // intact (post-merge they're either the user's real value or the
+ // CWD default just installed by the placeholder sweep above).
+ if (baseInput != null) {
+ patchFileSystemBasePath(root, "fetchers",
"file-system-fetcher",
+ baseInput.toAbsolutePath().toString());
+ patchSingletonFileSystemBasePath(root, "pipes-iterator",
+ "file-system-pipes-iterator",
baseInput.toAbsolutePath().toString());
+ }
+ if (baseOutput != null) {
+ patchFileSystemBasePath(root, "emitters",
"file-system-emitter",
+ baseOutput.toAbsolutePath().toString());
+ }
+
+ // CLI overrides on the pipes section.
ObjectNode pipesNode = root.has("pipes")
? (ObjectNode) root.get("pipes")
: objectMapper.createObjectNode();
@@ -142,23 +154,14 @@ public class PluginsWriter {
// For content-only mode, change the emitter file extension based
on handler type
if (simpleAsyncConfig.isContentOnly()) {
String ext =
getFileExtensionForHandlerType(simpleAsyncConfig.getHandlerType());
- if (emitters != null && emitters.has("fse")) {
- ObjectNode fse = (ObjectNode) emitters.get("fse");
- if (fse != null && fse.has("file-system-emitter")) {
- ObjectNode fsEmitter = (ObjectNode)
fse.get("file-system-emitter");
- fsEmitter.put("fileExtension", ext);
- }
- }
+ patchFileSystemField(root, "emitters", "file-system-emitter",
+ "fileExtension", ext);
}
// Override the emitter's onExists policy if set on the CLI
(--on-exists)
- if (!StringUtils.isBlank(simpleAsyncConfig.getOnExists())
- && emitters != null && emitters.has("fse")) {
- ObjectNode fse = (ObjectNode) emitters.get("fse");
- if (fse != null && fse.has("file-system-emitter")) {
- ObjectNode fsEmitter = (ObjectNode)
fse.get("file-system-emitter");
- fsEmitter.put("onExists", simpleAsyncConfig.getOnExists());
- }
+ if (!StringUtils.isBlank(simpleAsyncConfig.getOnExists())) {
+ patchFileSystemField(root, "emitters", "file-system-emitter",
+ "onExists", simpleAsyncConfig.getOnExists());
}
// Write timeout limits to parse-context if configured on CLI
@@ -178,6 +181,102 @@ public class PluginsWriter {
}
}
+ /**
+ * Sets {@code basePath} on every entry in an id-keyed section
+ * ({@code fetchers}, {@code emitters}) whose wrapper type matches
+ * {@code typeName}. Other component types in the section are left
+ * untouched so a config that mixes filesystem + S3 still works.
+ */
+ private static void patchFileSystemBasePath(ObjectNode root, String
section,
+ String typeName, String
basePath) {
+ patchFileSystemField(root, section, typeName, "basePath", basePath);
+ }
+
+ /**
+ * Sets a single field on every id-keyed entry in {@code section} whose
+ * wrapper type matches {@code typeName}.
+ */
+ private static void patchFileSystemField(ObjectNode root, String section,
+ String typeName, String field,
String value) {
+ JsonNode sectionNode = root.get(section);
+ if (sectionNode == null || !sectionNode.isObject()) {
+ return;
+ }
+ Iterator<Map.Entry<String, JsonNode>> ids = sectionNode.fields();
+ while (ids.hasNext()) {
+ Map.Entry<String, JsonNode> idEntry = ids.next();
+ JsonNode typed = idEntry.getValue();
+ if (typed.isObject() && typed.has(typeName)) {
+ ObjectNode target = (ObjectNode) typed.get(typeName);
+ target.put(field, value);
+ }
+ }
+ }
+
+ /**
+ * Sets {@code basePath} on a singleton section ({@code pipes-iterator})
+ * whose wrapper type matches {@code typeName}.
+ */
+ private static void patchSingletonFileSystemBasePath(ObjectNode root,
String section,
+ String typeName,
String basePath) {
+ JsonNode sectionNode = root.get(section);
+ if (sectionNode == null || !sectionNode.isObject() ||
!sectionNode.has(typeName)) {
+ return;
+ }
+ ObjectNode target = (ObjectNode) sectionNode.get(typeName);
+ target.put("basePath", basePath);
+ }
+
+ /**
+ * Replaces {@code basePath} with {@code replacement} for every id-keyed
+ * entry in {@code section} of wrapper type {@code typeName} whose
+ * current value is the literal {@code placeholder} string. Real
+ * user-supplied paths are left alone.
+ */
+ private static void replaceFileSystemBasePathPlaceholder(ObjectNode root,
String section,
+ String typeName,
String placeholder,
+ String
replacement) {
+ JsonNode sectionNode = root.get(section);
+ if (sectionNode == null || !sectionNode.isObject()) {
+ return;
+ }
+ Iterator<Map.Entry<String, JsonNode>> ids = sectionNode.fields();
+ while (ids.hasNext()) {
+ JsonNode typed = ids.next().getValue();
+ if (typed.isObject() && typed.has(typeName)) {
+ ObjectNode target = (ObjectNode) typed.get(typeName);
+ JsonNode current = target.get("basePath");
+ if (current != null && current.isTextual()
+ && placeholder.equals(current.asText())) {
+ target.put("basePath", replacement);
+ }
+ }
+ }
+ }
+
+ /**
+ * Replaces the singleton {@code basePath} placeholder under
+ * {@code section.typeName} only if its current value is the literal
+ * placeholder. Mirrors {@link #patchSingletonFileSystemBasePath} but
+ * preserves user-supplied real paths.
+ */
+ private static void
replaceSingletonFileSystemBasePathPlaceholder(ObjectNode root,
+ String
section,
+ String
typeName,
+ String
placeholder,
+ String
replacement) {
+ JsonNode sectionNode = root.get(section);
+ if (sectionNode == null || !sectionNode.isObject() ||
!sectionNode.has(typeName)) {
+ return;
+ }
+ ObjectNode target = (ObjectNode) sectionNode.get(typeName);
+ JsonNode current = target.get("basePath");
+ if (current != null && current.isTextual()
+ && placeholder.equals(current.asText())) {
+ target.put("basePath", replacement);
+ }
+ }
+
/**
* Merges user config fields into the auto-generated root.
* All user fields override the auto-generated template values.
diff --git
a/tika-pipes/tika-async-cli/src/test/resources/configs/config-content-only-default.json
b/tika-pipes/tika-async-cli/src/test/resources/configs/config-content-only-default.json
index 7b7849a7b5..7e79a48898 100644
---
a/tika-pipes/tika-async-cli/src/test/resources/configs/config-content-only-default.json
+++
b/tika-pipes/tika-async-cli/src/test/resources/configs/config-content-only-default.json
@@ -34,8 +34,6 @@
"queueSize": 10000,
"numEmitters": 1,
"emitIntermediateResults": false,
- "startupTimeoutMillis": 240000,
- "sleepOnStartupTimeoutMillis": 240000,
"shutdownClientAfterMillis": 300000,
"numClients": 2,
"maxFilesProcessedPerProcess": 10000,
diff --git
a/tika-pipes/tika-async-cli/src/test/resources/configs/config-template.json
b/tika-pipes/tika-async-cli/src/test/resources/configs/config-template.json
index 58c630f352..706bd3e027 100644
--- a/tika-pipes/tika-async-cli/src/test/resources/configs/config-template.json
+++ b/tika-pipes/tika-async-cli/src/test/resources/configs/config-template.json
@@ -34,8 +34,6 @@
"queueSize": 10000,
"numEmitters": 1,
"emitIntermediateResults": false,
- "startupTimeoutMillis": 240000,
- "sleepOnStartupTimeoutMillis": 240000,
"shutdownClientAfterMillis": 300000,
"numClients": 2,
"maxFilesProcessedPerProcess": 10000,
diff --git a/tika-pipes/tika-pipes-core/pom.xml
b/tika-pipes/tika-pipes-core/pom.xml
index a2f391f1c3..cd4d4d7653 100644
--- a/tika-pipes/tika-pipes-core/pom.xml
+++ b/tika-pipes/tika-pipes-core/pom.xml
@@ -106,6 +106,15 @@
</manifestEntries>
</archive>
</configuration>
+ <executions>
+ <execution>
+ <goals>
+ <!-- Publishes a test-jar so plugin modules can extend
+ AbstractConfigExamplesTest. -->
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/AbstractComponentManager.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/AbstractComponentManager.java
index 70de8b78bd..d6fbc04630 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/AbstractComponentManager.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/AbstractComponentManager.java
@@ -115,6 +115,21 @@ public abstract class AbstractComponentManager<T extends
TikaExtension,
Map<String, ExtensionConfig> configs = new HashMap<>();
if (configNode != null && !configNode.isNull()) {
+ // Strict shape check. The section must be a JSON object keyed by
+ // instance ID — e.g. {"my-fetcher": {"file-system-fetcher":
{...}}}.
+ // Without this, an array like
+ // "fetchers": [{"file-system-fetcher": {"id": "my-fetcher",
...}}]
+ // would be silently walked past (JsonNode.fields() on an ArrayNode
+ // returns an empty iterator), leaving the manager with no
registered
+ // components and the user with an "Available: []" error at lookup
+ // time instead of at load time.
+ if (!configNode.isObject()) {
+ throw new TikaConfigException(
+ "Invalid '" + getConfigKey() + "' configuration:
expected a JSON "
+ + "object keyed by instance ID, e.g.
{\"my-id\": {\"type-name\": "
+ + "{...config...}}}. Got " +
configNode.getNodeType() + ". "
+ + "(Array-style configurations are not
supported.)");
+ }
// Outer loop: iterate over instance IDs
Iterator<Map.Entry<String, JsonNode>> instanceFields =
configNode.fields();
while (instanceFields.hasNext()) {
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesConfig.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesConfig.java
index 2f72cec73a..fd1d19d566 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesConfig.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesConfig.java
@@ -27,8 +27,6 @@ import org.apache.tika.pipes.api.ParseMode;
public class PipesConfig {
- public static final long DEFAULT_STARTUP_TIMEOUT_MILLIS = 240000;
-
public static final long DEFAULT_SHUTDOWN_CLIENT_AFTER_MILLS = 300000;
public static final int DEFAULT_NUM_CLIENTS = 4;
@@ -58,8 +56,6 @@ public class PipesConfig {
private long socketTimeoutMs = DEFAULT_SOCKET_TIMEOUT_MS;
private long heartbeatIntervalMs = DEFAULT_HEARTBEAT_INTERVAL_MS;
- private long startupTimeoutMillis = DEFAULT_STARTUP_TIMEOUT_MILLIS;
- private long sleepOnStartupTimeoutMillis = DEFAULT_STARTUP_TIMEOUT_MILLIS;
private long shutdownClientAfterMillis =
DEFAULT_SHUTDOWN_CLIENT_AFTER_MILLS;
private int numClients = DEFAULT_NUM_CLIENTS;
@@ -132,7 +128,7 @@ public class PipesConfig {
* This configuration is used by both PipesServer (forking process) and
* AsyncProcessor (async processing). Some fields are specific to each:
* <ul>
- * <li>PipesServer uses: numClients, timeoutMillis,
directEmitThresholdBytes, etc.</li>
+ * <li>PipesServer uses: numClients, socketTimeoutMs,
directEmitThresholdBytes, etc.</li>
* <li>AsyncProcessor uses: emitWithinMillis, queueSize, numEmitters,
etc.</li>
* </ul>
* Unused fields in each context are simply ignored.
@@ -157,7 +153,8 @@ public class PipesConfig {
/**
* Socket timeout in milliseconds for reading from the forked process.
* If no data is received within this time, the connection is considered
timed out.
- * This is different from timeoutMillis which is the parse/processing
timeout.
+ * This is distinct from the parse/processing timeout, which lives on
+ * {@link org.apache.tika.config.TimeoutLimits} under {@code
parse-context.timeout-limits}.
* @param socketTimeoutMs
*/
public void setSocketTimeoutMs(long socketTimeoutMs) {
@@ -209,11 +206,6 @@ public class PipesConfig {
return forkedJvmArgs;
}
- public void setStartupTimeoutMillis(long startupTimeoutMillis) {
- this.startupTimeoutMillis = startupTimeoutMillis;
- }
-
-
/**
* Restart the forked PipesServer after it has processed this many files
to avoid
* slow-building memory leaks.
@@ -235,10 +227,6 @@ public class PipesConfig {
this.javaPath = javaPath;
}
- public long getStartupTimeoutMillis() {
- return startupTimeoutMillis;
- }
-
/**
* Get the emit strategy configuration.
*
@@ -257,14 +245,6 @@ public class PipesConfig {
this.emitStrategy = emitStrategy;
}
- public long getSleepOnStartupTimeoutMillis() {
- return sleepOnStartupTimeoutMillis;
- }
-
- public void setSleepOnStartupTimeoutMillis(long
sleepOnStartupTimeoutMillis) {
- this.sleepOnStartupTimeoutMillis = sleepOnStartupTimeoutMillis;
- }
-
public int getStaleFetcherTimeoutSeconds() {
return staleFetcherTimeoutSeconds;
}
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/config/ConfigMerger.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/config/ConfigMerger.java
index 33756dbd77..e4bf01846d 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/config/ConfigMerger.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/config/ConfigMerger.java
@@ -156,9 +156,6 @@ public class ConfigMerger {
if (pc.getNumClients() > 0) {
pipesNode.put("numClients", pc.getNumClients());
}
- if (pc.getStartupTimeoutMillis() > 0) {
- pipesNode.put("startupTimeoutMillis",
pc.getStartupTimeoutMillis());
- }
if (pc.getMaxFilesProcessedPerProcess() > 0) {
pipesNode.put("maxFilesProcessedPerProcess",
pc.getMaxFilesProcessedPerProcess());
}
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/config/ConfigOverrides.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/config/ConfigOverrides.java
index b3c1b23f4d..2c9e100ad5 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/config/ConfigOverrides.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/config/ConfigOverrides.java
@@ -148,15 +148,13 @@ public class ConfigOverrides {
*/
public static class PipesConfigOverride {
private final int numClients;
- private final long startupTimeoutMillis;
private final int maxFilesProcessedPerProcess;
private final List<String> forkedJvmArgs;
public PipesConfigOverride(int numClients,
- long startupTimeoutMillis, int
maxFilesProcessedPerProcess,
+ int maxFilesProcessedPerProcess,
List<String> forkedJvmArgs) {
this.numClients = numClients;
- this.startupTimeoutMillis = startupTimeoutMillis;
this.maxFilesProcessedPerProcess = maxFilesProcessedPerProcess;
this.forkedJvmArgs = forkedJvmArgs != null ?
new ArrayList<>(forkedJvmArgs) : new ArrayList<>();
@@ -166,10 +164,6 @@ public class ConfigOverrides {
return numClients;
}
- public long getStartupTimeoutMillis() {
- return startupTimeoutMillis;
- }
-
public int getMaxFilesProcessedPerProcess() {
return maxFilesProcessedPerProcess;
}
@@ -229,7 +223,6 @@ public class ConfigOverrides {
public Builder setPipesConfig(int numClients,
List<String> forkedJvmArgs) {
return setPipesConfig(numClients,
-
org.apache.tika.pipes.core.PipesConfig.DEFAULT_STARTUP_TIMEOUT_MILLIS,
org.apache.tika.pipes.core.PipesConfig.DEFAULT_MAX_FILES_PROCESSED_PER_PROCESS,
forkedJvmArgs);
}
@@ -238,16 +231,15 @@ public class ConfigOverrides {
* Set pipes configuration with all options.
*
* @param numClients number of forked JVM clients
- * @param startupTimeoutMillis startup timeout in milliseconds
* @param maxFilesProcessedPerProcess max files before process restart
* @param forkedJvmArgs JVM arguments for forked processes (may be
null)
* @return this builder
*/
public Builder setPipesConfig(int numClients,
- long startupTimeoutMillis, int
maxFilesProcessedPerProcess,
+ int maxFilesProcessedPerProcess,
List<String> forkedJvmArgs) {
this.pipesConfig = new PipesConfigOverride(numClients,
- startupTimeoutMillis, maxFilesProcessedPerProcess,
forkedJvmArgs);
+ maxFilesProcessedPerProcess, forkedJvmArgs);
return this;
}
diff --git
a/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/config/ConfigMergerTest.java
b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/config/ConfigMergerTest.java
index a76e27ad8a..fea3fcbfae 100644
---
a/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/config/ConfigMergerTest.java
+++
b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/config/ConfigMergerTest.java
@@ -209,7 +209,7 @@ public class ConfigMergerTest {
@Test
public void testFullPipesConfig() throws IOException {
ConfigOverrides overrides = ConfigOverrides.builder()
- .setPipesConfig(8, 300000, 5000, List.of("-Xmx1g"))
+ .setPipesConfig(8, 5000, List.of("-Xmx1g"))
.build();
ConfigMerger.MergeResult result = ConfigMerger.mergeOrCreate(null,
overrides);
@@ -219,7 +219,6 @@ public class ConfigMergerTest {
JsonNode pipes = root.get("pipes");
assertEquals(8, pipes.get("numClients").asInt());
- assertEquals(300000, pipes.get("startupTimeoutMillis").asLong());
assertEquals(5000, pipes.get("maxFilesProcessedPerProcess").asInt());
Files.deleteIfExists(result.configPath());
diff --git
a/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/testutil/AbstractConfigExamplesTest.java
b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/testutil/AbstractConfigExamplesTest.java
new file mode 100644
index 0000000000..98144d71b2
--- /dev/null
+++
b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/testutil/AbstractConfigExamplesTest.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.core.testutil;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+
+/**
+ * Shared base class for plugin {@code ConfigExamplesTest}s. Loads JSON
+ * configuration examples from {@code /config-examples/} on the test
+ * classpath, validates that {@link TikaLoader} can parse them, and exposes
+ * helpers for drilling into the inner component config block.
+ */
+public abstract class AbstractConfigExamplesTest {
+
+ private static final String EXAMPLES_DIR = "/config-examples/";
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ @TempDir
+ protected Path tempDir;
+
+ /**
+ * Reads a JSON example from the {@code /config-examples/} classpath dir.
+ */
+ protected String readExample(String resourceName) throws Exception {
+ try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR +
resourceName)) {
+ assertNotNull(is, "Resource not found: " + EXAMPLES_DIR +
resourceName);
+ return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+ }
+ }
+
+ /**
+ * Reads the named example, writes it to a temp file, and asserts that
+ * {@link TikaLoader#load(Path)} returns a non-null config.
+ */
+ protected void loadAndValidate(String resourceName) throws Exception {
+ String json = readExample(resourceName);
+ Path configFile = tempDir.resolve("tika-config.json");
+ Files.writeString(configFile, json, StandardCharsets.UTF_8);
+ assertNotNull(TikaLoader.load(configFile));
+ }
+
+ /**
+ * Returns the inner component-config node from a Tika pipes JSON document.
+ * <p>
+ * Tika pipes configs nest fetchers/emitters as
+ * {@code section -> id -> type -> {config}}, while pipes-iterators and
+ * reporters omit the id level: {@code section -> type -> {config}}. Pass
+ * {@code id == null} to skip that level.
+ */
+ protected JsonNode innerComponent(String json, String section, String id,
String type)
+ throws Exception {
+ JsonNode root = OBJECT_MAPPER.readTree(json);
+ JsonNode node = root.get(section);
+ assertNotNull(node, "Missing section '" + section + "' in JSON");
+ if (id != null) {
+ node = node.get(id);
+ assertNotNull(node, "Missing id '" + id + "' under section '" +
section + "'");
+ }
+ JsonNode inner = node.get(type);
+ assertNotNull(inner, "Missing type '" + type + "' under "
+ + (id != null ? "id '" + id + "'" : "section '" + section +
"'"));
+ return inner;
+ }
+}
diff --git
a/tika-pipes/tika-pipes-fork-parser/src/main/java/org/apache/tika/pipes/fork/PipesForkParser.java
b/tika-pipes/tika-pipes-fork-parser/src/main/java/org/apache/tika/pipes/fork/PipesForkParser.java
index b9e7bb6d57..8c57272827 100644
---
a/tika-pipes/tika-pipes-fork-parser/src/main/java/org/apache/tika/pipes/fork/PipesForkParser.java
+++
b/tika-pipes/tika-pipes-fork-parser/src/main/java/org/apache/tika/pipes/fork/PipesForkParser.java
@@ -385,7 +385,6 @@ public class PipesForkParser implements Closeable {
// Set pipes configuration
.setPipesConfig(
pc.getNumClients(),
- pc.getStartupTimeoutMillis(),
pc.getMaxFilesProcessedPerProcess(),
pc.getForkedJvmArgs())
// Use PASSBACK_ALL strategy - results returned through socket
diff --git
a/tika-pipes/tika-pipes-fork-parser/src/main/java/org/apache/tika/pipes/fork/PipesForkParserConfig.java
b/tika-pipes/tika-pipes-fork-parser/src/main/java/org/apache/tika/pipes/fork/PipesForkParserConfig.java
index 06d42b97c2..8c498d2ad1 100644
---
a/tika-pipes/tika-pipes-fork-parser/src/main/java/org/apache/tika/pipes/fork/PipesForkParserConfig.java
+++
b/tika-pipes/tika-pipes-fork-parser/src/main/java/org/apache/tika/pipes/fork/PipesForkParserConfig.java
@@ -283,17 +283,6 @@ public class PipesForkParserConfig {
return pipesConfig.getNumClients();
}
- /**
- * Set the startup timeout in milliseconds.
- *
- * @param startupTimeoutMillis the startup timeout
- * @return this config for chaining
- */
- public PipesForkParserConfig setStartupTimeoutMillis(long
startupTimeoutMillis) {
- pipesConfig.setStartupTimeoutMillis(startupTimeoutMillis);
- return this;
- }
-
/**
* Get the plugins directory.
*
diff --git a/tika-pipes/tika-pipes-plugins/pom.xml
b/tika-pipes/tika-pipes-plugins/pom.xml
index 9eab9b49e7..ae19e2253c 100644
--- a/tika-pipes/tika-pipes-plugins/pom.xml
+++ b/tika-pipes/tika-pipes-plugins/pom.xml
@@ -80,6 +80,13 @@
<scope>test</scope>
<type>test-jar</type>
</dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-pipes-core</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ <type>test-jar</type>
+ </dependency>
</dependencies>
<build>
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/test/java/org/apache/tika/pipes/atlassianjwt/ConfigExamplesTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/test/java/org/apache/tika/pipes/atlassianjwt/ConfigExamplesTest.java
index e1802d0102..2479830b5b 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/test/java/org/apache/tika/pipes/atlassianjwt/ConfigExamplesTest.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/test/java/org/apache/tika/pipes/atlassianjwt/ConfigExamplesTest.java
@@ -19,46 +19,23 @@ package org.apache.tika.pipes.atlassianjwt;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.core.testutil.AbstractConfigExamplesTest;
import
org.apache.tika.pipes.fetcher.atlassianjwt.config.AtlassianJwtFetcherConfig;
/**
* Validates Atlassian JWT fetcher configuration examples used in
documentation.
*/
-public class ConfigExamplesTest {
-
- private static final String EXAMPLES_DIR = "/config-examples/";
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- @TempDir
- Path tempDir;
-
- private String readExample(String resourceName) throws Exception {
- try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR +
resourceName)) {
- assertNotNull(is, "Resource not found: " + resourceName);
- return new String(is.readAllBytes(), StandardCharsets.UTF_8);
- }
- }
+public class ConfigExamplesTest extends AbstractConfigExamplesTest {
@Test
public void testAtlassianJwtFetcherConfig() throws Exception {
- String json = readExample("atlassian-jwt-fetcher.json");
- Path configFile = tempDir.resolve("tika-config.json");
- Files.writeString(configFile, json, StandardCharsets.UTF_8);
- assertNotNull(TikaLoader.load(configFile));
+ loadAndValidate("atlassian-jwt-fetcher.json");
- JsonNode inner = OBJECT_MAPPER.readTree(json)
- .get("fetchers").get("ajwt").get("atlassian-jwt-fetcher");
+ JsonNode inner =
innerComponent(readExample("atlassian-jwt-fetcher.json"),
+ "fetchers", "ajwt", "atlassian-jwt-fetcher");
AtlassianJwtFetcherConfig config =
AtlassianJwtFetcherConfig.load(inner.toString());
assertEquals("tika-pipes-app-key", config.getIssuer());
assertNotNull(config.getSharedSecret());
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/java/org/apache/tika/pipes/azblob/ConfigExamplesTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/java/org/apache/tika/pipes/azblob/ConfigExamplesTest.java
index 0a083f608a..cf1b3959bc 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/java/org/apache/tika/pipes/azblob/ConfigExamplesTest.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/java/org/apache/tika/pipes/azblob/ConfigExamplesTest.java
@@ -19,17 +19,9 @@ package org.apache.tika.pipes.azblob;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.core.testutil.AbstractConfigExamplesTest;
import org.apache.tika.pipes.emitter.azblob.AZBlobEmitterConfig;
import org.apache.tika.pipes.fetcher.azblob.config.AZBlobFetcherConfig;
import org.apache.tika.pipes.iterator.azblob.AZBlobPipesIteratorConfig;
@@ -40,48 +32,15 @@ import
org.apache.tika.pipes.iterator.azblob.AZBlobPipesIteratorConfig;
* The JSON configuration examples are stored in {@code
src/test/resources/config-examples/}
* and are included directly in the AsciiDoc documentation via the {@code
include::} directive.
*/
-public class ConfigExamplesTest {
-
- private static final String EXAMPLES_DIR = "/config-examples/";
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- @TempDir
- Path tempDir;
-
- private String readExample(String resourceName) throws Exception {
- try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR +
resourceName)) {
- assertNotNull(is, "Resource not found: " + resourceName);
- return new String(is.readAllBytes(), StandardCharsets.UTF_8);
- }
- }
-
- private void loadViaTikaLoader(String resourceName) throws Exception {
- String json = readExample(resourceName);
- Path configFile = tempDir.resolve("tika-config.json");
- Files.writeString(configFile, json, StandardCharsets.UTF_8);
- TikaLoader loader = TikaLoader.load(configFile);
- assertNotNull(loader, "TikaLoader should not be null for: " +
resourceName);
- }
-
- private JsonNode innerComponent(String json, String section, String id,
String typeName)
- throws Exception {
- JsonNode root = OBJECT_MAPPER.readTree(json);
- JsonNode sectionNode = root.get(section);
- assertNotNull(sectionNode, "Missing section: " + section);
- JsonNode idNode = id == null ? sectionNode : sectionNode.get(id);
- assertNotNull(idNode, "Missing id: " + id);
- JsonNode typed = idNode.get(typeName);
- assertNotNull(typed, "Missing type: " + typeName);
- return typed;
- }
+public class ConfigExamplesTest extends AbstractConfigExamplesTest {
@Test
public void testAZBlobFetcherConfig() throws Exception {
- loadViaTikaLoader("az-blob-fetcher.json");
+ loadAndValidate("az-blob-fetcher.json");
- JsonNode inner = innerComponent(readExample("az-blob-fetcher.json"),
- "fetchers", "azf", "az-blob-fetcher");
- AZBlobFetcherConfig config =
AZBlobFetcherConfig.load(inner.toString());
+ AZBlobFetcherConfig config = AZBlobFetcherConfig.load(
+ innerComponent(readExample("az-blob-fetcher.json"),
+ "fetchers", "azf", "az-blob-fetcher").toString());
assertEquals("tika-input", config.getContainer());
assertEquals("https://myaccount.blob.core.windows.net",
config.getEndpoint());
assertNotNull(config.getSasToken());
@@ -89,11 +48,11 @@ public class ConfigExamplesTest {
@Test
public void testAZBlobEmitterConfig() throws Exception {
- loadViaTikaLoader("az-blob-emitter.json");
+ loadAndValidate("az-blob-emitter.json");
- JsonNode inner = innerComponent(readExample("az-blob-emitter.json"),
- "emitters", "aze", "az-blob-emitter");
- AZBlobEmitterConfig config =
AZBlobEmitterConfig.load(inner.toString());
+ AZBlobEmitterConfig config = AZBlobEmitterConfig.load(
+ innerComponent(readExample("az-blob-emitter.json"),
+ "emitters", "aze", "az-blob-emitter").toString());
assertEquals("tika-output", config.container());
assertEquals("json", config.fileExtension());
config.validate();
@@ -102,11 +61,11 @@ public class ConfigExamplesTest {
@Test
public void testAZBlobIteratorConfig() throws Exception {
- loadViaTikaLoader("az-blob-pipes-iterator.json");
+ loadAndValidate("az-blob-pipes-iterator.json");
- JsonNode inner =
innerComponent(readExample("az-blob-pipes-iterator.json"),
- "pipes-iterator", null, "az-blob-pipes-iterator");
- AZBlobPipesIteratorConfig config =
AZBlobPipesIteratorConfig.load(inner.toString());
+ AZBlobPipesIteratorConfig config = AZBlobPipesIteratorConfig.load(
+ innerComponent(readExample("az-blob-pipes-iterator.json"),
+ "pipes-iterator", null,
"az-blob-pipes-iterator").toString());
assertEquals("tika-input", config.getContainer());
assertEquals("incoming/", config.getPrefix());
assertEquals(360000L, config.getTimeoutMillis());
@@ -116,7 +75,7 @@ public class ConfigExamplesTest {
@Test
public void testAZBlobPipelineConfig() throws Exception {
- loadViaTikaLoader("az-blob-pipeline.json");
+ loadAndValidate("az-blob-pipeline.json");
String json = readExample("az-blob-pipeline.json");
AZBlobFetcherConfig fetcher = AZBlobFetcherConfig.load(
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/java/org/apache/tika/pipes/csv/ConfigExamplesTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/java/org/apache/tika/pipes/csv/ConfigExamplesTest.java
index 75ca442918..e814ad68f1 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/java/org/apache/tika/pipes/csv/ConfigExamplesTest.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/java/org/apache/tika/pipes/csv/ConfigExamplesTest.java
@@ -19,46 +19,23 @@ package org.apache.tika.pipes.csv;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.core.testutil.AbstractConfigExamplesTest;
import org.apache.tika.pipes.iterator.csv.CSVPipesIteratorConfig;
/**
* Validates CSV iterator configuration example used in documentation.
*/
-public class ConfigExamplesTest {
-
- private static final String EXAMPLES_DIR = "/config-examples/";
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- @TempDir
- Path tempDir;
-
- private String readExample(String resourceName) throws Exception {
- try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR +
resourceName)) {
- assertNotNull(is, "Resource not found: " + resourceName);
- return new String(is.readAllBytes(), StandardCharsets.UTF_8);
- }
- }
+public class ConfigExamplesTest extends AbstractConfigExamplesTest {
@Test
public void testCsvIteratorConfig() throws Exception {
- String json = readExample("csv-pipes-iterator.json");
- Path configFile = tempDir.resolve("tika-config.json");
- Files.writeString(configFile, json, StandardCharsets.UTF_8);
- assertNotNull(TikaLoader.load(configFile));
+ loadAndValidate("csv-pipes-iterator.json");
- JsonNode inner = OBJECT_MAPPER.readTree(json)
- .get("pipes-iterator").get("csv-pipes-iterator");
+ JsonNode inner = innerComponent(readExample("csv-pipes-iterator.json"),
+ "pipes-iterator", null, "csv-pipes-iterator");
CSVPipesIteratorConfig config =
CSVPipesIteratorConfig.load(inner.toString());
assertNotNull(config.getCsvPath());
assertEquals("doc_id", config.getIdColumn());
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/java/org/apache/tika/pipes/es/ConfigExamplesTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/java/org/apache/tika/pipes/es/ConfigExamplesTest.java
index b1be5faa4b..6a5f8d3d14 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/java/org/apache/tika/pipes/es/ConfigExamplesTest.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/java/org/apache/tika/pipes/es/ConfigExamplesTest.java
@@ -21,17 +21,9 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.core.testutil.AbstractConfigExamplesTest;
import org.apache.tika.pipes.emitter.es.ESEmitterConfig;
import org.apache.tika.pipes.reporter.es.ESReporterConfig;
@@ -41,48 +33,15 @@ import org.apache.tika.pipes.reporter.es.ESReporterConfig;
* The JSON configuration examples are stored in {@code
src/test/resources/config-examples/}
* and are included directly in the AsciiDoc documentation via the {@code
include::} directive.
*/
-public class ConfigExamplesTest {
-
- private static final String EXAMPLES_DIR = "/config-examples/";
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- @TempDir
- Path tempDir;
-
- private String readExample(String resourceName) throws Exception {
- try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR +
resourceName)) {
- assertNotNull(is, "Resource not found: " + resourceName);
- return new String(is.readAllBytes(), StandardCharsets.UTF_8);
- }
- }
-
- private void loadViaTikaLoader(String resourceName) throws Exception {
- String json = readExample(resourceName);
- Path configFile = tempDir.resolve("tika-config.json");
- Files.writeString(configFile, json, StandardCharsets.UTF_8);
- TikaLoader loader = TikaLoader.load(configFile);
- assertNotNull(loader, "TikaLoader should not be null for: " +
resourceName);
- }
-
- private JsonNode innerComponent(String json, String section, String id,
String typeName)
- throws Exception {
- JsonNode root = OBJECT_MAPPER.readTree(json);
- JsonNode sectionNode = root.get(section);
- assertNotNull(sectionNode, "Missing section: " + section);
- JsonNode idNode = id == null ? sectionNode : sectionNode.get(id);
- assertNotNull(idNode, "Missing id: " + id);
- JsonNode typed = idNode.get(typeName);
- assertNotNull(typed, "Missing type: " + typeName);
- return typed;
- }
+public class ConfigExamplesTest extends AbstractConfigExamplesTest {
@Test
public void testESEmitterConfig() throws Exception {
- loadViaTikaLoader("es-emitter.json");
+ loadAndValidate("es-emitter.json");
- JsonNode inner = innerComponent(readExample("es-emitter.json"),
- "emitters", "ese", "es-emitter");
- ESEmitterConfig config = ESEmitterConfig.load(inner.toString());
+ ESEmitterConfig config = ESEmitterConfig.load(
+ innerComponent(readExample("es-emitter.json"),
+ "emitters", "ese", "es-emitter").toString());
assertEquals("doc_id", config.idField());
assertEquals(ESEmitterConfig.AttachmentStrategy.PARENT_CHILD,
config.attachmentStrategy());
@@ -97,11 +56,11 @@ public class ConfigExamplesTest {
@Test
public void testESReporterConfig() throws Exception {
- loadViaTikaLoader("es-reporter.json");
+ loadAndValidate("es-reporter.json");
- JsonNode inner = innerComponent(readExample("es-reporter.json"),
- "pipes-reporters", null, "es-pipes-reporter");
- ESReporterConfig config = ESReporterConfig.load(inner.toString());
+ ESReporterConfig config = ESReporterConfig.load(
+ innerComponent(readExample("es-reporter.json"),
+ "pipes-reporters", null,
"es-pipes-reporter").toString());
assertTrue(config.esUrl().contains("tika-status"));
assertEquals("tika_", config.keyPrefix());
assertTrue(config.includeRouting());
@@ -112,7 +71,7 @@ public class ConfigExamplesTest {
@Test
public void testESPipelineConfig() throws Exception {
- loadViaTikaLoader("es-pipeline.json");
+ loadAndValidate("es-pipeline.json");
String json = readExample("es-pipeline.json");
ESEmitterConfig emitter = ESEmitterConfig.load(
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/java/org/apache/tika/pipes/fs/ConfigExamplesTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/java/org/apache/tika/pipes/fs/ConfigExamplesTest.java
index 70fe7947bb..041e079e15 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/java/org/apache/tika/pipes/fs/ConfigExamplesTest.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/java/org/apache/tika/pipes/fs/ConfigExamplesTest.java
@@ -16,17 +16,9 @@
*/
package org.apache.tika.pipes.fs;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.core.testutil.AbstractConfigExamplesTest;
/**
* Validates file system fetcher/emitter configuration examples used in
documentation.
@@ -34,23 +26,7 @@ import org.apache.tika.config.loader.TikaLoader;
* The JSON configuration examples are stored in {@code
src/test/resources/config-examples/}
* and are included directly in the AsciiDoc documentation via the {@code
include::} directive.
*/
-public class ConfigExamplesTest {
-
- private static final String EXAMPLES_DIR = "/config-examples/";
-
- @TempDir
- Path tempDir;
-
- private void loadAndValidate(String resourceName) throws Exception {
- try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR +
resourceName)) {
- assertNotNull(is, "Resource not found: " + resourceName);
- String json = new String(is.readAllBytes(),
StandardCharsets.UTF_8);
- Path configFile = tempDir.resolve("tika-config.json");
- Files.writeString(configFile, json, StandardCharsets.UTF_8);
- TikaLoader loader = TikaLoader.load(configFile);
- assertNotNull(loader, "TikaLoader should not be null for: " +
resourceName);
- }
- }
+public class ConfigExamplesTest extends AbstractConfigExamplesTest {
@Test
public void testFileSystemFetcherConfig() throws Exception {
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-emitter.json
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-emitter.json
index 4f01761e45..8ee447892f 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-emitter.json
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-emitter.json
@@ -1,13 +1,12 @@
{
- "emitters": [
- {
+ "emitters": {
+ "my-emitter": {
"file-system-emitter": {
- "id": "my-emitter",
"basePath": "/data/output",
"fileExtension": "json",
"onExists": "REPLACE",
"prettyPrint": true
}
}
- ]
+ }
}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-fetcher.json
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-fetcher.json
index 201d4fa099..cd60dd3b2c 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-fetcher.json
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-fetcher.json
@@ -1,11 +1,10 @@
{
- "fetchers": [
- {
+ "fetchers": {
+ "my-fetcher": {
"file-system-fetcher": {
- "id": "my-fetcher",
"basePath": "/data/documents",
"extractFileSystemMetadata": true
}
}
- ]
+ }
}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-pipeline.json
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-pipeline.json
index 3d95755eff..646c0f7b01 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-pipeline.json
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-pipeline.json
@@ -1,24 +1,22 @@
{
- "fetchers": [
- {
+ "fetchers": {
+ "input-fetcher": {
"file-system-fetcher": {
- "id": "input-fetcher",
"basePath": "/data/input",
"extractFileSystemMetadata": true
}
}
- ],
- "emitters": [
- {
+ },
+ "emitters": {
+ "output-emitter": {
"file-system-emitter": {
- "id": "output-emitter",
"basePath": "/data/output",
"fileExtension": "json",
"onExists": "SKIP",
"prettyPrint": false
}
}
- ],
+ },
"parsers": [
{
"default-parser": {}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/java/org/apache/tika/pipes/gcs/ConfigExamplesTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/java/org/apache/tika/pipes/gcs/ConfigExamplesTest.java
index 7cfc1f3fb1..52ce0a1fe5 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/java/org/apache/tika/pipes/gcs/ConfigExamplesTest.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/java/org/apache/tika/pipes/gcs/ConfigExamplesTest.java
@@ -17,19 +17,11 @@
package org.apache.tika.pipes.gcs;
import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.core.testutil.AbstractConfigExamplesTest;
import org.apache.tika.pipes.emitter.gcs.GCSEmitterConfig;
import org.apache.tika.pipes.fetcher.gcs.config.GCSFetcherConfig;
import org.apache.tika.pipes.iterator.gcs.GCSPipesIteratorConfig;
@@ -40,44 +32,11 @@ import
org.apache.tika.pipes.iterator.gcs.GCSPipesIteratorConfig;
* The JSON configuration examples are stored in {@code
src/test/resources/config-examples/}
* and are included directly in the AsciiDoc documentation via the {@code
include::} directive.
*/
-public class ConfigExamplesTest {
-
- private static final String EXAMPLES_DIR = "/config-examples/";
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- @TempDir
- Path tempDir;
-
- private String readExample(String resourceName) throws Exception {
- try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR +
resourceName)) {
- assertNotNull(is, "Resource not found: " + resourceName);
- return new String(is.readAllBytes(), StandardCharsets.UTF_8);
- }
- }
-
- private void loadViaTikaLoader(String resourceName) throws Exception {
- String json = readExample(resourceName);
- Path configFile = tempDir.resolve("tika-config.json");
- Files.writeString(configFile, json, StandardCharsets.UTF_8);
- TikaLoader loader = TikaLoader.load(configFile);
- assertNotNull(loader, "TikaLoader should not be null for: " +
resourceName);
- }
-
- private JsonNode innerComponent(String json, String section, String id,
String typeName)
- throws Exception {
- JsonNode root = OBJECT_MAPPER.readTree(json);
- JsonNode sectionNode = root.get(section);
- assertNotNull(sectionNode, "Missing section: " + section);
- JsonNode idNode = id == null ? sectionNode : sectionNode.get(id);
- assertNotNull(idNode, "Missing id: " + id);
- JsonNode typed = idNode.get(typeName);
- assertNotNull(typed, "Missing type: " + typeName);
- return typed;
- }
+public class ConfigExamplesTest extends AbstractConfigExamplesTest {
@Test
public void testGCSFetcherConfig() throws Exception {
- loadViaTikaLoader("gcs-fetcher.json");
+ loadAndValidate("gcs-fetcher.json");
JsonNode inner = innerComponent(readExample("gcs-fetcher.json"),
"fetchers", "gcsf", "gcs-fetcher");
@@ -88,7 +47,7 @@ public class ConfigExamplesTest {
@Test
public void testGCSEmitterConfig() throws Exception {
- loadViaTikaLoader("gcs-emitter.json");
+ loadAndValidate("gcs-emitter.json");
JsonNode inner = innerComponent(readExample("gcs-emitter.json"),
"emitters", "gcse", "gcs-emitter");
@@ -102,7 +61,7 @@ public class ConfigExamplesTest {
@Test
public void testGCSIteratorConfig() throws Exception {
- loadViaTikaLoader("gcs-pipes-iterator.json");
+ loadAndValidate("gcs-pipes-iterator.json");
JsonNode inner = innerComponent(readExample("gcs-pipes-iterator.json"),
"pipes-iterator", null, "gcs-pipes-iterator");
@@ -115,7 +74,7 @@ public class ConfigExamplesTest {
@Test
public void testGCSPipelineConfig() throws Exception {
- loadViaTikaLoader("gcs-pipeline.json");
+ loadAndValidate("gcs-pipeline.json");
String json = readExample("gcs-pipeline.json");
GCSFetcherConfig fetcher = GCSFetcherConfig.load(
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/test/java/org/apache/tika/pipes/googledrive/ConfigExamplesTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/test/java/org/apache/tika/pipes/googledrive/ConfigExamplesTest.java
index 7ee99ebca2..9d8bb365d7 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/test/java/org/apache/tika/pipes/googledrive/ConfigExamplesTest.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/test/java/org/apache/tika/pipes/googledrive/ConfigExamplesTest.java
@@ -20,46 +20,23 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.core.testutil.AbstractConfigExamplesTest;
import
org.apache.tika.pipes.fetcher.googledrive.config.GoogleDriveFetcherConfig;
/**
* Validates Google Drive fetcher configuration examples used in documentation.
*/
-public class ConfigExamplesTest {
-
- private static final String EXAMPLES_DIR = "/config-examples/";
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- @TempDir
- Path tempDir;
-
- private String readExample(String resourceName) throws Exception {
- try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR +
resourceName)) {
- assertNotNull(is, "Resource not found: " + resourceName);
- return new String(is.readAllBytes(), StandardCharsets.UTF_8);
- }
- }
+public class ConfigExamplesTest extends AbstractConfigExamplesTest {
@Test
public void testGoogleDriveFetcherConfig() throws Exception {
- String json = readExample("google-drive-fetcher.json");
- Path configFile = tempDir.resolve("tika-config.json");
- Files.writeString(configFile, json, StandardCharsets.UTF_8);
- assertNotNull(TikaLoader.load(configFile));
+ loadAndValidate("google-drive-fetcher.json");
- JsonNode inner = OBJECT_MAPPER.readTree(json)
- .get("fetchers").get("gdf").get("google-drive-fetcher");
+ JsonNode inner =
innerComponent(readExample("google-drive-fetcher.json"),
+ "fetchers", "gdf", "google-drive-fetcher");
GoogleDriveFetcherConfig config =
GoogleDriveFetcherConfig.load(inner.toString());
assertEquals("tika-pipes", config.getApplicationName());
assertEquals("[email protected]", config.getSubjectUser());
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/java/org/apache/tika/pipes/http/ConfigExamplesTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/java/org/apache/tika/pipes/http/ConfigExamplesTest.java
index ff447df2a3..33f737d638 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/java/org/apache/tika/pipes/http/ConfigExamplesTest.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/java/org/apache/tika/pipes/http/ConfigExamplesTest.java
@@ -17,49 +17,25 @@
package org.apache.tika.pipes.http;
import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.core.testutil.AbstractConfigExamplesTest;
import org.apache.tika.pipes.fetcher.http.config.HttpFetcherConfig;
/**
* Validates HTTP fetcher configuration examples used in documentation.
*/
-public class ConfigExamplesTest {
-
- private static final String EXAMPLES_DIR = "/config-examples/";
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- @TempDir
- Path tempDir;
-
- private String readExample(String resourceName) throws Exception {
- try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR +
resourceName)) {
- assertNotNull(is, "Resource not found: " + resourceName);
- return new String(is.readAllBytes(), StandardCharsets.UTF_8);
- }
- }
+public class ConfigExamplesTest extends AbstractConfigExamplesTest {
@Test
public void testHttpFetcherConfig() throws Exception {
- String json = readExample("http-fetcher.json");
- Path configFile = tempDir.resolve("tika-config.json");
- Files.writeString(configFile, json, StandardCharsets.UTF_8);
- assertNotNull(TikaLoader.load(configFile));
+ loadAndValidate("http-fetcher.json");
- JsonNode inner = OBJECT_MAPPER.readTree(json)
- .get("fetchers").get("httpf").get("http-fetcher");
+ JsonNode inner = innerComponent(readExample("http-fetcher.json"),
+ "fetchers", "httpf", "http-fetcher");
HttpFetcherConfig config = HttpFetcherConfig.load(inner.toString());
assertEquals("tika", config.getUserName());
assertEquals("basic", config.getAuthScheme());
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/java/org/apache/tika/pipes/jdbc/ConfigExamplesTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/java/org/apache/tika/pipes/jdbc/ConfigExamplesTest.java
index 05b657362c..f431d3881f 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/java/org/apache/tika/pipes/jdbc/ConfigExamplesTest.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/java/org/apache/tika/pipes/jdbc/ConfigExamplesTest.java
@@ -20,17 +20,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.core.testutil.AbstractConfigExamplesTest;
import org.apache.tika.pipes.emitter.jdbc.JDBCEmitterConfig;
import org.apache.tika.pipes.iterator.jdbc.JDBCPipesIteratorConfig;
import org.apache.tika.pipes.reporter.jdbc.JDBCPipesReporterConfig;
@@ -41,44 +34,11 @@ import
org.apache.tika.pipes.reporter.jdbc.JDBCPipesReporterConfig;
* The JSON configuration examples are stored in {@code
src/test/resources/config-examples/}
* and are included directly in the AsciiDoc documentation via the {@code
include::} directive.
*/
-public class ConfigExamplesTest {
-
- private static final String EXAMPLES_DIR = "/config-examples/";
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- @TempDir
- Path tempDir;
-
- private String readExample(String resourceName) throws Exception {
- try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR +
resourceName)) {
- assertNotNull(is, "Resource not found: " + resourceName);
- return new String(is.readAllBytes(), StandardCharsets.UTF_8);
- }
- }
-
- private void loadViaTikaLoader(String resourceName) throws Exception {
- String json = readExample(resourceName);
- Path configFile = tempDir.resolve("tika-config.json");
- Files.writeString(configFile, json, StandardCharsets.UTF_8);
- TikaLoader loader = TikaLoader.load(configFile);
- assertNotNull(loader, "TikaLoader should not be null for: " +
resourceName);
- }
-
- private JsonNode innerComponent(String json, String section, String id,
String typeName)
- throws Exception {
- JsonNode root = OBJECT_MAPPER.readTree(json);
- JsonNode sectionNode = root.get(section);
- assertNotNull(sectionNode, "Missing section: " + section);
- JsonNode idNode = id == null ? sectionNode : sectionNode.get(id);
- assertNotNull(idNode, "Missing id: " + id);
- JsonNode typed = idNode.get(typeName);
- assertNotNull(typed, "Missing type: " + typeName);
- return typed;
- }
+public class ConfigExamplesTest extends AbstractConfigExamplesTest {
@Test
public void testJDBCEmitterConfig() throws Exception {
- loadViaTikaLoader("jdbc-emitter.json");
+ loadAndValidate("jdbc-emitter.json");
JsonNode inner = innerComponent(readExample("jdbc-emitter.json"),
"emitters", "jdbce", "jdbc-emitter");
@@ -98,7 +58,7 @@ public class ConfigExamplesTest {
@Test
public void testJDBCIteratorConfig() throws Exception {
- loadViaTikaLoader("jdbc-pipes-iterator.json");
+ loadAndValidate("jdbc-pipes-iterator.json");
JsonNode inner =
innerComponent(readExample("jdbc-pipes-iterator.json"),
"pipes-iterator", null, "jdbc-pipes-iterator");
@@ -116,7 +76,7 @@ public class ConfigExamplesTest {
@Test
public void testJDBCReporterConfig() throws Exception {
- loadViaTikaLoader("jdbc-reporter.json");
+ loadAndValidate("jdbc-reporter.json");
JsonNode inner = innerComponent(readExample("jdbc-reporter.json"),
"pipes-reporters", null, "jdbc-reporter");
@@ -133,7 +93,7 @@ public class ConfigExamplesTest {
@Test
public void testJDBCPipelineConfig() throws Exception {
- loadViaTikaLoader("jdbc-pipeline.json");
+ loadAndValidate("jdbc-pipeline.json");
String json = readExample("jdbc-pipeline.json");
JDBCEmitterConfig emitter = JDBCEmitterConfig.load(
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-json/src/test/java/org/apache/tika/pipes/json/ConfigExamplesTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-json/src/test/java/org/apache/tika/pipes/json/ConfigExamplesTest.java
index d96140eae5..d7f09ca524 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-json/src/test/java/org/apache/tika/pipes/json/ConfigExamplesTest.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-json/src/test/java/org/apache/tika/pipes/json/ConfigExamplesTest.java
@@ -19,46 +19,23 @@ package org.apache.tika.pipes.json;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.core.testutil.AbstractConfigExamplesTest;
import org.apache.tika.pipes.pipesiterator.json.JsonPipesIteratorConfig;
/**
* Validates JSON iterator configuration example used in documentation.
*/
-public class ConfigExamplesTest {
-
- private static final String EXAMPLES_DIR = "/config-examples/";
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- @TempDir
- Path tempDir;
-
- private String readExample(String resourceName) throws Exception {
- try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR +
resourceName)) {
- assertNotNull(is, "Resource not found: " + resourceName);
- return new String(is.readAllBytes(), StandardCharsets.UTF_8);
- }
- }
+public class ConfigExamplesTest extends AbstractConfigExamplesTest {
@Test
public void testJsonIteratorConfig() throws Exception {
- String json = readExample("json-pipes-iterator.json");
- Path configFile = tempDir.resolve("tika-config.json");
- Files.writeString(configFile, json, StandardCharsets.UTF_8);
- assertNotNull(TikaLoader.load(configFile));
+ loadAndValidate("json-pipes-iterator.json");
- JsonNode inner = OBJECT_MAPPER.readTree(json)
- .get("pipes-iterator").get("json-pipes-iterator");
+ JsonNode inner =
innerComponent(readExample("json-pipes-iterator.json"),
+ "pipes-iterator", null, "json-pipes-iterator");
JsonPipesIteratorConfig config =
JsonPipesIteratorConfig.load(inner.toString());
assertNotNull(config.getJsonPath());
assertEquals("fsf", config.getFetcherId());
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/java/org/apache/tika/pipes/kafka/ConfigExamplesTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/java/org/apache/tika/pipes/kafka/ConfigExamplesTest.java
index 43c9a4daef..9951f55959 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/java/org/apache/tika/pipes/kafka/ConfigExamplesTest.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/java/org/apache/tika/pipes/kafka/ConfigExamplesTest.java
@@ -17,20 +17,12 @@
package org.apache.tika.pipes.kafka;
import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.core.testutil.AbstractConfigExamplesTest;
import org.apache.tika.pipes.emitter.kafka.KafkaEmitterConfig;
import org.apache.tika.pipes.iterator.kafka.KafkaPipesIteratorConfig;
@@ -40,44 +32,11 @@ import
org.apache.tika.pipes.iterator.kafka.KafkaPipesIteratorConfig;
* The JSON configuration examples are stored in {@code
src/test/resources/config-examples/}
* and are included directly in the AsciiDoc documentation via the {@code
include::} directive.
*/
-public class ConfigExamplesTest {
-
- private static final String EXAMPLES_DIR = "/config-examples/";
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- @TempDir
- Path tempDir;
-
- private String readExample(String resourceName) throws Exception {
- try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR +
resourceName)) {
- assertNotNull(is, "Resource not found: " + resourceName);
- return new String(is.readAllBytes(), StandardCharsets.UTF_8);
- }
- }
-
- private void loadViaTikaLoader(String resourceName) throws Exception {
- String json = readExample(resourceName);
- Path configFile = tempDir.resolve("tika-config.json");
- Files.writeString(configFile, json, StandardCharsets.UTF_8);
- TikaLoader loader = TikaLoader.load(configFile);
- assertNotNull(loader, "TikaLoader should not be null for: " +
resourceName);
- }
-
- private JsonNode innerComponent(String json, String section, String id,
String typeName)
- throws Exception {
- JsonNode root = OBJECT_MAPPER.readTree(json);
- JsonNode sectionNode = root.get(section);
- assertNotNull(sectionNode, "Missing section: " + section);
- JsonNode idNode = id == null ? sectionNode : sectionNode.get(id);
- assertNotNull(idNode, "Missing id: " + id);
- JsonNode typed = idNode.get(typeName);
- assertNotNull(typed, "Missing type: " + typeName);
- return typed;
- }
+public class ConfigExamplesTest extends AbstractConfigExamplesTest {
@Test
public void testKafkaEmitterConfig() throws Exception {
- loadViaTikaLoader("kafka-emitter.json");
+ loadAndValidate("kafka-emitter.json");
JsonNode inner = innerComponent(readExample("kafka-emitter.json"),
"emitters", "kafe", "kafka-emitter");
@@ -92,7 +51,7 @@ public class ConfigExamplesTest {
@Test
public void testKafkaIteratorConfig() throws Exception {
- loadViaTikaLoader("kafka-pipes-iterator.json");
+ loadAndValidate("kafka-pipes-iterator.json");
JsonNode inner =
innerComponent(readExample("kafka-pipes-iterator.json"),
"pipes-iterator", null, "kafka-pipes-iterator");
@@ -108,7 +67,7 @@ public class ConfigExamplesTest {
@Test
public void testKafkaPipelineConfig() throws Exception {
- loadViaTikaLoader("kafka-pipeline.json");
+ loadAndValidate("kafka-pipeline.json");
String json = readExample("kafka-pipeline.json");
KafkaEmitterConfig emitter = KafkaEmitterConfig.load(
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-microsoft-graph/src/test/java/org/apache/tika/pipes/microsoftgraph/ConfigExamplesTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-microsoft-graph/src/test/java/org/apache/tika/pipes/microsoftgraph/ConfigExamplesTest.java
index 83159ba65b..1b5b0e29eb 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-microsoft-graph/src/test/java/org/apache/tika/pipes/microsoftgraph/ConfigExamplesTest.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-microsoft-graph/src/test/java/org/apache/tika/pipes/microsoftgraph/ConfigExamplesTest.java
@@ -20,46 +20,23 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.core.testutil.AbstractConfigExamplesTest;
import
org.apache.tika.pipes.fetchers.microsoftgraph.config.MicrosoftGraphFetcherConfig;
/**
* Validates Microsoft Graph fetcher configuration examples used in
documentation.
*/
-public class ConfigExamplesTest {
-
- private static final String EXAMPLES_DIR = "/config-examples/";
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- @TempDir
- Path tempDir;
-
- private String readExample(String resourceName) throws Exception {
- try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR +
resourceName)) {
- assertNotNull(is, "Resource not found: " + resourceName);
- return new String(is.readAllBytes(), StandardCharsets.UTF_8);
- }
- }
+public class ConfigExamplesTest extends AbstractConfigExamplesTest {
@Test
public void testMicrosoftGraphFetcherConfig() throws Exception {
- String json = readExample("microsoft-graph-fetcher.json");
- Path configFile = tempDir.resolve("tika-config.json");
- Files.writeString(configFile, json, StandardCharsets.UTF_8);
- assertNotNull(TikaLoader.load(configFile));
+ loadAndValidate("microsoft-graph-fetcher.json");
- JsonNode inner = OBJECT_MAPPER.readTree(json)
- .get("fetchers").get("msgf").get("microsoft-graph-fetcher");
+ JsonNode inner =
innerComponent(readExample("microsoft-graph-fetcher.json"),
+ "fetchers", "msgf", "microsoft-graph-fetcher");
MicrosoftGraphFetcherConfig config =
MicrosoftGraphFetcherConfig.load(inner.toString());
assertNotNull(config.getClientSecretCredentialsConfig());
assertEquals("REDACTED-TENANT-UUID",
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/java/org/apache/tika/pipes/opensearch/ConfigExamplesTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/java/org/apache/tika/pipes/opensearch/ConfigExamplesTest.java
index d0c0a9eefa..e673f25b88 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/java/org/apache/tika/pipes/opensearch/ConfigExamplesTest.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/java/org/apache/tika/pipes/opensearch/ConfigExamplesTest.java
@@ -20,17 +20,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.core.testutil.AbstractConfigExamplesTest;
import org.apache.tika.pipes.emitter.opensearch.OpenSearchEmitterConfig;
import org.apache.tika.pipes.reporter.opensearch.OpenSearchReporterConfig;
@@ -40,44 +33,11 @@ import
org.apache.tika.pipes.reporter.opensearch.OpenSearchReporterConfig;
* The JSON configuration examples are stored in {@code
src/test/resources/config-examples/}
* and are included directly in the AsciiDoc documentation via the {@code
include::} directive.
*/
-public class ConfigExamplesTest {
-
- private static final String EXAMPLES_DIR = "/config-examples/";
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- @TempDir
- Path tempDir;
-
- private String readExample(String resourceName) throws Exception {
- try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR +
resourceName)) {
- assertNotNull(is, "Resource not found: " + resourceName);
- return new String(is.readAllBytes(), StandardCharsets.UTF_8);
- }
- }
-
- private void loadViaTikaLoader(String resourceName) throws Exception {
- String json = readExample(resourceName);
- Path configFile = tempDir.resolve("tika-config.json");
- Files.writeString(configFile, json, StandardCharsets.UTF_8);
- TikaLoader loader = TikaLoader.load(configFile);
- assertNotNull(loader, "TikaLoader should not be null for: " +
resourceName);
- }
-
- private JsonNode innerComponent(String json, String section, String id,
String typeName)
- throws Exception {
- JsonNode root = OBJECT_MAPPER.readTree(json);
- JsonNode sectionNode = root.get(section);
- assertNotNull(sectionNode, "Missing section: " + section);
- JsonNode idNode = id == null ? sectionNode : sectionNode.get(id);
- assertNotNull(idNode, "Missing id: " + id);
- JsonNode typed = idNode.get(typeName);
- assertNotNull(typed, "Missing type: " + typeName);
- return typed;
- }
+public class ConfigExamplesTest extends AbstractConfigExamplesTest {
@Test
public void testOpenSearchEmitterConfig() throws Exception {
- loadViaTikaLoader("opensearch-emitter.json");
+ loadAndValidate("opensearch-emitter.json");
JsonNode inner = innerComponent(readExample("opensearch-emitter.json"),
"emitters", "ose", "opensearch-emitter");
@@ -94,7 +54,7 @@ public class ConfigExamplesTest {
@Test
public void testOpenSearchReporterConfig() throws Exception {
- loadViaTikaLoader("opensearch-reporter.json");
+ loadAndValidate("opensearch-reporter.json");
JsonNode inner =
innerComponent(readExample("opensearch-reporter.json"),
"pipes-reporters", null, "opensearch-pipes-reporter");
@@ -109,7 +69,7 @@ public class ConfigExamplesTest {
@Test
public void testOpenSearchPipelineConfig() throws Exception {
- loadViaTikaLoader("opensearch-pipeline.json");
+ loadAndValidate("opensearch-pipeline.json");
String json = readExample("opensearch-pipeline.json");
OpenSearchEmitterConfig emitter = OpenSearchEmitterConfig.load(
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/java/org/apache/tika/pipes/s3/ConfigExamplesTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/java/org/apache/tika/pipes/s3/ConfigExamplesTest.java
index f248d8194e..36f67cb8bb 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/java/org/apache/tika/pipes/s3/ConfigExamplesTest.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/java/org/apache/tika/pipes/s3/ConfigExamplesTest.java
@@ -17,19 +17,11 @@
package org.apache.tika.pipes.s3;
import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.core.testutil.AbstractConfigExamplesTest;
import org.apache.tika.pipes.emitter.s3.S3EmitterConfig;
import org.apache.tika.pipes.fetcher.s3.config.S3FetcherConfig;
import org.apache.tika.pipes.iterator.s3.S3PipesIteratorConfig;
@@ -40,44 +32,11 @@ import
org.apache.tika.pipes.iterator.s3.S3PipesIteratorConfig;
* The JSON configuration examples are stored in {@code
src/test/resources/config-examples/}
* and are included directly in the AsciiDoc documentation via the {@code
include::} directive.
*/
-public class ConfigExamplesTest {
-
- private static final String EXAMPLES_DIR = "/config-examples/";
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- @TempDir
- Path tempDir;
-
- private String readExample(String resourceName) throws Exception {
- try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR +
resourceName)) {
- assertNotNull(is, "Resource not found: " + resourceName);
- return new String(is.readAllBytes(), StandardCharsets.UTF_8);
- }
- }
-
- private void loadViaTikaLoader(String resourceName) throws Exception {
- String json = readExample(resourceName);
- Path configFile = tempDir.resolve("tika-config.json");
- Files.writeString(configFile, json, StandardCharsets.UTF_8);
- TikaLoader loader = TikaLoader.load(configFile);
- assertNotNull(loader, "TikaLoader should not be null for: " +
resourceName);
- }
-
- private JsonNode innerComponent(String json, String section, String id,
String typeName)
- throws Exception {
- JsonNode root = OBJECT_MAPPER.readTree(json);
- JsonNode sectionNode = root.get(section);
- assertNotNull(sectionNode, "Missing section: " + section);
- JsonNode idNode = id == null ? sectionNode : sectionNode.get(id);
- assertNotNull(idNode, "Missing id: " + id);
- JsonNode typed = idNode.get(typeName);
- assertNotNull(typed, "Missing type: " + typeName);
- return typed;
- }
+public class ConfigExamplesTest extends AbstractConfigExamplesTest {
@Test
public void testS3FetcherConfig() throws Exception {
- loadViaTikaLoader("s3-fetcher.json");
+ loadAndValidate("s3-fetcher.json");
JsonNode inner = innerComponent(readExample("s3-fetcher.json"),
"fetchers", "s3f", "s3-fetcher");
@@ -90,7 +49,7 @@ public class ConfigExamplesTest {
@Test
public void testS3EmitterConfig() throws Exception {
- loadViaTikaLoader("s3-emitter.json");
+ loadAndValidate("s3-emitter.json");
JsonNode inner = innerComponent(readExample("s3-emitter.json"),
"emitters", "s3e", "s3-emitter");
@@ -105,7 +64,7 @@ public class ConfigExamplesTest {
@Test
public void testS3IteratorConfig() throws Exception {
- loadViaTikaLoader("s3-pipes-iterator.json");
+ loadAndValidate("s3-pipes-iterator.json");
JsonNode inner = innerComponent(readExample("s3-pipes-iterator.json"),
"pipes-iterator", null, "s3-pipes-iterator");
@@ -118,7 +77,7 @@ public class ConfigExamplesTest {
@Test
public void testS3PipelineConfig() throws Exception {
- loadViaTikaLoader("s3-pipeline.json");
+ loadAndValidate("s3-pipeline.json");
String json = readExample("s3-pipeline.json");
S3FetcherConfig fetcher = S3FetcherConfig.load(
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/java/org/apache/tika/pipes/solr/ConfigExamplesTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/java/org/apache/tika/pipes/solr/ConfigExamplesTest.java
index 65d06c37cc..cc07a18fc5 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/java/org/apache/tika/pipes/solr/ConfigExamplesTest.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/java/org/apache/tika/pipes/solr/ConfigExamplesTest.java
@@ -20,17 +20,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.core.testutil.AbstractConfigExamplesTest;
import org.apache.tika.pipes.emitter.solr.SolrEmitterConfig;
import org.apache.tika.pipes.iterator.solr.SolrPipesIteratorConfig;
@@ -40,44 +33,11 @@ import
org.apache.tika.pipes.iterator.solr.SolrPipesIteratorConfig;
* The JSON configuration examples are stored in {@code
src/test/resources/config-examples/}
* and are included directly in the AsciiDoc documentation via the {@code
include::} directive.
*/
-public class ConfigExamplesTest {
-
- private static final String EXAMPLES_DIR = "/config-examples/";
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- @TempDir
- Path tempDir;
-
- private String readExample(String resourceName) throws Exception {
- try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR +
resourceName)) {
- assertNotNull(is, "Resource not found: " + resourceName);
- return new String(is.readAllBytes(), StandardCharsets.UTF_8);
- }
- }
-
- private void loadViaTikaLoader(String resourceName) throws Exception {
- String json = readExample(resourceName);
- Path configFile = tempDir.resolve("tika-config.json");
- Files.writeString(configFile, json, StandardCharsets.UTF_8);
- TikaLoader loader = TikaLoader.load(configFile);
- assertNotNull(loader, "TikaLoader should not be null for: " +
resourceName);
- }
-
- private JsonNode innerComponent(String json, String section, String id,
String typeName)
- throws Exception {
- JsonNode root = OBJECT_MAPPER.readTree(json);
- JsonNode sectionNode = root.get(section);
- assertNotNull(sectionNode, "Missing section: " + section);
- JsonNode idNode = id == null ? sectionNode : sectionNode.get(id);
- assertNotNull(idNode, "Missing id: " + id);
- JsonNode typed = idNode.get(typeName);
- assertNotNull(typed, "Missing type: " + typeName);
- return typed;
- }
+public class ConfigExamplesTest extends AbstractConfigExamplesTest {
@Test
public void testSolrEmitterUrlsConfig() throws Exception {
- loadViaTikaLoader("solr-emitter.json");
+ loadAndValidate("solr-emitter.json");
JsonNode inner = innerComponent(readExample("solr-emitter.json"),
"emitters", "solre", "solr-emitter");
@@ -94,7 +54,7 @@ public class ConfigExamplesTest {
@Test
public void testSolrEmitterZkConfig() throws Exception {
- loadViaTikaLoader("solr-emitter-zk.json");
+ loadAndValidate("solr-emitter-zk.json");
JsonNode inner = innerComponent(readExample("solr-emitter-zk.json"),
"emitters", "solre", "solr-emitter");
@@ -109,7 +69,7 @@ public class ConfigExamplesTest {
@Test
public void testSolrIteratorConfig() throws Exception {
- loadViaTikaLoader("solr-pipes-iterator.json");
+ loadAndValidate("solr-pipes-iterator.json");
JsonNode inner =
innerComponent(readExample("solr-pipes-iterator.json"),
"pipes-iterator", null, "solr-pipes-iterator");
@@ -123,7 +83,7 @@ public class ConfigExamplesTest {
@Test
public void testSolrPipelineConfig() throws Exception {
- loadViaTikaLoader("solr-pipeline.json");
+ loadAndValidate("solr-pipeline.json");
String json = readExample("solr-pipeline.json");
SolrEmitterConfig emitter = SolrEmitterConfig.load(
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/AbstractSpiComponentLoader.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/AbstractSpiComponentLoader.java
index 7be0c37a14..191d5b164d 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/AbstractSpiComponentLoader.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/AbstractSpiComponentLoader.java
@@ -19,6 +19,8 @@ package org.apache.tika.config.loader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -241,11 +243,24 @@ public abstract class AbstractSpiComponentLoader<T>
implements ComponentLoader<T
// ==================== Shared implementation ====================
+ /**
+ * The set of keys permitted inside a marker config (e.g., inside
+ * {@code default-parser}). Subclasses extend this when they consume
additional
+ * framework-level decorators on the marker.
+ * <p>
+ * Default: just {@code "exclude"}. ParserLoader adds the mime-filter
decorators.
+ */
+ protected Set<String> getAllowedMarkerKeys() {
+ return Set.of("exclude");
+ }
+
private DefaultMarkerConfig<T> findDefaultMarker(List<Map.Entry<String,
JsonNode>> entries,
- LoaderContext context) {
+ LoaderContext context)
+ throws TikaConfigException {
int index = 0;
for (Map.Entry<String, JsonNode> entry : entries) {
if (defaultMarkerName.equals(entry.getKey())) {
+ validateMarkerKeys(entry.getValue());
Set<Class<? extends T>> exclusions =
parseExclusions(entry.getValue(), context);
return new DefaultMarkerConfig<>(true, index, exclusions,
entry.getValue());
@@ -255,6 +270,34 @@ public abstract class AbstractSpiComponentLoader<T>
implements ComponentLoader<T
return new DefaultMarkerConfig<>(false, -1, Collections.emptySet(),
null);
}
+ /**
+ * Rejects any unknown key inside a marker's config. The marker schema is
fixed
+ * and tiny — silently ignoring an unrecognized key (e.g., {@code _exclude}
+ * instead of {@code exclude}) means the directive is dropped on the floor
and
+ * the user only discovers it at runtime, if at all.
+ */
+ private void validateMarkerKeys(JsonNode markerConfig) throws
TikaConfigException {
+ if (markerConfig == null || !markerConfig.isObject()) {
+ return;
+ }
+ Set<String> allowed = getAllowedMarkerKeys();
+ Set<String> unknown = new LinkedHashSet<>();
+ Iterator<String> it = markerConfig.fieldNames();
+ while (it.hasNext()) {
+ String key = it.next();
+ if (!allowed.contains(key)) {
+ unknown.add(key);
+ }
+ }
+ if (!unknown.isEmpty()) {
+ throw new TikaConfigException(
+ "Unknown key(s) " + unknown + " inside '" +
defaultMarkerName
+ + "'. Allowed keys: " + allowed
+ + ". (Did you mean 'exclude'? The
leading-underscore form"
+ + " '_exclude' is not recognized.)");
+ }
+ }
+
@SuppressWarnings("unchecked")
private Set<Class<? extends T>> parseExclusions(JsonNode configNode,
LoaderContext context) {
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInstantiator.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInstantiator.java
index 4b87ba78ca..4dcc930353 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInstantiator.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInstantiator.java
@@ -305,9 +305,17 @@ public class ComponentInstantiator {
}
/**
- * Strips decorator fields (_mime-include, _mime-exclude) from config node.
- * These fields are handled by TikaLoader for wrapping, not by the
component itself.
- * Note: _exclude is NOT stripped as it's used by DefaultParser for SPI
exclusions.
+ * Strips decorator fields ({@code _mime-include}, {@code _mime-exclude})
from a real
+ * component's config node. These directives are applied by {@link
+ * org.apache.tika.config.loader.TikaLoader} as a wrapper around the
component, not
+ * consumed by the component itself, so they must be stripped before
deserialization.
+ * <p>
+ * Convention: directives that share a JSON object with a real component's
own
+ * config properties carry a leading underscore to avoid namespace
collisions
+ * (e.g., a parser could legitimately have a config key named {@code
mime-include}).
+ * Directives on marker entries that have no component-config namespace —
+ * {@code "exclude"} on {@code default-parser}/{@code default-detector} —
need no
+ * prefix; those are read directly by {@link AbstractSpiComponentLoader}.
*/
private static JsonNode stripDecoratorFields(JsonNode configNode) {
if (configNode == null || !configNode.isObject()) {
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
index ec1f8ff42a..1c37c68fff 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
@@ -80,6 +80,13 @@ public class ParserLoader extends
AbstractSpiComponentLoader<Parser> {
exclusions);
}
+ @Override
+ protected Set<String> getAllowedMarkerKeys() {
+ // ParserLoader honors framework mime-filter decorators on
default-parser
+ // in addition to the standard "exclude" key.
+ return Set.of("exclude", "_mime-include", "_mime-exclude");
+ }
+
@Override
protected Parser decorateDefaultComposite(Parser parser, JsonNode
configNode,
LoaderContext context) throws
TikaConfigException {
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
index 344faa66c7..9989ca2b1a 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
@@ -78,7 +78,7 @@ import org.apache.tika.exception.TikaConfigException;
* ],
* "detectors": [
* "poifs-container-detector", // String shorthand
- * { "default-detector": { "spoolTypes": ["application/zip",
"application/pdf"] } }
+ * { "default-detector": { "exclude": ["html-detector"] } }
* ],
*
* // Pipes components (validated by validateKeys())
diff --git
a/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
b/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
index 403464d9c4..0d157048c6 100644
---
a/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
+++
b/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
@@ -26,7 +26,6 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
-import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.xml.sax.helpers.DefaultHandler;
@@ -369,11 +368,10 @@ public class TikaLoaderTest {
assertFalse(config.isThrowOnMaxCount(), "Should return defaults when
key missing");
}
- // TODO: TIKA-SERIALIZATION-FOLLOWUP - Jackson may need configuration to
fail on unknown properties
- @Disabled("TIKA-SERIALIZATION-FOLLOWUP")
@Test
- public void testInvalidBeanPropertyThrowsException() throws Exception {
- // Config with a property that doesn't exist on DefaultDetector
+ public void testUnknownKeyInDefaultDetectorThrows() throws Exception {
+ // Strict-marker-key validation: an unknown key inside default-detector
+ // must error at load time rather than being silently ignored.
(TIKA-4739)
String invalidConfig = """
{
"detectors": [
@@ -389,16 +387,113 @@ public class TikaLoaderTest {
Path tempFile = Files.createTempFile("test-invalid-property", ".json");
try {
Files.write(tempFile,
invalidConfig.getBytes(StandardCharsets.UTF_8));
+ TikaLoader loader = TikaLoader.load(tempFile);
+ try {
+ loader.loadDetectors();
+ throw new AssertionError("Expected TikaConfigException for
unknown marker key");
+ } catch (org.apache.tika.exception.TikaConfigException e) {
+ assertTrue(e.getMessage().contains("nonExistentProperty"),
+ "Error should name the offending key");
+ assertTrue(e.getMessage().contains("default-detector"),
+ "Error should name the marker");
+ }
+ } finally {
+ Files.deleteIfExists(tempFile);
+ }
+ }
+
+ @Test
+ public void testUnderscoreExcludeInDefaultParserThrows() throws Exception {
+ // The canonical form is "exclude" (no underscore). The historical
+ // "_exclude" was silently dropped, leading to ghost configs that did
+ // nothing. Strict validation must catch this at load time. (TIKA-4739)
+ String invalidConfig = """
+ {
+ "parsers": [
+ {
+ "default-parser": {
+ "_exclude": ["pdf-parser"]
+ }
+ }
+ ]
+ }
+ """;
+
+ Path tempFile = Files.createTempFile("test-underscore-exclude",
".json");
+ try {
+ Files.write(tempFile,
invalidConfig.getBytes(StandardCharsets.UTF_8));
+ TikaLoader loader = TikaLoader.load(tempFile);
+ try {
+ loader.loadParsers();
+ throw new AssertionError("Expected TikaConfigException for
_exclude on default-parser");
+ } catch (org.apache.tika.exception.TikaConfigException e) {
+ assertTrue(e.getMessage().contains("_exclude"),
+ "Error should name the offending key");
+ assertTrue(e.getMessage().contains("default-parser"),
+ "Error should name the marker");
+ }
+ } finally {
+ Files.deleteIfExists(tempFile);
+ }
+ }
+ @Test
+ public void testMimeFilterDecoratorsAllowedOnDefaultParser() throws
Exception {
+ // default-parser accepts the framework-level mime-filter decorators
+ // (_mime-include / _mime-exclude). These must NOT be rejected by
+ // strict-marker-key validation. (TIKA-4739)
+ String config = """
+ {
+ "parsers": [
+ {
+ "default-parser": {
+ "exclude": [],
+ "_mime-include": ["application/pdf"]
+ }
+ }
+ ]
+ }
+ """;
+
+ Path tempFile = Files.createTempFile("test-mime-include", ".json");
+ try {
+ Files.write(tempFile, config.getBytes(StandardCharsets.UTF_8));
+ TikaLoader loader = TikaLoader.load(tempFile);
+ Parser parser = loader.loadParsers();
+ assertNotNull(parser, "_mime-include on default-parser must load
successfully");
+ } finally {
+ Files.deleteIfExists(tempFile);
+ }
+ }
+
+ @Test
+ public void testMimeFilterDecoratorRejectedOnDefaultDetector() throws
Exception {
+ // _mime-include is only meaningful on parsers (it restricts which mime
+ // types a parser handles). On detectors it has no consumer, so strict
+ // validation rejects it rather than silently ignore. (TIKA-4739)
+ String config = """
+ {
+ "detectors": [
+ {
+ "default-detector": {
+ "_mime-include": ["application/pdf"]
+ }
+ }
+ ]
+ }
+ """;
+
+ Path tempFile = Files.createTempFile("test-mime-include-detector",
".json");
+ try {
+ Files.write(tempFile, config.getBytes(StandardCharsets.UTF_8));
TikaLoader loader = TikaLoader.load(tempFile);
try {
loader.loadDetectors();
- throw new AssertionError("Expected TikaConfigException for
invalid property");
+ throw new AssertionError(
+ "Expected TikaConfigException for _mime-include on
default-detector");
} catch (org.apache.tika.exception.TikaConfigException e) {
- // Expected - Jackson should fail on unknown property
- assertTrue(e.getMessage().contains("nonExistentProperty") ||
-
e.getCause().getMessage().contains("nonExistentProperty"),
- "Error should mention the invalid property name");
+ assertTrue(e.getMessage().contains("_mime-include"),
+ "Error should name the offending key");
}
} finally {
Files.deleteIfExists(tempFile);