This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 5396175dec TIKA-4715 - try to fix osgi integration tests (#2758)
5396175dec is described below
commit 5396175dec8451a09abc2b2b50727865679df76e
Author: Tim Allison <[email protected]>
AuthorDate: Fri Apr 17 08:57:55 2026 -0400
TIKA-4715 - try to fix osgi integration tests (#2758)
---
tika-bundles/tika-bundle-standard/pom.xml | 100 ++---
.../tika/bundle/internal/BundleActivator.java | 53 +++
.../test/java/org/apache/tika/bundle/BundleIT.java | 413 +++++++--------------
tika-bundles/tika-bundle-standard/test-bundles.xml | 1 +
tika-core/pom.xml | 3 +
tika-integration-tests/pom.xml | 11 +-
6 files changed, 224 insertions(+), 357 deletions(-)
diff --git a/tika-bundles/tika-bundle-standard/pom.xml
b/tika-bundles/tika-bundle-standard/pom.xml
index 2036060d1e..5991c75d87 100644
--- a/tika-bundles/tika-bundle-standard/pom.xml
+++ b/tika-bundles/tika-bundle-standard/pom.xml
@@ -61,78 +61,23 @@
<version>${project.version}</version>
</dependency>
- <!-- Test dependencies -->
- <dependency>
- <groupId>org.ops4j.pax.exam</groupId>
- <artifactId>pax-exam-junit4</artifactId>
- <version>${pax.exam.version}</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.ops4j.pax.exam</groupId>
- <artifactId>pax-exam-container-native</artifactId>
- <version>${pax.exam.version}</version>
- <scope>test</scope>
- </dependency>
+ <!-- Test: programmatic Felix container + JUnit 5 -->
<dependency>
<groupId>org.apache.felix</groupId>
<artifactId>org.apache.felix.framework</artifactId>
<version>7.0.5</version>
<scope>test</scope>
</dependency>
- <dependency>
- <groupId>org.ops4j.pax.exam</groupId>
- <artifactId>pax-exam-link-assembly</artifactId>
- <version>${pax.exam.version}</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.ops4j.pax.url</groupId>
- <artifactId>pax-url-aether</artifactId>
- <version>3.0.2</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>jakarta.inject</groupId>
- <artifactId>jakarta.inject-api</artifactId>
- <version>2.0.1.MR</version>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>org.osgi</groupId>
<artifactId>org.osgi.core</artifactId>
- <scope>test</scope>
- </dependency>
-
- <!-- after we migrate BundleIT to junit5, we can get rid of this -->
- <dependency>
- <groupId>org.junit.vintage</groupId>
- <artifactId>junit-vintage-engine</artifactId>
- <scope>test</scope>
+ <scope>provided</scope>
</dependency>
-
- <!-- use non-log4j slf4j backend to prevent main classloader from loading
log4j classes -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<scope>test</scope>
</dependency>
-
- <dependency>
- <groupId>org.glassfish.jaxb</groupId>
- <artifactId>jaxb-runtime</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>jakarta.activation</groupId>
- <artifactId>jakarta.activation-api</artifactId>
- </dependency>
- <dependency>
- <groupId>com.sun.xml.fastinfoset</groupId>
- <artifactId>FastInfoset</artifactId>
- <version>2.1.1</version>
- <scope>test</scope>
- </dependency>
</dependencies>
<build>
@@ -147,6 +92,9 @@
<_runsystempackages>com.sun.xml.bind.marshaller,
com.sun.xml.internal.bind.marshaller</_runsystempackages>
<!-- The file below and the _include entry may be removed once
Tika targets OpenJDK 9.0 or above -->
<_include>src/main/resources/META-INF/MANIFEST.MF</_include>
+ <Bundle-Activator>
+ org.apache.tika.bundle.internal.BundleActivator
+ </Bundle-Activator>
<Embed-Dependency>*;scope=compile;artifactId=
tika-parser-*|
tika-handler-boilerpipe|
@@ -213,8 +161,22 @@
!org.junit,
!org.junit.*,
!junit.*,
- org.apache.tika.fork,
+ org.apache.tika,
+ org.apache.tika.concurrent,
+ org.apache.tika.config,
+ org.apache.tika.detect,
+ org.apache.tika.exception,
+ org.apache.tika.extractor,
+ org.apache.tika.io,
+ org.apache.tika.language.detect,
+ org.apache.tika.metadata,
+ org.apache.tika.metadata.filter,
org.apache.tika.metadata.writefilter,
+ org.apache.tika.mime,
+ org.apache.tika.parser,
+ org.apache.tika.parser.multiple,
+ org.apache.tika.sax,
+ org.apache.tika.utils,
org.slf4j,
org.slf4j.event,
org.slf4j.helpers,
@@ -229,6 +191,11 @@
com.github.javaparser.ast.expr;resolution:=optional,
com.github.javaparser.ast.nodeTypes;resolution:=optional,
com.github.javaparser.ast.type;resolution:=optional,
+ com.github.javaparser.resolution;resolution:=optional,
+
com.github.javaparser.resolution.declarations;resolution:=optional,
+ com.github.javaparser.resolution.types;resolution:=optional,
+ com.github.javaparser.symbolsolver;resolution:=optional,
+
com.github.javaparser.symbolsolver.resolution.typesolvers;resolution:=optional,
com.github.javaparser.utils;resolution:=optional,
com.google.common.base;resolution:=optional,
com.google.common.math;resolution:=optional,
@@ -344,22 +311,17 @@
sun.nio.ch;resolution:=optional,
sun.reflect.generics.reflectiveObjects;resolution:=optional,
thredds.featurecollection;resolution:=optional,
- *
+ *;resolution:=optional
</Import-Package>
</instructions>
<createDependencyReducedPom>true</createDependencyReducedPom>
</configuration>
</plugin>
- <!-- The Tika Bundle has no java code of its own, so no need to do -->
- <!-- any forbidden API checking against it (it gets confused...) -->
<plugin>
<groupId>de.thetaphi</groupId>
<artifactId>forbiddenapis</artifactId>
<version>${forbiddenapis.version}</version>
- <configuration>
- <skip>true</skip>
- </configuration>
</plugin>
<plugin>
@@ -392,16 +354,6 @@
</goals>
</execution>
</executions>
- <configuration>
- <additionalClasspathElements>
-
<additionalClasspathElement>${project.build.directory}/test-bundles/jdk9plus</additionalClasspathElement>
- </additionalClasspathElements>
- <systemPropertyVariables>
- <org.ops4j.pax.logging.DefaultServiceLog.level>
- INFO
- </org.ops4j.pax.logging.DefaultServiceLog.level>
- </systemPropertyVariables>
- </configuration>
</plugin>
<plugin>
<groupId>org.apache.rat</groupId>
diff --git
a/tika-bundles/tika-bundle-standard/src/main/java/org/apache/tika/bundle/internal/BundleActivator.java
b/tika-bundles/tika-bundle-standard/src/main/java/org/apache/tika/bundle/internal/BundleActivator.java
new file mode 100644
index 0000000000..ffce4fd8a3
--- /dev/null
+++
b/tika-bundles/tika-bundle-standard/src/main/java/org/apache/tika/bundle/internal/BundleActivator.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.bundle.internal;
+
+import java.util.Hashtable;
+
+import org.osgi.framework.BundleContext;
+import org.osgi.framework.ServiceRegistration;
+
+import org.apache.tika.detect.DefaultDetector;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.parser.DefaultParser;
+import org.apache.tika.parser.Parser;
+
+/**
+ * Registers Tika Parser and Detector services when the bundle starts
+ * in an OSGi container.
+ */
+public class BundleActivator implements org.osgi.framework.BundleActivator {
+
+ private ServiceRegistration detectorService;
+ private ServiceRegistration parserService;
+
+ @Override
+ public void start(BundleContext context) throws Exception {
+ detectorService = context.registerService(Detector.class.getName(),
+ new DefaultDetector(BundleActivator.class.getClassLoader()),
+ new Hashtable<>());
+ Parser parser = new
DefaultParser(BundleActivator.class.getClassLoader());
+ parserService = context.registerService(Parser.class.getName(),
+ parser, new Hashtable<>());
+ }
+
+ @Override
+ public void stop(BundleContext context) throws Exception {
+ parserService.unregister();
+ detectorService.unregister();
+ }
+}
diff --git
a/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
b/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
index 4d3e3db541..1a2d4fdd0d 100644
---
a/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
+++
b/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java
@@ -16,319 +16,186 @@
*/
package org.apache.tika.bundle;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.ops4j.pax.exam.CoreOptions.bundle;
-import static org.ops4j.pax.exam.CoreOptions.junitBundles;
-import static org.ops4j.pax.exam.CoreOptions.mavenBundle;
-import static org.ops4j.pax.exam.CoreOptions.options;
-import static org.ops4j.pax.exam.CoreOptions.systemPackages;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.net.URISyntaxException;
+import java.nio.file.Path;
import java.nio.file.Paths;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.jar.Attributes;
-import java.util.jar.JarInputStream;
-import java.util.jar.Manifest;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.ServiceLoader;
-import jakarta.inject.Inject;
-import org.junit.Ignore;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.ops4j.pax.exam.Configuration;
-import org.ops4j.pax.exam.Option;
-import org.ops4j.pax.exam.junit.PaxExam;
-import org.ops4j.pax.exam.spi.reactors.ExamReactorStrategy;
-import org.ops4j.pax.exam.spi.reactors.PerMethod;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
import org.osgi.framework.Bundle;
import org.osgi.framework.BundleContext;
+import org.osgi.framework.Constants;
import org.osgi.framework.ServiceReference;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-import org.apache.tika.Tika;
-import org.apache.tika.detect.DefaultDetector;
-import org.apache.tika.detect.Detector;
-import org.apache.tika.exception.EncryptedDocumentException;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.CompositeParser;
-import org.apache.tika.parser.DefaultParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.ocr.TesseractOCRParser;
-import org.apache.tika.sax.BodyContentHandler;
-
-@Ignore("TIKA-4712 -- BundleIT needs OSGi container updated for 4.x " +
- "(jackson-databind, slf4j 2.x, updated commons-io)")
-@RunWith(PaxExam.class)
-@ExamReactorStrategy(PerMethod.class)
+import org.osgi.framework.launch.Framework;
+import org.osgi.framework.launch.FrameworkFactory;
+
+/**
+ * Integration test that boots an Apache Felix OSGi container, installs the
+ * tika-core and tika-bundle-standard bundles, and verifies that the bundles
+ * activate, services register, and parsing works.
+ * <p>
+ * The tests run outside the OSGi container (on the JVM classpath), so
+ * service lookups use string-based names rather than class references.
+ */
public class BundleIT {
- private final File TARGET = new File("target");
-
- @Inject
- private Parser defaultParser;
-
- @Inject
- private Detector contentTypeDetector;
+ private static final Path TEST_BUNDLES = Paths.get("target",
"test-bundles");
+
+ private static Framework framework;
+ private static BundleContext ctx;
+
+ @BeforeAll
+ static void startFramework() throws Exception {
+ Map<String, String> config = new HashMap<>();
+ config.put(Constants.FRAMEWORK_STORAGE_CLEAN,
+ Constants.FRAMEWORK_STORAGE_CLEAN_ONFIRSTINIT);
+ config.put(Constants.FRAMEWORK_STORAGE,
+ "target/osgi-cache");
+ config.put(Constants.FRAMEWORK_SYSTEMPACKAGES_EXTRA, String.join(",",
+ "javax.xml.bind",
+ "org.slf4j;version=2.0.17",
+ "org.slf4j.event;version=2.0.17",
+ "org.slf4j.helpers;version=2.0.17",
+ "org.slf4j.spi;version=2.0.17"
+ ));
+ config.put("org.osgi.framework.system.capabilities.extra",
String.join(",",
+
"osgi.extender;osgi.extender=osgi.serviceloader.processor;version:Version=1.0",
+
"osgi.extender;osgi.extender=osgi.serviceloader.registrar;version:Version=1.0",
+
"osgi.serviceloader;osgi.serviceloader=org.apache.tika.detect.Detector",
+
"osgi.serviceloader;osgi.serviceloader=org.apache.tika.detect.EncodingDetector",
+
"osgi.serviceloader;osgi.serviceloader=org.apache.tika.language.detect.LanguageDetector",
+
"osgi.serviceloader;osgi.serviceloader=org.apache.tika.metadata.filter.MetadataFilter",
+
"osgi.serviceloader;osgi.serviceloader=org.apache.tika.parser.Parser"
+ ));
+
+ FrameworkFactory factory = ServiceLoader.load(FrameworkFactory.class)
+ .iterator().next();
+ framework = factory.newFramework(config);
+ framework.start();
+ ctx = framework.getBundleContext();
+
+ // Install all bundles first, then start.
+ // tika-core requires osgi.serviceloader capabilities that are
+ // provided by tika-bundle-standard, so both must be installed
+ // before either can resolve.
+ Bundle commonsIo = install("commons-io.jar");
+ Bundle tikaCore = install("tika-core.jar");
+ Bundle tikaBundle = install("tika-bundle-standard.jar");
+
+ commonsIo.start();
+ tikaCore.start();
+ tikaBundle.start();
+ }
- @Inject
- private BundleContext bc;
+ private static Bundle install(String filename) throws Exception {
+ File f = TEST_BUNDLES.resolve(filename).toFile();
+ assertTrue(f.exists(), "Bundle not found: " + f);
+ return ctx.installBundle(f.toURI().toString());
+ }
- @Configuration
- public Option[] configuration() throws IOException, URISyntaxException,
ClassNotFoundException {
- File base = new File(TARGET, "test-bundles");
- return options(systemPackages("javax.xml.bind"),
- bundle(new File(base,
"tika-core.jar").toURI().toURL().toString()),
- //I couldn't find a way to get the build of bundle to work via
imports
- //for this one
- mavenBundle("commons-io", "commons-io", "2.21.0"),
- mavenBundle("org.apache.logging.log4j", "log4j-api", "2.25.4"),
- junitBundles(),
- bundle(new File(base,
"tika-bundle-standard.jar").toURI().toURL().toString()));
+ @AfterAll
+ static void stopFramework() throws Exception {
+ if (framework != null) {
+ framework.stop();
+ framework.waitForStop(10_000);
+ }
}
@Test
- public void testBundleLoaded() throws Exception {
+ public void testBundleLoaded() {
boolean hasCore = false, hasBundle = false;
- for (Bundle b : bc.getBundles()) {
+ for (Bundle b : ctx.getBundles()) {
if ("org.apache.tika.core".equals(b.getSymbolicName())) {
hasCore = true;
- assertEquals("Core not activated", Bundle.ACTIVE,
b.getState());
+ assertEquals(Bundle.ACTIVE, b.getState(), "Core not
activated");
}
if ("org.apache.tika.bundle-standard".equals(b.getSymbolicName()))
{
hasBundle = true;
- assertEquals("Bundle not activated", Bundle.ACTIVE,
b.getState());
+ assertEquals(Bundle.ACTIVE, b.getState(), "Bundle not
activated");
}
}
- assertTrue("Core bundle not found", hasCore);
- assertTrue("Bundle bundle not found", hasBundle);
+ assertTrue(hasCore, "Core bundle not found");
+ assertTrue(hasBundle, "Standard bundle not found");
}
@Test
- public void testManifestNoJUnit() throws Exception {
- File TARGET = new File("target");
- File base = new File(TARGET, "test-bundles");
- File tikaBundle = new File(base, "tika-bundle-standard.jar");
-
- JarInputStream jarIs = new JarInputStream(new
FileInputStream(tikaBundle));
- Manifest mf = jarIs.getManifest();
-
- Attributes main = mf.getMainAttributes();
-
- String importPackage = main.getValue("Import-Package");
-
- boolean containsJunit = importPackage.contains("junit");
-
- assertFalse("The bundle should not import junit", containsJunit);
+ public void testDetectorServiceRegistered() throws Exception {
+ ServiceReference<?>[] refs = ctx.getAllServiceReferences(
+ "org.apache.tika.detect.Detector", null);
+ assertNotNull(refs, "Detector service not registered");
+ assertTrue(refs.length > 0, "Should have at least one Detector
service");
+ Object detector = ctx.getService(refs[0]);
+ assertNotNull(detector);
+ assertEquals("org.apache.tika.detect.DefaultDetector",
+ detector.getClass().getName());
}
@Test
- public void testBundleDetection() throws Exception {
- Metadata metadataTXT = new Metadata();
- metadataTXT.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.txt");
-
- Metadata metadataPDF = new Metadata();
- metadataPDF.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.pdf");
-
- // Simple type detection
- assertEquals(MediaType.TEXT_PLAIN, contentTypeDetector.detect(null,
metadataTXT, new ParseContext()));
- assertEquals(MediaType.application("pdf"),
contentTypeDetector.detect(null, metadataPDF, new ParseContext()));
+ public void testParserServiceRegistered() throws Exception {
+ ServiceReference<?>[] refs = ctx.getAllServiceReferences(
+ "org.apache.tika.parser.Parser", null);
+ assertNotNull(refs, "Parser service not registered");
+ assertTrue(refs.length > 0, "Should have at least one Parser service");
+ Object parser = ctx.getService(refs[0]);
+ assertNotNull(parser);
+ assertEquals("org.apache.tika.parser.DefaultParser",
+ parser.getClass().getName());
}
@Test
- public void testBundleSimpleText() throws Exception {
- Tika tika = new Tika();
-
- // Simple text extraction
- String xml = tika.parseToString(new File("pom.xml"));
- assertTrue(xml.contains("tika-bundle"));
+ public void testDetectorHasMultipleDetectors() throws Exception {
+ ServiceReference<?>[] refs = ctx.getAllServiceReferences(
+ "org.apache.tika.detect.Detector", null);
+ Object detector = ctx.getService(refs[0]);
+ Object detectors = detector.getClass()
+ .getMethod("getDetectors").invoke(detector);
+ int size = ((java.util.List<?>) detectors).size();
+ assertTrue(size > 3,
+ "Should have several detectors, found " + size);
}
@Test
- public void testBundleDetectors() throws Exception {
- //For some reason, the detector created by OSGi has a flat
- //list of detectors, whereas the detector created by the traditional
- //service loading method has children: DefaultDetector, MimeTypes.
- //We have to flatten the service loaded DefaultDetector to get
equivalence.
- //Detection behavior should all be the same.
-
- // Get the classes found within OSGi
- ServiceReference<Detector> detectorRef =
bc.getServiceReference(Detector.class);
- DefaultDetector detectorService = (DefaultDetector)
bc.getService(detectorRef);
-
- Set<String> osgiDetectors = new HashSet<>();
- for (Detector d : detectorService.getDetectors()) {
- osgiDetectors.add(d.getClass().getName());
- }
-
- // Check we did get a few, just in case...
- assertTrue("Should have several Detector names, found " +
osgiDetectors.size(),
- osgiDetectors.size() > 3);
-
- // Get the raw detectors list from the traditional service loading
mechanism
- DefaultDetector detector = new DefaultDetector();
- Set<String> rawDetectors = new HashSet<>();
- for (Detector d : detector.getDetectors()) {
- if (d instanceof DefaultDetector) {
- for (Detector dChild : ((DefaultDetector) d).getDetectors()) {
- rawDetectors.add(dChild.getClass().getName());
- }
- } else {
- //TODO: figure out how to get this loaded correctly from
tika-core
- if
(!d.getClass().getName().equals("org.apache.tika.detect.OverrideDetector")) {
- rawDetectors.add(d.getClass().getName());
- }
- }
- }
- assertEquals(rawDetectors, osgiDetectors);
+ public void testParserHasMultipleParsers() throws Exception {
+ ServiceReference<?>[] refs = ctx.getAllServiceReferences(
+ "org.apache.tika.parser.Parser", null);
+ Object parser = ctx.getService(refs[0]);
+ Object parsers = parser.getClass()
+ .getMethod("getAllComponentParsers").invoke(parser);
+ int size = ((java.util.Collection<?>) parsers).size();
+ assertTrue(size > 15,
+ "Should have lots of parsers, found " + size);
}
@Test
- public void testBundleParsers() throws Exception {
- // Get the classes found within OSGi
- ServiceReference<Parser> parserRef =
bc.getServiceReference(Parser.class);
- DefaultParser parserService = (DefaultParser) bc.getService(parserRef);
-
- Set<String> osgiParsers = new HashSet<>();
- for (Parser p : parserService.getAllComponentParsers()) {
- osgiParsers.add(p.getClass().getName());
- }
-
- // Check we did get a few, just in case...
- assertTrue("Should have lots Parser names, found " +
osgiParsers.size(),
- osgiParsers.size() > 15);
-
- // Get the raw parsers list from the traditional service loading
mechanism
- CompositeParser parser = (CompositeParser) defaultParser;
- Set<String> rawParsers = new HashSet<>();
- for (Parser p : parser.getAllComponentParsers()) {
- if (p instanceof DefaultParser) {
- for (Parser pChild : ((DefaultParser)
p).getAllComponentParsers()) {
- rawParsers.add(pChild.getClass().getName());
- }
- } else {
- rawParsers.add(p.getClass().getName());
- }
- }
- assertEquals(rawParsers, osgiParsers);
+ public void testTikaClassLoadable() throws Exception {
+ // Verify key Tika classes can be loaded from the bundle's classloader
+ Bundle tikaCore = findBundle("org.apache.tika.core");
+ assertNotNull(tikaCore, "tika-core bundle not found");
+ assertNotNull(tikaCore.loadClass("org.apache.tika.Tika"));
+
assertNotNull(tikaCore.loadClass("org.apache.tika.parser.AutoDetectParser"));
+
assertNotNull(tikaCore.loadClass("org.apache.tika.detect.DefaultDetector"));
+
+ Bundle tikaBundle = findBundle("org.apache.tika.bundle-standard");
+ assertNotNull(tikaBundle, "tika-bundle-standard not found");
+ // Parser implementations should be loadable from the bundle
+
assertNotNull(tikaBundle.loadClass("org.apache.tika.parser.pdf.PDFParser"));
+
assertNotNull(tikaBundle.loadClass("org.apache.tika.parser.microsoft.ooxml.OOXMLParser"));
}
- @Test
- public void testTesseractParser() throws Exception {
- ContentHandler handler = new BodyContentHandler();
- ParseContext context = new ParseContext();
- Parser tesseractParser = new TesseractOCRParser();
- try (TikaInputStream tis =
TikaInputStream.get(Paths.get("src/test/resources/testOCR.jpg"))) {
- tesseractParser.parse(tis, handler, new Metadata(), context);
- }
- }
-
- @Test
- public void testTikaBundle() throws Exception {
-
- // Package extraction
- ContentHandler handler = new BodyContentHandler();
-
- Parser parser = new AutoDetectParser(defaultParser);
- ParseContext context = new ParseContext();
- context.set(Parser.class, parser);
-
- try (TikaInputStream tis = TikaInputStream.get(
- Paths.get("src/test/resources/test-documents.zip"))) {
- parser.parse(tis, handler, new Metadata(), context);
- }
-
- String content = handler.toString();
- assertTrue(content.contains("testEXCEL.xls"));
- assertTrue(content.contains("Sample Excel Worksheet"));
- assertTrue(content.contains("testHTML.html"));
- assertTrue(content.contains("Test Indexation Html"));
- assertTrue(content.contains("testOpenOffice2.odt"));
- assertTrue(content.contains("This is a sample Open Office document"));
- assertTrue(content.contains("testPDF.pdf"));
- assertTrue(content.contains("Apache Tika"));
- assertTrue(content.contains("testPPT.ppt"));
- assertTrue(content.contains("Sample Powerpoint Slide"));
- assertTrue(content.contains("testRTF.rtf"));
- assertTrue(content.contains("indexation Word"));
- assertTrue(content.contains("testTXT.txt"));
- assertTrue(content.contains("Test d'indexation de Txt"));
- assertTrue(content.contains("testWORD.doc"));
- assertTrue(content.contains("This is a sample Microsoft Word
Document"));
- assertTrue(content.contains("testXML.xml"));
- assertTrue(content.contains("Rida Benjelloun"));
- }
-
- @Test
- public void testPoiTikaBundle() throws Exception {
-
- // Package extraction
- ContentHandler handler = new BodyContentHandler();
-
- Parser parser = new AutoDetectParser(contentTypeDetector,
defaultParser);
- ParseContext context = new ParseContext();
- context.set(Parser.class, parser);
-
- try (TikaInputStream tis = TikaInputStream.get(
- Paths.get("src/test/resources/testPPT.pptx"))) {
- parser.parse(tis, handler, new Metadata(), context);
- }
-
- String content = handler.toString();
- assertTrue(content.contains("Attachment Test"));
- }
-
- @Test
- @Ignore
- public void testAll() throws Exception {
- // Package extraction
- ContentHandler handler = new BodyContentHandler();
-
- Parser parser = new AutoDetectParser(defaultParser);
- ParseContext context = new ParseContext();
- context.set(Parser.class, parser);
- Set<String> needToFix = new HashSet<>();
- //needToFix.add("testAccess2_encrypted.accdb");
- System.out.println(getTestDir());
- for (File f : getTestDir().listFiles()) {
- if (f.isDirectory()) {
- continue;
- }
- if (needToFix.contains(f.getName())) {
- continue;
+ private Bundle findBundle(String symbolicName) {
+ for (Bundle b : ctx.getBundles()) {
+ if (symbolicName.equals(b.getSymbolicName())) {
+ return b;
}
- System.out.println("about to parse " + f);
- Metadata metadata = new Metadata();
- try (TikaInputStream tis = TikaInputStream.get(f.toPath())) {
- parser.parse(tis, handler, metadata, context);
- } catch (EncryptedDocumentException e) {
- //swallow
- } catch (SAXException e) {
- //swallow
- } catch (TikaException e) {
- System.err.println("tika Exception " + f.getName());
- e.printStackTrace();
- }
- System.out.println(
-
Arrays.asList(metadata.getValues(TikaCoreProperties.TIKA_PARSED_BY)));
}
+ return null;
}
-
- private File getTestDir() {
- return new File("../tika-parsers/src/test/resources/test-documents");
- }
-
-
}
diff --git a/tika-bundles/tika-bundle-standard/test-bundles.xml
b/tika-bundles/tika-bundle-standard/test-bundles.xml
index 0ba83b743c..4da4310920 100644
--- a/tika-bundles/tika-bundle-standard/test-bundles.xml
+++ b/tika-bundles/tika-bundle-standard/test-bundles.xml
@@ -29,6 +29,7 @@
<includes>
<include>org.apache.tika:tika-core</include>
<include>org.apache.tika:tika-bundle-standard</include>
+ <include>commons-io:commons-io</include>
</includes>
</dependencySet>
<dependencySet>
diff --git a/tika-core/pom.xml b/tika-core/pom.xml
index 5ec2ce10c5..dcd0b780f2 100644
--- a/tika-core/pom.xml
+++ b/tika-core/pom.xml
@@ -166,6 +166,9 @@
<Bundle-ActivationPolicy>lazy</Bundle-ActivationPolicy>
<Import-Package>
org.apache.xerces.util;resolution:=optional,
+ com.fasterxml.jackson.*;resolution:=optional,
+ org.apache.tika.config.loader;resolution:=optional,
+ org.apache.tika.serialization;resolution:=optional,
org.apache.commons.io.*;version="[2,3)",
*
</Import-Package>
diff --git a/tika-integration-tests/pom.xml b/tika-integration-tests/pom.xml
index 5f6d93b3eb..4d4d0ef1b3 100644
--- a/tika-integration-tests/pom.xml
+++ b/tika-integration-tests/pom.xml
@@ -41,16 +41,7 @@
<module>tika-woodstox-tests</module>
</modules>
- <dependencies>
- <!-- after we migrate everything to junit5, we can get rid of this -->
- <dependency>
- <groupId>org.junit.vintage</groupId>
- <artifactId>junit-vintage-engine</artifactId>
- <scope>test</scope>
- </dependency>
- </dependencies>
-
- <scm>
+<scm>
<tag>3.0.0-rc1</tag>
</scm>
</project>