This is an automated email from the ASF dual-hosted git repository.
tballison pushed a change to branch TIKA-4731-common-script
in repository https://gitbox.apache.org/repos/asf/tika.git
from 20ce737110 updates based on copilot feedback
add 5fb9402c2a TIKA-4327: update aws, enforcer plugin, azure
add 230d6352ac TIKA-4327: update aws, jbig2
add 52530afcce TIKA-4327: update junit6
add da1801a84c TIKA-4733 -- improve release artifact robustness and
documentation (#2825)
add 0b38268d4f TIKA-4735 -- fix content-only (#2826)
add c2b15c9ce1 TIKA-4733 -- fix docker-snapshot.yml to match new release
zip artifacts (#2827)
add 19b4c66927 TIKA-4728 - fix xhtml in widgets (#2817)
add 4b66205620 TIKA-4736 -- image extraction fails (#2828)
add 8ef279d581 TIKA-4327: update aws, netty, woodstox, plexus
add 933fb96d10 Bump com.github.luben:zstd-jni from 1.5.7-8 to 1.5.7-9
(#2829)
add 795f30c368 Bump com.microsoft.graph:microsoft-graph from 6.64.0 to
6.65.0 (#2835)
add 1aea9db9a2 Bump org.apache.kafka:kafka-clients from 4.2.0 to 4.3.0
(#2834)
add 3622658b9a Bump software.amazon.awssdk:bom from 2.44.10 to 2.44.12
(#2833)
add 29f287fe0c Bump org.apache.maven.plugins:maven-site-plugin from 3.21.0
to 3.22.0 (#2832)
add 3b1f68ec11 Bump org.ow2.asm:asm from 9.10 to 9.10.1 (#2830)
add cfddd1afc0 Bump eu.maveniverse.maven.nisse:extension from 0.9.0 to
0.9.2 (#2831)
add 4bfbdf22cf TIKA-4737 -- improve docs for tika-pipes via tika-app
(#2836)
add 0cbdb26e24 TIKA-4740 -- fix flaky windows test
add 1abcd65381 TIKA-4740 -- update docs
add d02dc13903 TIKA-4740 -- tika-server-core fix (#2841)
add 94040af31f Merge branch 'main' into TIKA-4731-common-script
No new revisions were added by this update.
Summary of changes:
.github/workflows/docker-snapshot.yml | 4 +-
.mvn/extensions.xml | 2 +-
docs/modules/ROOT/nav.adoc | 1 +
.../integration-testing/run-uat-script.adoc | 10 +-
.../advanced/integration-testing/tika-server.adoc | 18 +-
.../pages/maintainers/release-guides/docker.adoc | 4 +-
.../release-guides/release-artifacts.adoc | 10 +-
.../pages/maintainers/release-guides/tika.adoc | 2 +-
docs/modules/ROOT/pages/migration-to-4x/index.adoc | 2 +-
docs/modules/ROOT/pages/pipes/configuration.adoc | 2 +-
docs/modules/ROOT/pages/pipes/cpu-sizing.adoc | 2 +-
docs/modules/ROOT/pages/pipes/parse-modes.adoc | 6 +-
docs/modules/ROOT/pages/pipes/troubleshooting.adoc | 131 ++++++++++++
docs/modules/ROOT/pages/using-tika/cli/index.adoc | 74 +++++--
.../ROOT/pages/using-tika/server/index.adoc | 4 +-
docs/modules/ROOT/pages/using-tika/server/tls.adoc | 2 +-
pom.xml | 52 ++++-
tika-app/pom.xml | 4 +
.../main/java/org/apache/tika/cli/AsyncHelper.java | 21 +-
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 78 ++++---
.../java/org/apache/tika/cli/AsyncHelperTest.java | 30 ++-
.../ParsingEmbeddedDocumentExtractor.java | 29 ++-
.../tika/sax/BasicContentHandlerFactory.java | 30 +++
.../org/apache/tika/sax/StrictXHTMLValidator.java | 229 +++++++++++++++++++++
.../org/apache/tika/sax/XHTMLBalancingHandler.java | 123 +++++++++++
.../src/test/java/org/apache/tika/TikaTest.java | 37 +++-
.../apache/tika/sax/XHTMLBalancingHandlerTest.java | 130 ++++++++++++
tika-e2e-tests/tika-server/pom.xml | 3 +-
.../tika/server/e2e/TikaServerHttp2Test.java | 25 ++-
tika-eval/tika-eval-app/pom.xml | 4 +
tika-parent/pom.xml | 22 +-
.../tika/parser/iwork/PagesContentHandler.java | 19 +-
.../java/org/apache/tika/parser/prt/PRTParser.java | 85 ++++----
.../apache/tika/parser/code/SourceCodeParser.java | 25 +++
.../tika/parser/code/SourceCodeParserTest.java | 1 -
.../microsoft/ooxml/AbstractOOXMLExtractor.java | 15 +-
.../microsoft/ooxml/OOXMLTikaBodyPartHandler.java | 55 +++++
.../ooxml/OOXMLWordAndPowerPointTextHandler.java | 7 +
.../ooxml/SXSLFPowerPointExtractorDecorator.java | 16 +-
.../ooxml/SXWPFWordExtractorDecorator.java | 6 +
.../ooxml/XSSFExcelExtractorDecorator.java | 36 ++++
.../org/apache/tika/parser/epub/EpubParser.java | 44 +++-
.../tika/parser/odf/OpenDocumentBodyHandler.java | 77 +++++++
.../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 25 ++-
.../org/apache/tika/parser/pdf/PDFParserTest.java | 14 ++
.../test-documents/testPDF_jsActionOnPage.pdf | 26 +++
.../java/org/apache/tika/parser/txt/TXTParser.java | 27 ++-
.../java/org/apache/tika/parser/tmx/TMXParser.java | 2 +
.../apache/tika/parser/xliff/XLIFF12Parser.java | 3 +-
.../org/apache/tika/async/cli/PluginsWriter.java | 10 +
.../apache/tika/async/cli/SimpleAsyncConfig.java | 13 ++
.../org/apache/tika/async/cli/TikaAsyncCLI.java | 16 +-
.../apache/tika/async/cli/AsyncCliParserTest.java | 25 +++
.../apache/tika/async/cli/AsyncProcessorTest.java | 42 ++++
...plate.json => config-content-only-default.json} | 5 +-
.../tika/pipes/core/PerClientServerManager.java | 45 +++-
.../apache/tika/pipes/core/ServerProcessIO.java | 112 ++++++++++
.../tika/pipes/core/SharedServerManager.java | 41 +++-
.../apache/tika/pipes/core/server/PipesServer.java | 54 +++++
.../apache/tika/pipes/core/server/PipesWorker.java | 3 +
.../tika/pipes/emitter/fs/FileSystemEmitter.java | 18 +-
.../fs/FileSystemEmitterRuntimeConfigTest.java | 31 +++
.../tika-pipes-microsoft-graph/pom.xml | 2 +-
tika-server/README.md | 10 +-
tika-server/docker-build/CHANGES.md | 4 +-
tika-server/docker-build/README.md | 8 +-
.../docker-build/docker-compose-tika-customocr.yml | 10 +-
.../docker-build/docker-compose-tika-grobid.yml | 10 +-
tika-server/docker-build/full/Dockerfile | 24 ++-
tika-server/docker-build/full/Dockerfile.snapshot | 2 +-
tika-server/docker-build/minimal/Dockerfile | 24 ++-
.../docker-build/minimal/Dockerfile.snapshot | 2 +-
.../tika/server/core/IntegrationTestBase.java | 31 +++
.../server/core/benchmark/TikaServerBenchmark.java | 2 +-
.../bin/install_tika_service.sh | 21 +-
tika-server/tika-server-standard/bin/tika | 10 +-
tika-server/tika-server-standard/bin/tika.in.sh | 2 +-
tika-server/tika-server-standard/pom.xml | 45 ++++
.../src/main/assembly/assembly.xml | 5 +-
tika-translate/pom.xml | 2 +-
80 files changed, 1945 insertions(+), 258 deletions(-)
create mode 100644 docs/modules/ROOT/pages/pipes/troubleshooting.adoc
create mode 100644
tika-core/src/main/java/org/apache/tika/sax/StrictXHTMLValidator.java
create mode 100644
tika-core/src/main/java/org/apache/tika/sax/XHTMLBalancingHandler.java
create mode 100644
tika-core/src/test/java/org/apache/tika/sax/XHTMLBalancingHandlerTest.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/test-documents/testPDF_jsActionOnPage.pdf
copy
tika-pipes/tika-async-cli/src/test/resources/configs/{config-template.json =>
config-content-only-default.json} (92%)
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/ServerProcessIO.java