This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4639
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 064dcbfda3638c43bfbd821f0d96c36b1c9cd9da
Merge: 6aed8956a4 589d1c25b1
Author: tallison <[email protected]>
AuthorDate: Thu Jan 29 08:05:08 2026 -0500

    Merge branch 'main' of https://github.com/apache/tika into TIKA-4639

 .../test/java/org/apache/tika/cli/TikaCLITest.java |  41 ++++---
 .../ParsingEmbeddedDocumentExtractor.java          |  20 ++--
 .../tika/extractor/RUnpackExtractorFactory.java    | 121 --------------------
 ...rFactory.java => StandardExtractorFactory.java} |  18 +--
 .../java/org/apache/tika/io/FilenameUtils.java     |   2 +-
 .../org/apache/tika/parser/AutoDetectParser.java   |   7 +-
 .../apache/tika/parser/AutoDetectParserConfig.java |  17 +--
 .../java/org/apache/tika/parser/EmptyParser.java   |   2 +-
 .../tika/parser/external/ExternalParser.java       |   2 +-
 .../tika/parser/external2/ExternalParser.java      |   2 +-
 .../java/org/apache/tika/sax/SAXOutputConfig.java  |  76 +++++++++++++
 .../org/apache/tika/sax/XHTMLContentHandler.java   |  87 +++++++++++----
 .../apache/tika/sax/XHTMLContentHandlerTest.java   | 123 +++++++++++++++++++++
 .../org/apache/tika/example/RollbackSoftware.java  |   2 +-
 .../org/apache/custom/parser/MyCustomParser.java   |   2 +-
 .../apache/tika/parser/envi/EnviHeaderParser.java  |   2 +-
 .../org/apache/tika/parser/gdal/GDALParser.java    |   8 +-
 .../geoinfo/GeographicInformationParser.java       |   2 +-
 .../org/apache/tika/parser/grib/GribParser.java    |   2 +-
 .../java/org/apache/tika/parser/hdf/HDFParser.java |   2 +-
 .../apache/tika/parser/isatab/ISArchiveParser.java |   2 +-
 .../apache/tika/parser/netcdf/NetCDFParser.java    |   2 +-
 .../apache/tika/parser/ner/NamedEntityParser.java  |   2 +-
 .../parser/transcribe/aws/AmazonTranscribe.java    |   2 +-
 .../tika/parser/apple/AppleSingleFileParser.java   |   2 +-
 .../org/apache/tika/parser/apple/PListParser.java  |   2 +-
 .../tika/parser/iwork/IWorkPackageParser.java      |   2 +-
 .../parser/iwork/iwana/IWork13PackageParser.java   |   2 +-
 .../org/apache/tika/parser/audio/AudioParser.java  |   2 +-
 .../org/apache/tika/parser/audio/MidiParser.java   |   2 +-
 .../java/org/apache/tika/parser/mp3/Mp3Parser.java |   2 +-
 .../java/org/apache/tika/parser/mp4/MP4Parser.java |   2 +-
 .../org/apache/tika/parser/ogg/FlacParser.java     |   2 +-
 .../java/org/apache/tika/parser/ogg/OggParser.java |   2 +-
 .../org/apache/tika/parser/ogg/OpusParser.java     |   2 +-
 .../org/apache/tika/parser/ogg/SpeexParser.java    |   2 +-
 .../org/apache/tika/parser/ogg/TheoraParser.java   |   2 +-
 .../org/apache/tika/parser/ogg/VorbisParser.java   |   2 +-
 .../org/apache/tika/parser/video/FLVParser.java    |   2 +-
 .../org/apache/tika/parser/dgn/DGN8Parser.java     |   2 +-
 .../java/org/apache/tika/parser/dwg/DWGParser.java |   2 +-
 .../org/apache/tika/parser/dwg/DWGReadParser.java  |   2 +-
 .../java/org/apache/tika/parser/prt/PRTParser.java |   2 +-
 .../org/apache/tika/parser/asm/ClassParser.java    |   2 +-
 .../apache/tika/parser/asm/XHTMLClassVisitor.java  |   5 +-
 .../apache/tika/parser/code/SourceCodeParser.java  |   2 +-
 .../tika/parser/executable/ExecutableParser.java   |   2 +-
 .../executable/UniversalExecutableParser.java      |   2 +-
 .../java/org/apache/tika/parser/mat/MatParser.java |   2 +-
 .../org/apache/tika/parser/sas/SAS7BDATParser.java |   2 +-
 .../org/apache/tika/parser/crypto/TSDParser.java   |   2 +-
 .../tika/parser/font/AdobeFontMetricParser.java    |   2 +-
 .../apache/tika/parser/font/TrueTypeParser.java    |   2 +-
 .../org/apache/tika/parser/html/HtmlHandler.java   |   2 +-
 .../tika/parser/image/AbstractImageParser.java     |   4 +-
 .../org/apache/tika/parser/image/ICNSParser.java   |   2 +-
 .../org/apache/tika/parser/image/PSDParser.java    |   2 +-
 .../org/apache/tika/parser/image/WebPParser.java   |   2 +-
 .../apache/tika/parser/jdbc/AbstractDBParser.java  |   2 +-
 .../org/apache/tika/parser/mail/RFC822Parser.java  |   2 +-
 .../org/apache/tika/parser/mbox/MboxParser.java    |   2 +-
 .../apache/tika/parser/microsoft/EMFParser.java    |   2 +-
 .../tika/parser/microsoft/JackcessParser.java      |   2 +-
 .../tika/parser/microsoft/MSOwnerFileParser.java   |   2 +-
 .../apache/tika/parser/microsoft/OfficeParser.java |   2 +-
 .../tika/parser/microsoft/OldExcelParser.java      |   2 +-
 .../apache/tika/parser/microsoft/TNEFParser.java   |   2 +-
 .../apache/tika/parser/microsoft/WMFParser.java    |   2 +-
 .../microsoft/activemime/ActiveMimeParser.java     |   2 +-
 .../tika/parser/microsoft/chm/ChmParser.java       |   2 +-
 .../tika/parser/microsoft/libpst/LibPstParser.java |   2 +-
 .../parser/microsoft/onenote/OneNoteParser.java    |   2 +-
 .../microsoft/ooxml/AbstractOOXMLExtractor.java    |   2 +-
 .../ooxml/xwpf/ml2006/Word2006MLParser.java        |   2 +-
 .../parser/microsoft/pst/OutlookPSTParser.java     |   2 +-
 .../parser/microsoft/pst/PSTMailItemParser.java    |   2 +-
 .../tika/parser/microsoft/rtf/RTFParser.java       |   2 +-
 .../microsoft/xml/AbstractXML2003Parser.java       |   2 +-
 .../java/org/apache/tika/parser/dbf/DBFParser.java |   2 +-
 .../java/org/apache/tika/parser/dif/DIFParser.java |   2 +-
 .../org/apache/tika/parser/epub/EpubParser.java    |   2 +-
 .../org/apache/tika/parser/hwp/HwpV5Parser.java    |   2 +-
 .../apache/tika/parser/indesign/IDMLParser.java    |   2 +-
 .../java/org/apache/tika/parser/mif/MIFParser.java |   2 +-
 .../tika/parser/odf/FlatOpenDocumentParser.java    |   2 +-
 .../tika/parser/odf/OpenDocumentContentParser.java |   2 +-
 .../apache/tika/parser/odf/OpenDocumentParser.java |   2 +-
 .../tika/parser/wordperfect/QuattroProParser.java  |   2 +-
 .../tika/parser/wordperfect/WordPerfectParser.java |   2 +-
 .../org/apache/tika/parser/feed/FeedParser.java    |   2 +-
 .../apache/tika/parser/iptc/IptcAnpaParser.java    |   2 +-
 .../apache/tika/parser/ocr/TesseractOCRParser.java |   2 +-
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  |   2 +-
 .../java/org/apache/tika/parser/pdf/PDFParser.java |   2 +-
 .../apache/tika/parser/pkg/CompressorParser.java   |   2 +-
 .../org/apache/tika/parser/pkg/PackageParser.java  |   2 +-
 .../java/org/apache/tika/parser/pkg/RarParser.java |   2 +-
 .../org/apache/tika/parser/pkg/UnrarParser.java    |   2 +-
 .../apache/tika/parser/csv/TextAndCSVParser.java   |   8 +-
 .../tika/parser/strings/Latin1StringsParser.java   |   2 +-
 .../apache/tika/parser/strings/StringsParser.java  |   2 +-
 .../java/org/apache/tika/parser/txt/TXTParser.java |   2 +-
 .../org/apache/tika/parser/http/HttpParser.java    |   2 +-
 .../org/apache/tika/parser/wacz/WACZParser.java    |   2 +-
 .../org/apache/tika/parser/warc/WARCParser.java    |   2 +-
 .../java/org/apache/tika/parser/tmx/TMXParser.java |   2 +-
 .../apache/tika/parser/xliff/XLIFF12Parser.java    |   2 +-
 .../org/apache/tika/parser/xliff/XLZParser.java    |   2 +-
 .../java/org/apache/tika/parser/xml/XMLParser.java |   2 +-
 .../apache/tika/parser/AutoDetectParserTest.java   |  23 ----
 .../tika/parser/AutoDetectReaderParserTest.java    |   2 +-
 .../resources/configs/tika-config-no-names.json    |   7 +-
 ...a-config-upcasing-custom-handler-decorator.json |  22 +---
 .../resources/configs/tika-config-with-names.json  |   6 +-
 .../org/apache/tika/async/cli/TikaAsyncCLI.java    |  10 +-
 .../apache/tika/async/cli/AsyncProcessorTest.java  |  12 +-
 .../AbstractEmbeddedDocumentBytesHandler.java      |  49 +++-----
 .../BasicEmbeddedDocumentBytesHandler.java         |  57 ----------
 .../EmittingEmbeddedDocumentBytesHandler.java      |  14 +--
 .../pipes/core}/extractor/RUnpackExtractor.java    |  13 +--
 .../core/extractor/RUnpackExtractorFactory.java    |  24 ++--
 ...dDocumentBytesConfig.java => UnpackConfig.java} | 116 +++++++++++++++----
 .../apache/tika/pipes/core/server/EmitHandler.java |  12 +-
 .../tika/pipes/core/server/ParseHandler.java       |   8 +-
 .../apache/tika/pipes/core/server/PipesServer.java |  14 +--
 .../apache/tika/pipes/core/server/PipesWorker.java |  34 +++---
 .../core/extractor/UnpackConfigSelectorTest.java   |  33 +++---
 .../core/serialization/JsonFetchEmitTupleTest.java |   2 +-
 tika-pipes/tika-pipes-integration-tests/pom.xml    |  12 ++
 .../pipes/core/DigestingOpenContainersTest.java    |  66 +++++++++++
 .../apache/tika/pipes/core/PipesServerTest.java    |  97 +---------------
 .../src/test/resources/configs/tika-4533.json      |  19 ++++
 .../resources/configs/tika-config-truncate.json    |  12 +-
 .../resources/test-documents/testLargeOLEDoc.doc   | Bin 0 -> 2077696 bytes
 .../tika/config/loader/ComponentRegistry.java      |   6 +-
 .../org/apache/tika/config/loader/TikaLoader.java  |   4 +
 .../configs/TIKA-4207-embedded-bytes-config.json   |  14 ---
 .../tika/server/core/resource/AsyncResource.java   |  10 +-
 .../apache/tika/server/standard/TikaPipesTest.java |  10 +-
 139 files changed, 751 insertions(+), 684 deletions(-)

diff --cc tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
index 70fe1ad08f,ae9a33e170..752c0c2e35
--- a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
@@@ -28,9 -28,10 +28,9 @@@ import org.apache.tika.exception.TikaEx
  import org.apache.tika.exception.ZeroByteFileException;
  import org.apache.tika.extractor.EmbeddedDocumentExtractor;
  import org.apache.tika.extractor.EmbeddedDocumentExtractorFactory;
- import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractorFactory;
+ import org.apache.tika.extractor.StandardExtractorFactory;
  import org.apache.tika.io.TikaInputStream;
  import org.apache.tika.metadata.Metadata;
 -import org.apache.tika.metadata.TikaCoreProperties;
  import org.apache.tika.mime.MediaType;
  import org.apache.tika.mime.MediaTypeRegistry;
  import org.apache.tika.sax.SecureContentHandler;

Reply via email to