sdext/source/pdfimport/filterdet.cxx |   28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

New commits:
commit bc680499b5fa782c7396f7ab688c8f5498b58b46
Author:     Dr. David Alan Gilbert <d...@treblig.org>
AuthorDate: Thu Apr 10 00:58:17 2025 +0100
Commit:     Tomaž Vajngerl <qui...@gmail.com>
CommitDate: Mon Apr 21 15:16:35 2025 +0200

    tdf#55425, tdf#66580: sdext,pdfimport: embeddedFile detection
    
    Check for exactly one attachment in the PDF and get it's name.
    A PDF with 0 or more than 1 attachment isn't a LO hybrid.
    
    Change-Id: I9d556b1b0435b734c278b58380589c6e3bca9964
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/183950
    Tested-by: Jenkins
    Reviewed-by: Tomaž Vajngerl <qui...@gmail.com>

diff --git a/sdext/source/pdfimport/filterdet.cxx 
b/sdext/source/pdfimport/filterdet.cxx
index 8560679fd5bd..b817b15d523b 100644
--- a/sdext/source/pdfimport/filterdet.cxx
+++ b/sdext/source/pdfimport/filterdet.cxx
@@ -328,6 +328,34 @@ uno::Reference<io::XStream> getEmbeddedFile(const 
OUString& rInPDFFileURL,
 
         auto pPdfiumDoc = pPdfium->openDocument(pMemRawPdf, nFileSize, 
OString(/*TODO Pass*/));
 
+        do {
+            auto nPdfiumErr = pPdfium->getLastErrorCode();
+            if (nPdfiumErr != vcl::pdf::PDFErrorType::Success
+                && nPdfiumErr != vcl::pdf::PDFErrorType::Password)
+            {
+                SAL_WARN("sdext.pdfimport",
+                         "getEmbeddedFile pdfium err: " << 
pPdfium->getLastError());
+                break;
+            }
+            if (nPdfiumErr == vcl::pdf::PDFErrorType::Password)
+            {
+                SAL_WARN("sdext.pdfimport", "getEmbeddedFile pdfium Pass 
todo");
+                break;
+            }
+            // The new style hybrids have exactly one embedded file
+            if (pPdfiumDoc->getAttachmentCount() != 1)
+            {
+                SAL_INFO("sdext.pdfimport", "getEmbeddedFile incorrect 
attachment count");
+                break;
+            }
+            auto pAttachment = pPdfiumDoc->getAttachment(0);
+            auto aName = pAttachment->getName();
+            // pdfium currently has no way to read the MIME type (aka Subtype 
field)
+            // see https://issues.chromium.org/issues/408241034
+            // When it does we can check the filename matches the expected 
mimetype
+            SAL_INFO("sdext.pdfimport", "getEmbeddedFile attachment name: " << 
aName);
+        } while(false);
+
         osl_unmapMappedFile(fileHandle, pMemRawPdf, nFileSize);
         osl_closeFile(fileHandle);
     }

Reply via email to