vcl/inc/pdf/objectcopier.hxx                |    3 -
 vcl/qa/cppunit/pdfexport/data/tdf160051.odt |binary
 vcl/qa/cppunit/pdfexport/pdfexport.cxx      |   44 ++++++++++++++++++++
 vcl/qa/cppunit/pdfexport/pdfexport2.cxx     |    6 +-
 vcl/source/gdi/pdfobjectcopier.cxx          |   60 +++++++++++++++++++++++++++-
 vcl/source/gdi/pdfwriter_impl.cxx           |    4 +
 6 files changed, 110 insertions(+), 7 deletions(-)

New commits:
commit 05a075d23eb6003849e75582e12ef788e615a56d
Author:     Tibor Nagy <tibor.nagy.ext...@allotropia.de>
AuthorDate: Thu Jan 9 23:02:55 2025 +0100
Commit:     Nagy Tibor <tibor.nagy.ext...@allotropia.de>
CommitDate: Sun Jan 12 02:24:24 2025 +0100

    tdf#160051 PDF export: Artifact present inside tagged content
    
    If a PDF file containing artifacts is added to a document as an image,
    and the document is then exported as a tagged PDF, these artifacts are
    placed into a structure element (e.g., figure), which is not allowed.
    This fix removes unnecessary artifact tags from the content stream.
    
    Change-Id: I590ebec9a7aecdaa42520008824469bc8a9ff65b
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/180041
    Reviewed-by: Nagy Tibor <tibor.nagy.ext...@allotropia.de>
    Tested-by: Jenkins

diff --git a/vcl/inc/pdf/objectcopier.hxx b/vcl/inc/pdf/objectcopier.hxx
index 0168f69717ae..6d4d8676e418 100644
--- a/vcl/inc/pdf/objectcopier.hxx
+++ b/vcl/inc/pdf/objectcopier.hxx
@@ -57,7 +57,8 @@ public:
 
     /// Copies page one or more page streams from rContentStreams into rStream.
     static sal_Int32 copyPageStreams(std::vector<filter::PDFObjectElement*>& 
rContentStreams,
-                                     SvMemoryStream& rStream, bool& 
rCompressed);
+                                     SvMemoryStream& rStream, bool& 
rCompressed,
+                                     bool bIsTaggedNonReferenceXObject = 
false);
 };
 }
 
diff --git a/vcl/qa/cppunit/pdfexport/data/tdf160051.odt 
b/vcl/qa/cppunit/pdfexport/data/tdf160051.odt
new file mode 100644
index 000000000000..39151e7e8d2c
Binary files /dev/null and b/vcl/qa/cppunit/pdfexport/data/tdf160051.odt differ
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx 
b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index c1f2ea43a6a0..c49c01aa7133 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -90,6 +90,50 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testPopupRectangleSize)
     }
 }
 
+CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf160051)
+{
+    // A tagged PDF file which containing artifacts was added to the sample 
file as an image.
+    // When the sample file exporting as a tagged PDF, these artifacts are 
placed into a structure
+    // element (e.g.:figure) which is not allowed.
+
+    uno::Sequence<beans::PropertyValue> aFilterData(
+        comphelper::InitPropertySequence({ { "PDFUACompliance", uno::Any(true) 
},
+                                           { "SelectPdfVersion", 
uno::Any(sal_Int32(17)) } }));
+    aMediaDescriptor[u"FilterData"_ustr] <<= aFilterData;
+
+    vcl::filter::PDFDocument aDocument;
+    load(u"tdf160051.odt", aDocument);
+
+    std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages();
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPages.size());
+
+    // Directly go to the inner XObject Im7.
+    auto pInnerIm = aDocument.LookupObject(7);
+    CPPUNIT_ASSERT(pInnerIm);
+
+    vcl::filter::PDFStreamElement* pStream = pInnerIm->GetStream();
+    CPPUNIT_ASSERT(pStream);
+    SvMemoryStream& rObjectStream = pStream->GetMemory();
+
+    // Uncompress it.
+    SvMemoryStream aUncompressed;
+    ZCodec aZCodec;
+    aZCodec.BeginCompression();
+    rObjectStream.Seek(0);
+    aZCodec.Decompress(rObjectStream, aUncompressed);
+    CPPUNIT_ASSERT(aZCodec.EndCompression());
+
+    auto pStart = static_cast<const char*>(aUncompressed.GetData());
+    const char* pEnd = pStart + aUncompressed.GetSize();
+    OString aStr("/Artifact"_ostr);
+    auto pArtifact = std::search(pStart, pEnd, aStr.getStr(), aStr.getStr() + 
aStr.getLength());
+
+    // Without the fix in place, this test would have failed with
+    // Expected: The content stream does not contain "/Artifact" element
+    // Actual:   The content stream contains "/Artifact" element
+    CPPUNIT_ASSERT_EQUAL(pArtifact, pEnd);
+}
+
 CPPUNIT_TEST_FIXTURE(PdfExportTest, testCommentAnnotation)
 {
     // Enable PDF/UA and Comment as PDF annotations
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx 
b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx
index f883a3c97540..25c707b5f0fc 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx
@@ -830,7 +830,7 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, testMultiPagePDF)
         CPPUNIT_ASSERT(aZCodec.EndCompression());
 
         // Just check that the size of the page stream is what is expected.
-        CPPUNIT_ASSERT_EQUAL(sal_uInt64(1236), aUncompressed.Tell());
+        CPPUNIT_ASSERT_EQUAL(sal_uInt64(1218), aUncompressed.Tell());
     }
 
     { // embedded PDF page 2
@@ -865,7 +865,7 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, testMultiPagePDF)
         CPPUNIT_ASSERT(aZCodec.EndCompression());
 
         // Just check that the size of the page stream is what is expected.
-        CPPUNIT_ASSERT_EQUAL(sal_uInt64(3911), aUncompressed.Tell());
+        CPPUNIT_ASSERT_EQUAL(sal_uInt64(3893), aUncompressed.Tell());
     }
 
     { // embedded PDF page 3
@@ -900,7 +900,7 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, testMultiPagePDF)
         CPPUNIT_ASSERT(aZCodec.EndCompression());
 
         // Just check that the size of the page stream is what is expected.
-        CPPUNIT_ASSERT_EQUAL(sal_uInt64(373), aUncompressed.Tell());
+        CPPUNIT_ASSERT_EQUAL(sal_uInt64(355), aUncompressed.Tell());
     }
 #endif
 }
diff --git a/vcl/source/gdi/pdfobjectcopier.cxx 
b/vcl/source/gdi/pdfobjectcopier.cxx
index 56c3ba6e8138..3761520e3148 100644
--- a/vcl/source/gdi/pdfobjectcopier.cxx
+++ b/vcl/source/gdi/pdfobjectcopier.cxx
@@ -19,6 +19,8 @@
 #include <pdf/objectcopier.hxx>
 #include <pdf/pdfwriter_impl.hxx>
 
+#include <o3tl/string_view.hxx>
+
 namespace vcl
 {
 PDFObjectCopier::PDFObjectCopier(PDFObjectContainer& rContainer)
@@ -304,7 +306,8 @@ void 
PDFObjectCopier::copyPageResources(filter::PDFObjectElement* pPage, OString
 }
 
 sal_Int32 
PDFObjectCopier::copyPageStreams(std::vector<filter::PDFObjectElement*>& 
rContentStreams,
-                                           SvMemoryStream& rStream, bool& 
rCompressed)
+                                           SvMemoryStream& rStream, bool& 
rCompressed,
+                                           bool bIsTaggedNonReferenceXObject)
 {
     for (auto pContent : rContentStreams)
     {
@@ -344,7 +347,60 @@ sal_Int32 
PDFObjectCopier::copyPageStreams(std::vector<filter::PDFObjectElement*
                 continue;
             }
 
-            rStream.WriteBytes(aMemoryStream.GetData(), 
aMemoryStream.GetSize());
+            bool bHasArtifact = false;
+            if (bIsTaggedNonReferenceXObject)
+            {
+                auto pStart = static_cast<const 
char*>(aMemoryStream.GetData());
+                const char* const pEnd = pStart + aMemoryStream.GetSize();
+                std::string_view aStreamView(pStart, pEnd - pStart);
+
+                std::string_view sArtifact = "/Artifact";
+                std::size_t nPosArtifact = aStreamView.find(sArtifact);
+                if (nPosArtifact != std::string_view::npos)
+                {
+                    bHasArtifact = true;
+                    SvMemoryStream aTmpStream;
+                    std::string_view sBMC = "BMC";
+                    std::string_view sBDC = "BDC";
+                    std::string_view sEMC = "EMC";
+
+                    while (!aStreamView.empty())
+                    {
+                        aTmpStream.WriteOString(aStreamView.substr(0, 
nPosArtifact));
+                        aStreamView.remove_prefix(nPosArtifact + 
sArtifact.size());
+
+                        std::size_t nPosBMC = aStreamView.find(sBMC);
+                        std::size_t nPosBDC = aStreamView.find(sBDC);
+                        std::size_t nPos = std::min(nPosBMC, nPosBDC);
+
+                        if (nPos != std::string_view::npos)
+                        {
+                            if (nPos == nPosBMC)
+                                aStreamView.remove_prefix(nPos + sBMC.size() + 
1);
+                            else
+                                aStreamView.remove_prefix(nPos + sBDC.size() + 
1);
+
+                            std::size_t nPosEMC = aStreamView.find(sEMC);
+                            if (nPosEMC != std::string_view::npos)
+                            {
+                                aTmpStream.WriteOString(aStreamView.substr(0, 
nPosEMC));
+                                aStreamView.remove_prefix(nPosEMC + 
sEMC.size() + 1);
+                            }
+                        }
+
+                        nPosArtifact = aStreamView.find(sArtifact);
+                        if (nPosArtifact == std::string_view::npos)
+                        {
+                            aTmpStream.WriteOString(aStreamView);
+                            break;
+                        }
+                    }
+                    rStream.WriteBytes(aTmpStream.GetData(), 
aTmpStream.GetSize());
+                }
+            }
+
+            if (!bHasArtifact)
+                rStream.WriteBytes(aMemoryStream.GetData(), 
aMemoryStream.GetSize());
         }
         else
         {
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx 
b/vcl/source/gdi/pdfwriter_impl.cxx
index 526dc7ec97b1..99a1ca024a84 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -9305,7 +9305,9 @@ void PDFWriterImpl::writeReferenceXObject(const 
ReferenceXObjectEmit& rEmit)
 
         SvMemoryStream aStream;
         bool bCompressed = false;
-        sal_Int32 nLength = PDFObjectCopier::copyPageStreams(aContentStreams, 
aStream, bCompressed);
+        bool bIsTaggedNonReferenceXObject = m_aContext.Tagged && 
!m_aContext.UseReferenceXObject;
+        sal_Int32 nLength = PDFObjectCopier::copyPageStreams(aContentStreams, 
aStream, bCompressed,
+                                                             
bIsTaggedNonReferenceXObject);
         aLine.append(nLength);
 
         aLine.append(">>
stream
");

Reply via email to