vcl/inc/pdf/objectcopier.hxx | 3 - vcl/qa/cppunit/pdfexport/data/tdf160051.odt |binary vcl/qa/cppunit/pdfexport/pdfexport.cxx | 44 ++++++++++++++++++++ vcl/qa/cppunit/pdfexport/pdfexport2.cxx | 6 +- vcl/source/gdi/pdfobjectcopier.cxx | 60 +++++++++++++++++++++++++++- vcl/source/gdi/pdfwriter_impl.cxx | 4 + 6 files changed, 110 insertions(+), 7 deletions(-)
New commits: commit 05a075d23eb6003849e75582e12ef788e615a56d Author: Tibor Nagy <tibor.nagy.ext...@allotropia.de> AuthorDate: Thu Jan 9 23:02:55 2025 +0100 Commit: Nagy Tibor <tibor.nagy.ext...@allotropia.de> CommitDate: Sun Jan 12 02:24:24 2025 +0100 tdf#160051 PDF export: Artifact present inside tagged content If a PDF file containing artifacts is added to a document as an image, and the document is then exported as a tagged PDF, these artifacts are placed into a structure element (e.g., figure), which is not allowed. This fix removes unnecessary artifact tags from the content stream. Change-Id: I590ebec9a7aecdaa42520008824469bc8a9ff65b Reviewed-on: https://gerrit.libreoffice.org/c/core/+/180041 Reviewed-by: Nagy Tibor <tibor.nagy.ext...@allotropia.de> Tested-by: Jenkins diff --git a/vcl/inc/pdf/objectcopier.hxx b/vcl/inc/pdf/objectcopier.hxx index 0168f69717ae..6d4d8676e418 100644 --- a/vcl/inc/pdf/objectcopier.hxx +++ b/vcl/inc/pdf/objectcopier.hxx @@ -57,7 +57,8 @@ public: /// Copies page one or more page streams from rContentStreams into rStream. static sal_Int32 copyPageStreams(std::vector<filter::PDFObjectElement*>& rContentStreams, - SvMemoryStream& rStream, bool& rCompressed); + SvMemoryStream& rStream, bool& rCompressed, + bool bIsTaggedNonReferenceXObject = false); }; } diff --git a/vcl/qa/cppunit/pdfexport/data/tdf160051.odt b/vcl/qa/cppunit/pdfexport/data/tdf160051.odt new file mode 100644 index 000000000000..39151e7e8d2c Binary files /dev/null and b/vcl/qa/cppunit/pdfexport/data/tdf160051.odt differ diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx index c1f2ea43a6a0..c49c01aa7133 100644 --- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx +++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx @@ -90,6 +90,50 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testPopupRectangleSize) } } +CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf160051) +{ + // A tagged PDF file which containing artifacts was added to the sample file as an image. + // When the sample file exporting as a tagged PDF, these artifacts are placed into a structure + // element (e.g.:figure) which is not allowed. + + uno::Sequence<beans::PropertyValue> aFilterData( + comphelper::InitPropertySequence({ { "PDFUACompliance", uno::Any(true) }, + { "SelectPdfVersion", uno::Any(sal_Int32(17)) } })); + aMediaDescriptor[u"FilterData"_ustr] <<= aFilterData; + + vcl::filter::PDFDocument aDocument; + load(u"tdf160051.odt", aDocument); + + std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages(); + CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPages.size()); + + // Directly go to the inner XObject Im7. + auto pInnerIm = aDocument.LookupObject(7); + CPPUNIT_ASSERT(pInnerIm); + + vcl::filter::PDFStreamElement* pStream = pInnerIm->GetStream(); + CPPUNIT_ASSERT(pStream); + SvMemoryStream& rObjectStream = pStream->GetMemory(); + + // Uncompress it. + SvMemoryStream aUncompressed; + ZCodec aZCodec; + aZCodec.BeginCompression(); + rObjectStream.Seek(0); + aZCodec.Decompress(rObjectStream, aUncompressed); + CPPUNIT_ASSERT(aZCodec.EndCompression()); + + auto pStart = static_cast<const char*>(aUncompressed.GetData()); + const char* pEnd = pStart + aUncompressed.GetSize(); + OString aStr("/Artifact"_ostr); + auto pArtifact = std::search(pStart, pEnd, aStr.getStr(), aStr.getStr() + aStr.getLength()); + + // Without the fix in place, this test would have failed with + // Expected: The content stream does not contain "/Artifact" element + // Actual: The content stream contains "/Artifact" element + CPPUNIT_ASSERT_EQUAL(pArtifact, pEnd); +} + CPPUNIT_TEST_FIXTURE(PdfExportTest, testCommentAnnotation) { // Enable PDF/UA and Comment as PDF annotations diff --git a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx index f883a3c97540..25c707b5f0fc 100644 --- a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx +++ b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx @@ -830,7 +830,7 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, testMultiPagePDF) CPPUNIT_ASSERT(aZCodec.EndCompression()); // Just check that the size of the page stream is what is expected. - CPPUNIT_ASSERT_EQUAL(sal_uInt64(1236), aUncompressed.Tell()); + CPPUNIT_ASSERT_EQUAL(sal_uInt64(1218), aUncompressed.Tell()); } { // embedded PDF page 2 @@ -865,7 +865,7 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, testMultiPagePDF) CPPUNIT_ASSERT(aZCodec.EndCompression()); // Just check that the size of the page stream is what is expected. - CPPUNIT_ASSERT_EQUAL(sal_uInt64(3911), aUncompressed.Tell()); + CPPUNIT_ASSERT_EQUAL(sal_uInt64(3893), aUncompressed.Tell()); } { // embedded PDF page 3 @@ -900,7 +900,7 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, testMultiPagePDF) CPPUNIT_ASSERT(aZCodec.EndCompression()); // Just check that the size of the page stream is what is expected. - CPPUNIT_ASSERT_EQUAL(sal_uInt64(373), aUncompressed.Tell()); + CPPUNIT_ASSERT_EQUAL(sal_uInt64(355), aUncompressed.Tell()); } #endif } diff --git a/vcl/source/gdi/pdfobjectcopier.cxx b/vcl/source/gdi/pdfobjectcopier.cxx index 56c3ba6e8138..3761520e3148 100644 --- a/vcl/source/gdi/pdfobjectcopier.cxx +++ b/vcl/source/gdi/pdfobjectcopier.cxx @@ -19,6 +19,8 @@ #include <pdf/objectcopier.hxx> #include <pdf/pdfwriter_impl.hxx> +#include <o3tl/string_view.hxx> + namespace vcl { PDFObjectCopier::PDFObjectCopier(PDFObjectContainer& rContainer) @@ -304,7 +306,8 @@ void PDFObjectCopier::copyPageResources(filter::PDFObjectElement* pPage, OString } sal_Int32 PDFObjectCopier::copyPageStreams(std::vector<filter::PDFObjectElement*>& rContentStreams, - SvMemoryStream& rStream, bool& rCompressed) + SvMemoryStream& rStream, bool& rCompressed, + bool bIsTaggedNonReferenceXObject) { for (auto pContent : rContentStreams) { @@ -344,7 +347,60 @@ sal_Int32 PDFObjectCopier::copyPageStreams(std::vector<filter::PDFObjectElement* continue; } - rStream.WriteBytes(aMemoryStream.GetData(), aMemoryStream.GetSize()); + bool bHasArtifact = false; + if (bIsTaggedNonReferenceXObject) + { + auto pStart = static_cast<const char*>(aMemoryStream.GetData()); + const char* const pEnd = pStart + aMemoryStream.GetSize(); + std::string_view aStreamView(pStart, pEnd - pStart); + + std::string_view sArtifact = "/Artifact"; + std::size_t nPosArtifact = aStreamView.find(sArtifact); + if (nPosArtifact != std::string_view::npos) + { + bHasArtifact = true; + SvMemoryStream aTmpStream; + std::string_view sBMC = "BMC"; + std::string_view sBDC = "BDC"; + std::string_view sEMC = "EMC"; + + while (!aStreamView.empty()) + { + aTmpStream.WriteOString(aStreamView.substr(0, nPosArtifact)); + aStreamView.remove_prefix(nPosArtifact + sArtifact.size()); + + std::size_t nPosBMC = aStreamView.find(sBMC); + std::size_t nPosBDC = aStreamView.find(sBDC); + std::size_t nPos = std::min(nPosBMC, nPosBDC); + + if (nPos != std::string_view::npos) + { + if (nPos == nPosBMC) + aStreamView.remove_prefix(nPos + sBMC.size() + 1); + else + aStreamView.remove_prefix(nPos + sBDC.size() + 1); + + std::size_t nPosEMC = aStreamView.find(sEMC); + if (nPosEMC != std::string_view::npos) + { + aTmpStream.WriteOString(aStreamView.substr(0, nPosEMC)); + aStreamView.remove_prefix(nPosEMC + sEMC.size() + 1); + } + } + + nPosArtifact = aStreamView.find(sArtifact); + if (nPosArtifact == std::string_view::npos) + { + aTmpStream.WriteOString(aStreamView); + break; + } + } + rStream.WriteBytes(aTmpStream.GetData(), aTmpStream.GetSize()); + } + } + + if (!bHasArtifact) + rStream.WriteBytes(aMemoryStream.GetData(), aMemoryStream.GetSize()); } else { diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx index 526dc7ec97b1..99a1ca024a84 100644 --- a/vcl/source/gdi/pdfwriter_impl.cxx +++ b/vcl/source/gdi/pdfwriter_impl.cxx @@ -9305,7 +9305,9 @@ void PDFWriterImpl::writeReferenceXObject(const ReferenceXObjectEmit& rEmit) SvMemoryStream aStream; bool bCompressed = false; - sal_Int32 nLength = PDFObjectCopier::copyPageStreams(aContentStreams, aStream, bCompressed); + bool bIsTaggedNonReferenceXObject = m_aContext.Tagged && !m_aContext.UseReferenceXObject; + sal_Int32 nLength = PDFObjectCopier::copyPageStreams(aContentStreams, aStream, bCompressed, + bIsTaggedNonReferenceXObject); aLine.append(nLength); aLine.append(">> stream ");