sw/source/core/text/EnhancedPDFExportHelper.cxx | 118 +++++++++++++++++++++++- vcl/qa/cppunit/pdfexport/data/tdf160705.odt |binary vcl/qa/cppunit/pdfexport/pdfexport2.cxx | 61 ++++++++++++ 3 files changed, 176 insertions(+), 3 deletions(-)
New commits: commit accbfbc4a298cb811072ff78bda9425dd1af4e81 Author: Tibor Nagy <tibor.nagy.ext...@allotropia.de> AuthorDate: Fri Feb 21 03:34:11 2025 +0100 Commit: Nagy Tibor <tibor.nagy.ext...@allotropia.de> CommitDate: Fri Feb 21 12:27:33 2025 +0100 tdf#160705 PDF export: Set the table caption element inside the table According to the PDF standard, if a table has a caption, it must be either the first or the last child within the table tag. Therefore, if both above and below captions are set for the table, they will be exported as the table's first child. Change-Id: Id4ee732d469d606b6ad49f68b34500bc1174c51e Reviewed-on: https://gerrit.libreoffice.org/c/core/+/181981 Tested-by: Jenkins Reviewed-by: Nagy Tibor <tibor.nagy.ext...@allotropia.de> diff --git a/sw/source/core/text/EnhancedPDFExportHelper.cxx b/sw/source/core/text/EnhancedPDFExportHelper.cxx index 20ccb7bac875..f686899b6d25 100644 --- a/sw/source/core/text/EnhancedPDFExportHelper.cxx +++ b/sw/source/core/text/EnhancedPDFExportHelper.cxx @@ -135,6 +135,7 @@ typedef std::pair< SwRect, sal_Int32 > IdMapEntry; typedef std::vector< IdMapEntry > LinkIdMap; typedef std::vector< IdMapEntry > NoteIdMap; typedef std::map< const SwTable*, TableColumnsMapEntry > TableColumnsMap; +typedef std::map< const SwTable*, sal_Int32 > TableCaptionsMap; typedef std::map< const SwNumberTreeNode*, sal_Int32 > NumListIdMap; typedef std::map< const SwNumberTreeNode*, sal_Int32 > NumListBodyIdMap; typedef std::set<const void*> FrameTagSet; @@ -142,6 +143,7 @@ typedef std::set<const void*> FrameTagSet; struct SwEnhancedPDFState { TableColumnsMap m_TableColumnsMap; + TableCaptionsMap m_TableCaptionsMap; LinkIdMap m_LinkIdMap; NoteIdMap m_NoteIdMap; NumListIdMap m_NumListIdMap; @@ -257,6 +259,77 @@ bool lcl_IsHeadlineCell( const SwCellFrame& rCellFrame ) return bRet; } +// returns true if the frame is a caption +bool lcl_IsCaptionFrame(const SwFrame& rFrame) +{ + if (!rFrame.IsTextFrame()) + return false; + + SwTextFrame const& rTextFrame(*static_cast<const SwTextFrame*>(&rFrame)); + const SwTextNode* const pTextNd(rTextFrame.GetTextNodeForParaProps()); + if (!pTextNd) + return false; + + const SwFormat* pTextFormat = pTextNd->GetFormatColl(); + const SwFormat* pParentTextFormat = pTextFormat ? pTextFormat->DerivedFrom() : nullptr; + + ProgName sParentStyleName; + if (pParentTextFormat) + SwStyleNameMapper::FillProgName(pParentTextFormat->GetName(), sParentStyleName, + SwGetPoolIdFromName::TxtColl); + + return sParentStyleName == aCaption; +} + +const SwTabFrame* lcl_FindTableForCaption(const SwFrame& rFrame) +{ + const SwTabFrame* pTabFrame = nullptr; + bool bPrevFrame = false; + + // It is possible to add multiple captions to a table, + // both above and below, either simultaneously or separately. + // Start by checking the next frame, and if we don't find a table frame + // or if the next frame is not a caption, we return to the current caption + // and perform the same operation backwards using the previous frames. + const SwFrame* pRetFrame = rFrame.GetNext(); + if (!pRetFrame) + { + bPrevFrame = true; + pRetFrame = rFrame.GetPrev(); + } + + while (pRetFrame) + { + if (pRetFrame->IsTabFrame()) + { + pTabFrame = static_cast<const SwTabFrame*>(pRetFrame); + break; + } + + // Check if the next or the previous frame is a caption frame + bool bIsCaption = lcl_IsCaptionFrame(*pRetFrame); + if (bIsCaption && pRetFrame->GetNext()) + { + pRetFrame = !bPrevFrame ? pRetFrame->GetNext() : pRetFrame->GetPrev(); + } + else if (!bPrevFrame && rFrame.GetPrev()) + { + // If no table was found while checking the GetNext() frames, + // jump back to the current caption and + // start checking the GetPrev() frames. + bPrevFrame = true; + pRetFrame = rFrame.GetPrev(); + } + else + // This part handles the case + // when the table has been deleted, + // but the caption has not. + break; + } + + return pTabFrame; +} + // List all frames for which the NonStructElement tag is set: bool lcl_IsInNonStructEnv( const SwFrame& rFrame ) { @@ -573,7 +646,8 @@ void SwTaggedPDFHelper::BeginTag(vcl::pdf::StructElement eType, const OUString& ( rFrame.IsTextFrame() && rFrame.GetDrawObjs() ) || (rFrame.IsFootnoteFrame() && static_cast<SwFootnoteFrame const&>(rFrame).GetFollow()) || ( rFrame.IsRowFrame() && rFrame.IsInSplitTableRow() ) || - ( rFrame.IsCellFrame() && const_cast<SwFrame&>(rFrame).GetNextCellLeaf() ) ) + ( rFrame.IsCellFrame() && const_cast<SwFrame&>(rFrame).GetNextCellLeaf() ) || + rFrame.IsTabFrame() ) { pKey = lcl_GetKeyFromFrame(rFrame); @@ -1488,8 +1562,46 @@ void SwTaggedPDFHelper::BeginBlockStructureElements() else if (sParentStyleName == aCaption) { - nPDFType = sal_uInt16(vcl::pdf::StructElement::Caption); - aPDFType = sStyleName.toString() + aCaptionString; + OUString sTableCaption = sStyleName.toString() + aCaptionString; + + if (!pFrame->IsInFly()) // Table caption + { + TableCaptionsMap& rTableCaptionsMap( + mpPDFExtOutDevData->GetSwPDFState()->m_TableCaptionsMap); + + const SwTabFrame* pTabFrame = lcl_FindTableForCaption(*pFrame); + if (pTabFrame) + { + const SwTable* pTable = pTabFrame->GetTable(); + if (rTableCaptionsMap.find(pTable) != rTableCaptionsMap.end()) + { + // Reopen Caption tag: + // - if the table has an above and below caption + // - if the table has multiple above or below captions + m_nRestoreCurrentTag + = mpPDFExtOutDevData->GetCurrentStructureElement(); + + sal_Int32 const nCaptionId = rTableCaptionsMap[pTable]; + mpPDFExtOutDevData->SetCurrentStructureElement(nCaptionId); + } + else + { + OpenTagImpl(pTable); + + // Open Caption tag + sal_Int32 const nId = BeginTagImpl( + nullptr, vcl::pdf::StructElement::Caption, sTableCaption); + + rTableCaptionsMap[pTable] = nId; + } + } + aPDFType = "Standard"; + } + else // Figure caption + { + nPDFType = sal_uInt16(vcl::pdf::StructElement::Caption); + aPDFType = sTableCaption; + } } // Heading: H diff --git a/vcl/qa/cppunit/pdfexport/data/tdf160705.odt b/vcl/qa/cppunit/pdfexport/data/tdf160705.odt new file mode 100644 index 000000000000..144cfa286583 Binary files /dev/null and b/vcl/qa/cppunit/pdfexport/data/tdf160705.odt differ diff --git a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx index 05e3342fde1a..7b8af48bf467 100644 --- a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx +++ b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx @@ -85,6 +85,67 @@ void PdfExportTest2::load(std::u16string_view rFile, vcl::filter::PDFDocument& r CPPUNIT_ASSERT(rDocument.Read(aStream)); } +CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf160705) +{ + // Enable PDF/UA + uno::Sequence<beans::PropertyValue> aFilterData( + comphelper::InitPropertySequence({ { "PDFUACompliance", uno::Any(true) } })); + aMediaDescriptor[u"FilterData"_ustr] <<= aFilterData; + + vcl::filter::PDFDocument aDocument; + load(u"tdf160705.odt", aDocument); + + // The document has one page. + std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages(); + CPPUNIT_ASSERT_EQUAL(size_t(1), aPages.size()); + + int nTable(0); + for (const auto& rDocElement : aDocument.GetElements()) + { + auto pObject = dynamic_cast<vcl::filter::PDFObjectElement*>(rDocElement.get()); + if (!pObject) + continue; + + auto pType = dynamic_cast<vcl::filter::PDFNameElement*>(pObject->Lookup("Type"_ostr)); + if (pType && pType->GetValue() == "StructElem") + { + auto pS = dynamic_cast<vcl::filter::PDFNameElement*>(pObject->Lookup("S"_ostr)); + if (pS && pS->GetValue() == "Table") + { + auto pKids = dynamic_cast<vcl::filter::PDFArrayElement*>(pObject->Lookup("K"_ostr)); + CPPUNIT_ASSERT(pKids); + + // In the first table, the caption element is the first child element + // In the second table, the caption element is the last child element + // In the third table, the caption element is the first child element + int nId = 0; + if (nTable == 1) // second table + nId = pKids->GetElements().size() - 1; + else if (nTable == 2) // third table + nId = 0; + + auto pTableKids = pKids->GetElements(); + auto pRefKid = dynamic_cast<vcl::filter::PDFReferenceElement*>(pTableKids[nId]); + CPPUNIT_ASSERT(pRefKid); + auto pObj = pRefKid->LookupObject(); + CPPUNIT_ASSERT(pObj); + auto pType1 = dynamic_cast<vcl::filter::PDFNameElement*>(pObj->Lookup("Type"_ostr)); + CPPUNIT_ASSERT_EQUAL("StructElem"_ostr, pType1->GetValue()); + auto pS1 = dynamic_cast<vcl::filter::PDFNameElement*>(pObj->Lookup("S"_ostr)); + CPPUNIT_ASSERT_EQUAL("TableCaption"_ostr, pS1->GetValue()); + + auto pKids2 = dynamic_cast<vcl::filter::PDFArrayElement*>(pObj->Lookup("K"_ostr)); + CPPUNIT_ASSERT(pKids2); + + // The captions of Table1, Table2 and Table3 also have two standard elements + CPPUNIT_ASSERT_EQUAL(size_t(2), pKids2->GetElements().size()); + + ++nTable; + } + } + } +} + CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf159895) { // Enable PDF/UA