sw/qa/filter/md/md.cxx | 56 ++++++++++++++++++++++ sw/source/filter/md/wrtmd.cxx | 105 ++++++++++++++++++++++++++++++++++++------ sw/source/filter/md/wrtmd.hxx | 21 ++++++++ 3 files changed, 167 insertions(+), 15 deletions(-)
New commits: commit f073d733568d3b635ac8b2c3a5081afd679b4915 Author: Miklos Vajna <vmik...@collabora.com> AuthorDate: Tue Sep 2 08:45:11 2025 +0200 Commit: Miklos Vajna <vmik...@collabora.com> CommitDate: Tue Sep 2 21:38:29 2025 +0200 tdf#167564 sw markdown export: handle tables Export the bugdoc to markdown, the content of the table is lost. Common mark has no table support, but there is a spec at <https://github.github.com/gfm/#tables-extension->, which seems to be what md4c imports, so export that. To make sure we export the table content exactly once, don't touch m_pCurrentPam in OutMarkdown_SwTableNode(), just record the info we need about the table, then let the iteration loop in SwMDWriter::Out_SwDoc() process the table content. This is just minimal support to make sure the table content is in the output, table alignment is not implemented yet. Change-Id: I9902fafc1ab5b8b7bc2472b026ab6df490f6407f Reviewed-on: https://gerrit.libreoffice.org/c/core/+/190514 Tested-by: Jenkins Reviewed-by: Miklos Vajna <vmik...@collabora.com> diff --git a/sw/qa/filter/md/md.cxx b/sw/qa/filter/md/md.cxx index 502fda596fa7..1bbbad212a5b 100644 --- a/sw/qa/filter/md/md.cxx +++ b/sw/qa/filter/md/md.cxx @@ -23,6 +23,7 @@ #include <IDocumentContentOperations.hxx> #include <fmtcntnt.hxx> #include <ndgrf.hxx> +#include <itabenum.hxx> namespace { @@ -311,6 +312,61 @@ CPPUNIT_TEST_FIXTURE(Test, testExportingImage) CPPUNIT_ASSERT_EQUAL(aExpected, aActual); } +CPPUNIT_TEST_FIXTURE(Test, testExportingTable) +{ + // Given a document that has a table: + createSwDoc(); + SwDocShell* pDocShell = getSwDocShell(); + SwWrtShell* pWrtShell = pDocShell->GetWrtShell(); + pWrtShell->Insert(u"before"_ustr); + SwInsertTableOptions aInsertTableOptions(SwInsertTableFlags::DefaultBorder, + /*nRowsToRepeat=*/0); + pWrtShell->InsertTable(aInsertTableOptions, /*nRows=*/3, /*nCols=*/3); + pWrtShell->Insert(u"after"_ustr); + pWrtShell->SttPara(); + pWrtShell->MoveTable(GotoPrevTable, fnTableStart); + pWrtShell->Insert(u"A1"_ustr); + pWrtShell->GoNextCell(); + pWrtShell->Insert(u"B1"_ustr); + pWrtShell->GoNextCell(); + pWrtShell->Insert(u"C1"_ustr); + pWrtShell->GoNextCell(); + pWrtShell->Insert(u"A2"_ustr); + pWrtShell->GoNextCell(); + pWrtShell->Insert(u"B2"_ustr); + pWrtShell->GoNextCell(); + pWrtShell->Insert(u"C2"_ustr); + pWrtShell->GoNextCell(); + pWrtShell->Insert(u"A3"_ustr); + pWrtShell->GoNextCell(); + pWrtShell->Insert(u"B3"_ustr); + pWrtShell->GoNextCell(); + pWrtShell->Insert(u"C3"_ustr); + + // When saving that to markdown: + save(mpFilter); + + // Then make sure the table content is not lost: + std::string aActual = TempFileToString(); + std::string aExpected( + // clang-format off + "before" SAL_NEWLINE_STRING + SAL_NEWLINE_STRING + "| A1 | B1 | C1 |" SAL_NEWLINE_STRING + // Delimiter row consists of cells whose only content are hyphens (-). + "|-|-|-|" SAL_NEWLINE_STRING + "| A2 | B2 | C2 |" SAL_NEWLINE_STRING + "| A3 | B3 | C3 |" SAL_NEWLINE_STRING + SAL_NEWLINE_STRING + "after" SAL_NEWLINE_STRING + // clang-format on + ); + // Without the accompanying fix in place, this test would have failed with: + // - Actual : before after + // i.e. the table content was lost. + CPPUNIT_ASSERT_EQUAL(aExpected, aActual); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/sw/source/filter/md/wrtmd.cxx b/sw/source/filter/md/wrtmd.cxx index fa3882820e41..a1b4dc8c98ec 100644 --- a/sw/source/filter/md/wrtmd.cxx +++ b/sw/source/filter/md/wrtmd.cxx @@ -443,11 +443,36 @@ void OutEscapedChars(SwMDWriter& rWrt, std::u16string_view chars) /* Output of the nodes*/ void OutMarkdown_SwTextNode(SwMDWriter& rWrt, const SwTextNode& rNode, bool bFirst) { + std::optional<SwMDCellInfo> oCellInfo; + std::stack<SwMDTableInfo>& rTableInfos = rWrt.GetTableInfos(); + if (!rTableInfos.empty()) + { + SwMDTableInfo& aTableInfo = rTableInfos.top(); + auto it = aTableInfo.aCellInfos.find(rNode.GetIndex()); + if (it != aTableInfo.aCellInfos.end()) + { + // This text node is at the start or end of a table cell. + oCellInfo = it->second; + } + } + + if (oCellInfo && oCellInfo->bCellStart) + { + // Cell start, separate by " | " from the previous cell, see + // <https://github.github.com/gfm/#tables-extension->. + if (!oCellInfo->bRowStart) + { + rWrt.Strm().WriteUnicodeOrByteText(u" "); + } + + rWrt.Strm().WriteUnicodeOrByteText(u"| "); + } + const OUString& rNodeText = rNode.GetText(); if (!rNodeText.isEmpty()) { // Paragraphs separate by empty lines - if (!bFirst) + if (!bFirst && !oCellInfo) rWrt.Strm().WriteUnicodeOrByteText(u"" SAL_NEWLINE_STRING); int nHeadingLevel = 0; @@ -609,23 +634,69 @@ void OutMarkdown_SwTextNode(SwMDWriter& rWrt, const SwTextNode& rNode, bool bFir // Output final closing attributes OutFormattingChange(rWrt, positions, nEnd, currentStatus); } - rWrt.Strm().WriteUnicodeOrByteText(u"" SAL_NEWLINE_STRING); -} -void OutMarkdown_SwTableNode(SwMDWriter& /*rWrt*/, const SwTableNode& /*rNode*/) -{ - // TODO + bool bRowEnd = oCellInfo && oCellInfo->bRowEnd; + if (bRowEnd) + { + // Cell ends are implicit, but row end has its own marker. + rWrt.Strm().WriteUnicodeOrByteText(u" |"); - //const SwTable& rTable = rNode.GetTable(); + if (oCellInfo->bFirstRowEnd) + { + // First row has a separator from the remaining rows, which is written even for + // single-row tables. + rWrt.Strm().WriteUnicodeOrByteText(u"" SAL_NEWLINE_STRING); + for (size_t nBox = 0; nBox < oCellInfo->nFirstRowBoxCount; ++nBox) + { + rWrt.Strm().WriteUnicodeOrByteText(u"|-"); + } + rWrt.Strm().WriteUnicodeOrByteText(u"|"); + } + } - //WriterRef pHtmlWrt; - //GetHTMLWriter({}, {}, pHtmlWrt); - //SvMemoryStream stream; - //SwPaM pam(*rNode.EndOfSectionNode(), rNode); - //pam.End()->Adjust(SwNodeOffset(+1)); - //pHtmlWrt->Write(pam, stream, nullptr); + if (!oCellInfo || bRowEnd) + { + rWrt.Strm().WriteUnicodeOrByteText(u"" SAL_NEWLINE_STRING); + } +} - //... +void OutMarkdown_SwTableNode(SwMDWriter& rWrt, const SwTableNode& rTableNode) +{ + // Scan the SwTable for the interesting start/end nodes, so we know when to write the various + // separators when the caller will actually traverse the content of the table. + SwMDTableInfo aTableInfo; + const SwTable& rTable = rTableNode.GetTable(); + for (size_t nLine = 0; nLine < rTable.GetTabLines().size(); ++nLine) + { + const SwTableLine* pLine = rTable.GetTabLines()[nLine]; + for (size_t nBox = 0; nBox < pLine->GetTabBoxes().size(); ++nBox) + { + const SwTableBox* pBox = pLine->GetTabBoxes()[nBox]; + const SwStartNode* pStart = pBox->GetSttNd(); + SwMDCellInfo& rStartInfo = aTableInfo.aCellInfos[pStart->GetIndex() + 1]; + const SwEndNode* pEnd = pStart->EndOfSectionNode(); + rStartInfo.bCellStart = true; + if (nBox == 0) + { + rStartInfo.bRowStart = true; + } + if (nBox == pLine->GetTabBoxes().size() - 1) + { + SwMDCellInfo& rEndInfo = aTableInfo.aCellInfos[pEnd->GetIndex() - 1]; + rEndInfo.bRowEnd = true; + if (nLine == 0) + { + rEndInfo.bFirstRowEnd = true; + rEndInfo.nFirstRowBoxCount = pLine->GetTabBoxes().size(); + } + } + } + } + aTableInfo.pEndNode = rTableNode.EndOfSectionNode(); + rWrt.GetTableInfos().push(aTableInfo); + + // Separator between the table and the previous content. + rWrt.Strm().WriteUnicodeOrByteText(u"" SAL_NEWLINE_STRING); } } @@ -705,7 +776,11 @@ void SwMDWriter::Out_SwDoc(SwPaM* pPam) else if (rNd.IsTableNode()) { OutMarkdown_SwTableNode(*this, *rNd.GetTableNode()); - m_pCurrentPam->GetPoint()->Assign(*rNd.EndOfSectionNode()); + } + else if (rNd.IsEndNode() && !m_aTableInfos.empty() + && &rNd == m_aTableInfos.top().pEndNode) + { + m_aTableInfos.pop(); } else if (rNd.IsSectionNode()) { diff --git a/sw/source/filter/md/wrtmd.hxx b/sw/source/filter/md/wrtmd.hxx index 23490bb96b1b..10960de60461 100644 --- a/sw/source/filter/md/wrtmd.hxx +++ b/sw/source/filter/md/wrtmd.hxx @@ -22,12 +22,31 @@ #include <sal/config.h> #include <map> +#include <stack> #include <rtl/ustring.hxx> #include <shellio.hxx> #include <swdllapi.h> +/// Stores information about an SwNode that is at the start or end of a table cell. +struct SwMDCellInfo +{ + bool bCellStart = false; + bool bRowStart = false; + bool bRowEnd = false; + bool bFirstRowEnd = false; + size_t nFirstRowBoxCount = 0; +}; + +/// Tracks information about one SwTableNode, the instance is alive while the write of the table is +/// in progress. +struct SwMDTableInfo +{ + std::map<SwNodeOffset, SwMDCellInfo> aCellInfos; + const SwEndNode* pEndNode = nullptr; +}; + class SwMDWriter : public Writer { public: @@ -37,6 +56,7 @@ public: SwNodeOffset StartNodeIndex() const { return m_nStartNodeIndex; } void SetListLevelPrefixSize(int nListLevel, int nPrefixSize); const std::map<int, int>& GetListLevelPrefixSizes() const { return m_aListLevelPrefixSizes; } + std::stack<SwMDTableInfo>& GetTableInfos() { return m_aTableInfos; } protected: ErrCode WriteStream() override; @@ -48,6 +68,7 @@ private: SwNodeOffset m_nStartNodeIndex{ 0 }; /// List level -> prefix size map, e.g. "1. " size is 3. std::map<int, int> m_aListLevelPrefixSizes; + std::stack<SwMDTableInfo> m_aTableInfos; }; /* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */