sw/qa/filter/md/md.cxx | 45 ++++++++++++++++++++++++++++++++++++++++++ sw/source/filter/md/wrtmd.cxx | 25 ++++++++++++++++++++++- 2 files changed, 69 insertions(+), 1 deletion(-)
New commits: commit 85aa1402c670e8c85949c5aaf01f529a0a59c05b Author: Miklos Vajna <vmik...@collabora.com> AuthorDate: Tue Sep 9 08:37:26 2025 +0200 Commit: Miklos Vajna <vmik...@collabora.com> CommitDate: Tue Sep 9 12:47:31 2025 +0200 tdf#168317 sw markdown export: handle code block The bugdoc has a multi-paragraph code block, import that to Writer, save as markdown, the text is written as normal text. Note that unlike block quote or heading, there is no marker at the start of all paragraphs: only before the first and after the last one. However, in the Writer doc model, this is a per-paragraph setting for the used style. Fix the problem by checking if the para style is RES_POOLCOLL_HTML_PRE, then seeing if there is a similar paragraph before/after, so we only emit the start/end marker only once, even for multi-paragraph code blocks. Note that our own markdown import currently creates single-paragraph code blocks by replacing paragraph breaks with newlines. Change-Id: I0803bc71c758f250489d11e90b7f090966c8a8cc Reviewed-on: https://gerrit.libreoffice.org/c/core/+/190693 Reviewed-by: Miklos Vajna <vmik...@collabora.com> Tested-by: Jenkins diff --git a/sw/qa/filter/md/md.cxx b/sw/qa/filter/md/md.cxx index 61f993a403d2..2f62fa80703b 100644 --- a/sw/qa/filter/md/md.cxx +++ b/sw/qa/filter/md/md.cxx @@ -418,6 +418,51 @@ CPPUNIT_TEST_FIXTURE(Test, testBlockQuoteMdExport) CPPUNIT_ASSERT_EQUAL(aExpected, aActual); } +CPPUNIT_TEST_FIXTURE(Test, testCodeBlockMdExport) +{ + // Given a document that has a multi-paragraph code block: + createSwDoc(); + SwDocShell* pDocShell = getSwDocShell(); + SwWrtShell* pWrtShell = pDocShell->GetWrtShell(); + pWrtShell->Insert(u"A"_ustr); + pWrtShell->SplitNode(); + pWrtShell->Insert(u"B"_ustr); + SwCursor* pCursor = pWrtShell->GetCursor(); + SwDoc* pDoc = pDocShell->GetDoc(); + IDocumentStylePoolAccess& rIDSPA = pDoc->getIDocumentStylePoolAccess(); + SwTextFormatColl* pColl = rIDSPA.GetTextCollFromPool(RES_POOLCOLL_HTML_PRE); + pDoc->SetTextFormatColl(*pCursor, pColl); + pWrtShell->SplitNode(); + pWrtShell->Insert(u"C"_ustr); + pWrtShell->SplitNode(); + pWrtShell->Insert(u"D"_ustr); + pColl = rIDSPA.GetTextCollFromPool(RES_POOLCOLL_STANDARD); + pDoc->SetTextFormatColl(*pCursor, pColl); + + // When saving that to markdown: + save(mpFilter); + + // Then make sure the code block is exported: + std::string aActual = TempFileToString(); + std::string aExpected( + // clang-format off + "A" SAL_NEWLINE_STRING + SAL_NEWLINE_STRING + "```" SAL_NEWLINE_STRING + "B" SAL_NEWLINE_STRING + SAL_NEWLINE_STRING + "C" SAL_NEWLINE_STRING + "```" SAL_NEWLINE_STRING + SAL_NEWLINE_STRING + "D" SAL_NEWLINE_STRING + // clang-format on + ); + // Without the accompanying fix in place, this test would have failed with: + // - Actual : A B C D + // i.e. the code block formatting was lost. + CPPUNIT_ASSERT_EQUAL(aExpected, aActual); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/sw/source/filter/md/wrtmd.cxx b/sw/source/filter/md/wrtmd.cxx index ebe2aadb7966..75400719e687 100644 --- a/sw/source/filter/md/wrtmd.cxx +++ b/sw/source/filter/md/wrtmd.cxx @@ -476,7 +476,7 @@ void OutMarkdown_SwTextNode(SwMDWriter& rWrt, const SwTextNode& rNode, bool bFir rWrt.Strm().WriteUnicodeOrByteText(u"" SAL_NEWLINE_STRING); const SwFormatColl* pFormatColl = rNode.GetFormatColl(); - if (pFormatColl->GetPoolFormatId() == RES_POOLCOLL_HTML_BLOCKQUOTE) + if (pFormatColl && pFormatColl->GetPoolFormatId() == RES_POOLCOLL_HTML_BLOCKQUOTE) { // <https://spec.commonmark.org/0.31.2/#block-quotes> first block quote, then heading. rWrt.Strm().WriteUnicodeOrByteText(u"> "); @@ -556,6 +556,18 @@ void OutMarkdown_SwTextNode(SwMDWriter& rWrt, const SwTextNode& rNode, bool bFir } } + if (pFormatColl && pFormatColl->GetPoolFormatId() == RES_POOLCOLL_HTML_PRE) + { + // Before the first paragraph of a code block, see + // <https://spec.commonmark.org/0.31.2/#fenced-code-blocks>. + SwTextNode* pPrevNode = rWrt.m_pDoc->GetNodes()[rNode.GetIndex() - 1]->GetTextNode(); + const SwFormatColl* pPrevColl = pPrevNode ? pPrevNode->GetFormatColl() : nullptr; + if (!pPrevColl || pPrevColl->GetPoolFormatId() != RES_POOLCOLL_HTML_PRE) + { + rWrt.Strm().WriteUnicodeOrByteText(u"```" SAL_NEWLINE_STRING); + } + } + sal_Int32 nStrPos = rWrt.m_pCurrentPam->GetPoint()->GetContentIndex(); sal_Int32 nEnd = rNodeText.getLength(); if (rWrt.m_pCurrentPam->GetPoint()->GetNode() == rWrt.m_pCurrentPam->GetMark()->GetNode()) @@ -640,6 +652,17 @@ void OutMarkdown_SwTextNode(SwMDWriter& rWrt, const SwTextNode& rNode, bool bFir assert(positions.hintStarts.current() == nullptr); // Output final closing attributes OutFormattingChange(rWrt, positions, nEnd, currentStatus); + + if (pFormatColl && pFormatColl->GetPoolFormatId() == RES_POOLCOLL_HTML_PRE) + { + // After the last paragraph of a code block. + SwTextNode* pNextNode = rWrt.m_pDoc->GetNodes()[rNode.GetIndex() + 1]->GetTextNode(); + const SwFormatColl* pNextColl = pNextNode ? pNextNode->GetFormatColl() : nullptr; + if (!pNextColl || pNextColl->GetPoolFormatId() != RES_POOLCOLL_HTML_PRE) + { + rWrt.Strm().WriteUnicodeOrByteText(u"" SAL_NEWLINE_STRING "```"); + } + } } bool bRowEnd = oCellInfo && oCellInfo->bRowEnd;