sw/qa/extras/ooxmlexport/data/custom-styles-TOC-comma.docx |binary sw/qa/extras/ooxmlexport/ooxmlexport17.cxx | 25 +++++++++++ writerfilter/source/dmapper/DomainMapper_Impl.cxx | 4 + writerfilter/source/dmapper/StyleSheetTable.cxx | 29 +++++++++++++ 4 files changed, 57 insertions(+), 1 deletion(-)
New commits: commit ecbad22fdf81c6f072b6c9f9c16dbba47fe4748c Author: Michael Stahl <michael.st...@allotropia.de> AuthorDate: Wed Jan 18 15:41:57 2023 +0100 Commit: Michael Stahl <michael.st...@allotropia.de> CommitDate: Wed Jan 18 18:41:25 2023 +0000 tdf#153082 writerfilter: import locale-dependent TOC \t style names The bugdoc contains this style: <w:style w:type="paragraph" w:styleId="IntensivesZitat"> <w:name w:val="Intense Quote"/> <w:basedOn w:val="Standard"/> <w:next w:val="Standard"/> <w:link w:val="IntensivesZitatZchn"/> ... which is referred to by: TOC \o "1-3" \h \z \t "Intensives Zitat;3;Custom1;3;_MyStyle0;3" Word in an "en" locale is unable to match the "Intensives Zitat" in the TOC field with the style "Intense Quote", which is a built-in style in Word (no equivalent in Writer). At first glance nothing in styles.xml matches the localised built-in style name in the TOC field. But it looks like the w:styleId value is somehow generated from the localised style name by omitting certain characters like SPACE and non-ASCII letters. Change-Id: I2050f7cf7f8d80bee1f667ee53b7f9981bbf7b49 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/145745 Tested-by: Jenkins Reviewed-by: Michael Stahl <michael.st...@allotropia.de> diff --git a/sw/qa/extras/ooxmlexport/data/custom-styles-TOC-comma.docx b/sw/qa/extras/ooxmlexport/data/custom-styles-TOC-comma.docx new file mode 100644 index 000000000000..1b013086dcc6 Binary files /dev/null and b/sw/qa/extras/ooxmlexport/data/custom-styles-TOC-comma.docx differ diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx index 86673055a0cd..32932999cc93 100644 --- a/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx +++ b/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx @@ -15,6 +15,7 @@ #include <com/sun/star/style/ParagraphAdjust.hpp> #include <com/sun/star/text/WritingMode2.hpp> #include <com/sun/star/text/XBookmarksSupplier.hpp> +#include <com/sun/star/text/XDocumentIndex.hpp> #include <com/sun/star/text/XFootnotesSupplier.hpp> #include <com/sun/star/text/XTextFieldsSupplier.hpp> #include <com/sun/star/text/XTextField.hpp> @@ -710,6 +711,30 @@ DECLARE_OOXMLEXPORT_TEST(testTdf148361, "tdf148361.docx") CPPUNIT_ASSERT_EQUAL(OUString("[Type text]"), aActual); } +DECLARE_OOXMLEXPORT_TEST(testTdf153082_comma, "custom-styles-TOC-comma.docx") +{ + uno::Reference<text::XDocumentIndexesSupplier> xIndexSupplier(mxComponent, uno::UNO_QUERY); + uno::Reference<container::XIndexAccess> xIndexes = xIndexSupplier->getDocumentIndexes(); + uno::Reference<text::XDocumentIndex> xTOC(xIndexes->getByIndex(0), uno::UNO_QUERY); + // check styles + uno::Reference<container::XIndexAccess> xParaStyles = + getProperty<uno::Reference<container::XIndexAccess>>(xTOC, "LevelParagraphStyles"); + uno::Sequence<OUString> styles; + xParaStyles->getByIndex(0) >>= styles; + CPPUNIT_ASSERT_EQUAL(uno::Sequence<OUString>{"_MyStyle0"}, styles); + xParaStyles->getByIndex(1) >>= styles; + CPPUNIT_ASSERT_EQUAL(uno::Sequence<OUString>{"Custom1"}, styles); + xParaStyles->getByIndex(2) >>= styles; + // the first one is built-in Word style that was localised DE "Intensives Zitat" in the file + CPPUNIT_ASSERT_EQUAL(uno::Sequence<OUString>{"Intense Quote"}, styles); + xTOC->update(); + OUString const tocContent(xTOC->getAnchor()->getString()); + CPPUNIT_ASSERT(tocContent.startsWith("Table of Contents")); + CPPUNIT_ASSERT(tocContent.indexOf("Lorem ipsum dolor sit amet, consectetuer adipiscing elit.") != -1); + CPPUNIT_ASSERT(tocContent.indexOf("Fusce posuere, magna sed pulvinar ultricies, purus lectus malesuada libero, sit amet commodo magna eros quis urna.") != -1); + CPPUNIT_ASSERT(tocContent.indexOf("Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas.") != -1); +} + DECLARE_OOXMLEXPORT_TEST(testTdf142407, "tdf142407.docx") { uno::Reference<container::XNameAccess> xPageStyles = getStyles("PageStyles"); diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx b/writerfilter/source/dmapper/DomainMapper_Impl.cxx index 8358596bdf8e..00691473094f 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx @@ -6303,7 +6303,9 @@ void DomainMapper_Impl::handleToc uno::Sequence< OUString> aStyles( nLevelCount ); for ( auto& rStyle : asNonConstRange(aStyles) ) { - rStyle = (aTOCStyleIter++)->second; + // tdf#153082 must map w:styleId to w:name + rStyle = GetStyleSheetTable()->ConvertStyleName(aTOCStyleIter->second, true); + ++aTOCStyleIter; } xParaStyles->replaceByIndex(nLevel - 1, uno::Any(aStyles)); } diff --git a/writerfilter/source/dmapper/StyleSheetTable.cxx b/writerfilter/source/dmapper/StyleSheetTable.cxx index 963ff9c907b6..c0a8e32845c9 100644 --- a/writerfilter/source/dmapper/StyleSheetTable.cxx +++ b/writerfilter/source/dmapper/StyleSheetTable.cxx @@ -42,6 +42,7 @@ #include <map> #include <osl/diagnose.h> #include <rtl/ustrbuf.hxx> +#include <rtl/character.hxx> #include <sal/log.hxx> #include <comphelper/propertyvalue.hxx> #include <comphelper/string.hxx> @@ -1412,6 +1413,30 @@ const StyleSheetEntryPtr & StyleSheetTable::GetCurrentEntry() const return m_pImpl->m_pCurrentEntry; } +/** + This is a heuristic to find Word's w:styleId value from localised style name. + It's not clear how exactly it works, but apparently Word stores into + w:styleId some filtered representation of the localised style name. + Tragically there are references to the localised style name itself in TOC + fields. + Hopefully this works and a complete map of >100 built-in style names + localised to all langauges isn't needed. +*/ +static auto FilterChars(OUString const& rStyleName) -> OUString +{ + OUStringBuffer ret; + sal_Int32 index(0); + while (index < rStyleName.getLength()) + { + auto const c(rStyleName.iterateCodePoints(&index)); + if (rtl::isAsciiAlphanumeric(c)) + { + ret.appendUtf32(c); + } + } + return ret.makeStringAndClear(); +} + OUString StyleSheetTable::ConvertStyleName( const OUString& rWWName, bool bExtendedSearch) { OUString sRet( rWWName ); @@ -1419,6 +1444,10 @@ OUString StyleSheetTable::ConvertStyleName( const OUString& rWWName, bool bExten { //search for the rWWName in the IdentifierD of the existing styles and convert the sStyleName member auto findIt = m_pImpl->m_aStyleSheetEntriesMap.find(rWWName); + if (findIt == m_pImpl->m_aStyleSheetEntriesMap.end()) + { + findIt = m_pImpl->m_aStyleSheetEntriesMap.find(FilterChars(rWWName)); + } if (findIt != m_pImpl->m_aStyleSheetEntriesMap.end()) { if (!findIt->second->sConvertedStyleName.isEmpty())