sw/qa/extras/ooxmlexport/ooxmlexport17.cxx | 8 - writerfilter/source/dmapper/DomainMapper_Impl.cxx | 55 +++++++++- writerfilter/source/dmapper/DomainMapper_Impl.hxx | 2 writerfilter/source/dmapper/StyleSheetTable.cxx | 120 ++++++++++++++++------ writerfilter/source/dmapper/StyleSheetTable.hxx | 5 5 files changed, 153 insertions(+), 37 deletions(-)
New commits: commit ca71482237d31703454062b8b2f544a8bacd2831 Author: Michael Stahl <michael.st...@allotropia.de> AuthorDate: Tue Jan 31 15:05:44 2023 +0100 Commit: Michael Stahl <michael.st...@allotropia.de> CommitDate: Tue Jan 31 17:02:51 2023 +0000 tdf#153083 writerfilter: import locale-dependent TOC \t style names, 2 There was a problem with commit ecbad22fdf81c6f072b6c9f9c16dbba47fe4748c while it now worked in Writer, the roundtripped DOCX with the en-US built-in style name in the TOC field no longer worked in Word in the original locale (but started to work in en-US locale). Let's try a slightly different approach: use the same style name as-is from the TOC field, and if it doesn't exist as a style, then clone the en-US built-in style with that name, and at the end of the import, iterate all paragraphs in the document and replace any application of the en-US style with the localised style. So both the en-US style and the localised style exist, and for references that aren't fields (e.g. basedOn in other styles) it should hopefully not matter which one is referenced. The DOCX exported from Writer now has a TOC field that works in Word in the original locale. Change-Id: Ibcc3c5899e31295b5704ebefb548f40b67eda9bf Reviewed-on: https://gerrit.libreoffice.org/c/core/+/146414 Tested-by: Jenkins Reviewed-by: Michael Stahl <michael.st...@allotropia.de> diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx index bfc09d934343..fbfe633fd55e 100644 --- a/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx +++ b/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx @@ -725,8 +725,8 @@ DECLARE_OOXMLEXPORT_TEST(testTdf153082_semicolon, "custom-styles-TOC-semicolon.d xParaStyles->getByIndex(1) >>= styles; CPPUNIT_ASSERT_EQUAL(uno::Sequence<OUString>{}, styles); xParaStyles->getByIndex(2) >>= styles; - // the first one is built-in Word style that was localised DE "Intensives Zitat" in the file - CPPUNIT_ASSERT_EQUAL((uno::Sequence<OUString>{"Intense Quote", "Custom1", "_MyStyle0"}), styles); + // the first one is built-in Word style "Intense Quote" that was localised DE "Intensives Zitat" in the file + CPPUNIT_ASSERT_EQUAL((uno::Sequence<OUString>{"Intensives Zitat", "Custom1", "_MyStyle0"}), styles); xTOC->update(); OUString const tocContent(xTOC->getAnchor()->getString()); CPPUNIT_ASSERT(tocContent.startsWith("Table of Contents")); @@ -749,8 +749,8 @@ DECLARE_OOXMLEXPORT_TEST(testTdf153082_comma, "custom-styles-TOC-comma.docx") xParaStyles->getByIndex(1) >>= styles; CPPUNIT_ASSERT_EQUAL(uno::Sequence<OUString>{"Custom1"}, styles); xParaStyles->getByIndex(2) >>= styles; - // the first one is built-in Word style that was localised DE "Intensives Zitat" in the file - CPPUNIT_ASSERT_EQUAL(uno::Sequence<OUString>{"Intense Quote"}, styles); + // the first one is built-in Word style "Intense Quote" that was localised DE "Intensives Zitat" in the file + CPPUNIT_ASSERT_EQUAL(uno::Sequence<OUString>{"Intensives Zitat"}, styles); xTOC->update(); OUString const tocContent(xTOC->getAnchor()->getString()); CPPUNIT_ASSERT(tocContent.startsWith("Table of Contents")); diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx b/writerfilter/source/dmapper/DomainMapper_Impl.cxx index 604e067aab8e..d25e3463d7f5 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx @@ -89,6 +89,7 @@ #include <oox/mathml/imexport.hxx> #include <utility> #include <xmloff/odffields.hxx> +#include <rtl/character.hxx> #include <rtl/uri.hxx> #include <unotools/ucbstreamhelper.hxx> #include <unotools/streamwrap.hxx> @@ -449,7 +450,10 @@ DomainMapper_Impl::~DomainMapper_Impl() ChainTextFrames(); // Don't remove last paragraph when pasting, sw expects that empty paragraph. if (m_bIsNewDoc) + { RemoveLastParagraph(); + GetStyleSheetTable()->ApplyClonedTOCStyles(); + } if (hasTableManager()) { getTableManager().endLevel(); @@ -6107,6 +6111,51 @@ DomainMapper_Impl::StartIndexSectionChecked(const OUString& sServiceName) return xRet; } +/** + This is a heuristic to find Word's w:styleId value from localised style name. + It's not clear how exactly it works, but apparently Word stores into + w:styleId some filtered representation of the localised style name. + Tragically there are references to the localised style name itself in TOC + fields. + Hopefully this works and a complete map of >100 built-in style names + localised to all languages isn't needed. +*/ +static auto FilterChars(OUString const& rStyleName) -> OUString +{ + OUStringBuffer ret; + sal_Int32 index(0); + while (index < rStyleName.getLength()) + { + auto const c(rStyleName.iterateCodePoints(&index)); + if (rtl::isAsciiAlphanumeric(c)) + { + ret.appendUtf32(c); + } + } + return ret.makeStringAndClear(); +} + +OUString DomainMapper_Impl::ConvertTOCStyleName(OUString const& rTOCStyleName) +{ + assert(!rTOCStyleName.isEmpty()); + if (auto const pStyle = GetStyleSheetTable()->FindStyleSheetByISTD(rTOCStyleName)) + { // theoretical case: what OOXML says + return pStyle->sStyleName; + } + auto const pStyle = GetStyleSheetTable()->FindStyleSheetByISTD(FilterChars(rTOCStyleName)); + if (pStyle && m_bIsNewDoc) + { // practical case: Word wrote i18n name to TOC field, but it doesn't + // exist in styles.xml; tdf#153083 clone it for best roundtrip + SAL_INFO("writerfilter.dmapper", "cloning TOC paragraph style (presumed built-in) " << rTOCStyleName << " from " << pStyle->sStyleName); + GetStyleSheetTable()->CloneTOCStyle(GetFontTable(), pStyle, rTOCStyleName); + return rTOCStyleName; + } + else + { + return GetStyleSheetTable()->ConvertStyleName(rTOCStyleName); + } +} + void DomainMapper_Impl::handleToc (const FieldContextPtr& pContext, const OUString & sTOCServiceName) @@ -6308,8 +6357,8 @@ void DomainMapper_Impl::handleToc uno::Sequence< OUString> aStyles( nLevelCount ); for ( auto& rStyle : asNonConstRange(aStyles) ) { - // tdf#153082 must map w:styleId to w:name - rStyle = GetStyleSheetTable()->ConvertStyleName(aTOCStyleIter->second, true); + // tdf#153083 must map w:styleId to w:name + rStyle = ConvertTOCStyleName(aTOCStyleIter->second); ++aTOCStyleIter; } xParaStyles->replaceByIndex(nLevel - 1, uno::Any(aStyles)); @@ -6344,7 +6393,7 @@ void DomainMapper_Impl::handleToc if (!sTemplate.isEmpty()) { - OUString const sConvertedStyleName(GetStyleSheetTable()->ConvertStyleName(sTemplate, true)); + OUString const sConvertedStyleName(ConvertTOCStyleName(sTemplate)); xTOC->setPropertyValue("CreateFromParagraphStyle", uno::Any(sConvertedStyleName)); } diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.hxx b/writerfilter/source/dmapper/DomainMapper_Impl.hxx index d4d25a75b4dc..27f51124d8be 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.hxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.hxx @@ -1209,6 +1209,8 @@ public: void commentProps(const OUString& sId, const CommentProperties& rProps); + OUString ConvertTOCStyleName(OUString const&); + OUString getFontNameForTheme(const Id id); private: diff --git a/writerfilter/source/dmapper/StyleSheetTable.cxx b/writerfilter/source/dmapper/StyleSheetTable.cxx index f7dfa61e779a..52c14f2bf04d 100644 --- a/writerfilter/source/dmapper/StyleSheetTable.cxx +++ b/writerfilter/source/dmapper/StyleSheetTable.cxx @@ -30,9 +30,13 @@ #include <com/sun/star/beans/XMultiPropertySet.hpp> #include <com/sun/star/beans/XPropertyState.hpp> #include <com/sun/star/beans/PropertyValue.hpp> +#include <com/sun/star/container/XEnumerationAccess.hpp> #include <com/sun/star/container/XNameContainer.hpp> #include <com/sun/star/container/XIndexReplace.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> #include <com/sun/star/text/XTextDocument.hpp> +#include <com/sun/star/text/XTextFramesSupplier.hpp> +#include <com/sun/star/text/XTextTable.hpp> #include <com/sun/star/style/NumberingType.hpp> #include <com/sun/star/style/XStyleFamiliesSupplier.hpp> #include <com/sun/star/style/XStyle.hpp> @@ -42,7 +46,6 @@ #include <map> #include <osl/diagnose.h> #include <rtl/ustrbuf.hxx> -#include <rtl/character.hxx> #include <sal/log.hxx> #include <comphelper/propertyvalue.hxx> #include <comphelper/string.hxx> @@ -272,6 +275,7 @@ struct StyleSheetTable_Impl uno::Reference< beans::XPropertySet> m_xTextDefaults; std::vector< StyleSheetEntryPtr > m_aStyleSheetEntries; std::map< OUString, StyleSheetEntryPtr > m_aStyleSheetEntriesMap; + std::map<OUString, OUString> m_ClonedTOCStylesMap; StyleSheetEntryPtr m_pCurrentEntry; PropertyMapPtr m_pDefaultParaProps, m_pDefaultCharProps; OUString m_sDefaultParaStyleName; //WW8 name @@ -287,6 +291,7 @@ struct StyleSheetTable_Impl void AppendLatentStyleProperty(const OUString& aName, Value const & rValue); /// Sets all properties of xStyle back to default. static void SetPropertiesToDefault(const uno::Reference<style::XStyle>& xStyle); + void ApplyClonedTOCStylesToXText(uno::Reference<text::XText> const& xText); }; @@ -993,7 +998,90 @@ void StyleSheetTable::ReApplyInheritedOutlineLevelFromChapterNumbering() } } +void StyleSheetTable_Impl::ApplyClonedTOCStylesToXText(uno::Reference<text::XText> const& xText) +{ + uno::Reference<container::XEnumerationAccess> const xEA(xText, uno::UNO_QUERY_THROW); + uno::Reference<container::XEnumeration> const xParaEnum(xEA->createEnumeration()); + + while (xParaEnum->hasMoreElements()) + { + uno::Reference<lang::XServiceInfo> const xElem(xParaEnum->nextElement(), uno::UNO_QUERY_THROW); + if (xElem->supportsService(u"com.sun.star.text.Paragraph")) + { + uno::Reference<beans::XPropertySet> const xPara(xElem, uno::UNO_QUERY_THROW); + OUString styleName; + if (xPara->getPropertyValue(u"ParaStyleName") >>= styleName) + { + auto const it(m_ClonedTOCStylesMap.find(styleName)); + if (it != m_ClonedTOCStylesMap.end()) + { + xPara->setPropertyValue(u"ParaStyleName", uno::Any(it->second)); + } + } + } + else if (xElem->supportsService(u"com.sun.star.text.TextTable")) + { + uno::Reference<text::XTextTable> const xTable(xElem, uno::UNO_QUERY_THROW); + uno::Sequence<OUString> const cells(xTable->getCellNames()); + for (OUString const& rCell : cells) + { + uno::Reference<text::XText> const xCell(xTable->getCellByName(rCell), uno::UNO_QUERY_THROW); + ApplyClonedTOCStylesToXText(xCell); + } + } + } +} + +/** + Replace the applied en-US Word built-in styles that were referenced from + TOC fields (also STYLEREF and likely AUTOTEXTLIST) with the localised clones. + + With the style cloned, and the clone referenced, the ToX should work in + Writer and also, when exported to DOCX, in Word. + */ +void StyleSheetTable::ApplyClonedTOCStyles() +{ + if (m_pImpl->m_ClonedTOCStylesMap.empty() + || !m_pImpl->m_bIsNewDoc) // avoid modifying pre-existing content + { + return; + } + SAL_INFO("writerfilter.dmapper", "Applying cloned styles to make TOC work"); + // ignore header / footer, irrelevant for ToX + // text frames + uno::Reference<text::XTextFramesSupplier> const xDocTFS(m_pImpl->m_xTextDocument, uno::UNO_QUERY_THROW); + uno::Reference<container::XEnumerationAccess> const xFrames(xDocTFS->getTextFrames(), uno::UNO_QUERY_THROW); + uno::Reference<container::XEnumeration> const xFramesEnum(xFrames->createEnumeration()); + while (xFramesEnum->hasMoreElements()) + { + uno::Reference<text::XText> const xFrame(xFramesEnum->nextElement(), uno::UNO_QUERY_THROW); + m_pImpl->ApplyClonedTOCStylesToXText(xFrame); + } + // body + uno::Reference<text::XText> const xBody(m_pImpl->m_xTextDocument->getText()); + m_pImpl->ApplyClonedTOCStylesToXText(xBody); +} + +void StyleSheetTable::CloneTOCStyle(FontTablePtr const& rFontTable, StyleSheetEntryPtr const pStyle, OUString const& rNewName) +{ + StyleSheetEntryPtr const pClone(new StyleSheetEntry(*pStyle)); + pClone->sStyleIdentifierD = rNewName; + pClone->sStyleName = rNewName; + pClone->sConvertedStyleName = ConvertStyleName(rNewName); + m_pImpl->m_aStyleSheetEntries.push_back(pClone); + // add it so it will be found if referenced from another TOC + m_pImpl->m_aStyleSheetEntriesMap.emplace(rNewName, pClone); + m_pImpl->m_ClonedTOCStylesMap.emplace(pStyle->sStyleName, rNewName); + std::vector<StyleSheetEntryPtr> const styles{ pClone }; + return ApplyStyleSheetsImpl(rFontTable, styles); +} + void StyleSheetTable::ApplyStyleSheets( const FontTablePtr& rFontTable ) +{ + return ApplyStyleSheetsImpl(rFontTable, m_pImpl->m_aStyleSheetEntries); +} + +void StyleSheetTable::ApplyStyleSheetsImpl(const FontTablePtr& rFontTable, std::vector<StyleSheetEntryPtr> const& rEntries) { try { @@ -1013,7 +1101,7 @@ void StyleSheetTable::ApplyStyleSheets( const FontTablePtr& rFontTable ) std::vector< ::std::pair<OUString, uno::Reference<style::XStyle>> > aMissingFollow; std::vector<std::pair<OUString, uno::Reference<style::XStyle>>> aMissingLink; std::vector<beans::PropertyValue> aTableStylesVec; - for( auto& pEntry : m_pImpl->m_aStyleSheetEntries ) + for (auto& pEntry : rEntries) { if( pEntry->nStyleTypeCode == STYLE_TYPE_UNKNOWN && !pEntry->sStyleName.isEmpty() ) pEntry->nStyleTypeCode = STYLE_TYPE_PARA; // unspecified style types are considered paragraph styles @@ -1413,30 +1501,6 @@ const StyleSheetEntryPtr & StyleSheetTable::GetCurrentEntry() const return m_pImpl->m_pCurrentEntry; } -/** - This is a heuristic to find Word's w:styleId value from localised style name. - It's not clear how exactly it works, but apparently Word stores into - w:styleId some filtered representation of the localised style name. - Tragically there are references to the localised style name itself in TOC - fields. - Hopefully this works and a complete map of >100 built-in style names - localised to all languages isn't needed. -*/ -static auto FilterChars(OUString const& rStyleName) -> OUString -{ - OUStringBuffer ret; - sal_Int32 index(0); - while (index < rStyleName.getLength()) - { - auto const c(rStyleName.iterateCodePoints(&index)); - if (rtl::isAsciiAlphanumeric(c)) - { - ret.appendUtf32(c); - } - } - return ret.makeStringAndClear(); -} - OUString StyleSheetTable::ConvertStyleName( const OUString& rWWName, bool bExtendedSearch) { OUString sRet( rWWName ); @@ -1444,10 +1508,6 @@ OUString StyleSheetTable::ConvertStyleName( const OUString& rWWName, bool bExten { //search for the rWWName in the IdentifierD of the existing styles and convert the sStyleName member auto findIt = m_pImpl->m_aStyleSheetEntriesMap.find(rWWName); - if (findIt == m_pImpl->m_aStyleSheetEntriesMap.end()) - { - findIt = m_pImpl->m_aStyleSheetEntriesMap.find(FilterChars(rWWName)); - } if (findIt != m_pImpl->m_aStyleSheetEntriesMap.end()) { if (!findIt->second->sConvertedStyleName.isEmpty()) diff --git a/writerfilter/source/dmapper/StyleSheetTable.hxx b/writerfilter/source/dmapper/StyleSheetTable.hxx index 5dcf84b789bb..e2f79d863e8f 100644 --- a/writerfilter/source/dmapper/StyleSheetTable.hxx +++ b/writerfilter/source/dmapper/StyleSheetTable.hxx @@ -74,6 +74,7 @@ public: PropertyMapPtr GetMergedInheritedProperties(const StyleSheetTablePtr& pStyleSheetTable); StyleSheetEntry(); + StyleSheetEntry(StyleSheetEntry const&) = default; virtual ~StyleSheetEntry() override; }; @@ -98,6 +99,8 @@ public: StyleSheetEntryPtr FindDefaultParaStyle(); OUString ConvertStyleName( const OUString& rWWName, bool bExtendedSearch = false ); + void CloneTOCStyle(FontTablePtr const& rFontTable, StyleSheetEntryPtr const pStyle, OUString const& rName); + void ApplyClonedTOCStyles(); OUString getOrCreateCharStyle( PropertyValueVector_t& rCharProperties, bool bAlwaysCreate ); @@ -117,6 +120,8 @@ private: virtual void lcl_entry(writerfilter::Reference<Properties>::Pointer_t ref) override; void applyDefaults(bool bParaProperties); + + void ApplyStyleSheetsImpl(const FontTablePtr& rFontTable, std::vector<StyleSheetEntryPtr> const& rEntries); };