include/oox/core/filterdetect.hxx          |    1 
 include/test/bootstrapfixture.hxx          |    2 -
 oox/source/core/filterdetect.cxx           |   32 +++++++++++++++++++
 oox/source/core/xmlfilterbase.cxx          |    7 +++-
 sw/qa/extras/ooxmlexport/ooxmlexport.cxx   |    2 -
 sw/qa/extras/ooxmlexport/ooxmlexport2.cxx  |   10 +++---
 sw/qa/extras/ooxmlexport/ooxmlexport20.cxx |   22 +++++++++++--
 test/source/bootstrapfixture.cxx           |   48 +++++++++++++++++++++++++++--
 test/source/unoapi_test.cxx                |   31 +++---------------
 9 files changed, 116 insertions(+), 39 deletions(-)

New commits:
commit 29bb66c78d56b5784544860bafb06907e1cd2774
Author:     Justin Luth <justin.l...@collabora.com>
AuthorDate: Tue Feb 11 22:14:13 2025 -0500
Commit:     Miklos Vajna <vmik...@collabora.com>
CommitDate: Mon Feb 24 08:31:07 2025 +0100

    tdf#165207 tdf#164201 docx: always use errata uri in docProps/core.xml
    
    This patch replaces my import-focused
    25.8 commit d97085cc6cd2bdc3b6723d1960d0ec5fa0a48165 which said:
        This fixes a 7.6 regression
        from commit e66ddcd4b66923bc835bd7c5f5c784a809a420a2.
    
        At import, the base filter was treating too many documents
        as if they were limited to Word 2007 format,
        and thus reducing their compatibilityMode to 12 on export.
    
        This import case is matched in a LOT of unit tests.
        However, it doesn't manifest itself in
            saveAndReload(mpFilter) (or DECLARE_OOXMLEXPORT_TEST)
        because the mpFilter string set in the Test class with
            SwModelTestBase(..."ooxmlexport/data/", "Office Open XML Text"),
        forces saving in ISOIEC_29500_2008 mode
        and thus unit tests basically NEVER round-trip as "MS Word 2007 XML".
    
        However, the general user was almost always round-tripping
        these as MS Word 2007 XML / compat12 since LO 7.6.
    
    The export portion of this patch
    fixes a long-standing bug that became more prominent in 7.6
    when mstahl renamed docx filters to promote Word 2010,
    and then again in 25.2 when my bug 164201 patches
    told command-line convert-to to export using Word 2010 filter,
    and finally in 25.8 when interactively round-tripping existing files.
    
    So what does this patch fix?
    Basically, all of the meta-data was not being read by MS Word,
    which primarily means no create/modify dates, subject, title etc.
    Every time we saved to DOCX format (except DOCX 2007), we caused Word
    to be incapable of understanding anything in docProps/core.xml.
    Telltale sign: when MS Word round-trips with added docProps/core0.xml.
    
    The spec says that DOCX 2010 should be using
        officedocuments/2006/relationships/metadata/core-properties
    and thus our untouched 2010-era coding
    specified all of that for import and export.
    
    But reality seems to be much different than the spec,
    (and some errata-sounding documentation agrees),
    so throw the spec out the window and always write
        package/2006/relationships/metadata/core-properties
    (but only for DOCX - Excel and Powerpoint follow the spec...)
    
    In order to do that, I needed to re-write the import logic
    as already suggested by mstahl in
    https://gerrit.libreoffice.org/c/core/+/178048
    
    Bug 165207 doesn't affect LO in any way.
    If there was already a unit test writing out to _rels/.rels
    I would have added a test for this, but since there isn't
    I won't bother since I don't see any value in doing so.
    Plus, we already have validity checks to cover this kind of stuff.
    
    make CppunitTest_sw_ooxmlexport20 CPPUNIT_TEST_NAME=testTdf158855
    
    A uiwriter4 test depends on the previous patch in this bug report.
    
    officeotron sounds like they WANT to report invalidation errors
    based only on spec, so no point in trying to "submit a patch".
    
    Change-Id: Ib76803bf0c9f3791f9078846f00c118099ef67cd
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/181463
    Tested-by: Jenkins
    Reviewed-by: Justin Luth <jl...@mail.com>
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/182037
    Reviewed-by: Miklos Vajna <vmik...@collabora.com>
    Tested-by: Justin Luth <jl...@mail.com>

diff --git a/include/oox/core/filterdetect.hxx 
b/include/oox/core/filterdetect.hxx
index 8b01bcc5f285..2c907be56330 100644
--- a/include/oox/core/filterdetect.hxx
+++ b/include/oox/core/filterdetect.hxx
@@ -82,6 +82,7 @@ public:
     virtual void SAL_CALL characters( const OUString& aChars ) override;
 
 private:
+    void parseSettings(const AttributeList& rAttribs);
     void                parseRelationship( const AttributeList& rAttribs );
 
     OUString            getFilterNameFromContentType( std::u16string_view 
rContentType, std::u16string_view rFileName );
diff --git a/include/test/bootstrapfixture.hxx 
b/include/test/bootstrapfixture.hxx
index f512cabc7784..449534035eb4 100644
--- a/include/test/bootstrapfixture.hxx
+++ b/include/test/bootstrapfixture.hxx
@@ -61,7 +61,7 @@ public:
 
     virtual void setUp() override;
 
-    void validate(const OUString& rURL, ValidationFormat) const;
+    void validate(const OUString& rURL, std::u16string_view rFilter) const;
 
     // Allows to exclude tests dependent on color depth of the default virtual 
device
     static sal_uInt16 getDefaultDeviceBitCount();
diff --git a/oox/source/core/filterdetect.cxx b/oox/source/core/filterdetect.cxx
index 33c95c7e2ca3..ccd879f4494b 100644
--- a/oox/source/core/filterdetect.cxx
+++ b/oox/source/core/filterdetect.cxx
@@ -88,6 +88,15 @@ void SAL_CALL FilterDetectDocHandler::startFastElement(
     AttributeList aAttribs( rAttribs );
     switch ( nElement )
     {
+        // cases for word/settings.xml
+        case W_TOKEN(settings):
+        case W_TOKEN(compat):
+            break;
+        case W_TOKEN(compatSetting):
+            if (!maContextStack.empty() && (maContextStack.back() == 
W_TOKEN(compat)))
+                parseSettings(aAttribs);
+            break;
+
         // cases for _rels/.rels
         case PR_TOKEN( Relationships ):
         break;
@@ -142,6 +151,18 @@ void SAL_CALL FilterDetectDocHandler::characters( const 
OUString& /*aChars*/ )
 {
 }
 
+void FilterDetectDocHandler::parseSettings(const AttributeList& rAttribs)
+{
+    // tdf#131936 Remember filter when opening file as 'Office Open XML Text'
+    if 
(rAttribs.getStringDefaulted(W_TOKEN(name)).equalsIgnoreAsciiCase("compatibilityMode"))
+    {
+        const sal_Int32 nVal = rAttribs.getInteger(W_TOKEN(val), 12); // 
default to Word 2007
+        // if specified multiple times, highest value wins
+        if (nVal > 12 && maOOXMLVariant == OOXMLVariant::ECMA_Transitional)
+            maOOXMLVariant = OOXMLVariant::ISO_Transitional; // Word 2010+
+    }
+}
+
 void FilterDetectDocHandler::parseRelationship( const AttributeList& rAttribs )
 {
     OUString aType = rAttribs.getStringDefaulted( XML_Type);
@@ -438,6 +459,7 @@ OUString SAL_CALL FilterDetect::detect( Sequence< 
PropertyValue >& rMediaDescSeq
             aParser.registerNamespace( NMSP_packageRel );
             aParser.registerNamespace( NMSP_officeRel );
             aParser.registerNamespace( NMSP_packageContentTypes );
+            aParser.registerNamespace(NMSP_doc); // for W_TOKEN
 
             OUString aFileName;
             aMediaDescriptor[utl::MediaDescriptor::PROP_URL] >>= aFileName;
@@ -447,6 +469,16 @@ OUString SAL_CALL FilterDetect::detect( Sequence< 
PropertyValue >& rMediaDescSeq
             /*  Parse '_rels/.rels' to get the target path and 
'[Content_Types].xml'
                 to determine the content type of the part at the target path. 
*/
             aParser.parseStream( aZipStorage, u"_rels/.rels"_ustr );
+            try
+            {
+                // Text documents can't use .rels to determine maOOXMLVariant. 
Use compatibilityMode
+                 aParser.parseStream(aZipStorage, u"word/settings.xml"_ustr);
+            }
+            catch(const Exception&)
+            {
+                // not a MS Word text document, or file might not exist
+            }
+            // Order is critical: .rels and then settings.xml must be parsed 
before [Content_Types]
             aParser.parseStream( aZipStorage, u"[Content_Types].xml"_ustr );
         }
     }
diff --git a/oox/source/core/xmlfilterbase.cxx 
b/oox/source/core/xmlfilterbase.cxx
index 38a51cc4b2d4..8cb7c5213514 100644
--- a/oox/source/core/xmlfilterbase.cxx
+++ b/oox/source/core/xmlfilterbase.cxx
@@ -27,6 +27,7 @@
 #include <com/sun/star/beans/Pair.hpp>
 #include <com/sun/star/embed/XRelationshipAccess.hpp>
 #include <com/sun/star/frame/XModel.hpp>
+#include <com/sun/star/text/XTextDocument.hpp>
 #include <com/sun/star/xml/sax/XFastSAXSerializable.hpp>
 #include <com/sun/star/xml/sax/XSAXSerializable.hpp>
 #include <com/sun/star/xml/sax/Writer.hpp>
@@ -632,8 +633,12 @@ writeCoreProperties( XmlFilterBase& rSelf, const 
Reference< XDocumentProperties
         = bRemovePersonalInfo
           && 
!SvtSecurityOptions::IsOptionSet(SvtSecurityOptions::EOption::DocWarnKeepDocUserInfo);
 
+    // Although the spec says that ISOIEC_29500_2008 must use the 
"officedocument" URI,
+    // in MS errata they admit that DOCX just uses the old "package" URI from 
ECMA_376_1ST_EDITION.
+
     OUString sValue;
-    if( rSelf.getVersion() == oox::core::ISOIEC_29500_2008  )
+    const bool bDocx = 
dynamic_cast<text::XTextDocument*>(rSelf.getModel().get());
+    if (!bDocx && rSelf.getVersion() == oox::core::ISOIEC_29500_2008)
     {
         // The lowercase "officedocument" is intentional and according to the 
spec
         // (although most other places are written "officeDocument")
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport.cxx 
b/sw/qa/extras/ooxmlexport/ooxmlexport.cxx
index 64901fcf7359..6aee5bd0babd 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport.cxx
@@ -568,7 +568,7 @@ DECLARE_OOXMLEXPORT_TEST(testParagraphMark2, 
"paragraph-mark2.docx")
 CPPUNIT_TEST_FIXTURE(Test, testParagraphMarkNonempty)
 {
     loadAndSave("paragraph-mark-nonempty.odt");
-    validate(maTempFile.GetFileName(), test::OOXML);
+    validate(maTempFile.GetFileName(), mpFilter);
     CPPUNIT_ASSERT_EQUAL(1, getPages());
     xmlDocUniquePtr pXmlDoc = parseExport(u"word/document.xml"_ustr);
     // There were two <w:sz> elements, make sure the 40 one is dropped and the 
20 one is kept.
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport2.cxx 
b/sw/qa/extras/ooxmlexport/ooxmlexport2.cxx
index ef7fc436e132..f5d9efc4f516 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport2.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport2.cxx
@@ -44,7 +44,7 @@ public:
 CPPUNIT_TEST_FIXTURE(Test, testPageGraphicBackground)
 {
     loadAndReload("page-graphic-background.odt");
-    validate(maTempFile.GetFileName(), test::OOXML);
+    validate(maTempFile.GetFileName(), mpFilter);
     CPPUNIT_ASSERT_EQUAL(1, getPages());
     // No idea how the graphic background should be exported (seems there is no
     // way to do a non-tiling export to OOXML), but at least the background
@@ -165,7 +165,7 @@ CPPUNIT_TEST_FIXTURE(Test, testZoom)
     verify();
 
     // Validation test: order of elements were wrong.
-    validate(maTempFile.GetFileName(), test::OOXML);
+    validate(maTempFile.GetFileName(), mpFilter);
     xmlDocUniquePtr pXmlDoc = parseExport(u"word/styles.xml"_ustr);
     // Order was: rsid, next.
     int nNext = getXPathPosition(pXmlDoc, "/w:styles/w:style[3]", "next");
@@ -181,7 +181,7 @@ CPPUNIT_TEST_FIXTURE(Test, testZoom)
 CPPUNIT_TEST_FIXTURE(Test, defaultTabStopNotInStyles)
 {
     loadAndReload("empty.odt");
-    validate(maTempFile.GetFileName(), test::OOXML);
+    validate(maTempFile.GetFileName(), mpFilter);
     CPPUNIT_ASSERT_EQUAL(1, getPages());
 // The default tab stop was mistakenly exported to a style.
 // xray ThisComponent.StyleFamilies(1)(0).ParaTabStop
@@ -253,13 +253,13 @@ CPPUNIT_TEST_FIXTURE(Test, testFdo38244)
     verify();
     saveAndReload(mpFilter);
     verify();
-    validate(maTempFile.GetFileName(), test::OOXML);
+    validate(maTempFile.GetFileName(), mpFilter);
 }
 
 CPPUNIT_TEST_FIXTURE(Test, testCommentsNested)
 {
     loadAndReload("comments-nested.odt");
-    validate(maTempFile.GetFileName(), test::OOXML);
+    validate(maTempFile.GetFileName(), mpFilter);
     CPPUNIT_ASSERT_EQUAL(1, getPages());
     uno::Reference<beans::XPropertySet> xOuter = getProperty< 
uno::Reference<beans::XPropertySet> >(getRun(getParagraph(1), 2), 
u"TextField"_ustr);
     CPPUNIT_ASSERT_EQUAL(u"Outer"_ustr, getProperty<OUString>(xOuter, 
u"Content"_ustr));
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport20.cxx 
b/sw/qa/extras/ooxmlexport/ooxmlexport20.cxx
index fc4640425f88..7ef3f92031cd 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport20.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport20.cxx
@@ -15,6 +15,9 @@
 #include <com/sun/star/style/LineSpacing.hpp>
 #include <com/sun/star/style/LineSpacingMode.hpp>
 
+#include <sfx2/docfile.hxx>
+#include <sfx2/docfilt.hxx>
+
 #include <pam.hxx>
 #include <unotxdoc.hxx>
 #include <docsh.hxx>
@@ -1059,15 +1062,28 @@ CPPUNIT_TEST_FIXTURE(Test, testTdf158855)
     // Check that the import doesn't produce an extra empty paragraph before a 
page break
     CPPUNIT_ASSERT_EQUAL(2, getPages()); // was 3
     CPPUNIT_ASSERT_EQUAL(2, getParagraphs()); // was 3
-    uno::Reference<text::XTextTable>(getParagraphOrTable(1), 
uno::UNO_QUERY_THROW);
+
+    uno::Reference<text::XTextTable> xTableImport(getParagraphOrTable(1), 
uno::UNO_QUERY_THROW);
     getParagraph(2, u"Next page"_ustr); // was empty, with the 3rd being "Next 
page"
 
-    saveAndReload(mpFilter);
+    // tdf#164201 the table was shifting to left of the page margin because it 
became compat12
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(9), getProperty<sal_Int32>(xTableImport, 
u"LeftMargin"_ustr));
+    CPPUNIT_ASSERT_EQUAL(OUString("Office Open XML Text"),
+                         
getSwDocShell()->GetMedium()->GetFilter()->GetFilterName());
+
+    saveAndReload(getSwDocShell()->GetMedium()->GetFilter()->GetFilterName());
 
     CPPUNIT_ASSERT_EQUAL(2, getPages());
     CPPUNIT_ASSERT_EQUAL(2, getParagraphs());
-    uno::Reference<text::XTextTable>(getParagraphOrTable(1), 
uno::UNO_QUERY_THROW);
+    uno::Reference<text::XTextTable> xTableExport(getParagraphOrTable(1), 
uno::UNO_QUERY_THROW);
     getParagraph(2, u"Next page"_ustr);
+
+    // tdf#164201 instead of "From left: 0" (aka 9), it was "From Left: 
-0.19cm" (aka -191)
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(9), getProperty<sal_Int32>(xTableExport, 
u"LeftMargin"_ustr));
+
+    xmlDocUniquePtr pXmlSettings = parseExport(u"word/settings.xml"_ustr);
+    assertXPath(pXmlSettings, "//w:compat/w:compatSetting[1]", "name", 
u"compatibilityMode");
+    assertXPath(pXmlSettings, "//w:compat/w:compatSetting[1]", "val", u"15");
 }
 
 CPPUNIT_TEST_FIXTURE(Test, testTdf158971)
diff --git a/test/source/bootstrapfixture.cxx b/test/source/bootstrapfixture.cxx
index 32ebbd01a9e0..a34cc7e4473b 100644
--- a/test/source/bootstrapfixture.cxx
+++ b/test/source/bootstrapfixture.cxx
@@ -166,7 +166,7 @@ OUString filterOut(const OUString& s, std::u16string_view 
excludedSubstr)
     return result;
 }
 
-OUString filterValidationResults(const OUString& s)
+OUString filterValidationResults(const OUString& s, std::u16string_view 
rFilter)
 {
     OUString result = s;
     // In ECMA-376-1 Second Edition, 2008, there is the following restriction 
for oleObj:
@@ -189,13 +189,55 @@ OUString filterValidationResults(const OUString& s)
     //
     // But officeotron only knows the old version...
     result = filterOut(result, u"Invalid content was found starting with 
element 'p:pic'. No child element is expected at this point.");
+
+    if (rFilter == u"Office Open XML Text")
+    {
+        /* While the spec says the core-properties relationship must be
+         *     officedocument/2006/relationships/metadata/core-properties
+         * MS Word actually just writes the ECMA-376-1ST EDITION version
+         * for both ECMA_Transitional and ISO_Transitional formats.
+         *
+         * officeotron doesn't care that clause 15.2.12.1 fails on all 
MSWord-produced output.
+         */
+        result = filterOut(result, u"Entry with MIME type 
\"application/vnd.openxmlformats-package.core-properties+xml\" has unrecognized 
relationship type 
\"http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties\";
 (see ISO/IEC 29500-1:2008, Clause 15.2.12.1)");
+    }
+
     return result;
 }
 }
 #endif
 
-void test::BootstrapFixture::validate(const OUString& rPath, 
test::ValidationFormat eFormat) const
+void test::BootstrapFixture::validate(const OUString& rPath, 
std::u16string_view rFilter) const
 {
+    test::ValidationFormat eFormat = test::ODF;
+    if (rFilter == u"Calc Office Open XML")
+        eFormat = test::OOXML;
+    else if (rFilter == u"Office Open XML Text")
+        eFormat = test::OOXML;
+    else if (rFilter == u"Impress Office Open XML")
+        eFormat = test::OOXML;
+    else if (rFilter == u"writer8")
+        eFormat = test::ODF;
+    else if (rFilter == u"calc8")
+        eFormat = test::ODF;
+    else if (rFilter == u"impress8")
+        eFormat = test::ODF;
+    else if (rFilter == u"draw8")
+        eFormat = test::ODF;
+    else if (rFilter == u"OpenDocument Text Flat XML")
+        eFormat = test::ODF;
+    else if (rFilter == u"MS Word 97")
+        eFormat = test::MSBINARY;
+    else if (rFilter == u"MS Excel 97")
+        eFormat = test::MSBINARY;
+    else if (rFilter == u"MS PowerPoint 97")
+        eFormat = test::MSBINARY;
+    else
+    {
+        SAL_INFO("test", "BootstrapFixture::validate: unknown filter");
+        return;
+    }
+
 #if HAVE_EXPORT_VALIDATION
     OUString var;
     if( eFormat == test::OOXML )
@@ -266,7 +308,7 @@ void test::BootstrapFixture::validate(const OUString& 
rPath, test::ValidationFor
 
     if( eFormat == test::OOXML && !aContentOUString.isEmpty() )
     {
-        aContentOUString = filterValidationResults(aContentOUString);
+        aContentOUString = filterValidationResults(aContentOUString, rFilter);
         // check for validation errors here
         sal_Int32 nIndex = aContentOUString.lastIndexOf(grand_total);
         if(nIndex == -1)
diff --git a/test/source/unoapi_test.cxx b/test/source/unoapi_test.cxx
index a849ea6d2af1..035bf8c1f5fb 100644
--- a/test/source/unoapi_test.cxx
+++ b/test/source/unoapi_test.cxx
@@ -180,31 +180,12 @@ void UnoApiTest::save(const OUString& rFilter, const 
char* pPassword)
 
     if (!mbSkipValidation)
     {
-        if (rFilter == "Calc Office Open XML")
-            validate(maTempFile.GetFileName(), test::OOXML);
-        /*
-        // too many validation errors right now
-        else if (rFilter == "Office Open XML Text")
-            validate(maTempFile.GetFileName(), test::OOXML);
-        */
-        else if (rFilter == "Impress Office Open XML")
-            validate(maTempFile.GetFileName(), test::OOXML);
-        else if (rFilter == "writer8")
-            validate(maTempFile.GetFileName(), test::ODF);
-        else if (rFilter == "calc8")
-            validate(maTempFile.GetFileName(), test::ODF);
-        else if (rFilter == "impress8")
-            validate(maTempFile.GetFileName(), test::ODF);
-        else if (rFilter == "draw8")
-            validate(maTempFile.GetFileName(), test::ODF);
-        else if (rFilter == "OpenDocument Text Flat XML")
-            validate(maTempFile.GetFileName(), test::ODF);
-        else if (rFilter == "MS Word 97")
-            validate(maTempFile.GetFileName(), test::MSBINARY);
-        else if (rFilter == "MS Excel 97")
-            validate(maTempFile.GetFileName(), test::MSBINARY);
-        else if (rFilter == "MS PowerPoint 97")
-            validate(maTempFile.GetFileName(), test::MSBINARY);
+        if (rFilter == "Office Open XML Text")
+        {
+            // do nothing: too many validation errors right now
+        }
+        else
+            validate(maTempFile.GetFileName(), rFilter);
     }
 }
 

Reply via email to