writerfilter/source/dmapper/DomainMapper_Impl.cxx |   16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

New commits:
commit d632d86579467941ce8b3dda1dbd46c83a92877a
Author:     Michael Stahl <michael.st...@allotropia.de>
AuthorDate: Wed May 15 17:45:16 2024 +0200
Commit:     Caolán McNamara <caolan.mcnam...@collabora.com>
CommitDate: Sat May 18 22:28:27 2024 +0200

    writerfilter: fix parsing of invalid STYLEREF field
    
    forum-mso-en-3309.docx contains a funny field that doesn't follow
    the grammar in the OOXML spec:
    
    STYLEREF     "Heading 1" \* MERGEFORMAT
    
    Word can evaluate it and find the paragraph, so make the parser a bit
    more flexible, by adding known switches that don't have arguments, so
    that any argument following these becomes a field argument,
    for now only for STYLEREF.
    
    (regression from commit d4fdafa103bfea94a279d7069ddc50ba92f67d01)
    
    Change-Id: Ic42cd2be58fd65a817946e21a9661d357b02a99a
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/167697
    Tested-by: Jenkins
    Reviewed-by: Michael Stahl <michael.st...@allotropia.de>
    (cherry picked from commit 5ae1379fcdd00228e683ae90991e275f570cd92d)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/167733
    Reviewed-by: Caolán McNamara <caolan.mcnam...@collabora.com>

diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx 
b/writerfilter/source/dmapper/DomainMapper_Impl.cxx
index 20aa003b4101..4c5dff02fcf7 100644
--- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx
+++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx
@@ -5433,13 +5433,27 @@ std::tuple<OUString, std::vector<OUString>, 
std::vector<OUString> > splitFieldCo
         OUString const token =
             lcl_ExtractToken(rCommand, nStartIndex, bHaveToken, bIsSwitch);
         assert(nStartIndex <= rCommand.size());
+        static std::map<OUString, std::set<OUString>> const noArgumentSwitches 
= {
+            { u"STYLEREF"_ustr,
+              { u"\l"_ustr, u"\n"_ustr, u"\p"_ustr, u"\r"_ustr, u"\t"_ustr, 
u"\w"_ustr } }
+        };
         if (bHaveToken)
         {
             if (sType.isEmpty())
             {
                 sType = token.toAsciiUpperCase();
             }
-            else if (bIsSwitch || !switches.empty())
+            else if (bIsSwitch)
+            {
+                switches.push_back(token);
+            }
+            // evidently Word evaluates 'STYLEREF       "Heading 1" \* 
MERGEFORMAT'
+            // despite the grammar specifying that the style name must
+            // precede switches like ' '; try to approximate that here
+            // by checking for known switches that don't expect arguments
+            else if (auto const it = noArgumentSwitches.find(sType);
+                !switches.empty() && (it == noArgumentSwitches.end()
+                                    || 
it->second.find(switches.back().toAsciiLowerCase()) == it->second.end()))
             {
                 switches.push_back(token);
             }

Reply via email to