writerfilter/source/dmapper/DomainMapper_Impl.cxx | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-)
New commits: commit d632d86579467941ce8b3dda1dbd46c83a92877a Author: Michael Stahl <michael.st...@allotropia.de> AuthorDate: Wed May 15 17:45:16 2024 +0200 Commit: Caolán McNamara <caolan.mcnam...@collabora.com> CommitDate: Sat May 18 22:28:27 2024 +0200 writerfilter: fix parsing of invalid STYLEREF field forum-mso-en-3309.docx contains a funny field that doesn't follow the grammar in the OOXML spec: STYLEREF "Heading 1" \* MERGEFORMAT Word can evaluate it and find the paragraph, so make the parser a bit more flexible, by adding known switches that don't have arguments, so that any argument following these becomes a field argument, for now only for STYLEREF. (regression from commit d4fdafa103bfea94a279d7069ddc50ba92f67d01) Change-Id: Ic42cd2be58fd65a817946e21a9661d357b02a99a Reviewed-on: https://gerrit.libreoffice.org/c/core/+/167697 Tested-by: Jenkins Reviewed-by: Michael Stahl <michael.st...@allotropia.de> (cherry picked from commit 5ae1379fcdd00228e683ae90991e275f570cd92d) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/167733 Reviewed-by: Caolán McNamara <caolan.mcnam...@collabora.com> diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx b/writerfilter/source/dmapper/DomainMapper_Impl.cxx index 20aa003b4101..4c5dff02fcf7 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx @@ -5433,13 +5433,27 @@ std::tuple<OUString, std::vector<OUString>, std::vector<OUString> > splitFieldCo OUString const token = lcl_ExtractToken(rCommand, nStartIndex, bHaveToken, bIsSwitch); assert(nStartIndex <= rCommand.size()); + static std::map<OUString, std::set<OUString>> const noArgumentSwitches = { + { u"STYLEREF"_ustr, + { u"\l"_ustr, u"\n"_ustr, u"\p"_ustr, u"\r"_ustr, u"\t"_ustr, u"\w"_ustr } } + }; if (bHaveToken) { if (sType.isEmpty()) { sType = token.toAsciiUpperCase(); } - else if (bIsSwitch || !switches.empty()) + else if (bIsSwitch) + { + switches.push_back(token); + } + // evidently Word evaluates 'STYLEREF "Heading 1" \* MERGEFORMAT' + // despite the grammar specifying that the style name must + // precede switches like ' '; try to approximate that here + // by checking for known switches that don't expect arguments + else if (auto const it = noArgumentSwitches.find(sType); + !switches.empty() && (it == noArgumentSwitches.end() + || it->second.find(switches.back().toAsciiLowerCase()) == it->second.end())) { switches.push_back(token); }