commit 554eb63d3967fb88a38452114265d8a1bd5fee04 Author: Thibaut Cuvelier <tcuvel...@lyx.org> Date: Fri Feb 7 00:52:18 2025 +0100
InsetERT: recognise whenever the contents looks sufficiently like XML to attempt outputting it as raw XML. This behaviour is closer to what LyX 2.3 and previous handled ERTs for DocBook: they were always output as SGML. This no longer makes sense for the new DocBook support (you can export basically any document to DocBook and have reasonable output, while previously you had to use a DocBook template), but this heuristic should capture most previous use cases. (If the ERT is just XML: output it. Otherwise, it's probably not meant to be output as-is -- LaTeX, text, etc.) --- src/insets/InsetERT.cpp | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/src/insets/InsetERT.cpp b/src/insets/InsetERT.cpp index baf3b023ae..bfececdbab 100644 --- a/src/insets/InsetERT.cpp +++ b/src/insets/InsetERT.cpp @@ -30,6 +30,7 @@ #include "support/TempFile.h" #include "Encoding.h" +#include <algorithm> #include <sstream> #include <regex> #include <iostream> @@ -129,6 +130,7 @@ void InsetERT::docbook(XMLStream & xs, OutputParams const & runparams) const // Try to recognise some commands to have a nicer DocBook output. bool output_as_comment = true; + docstring os_trimmed = trim(os.str()); // First step: some commands have a direct mapping to DocBook, mostly because the mapping is simply text or // an XML entity. @@ -137,8 +139,6 @@ void InsetERT::docbook(XMLStream & xs, OutputParams const & runparams) const // recognised should simply be put in comments: have a list of elements that are either already recognised or are // not yet recognised? Global transformations like \string should then come first.) { - docstring os_trimmed = trim(os.str()); - // Rewrite \"u to \"{u}. static regex const regNoBraces(R"(^\\\W\w)"); if (regex_search(to_utf8(os_trimmed), regNoBraces)) { @@ -190,6 +190,43 @@ void InsetERT::docbook(XMLStream & xs, OutputParams const & runparams) const } } + // Third step: maybe this is XML, after all. + // Reminder: < is <, > is >. + if (prefixIs(os_trimmed, from_ascii("<")) && + suffixIs(os_trimmed, from_ascii(">"))) { + // To avoid false positives, ensure that the contents are only full XML tags, like: + // `<revhistory>`. This means that, in some cases, the user might expect this case + // to be triggered, but we decline to output it as raw XML to avoid errors. For + // instance: ERT[<revhistory] ERT[>]. It's quite unlikely for LaTeX code to have + // exactly the same number of < and >, but well-formed XML always does. This check + // does not enforce that the full ERT contains a valid XML excerpt: there might be + // a tag opening without a closing, like: ERT[<revhistory>] ERT[</revhistory>]. + auto count_substrings = [&os_trimmed](const docstring & substring) -> int { + // Hypothesis: no overlapping sequence. This is perfectly fine for this use case. + int occurrences = 0; + std::string::size_type pos = 0; + while ((pos = os_trimmed.find(substring, pos)) != std::string::npos) { + ++occurrences; + pos += substring.length(); + } + return occurrences; + }; + const int num_open_tags = count_substrings(from_ascii("<")); + const int num_close_tags = count_substrings(from_ascii(">")); + + if (num_close_tags == num_open_tags) { + // Decide this ERT is close enough to well-formed XML: unescape + // XML elements and output the string as-is (to avoid that + // XMLStream escapes the characters again). + xs << XMLStream::ESCAPE_NONE + << subst(subst(subst(os.str(), + from_ascii("<"), from_ascii("<")), + from_ascii(">"), from_ascii(">")), + from_ascii("&"), from_ascii("&")); + output_as_comment = false; + } + } + // Otherwise, output the ERT as a comment with the appropriate escaping if the command is not recognised. if (output_as_comment) { xs << XMLStream::ESCAPE_NONE << "<!-- "; -- lyx-cvs mailing list lyx-cvs@lists.lyx.org https://lists.lyx.org/mailman/listinfo/lyx-cvs