commit f70409b3b00cca400c01ea2a28b174e360b9024f
Author: Thibaut Cuvelier <tcuvel...@lyx.org>
Date:   Mon Nov 4 02:07:41 2024 +0100

    MathStream: perform the conversion for MathML per-character for MathML Core 
in case there is an active font.
    
    "Per-character" is performed as a user might see it: you need to find 
entities before doing the mapping!
---
 src/mathed/InsetMathBoldSymbol.cpp |  14 ++-
 src/mathed/InsetMathBox.cpp        |   6 +-
 src/mathed/InsetMathBrace.cpp      |   4 +-
 src/mathed/InsetMathChar.cpp       |   4 +-
 src/mathed/InsetMathSize.cpp       |   4 +-
 src/mathed/InsetMathSymbol.cpp     |   9 +-
 src/mathed/MathStream.cpp          | 215 ++++++++++++++++++++++++++++++++++++-
 src/mathed/MathStream.h            |  42 ++++++++
 8 files changed, 284 insertions(+), 14 deletions(-)

diff --git a/src/mathed/InsetMathBoldSymbol.cpp 
b/src/mathed/InsetMathBoldSymbol.cpp
index cd78ff7a68..ea9be7edf8 100644
--- a/src/mathed/InsetMathBoldSymbol.cpp
+++ b/src/mathed/InsetMathBoldSymbol.cpp
@@ -110,9 +110,17 @@ void InsetMathBoldSymbol::write(TeXMathStream & os) const
 
 void InsetMathBoldSymbol::mathmlize(MathMLStream & ms) const
 {
-       ms << MTagInline("mstyle", "mathvariant='bold'")
-          << cell(0)
-          << ETagInline("mstyle");
+       if (ms.version() == MathMLVersion::mathmlCore) {
+               // All three kinds have the same meaning (and are recognised in
+               // MathFontInfo::fromMacro).
+               MathFontInfo old_font = 
ms.fontInfo().mergeWith(MathFontInfo::fromMacro(from_ascii("boldsymbol")));
+               ms << cell(0);
+               ms.fontInfo() = old_font;
+       } else {
+               ms << MTagInline("mstyle", "mathvariant='bold'")
+                  << cell(0)
+                  << ETagInline("mstyle");
+       }
 }
 
 
diff --git a/src/mathed/InsetMathBox.cpp b/src/mathed/InsetMathBox.cpp
index 578370ffd1..17415adcbf 100644
--- a/src/mathed/InsetMathBox.cpp
+++ b/src/mathed/InsetMathBox.cpp
@@ -406,9 +406,9 @@ void InsetMathBoxed::infoize(odocstream & os) const
 
 void InsetMathBoxed::mathmlize(MathMLStream & ms) const
 {
-       ms << MTag("mrow", "class='boxed'");
-       ms << cell(0);
-       ms << ETag("mrow");
+       ms << MTag("mrow", "class='boxed'")
+          << cell(0)
+          << ETag("mrow");
 }
 
 
diff --git a/src/mathed/InsetMathBrace.cpp b/src/mathed/InsetMathBrace.cpp
index 4455c42a17..544bbf63ec 100644
--- a/src/mathed/InsetMathBrace.cpp
+++ b/src/mathed/InsetMathBrace.cpp
@@ -102,7 +102,9 @@ void InsetMathBrace::octave(OctaveStream & os) const
 
 void InsetMathBrace::mathmlize(MathMLStream & ms) const
 {
-       ms << MTag("mrow") << cell(0) << ETag("mrow");
+       ms << MTag("mrow")
+       << cell(0)
+       << ETag("mrow");
 }
 
 
diff --git a/src/mathed/InsetMathChar.cpp b/src/mathed/InsetMathChar.cpp
index 801cab60aa..4ff2e2a462 100644
--- a/src/mathed/InsetMathChar.cpp
+++ b/src/mathed/InsetMathChar.cpp
@@ -262,7 +262,7 @@ void InsetMathChar::mathmlize(MathMLStream & ms) const
 
        if (ms.inText()) {
                if (entity.empty())
-                       ms << char_;
+                       ms << StartRespectFont() << char_ << StopRespectFont();
                else
                        ms << from_ascii(entity);
                return;
@@ -279,7 +279,7 @@ void InsetMathChar::mathmlize(MathMLStream & ms) const
                (isAlphaASCII(char_) || Encodings::isMathAlpha(char_))
                        ? "mi" : "mo";
        ms << MTagInline(type, std::string(type) == "mo" ? "stretchy='false'" : 
"")
-          << char_type(char_)
+          << StartRespectFont() << char_type(char_) << StopRespectFont()
           << ETagInline(type);
 }
 
diff --git a/src/mathed/InsetMathSize.cpp b/src/mathed/InsetMathSize.cpp
index 5055f2d73e..97308c752f 100644
--- a/src/mathed/InsetMathSize.cpp
+++ b/src/mathed/InsetMathSize.cpp
@@ -85,7 +85,9 @@ void InsetMathSize::mathmlize(MathMLStream & ms) const
        stringstream attrs;
        attrs << "displaystyle='" << (dispstyle ? "true" : "false")
                << "' scriptlevel='" << scriptlevel << "'";
-       ms << MTag("mstyle", attrs.str()) << cell(0) << ETag("mstyle");
+       ms << MTag("mstyle", attrs.str())
+       << cell(0)
+       << ETag("mstyle");
 }
 
 
diff --git a/src/mathed/InsetMathSymbol.cpp b/src/mathed/InsetMathSymbol.cpp
index 14e84fdfd2..69d6b03d98 100644
--- a/src/mathed/InsetMathSymbol.cpp
+++ b/src/mathed/InsetMathSymbol.cpp
@@ -161,11 +161,16 @@ void InsetMathSymbol::mathmlize(MathMLStream & ms) const
        // FIXME We may need to do more interesting things
        // with MathMLtype.
        ms << MTagInline(sym_->MathMLtype());
-       if (sym_->xmlname == "x")
+       if (sym_->xmlname == "x") {
                // unknown so far
                ms << name();
-       else
+       } else if (strcmp(sym_->MathMLtype(), "mi") == 0) {
+               // If it's a character or a Greek letter (i.e. "mi"), map to a 
font.
+               ms << StartRespectFont() << sym_->xmlname << StopRespectFont();
+       } else {
+               // Operators do not have font variants.
                ms << sym_->xmlname;
+       }
        ms << ETagInline(sym_->MathMLtype());
 }
 
diff --git a/src/mathed/MathStream.cpp b/src/mathed/MathStream.cpp
index c6f78f72c1..ea1459e5d8 100644
--- a/src/mathed/MathStream.cpp
+++ b/src/mathed/MathStream.cpp
@@ -27,6 +27,8 @@
 #include <cstring>
 #include <FontInfo.h>
 
+#include "support/lstrings.h"
+
 using namespace std;
 
 namespace lyx {
@@ -69,7 +71,8 @@ MathFontInfo MathFontInfo::fromMacro(const docstring& tag)
                font.shape_ = MATH_UP_SHAPE;
        else if (tag == "frak" || tag == "mathfrak")
                font.family_ = MATH_FRAKTUR_FAMILY;
-       else if (tag == "mathbf" || tag == "textbf")
+       else if (tag == "mathbf" || tag == "textbf"
+                       || tag == "boldsymbol" || tag == "bm" || tag == "hm")
                font.series_ = MATH_BOLD_SERIES;
        else if (tag == "mathbb" || tag == "mathbbm"
                         || tag == "mathds")
@@ -193,6 +196,139 @@ std::string MathFontInfo::toHTMLSpanClass() const
 }
 
 
+docstring MathFontInfo::convertCharacterToUnicodeEntityWithFont(const 
docstring & c, bool in_text) const
+{
+       if (c.size() <= 1) {
+               return c;
+       }
+       // Otherwise, it's an entity, like 0x1d44e (as a hexadecimal number).
+       return from_ascii("&#") + convertCharacterToUnicodeWithFont(c, in_text) 
+ from_ascii(";");
+}
+
+
+docstring MathFontInfo::convertCharacterToUnicodeWithFont(const docstring & c, 
bool in_text) const
+{
+       MathVariantList const & mvl = mathedVariantList();
+
+       // If this character is unknown, exit early.
+       const auto it = mvl.find(support::ascii_lowercase(c));
+       if (it == mvl.end()) {
+               return c;
+       }
+
+       // Check for the best variant. Heuristically:
+       // - First check the font type: normal, script, fraktur, etc. This is 
the
+    //   most constraining factor.
+       // - Second, check for shape and series.
+       // If the variant for one factor does not exist, ignore it and continue
+    // the search. Hence, we store the copies of family, shape, and series.
+       UnicodeVariants const & variants = it->second;
+
+       MathFontFamily family = family_;
+       MathFontSeries series = series_;
+       MathFontShape shape = shape_;
+
+       if (family == MATH_INHERIT_FAMILY) {
+               family = MATH_NORMAL_FAMILY;
+       }
+       if (series == MATH_INHERIT_SERIES) {
+               series = MATH_MEDIUM_SERIES;
+       }
+       if (shape == MATH_INHERIT_SHAPE) {
+               shape = in_text ? MATH_UP_SHAPE : MATH_ITALIC_SHAPE;
+       }
+
+       if (family == MATH_MONOSPACE_FAMILY) {
+               if (!variants.monospace.empty()) return variants.monospace;
+               family = MATH_NORMAL_FAMILY;
+       }
+
+       if (family == MATH_DOUBLE_STRUCK_FAMILY) {
+               if (!variants.double_struck.empty()) return 
variants.double_struck;
+               family = MATH_NORMAL_FAMILY;
+       }
+
+       if (family == MATH_FRAKTUR_FAMILY) {
+               if (series == MATH_BOLD_SERIES) {
+                       if (!variants.bold_fraktur.empty()) return 
variants.bold_fraktur;
+                       series = MATH_MEDIUM_SERIES;
+               }
+
+               if (series == MATH_MEDIUM_SERIES) {
+                       if (!variants.fraktur.empty()) return variants.fraktur;
+               }
+
+               family = MATH_NORMAL_FAMILY;
+       }
+
+       if (family == MATH_SCRIPT_FAMILY) {
+               if (series == MATH_BOLD_SERIES) {
+                       if (!variants.bold_script.empty()) return 
variants.bold_script;
+                       series = MATH_MEDIUM_SERIES;
+               }
+
+               if (series == MATH_MEDIUM_SERIES) {
+                       if (!variants.script.empty()) return variants.script;
+               }
+
+               family = MATH_NORMAL_FAMILY;
+       }
+
+       if (family == MATH_SANS_FAMILY) {
+               if (series == MATH_BOLD_SERIES) {
+                       if (shape == MATH_UP_SHAPE) {
+                               if (!variants.bold_sans.empty()) return 
variants.bold_sans;
+                       } else {
+                               if (!variants.bold_italic_sans.empty()) return 
variants.bold_italic_sans;
+                       }
+                       series = MATH_MEDIUM_SERIES;
+               }
+
+               if (series == MATH_MEDIUM_SERIES) {
+                       if (shape == MATH_UP_SHAPE) {
+                               if (!variants.sans.empty()) return 
variants.sans;
+                       } else {
+                               if (!variants.italic_sans.empty()) return 
variants.italic_sans;
+                       }
+               }
+
+               family = MATH_NORMAL_FAMILY;
+       }
+
+       if (family != MATH_NORMAL_FAMILY) {
+               LYXERR(Debug::MATHED,
+                               "Unexpected case in 
MathFontInfo::convertCharacterToUnicodeWithFont"
+                               <<"(c = " << to_ascii(c) << ", in_text = " << 
in_text << "), unrecognised family: "
+                               << "family_ = " << family_ << ", series = " << 
series_ << ", shape = " << shape_);
+               // Continue processing to return a value that matches the other 
constraints.
+       }
+
+       if (series == MATH_BOLD_SERIES) {
+               if (shape == MATH_UP_SHAPE) {
+                       if (!variants.bold.empty()) return variants.bold;
+               } else {
+                       if (!variants.bold_italic.empty()) return 
variants.bold_italic;
+               }
+               series = MATH_MEDIUM_SERIES;
+       }
+
+       if (series == MATH_MEDIUM_SERIES) {
+               if (shape == MATH_UP_SHAPE) {
+                       if (!variants.character.empty()) return 
variants.character;
+               } else {
+                       if (!variants.italic.empty()) return variants.italic;
+               }
+       }
+
+       // The previous cases should have matched, unless this code is not up 
to date.
+       LYXERR(Debug::MATHED,
+                       "Unexpected case in 
MathFontInfo::convertCharacterToUnicodeWithFont"
+                       <<"(c = " << c << ", in_text = " << in_text << "), 
unrecognised series/shape: "
+                       << "family_ = " << family_ << ", series = " << series_ 
<< ", shape = " << shape_);
+       return variants.character;
+}
+
+
 NormalStream & operator<<(NormalStream & ns, MathAtom const & at)
 {
        at->normalize(ns);
@@ -515,7 +651,68 @@ MathMLStream & operator<<(MathMLStream & ms, MathData 
const & ar)
 MathMLStream & operator<<(MathMLStream & ms, docstring const & s)
 {
        ms.beforeText();
-       ms.os_ << s;
+       if (!ms.respect_font_) {
+               // Ignore fonts for now. This is especially useful for tags.
+               ms.os_ << s;
+       } else {
+               // Only care about fonts if they are currently enabled.
+               if (ms.version() == MathMLVersion::mathmlCore) {
+                       // New case: MathML uses Unicode characters to indicate 
fonts.
+                       // If possible, avoid doing the mapping: it involves 
looking up a hash
+                       // table and doing a lot of conditions *per character*
+                       bool needs_no_mapping =
+                               (ms.current_font_.family() == 
MathFontInfo::MathFontFamily::MATH_INHERIT_FAMILY ||
+                                       ms.current_font_.family() == 
MathFontInfo::MathFontFamily::MATH_NORMAL_FAMILY) &&
+                               (ms.current_font_.series() == 
MathFontInfo::MathFontSeries::MATH_INHERIT_SERIES ||
+                                       ms.current_font_.series() == 
MathFontInfo::MathFontSeries::MATH_MEDIUM_SERIES) &&
+                               (ms.current_font_.shape() == 
MathFontInfo::MathFontShape::MATH_INHERIT_SHAPE ||
+                                       (ms.in_mtext_ && 
ms.current_font_.shape() == MathFontInfo::MathFontShape::MATH_UP_SHAPE) ||
+                                       (!ms.in_mtext_ && 
ms.current_font_.shape() == MathFontInfo::MathFontShape::MATH_ITALIC_SHAPE));
+                       if (needs_no_mapping) {
+                               ms.os_ << s;
+                       } else {
+                               // Perform the conversion character per 
character (which might
+                               // mean consume a complete Greek entity!).
+                               docstring buf;
+                               bool within_entity = false;
+                               for (const char_type c : s) {
+                                       if (!within_entity && c == '&') { // 
New entity.
+                                               within_entity = true;
+                                       } else if (within_entity && c == '#') { 
// Still new entity.
+                                               // Nothing to do: 
unicode_alphanum_variants only has
+                                               // the code point, not the full 
XML/HTML entity.
+                                       } else if (within_entity && c == ';') { 
// End of entity.
+                                               if (buf.starts_with('x')) {
+                                                       // An HTML entity is 
typically &#x3B1;, but
+                                                       // 
unicode_alpha_num_variants has 0x3B1.
+                                                       buf.insert(0, 
from_ascii("0"));
+                                               }
+                                               ms.os_ << 
ms.current_font_.convertCharacterToUnicodeEntityWithFont(buf, ms.inText());
+                                               buf.clear();
+                                               within_entity = false;
+                                       } else if (within_entity) { // Within 
new entity.
+                                               buf += c;
+                                       } else {
+                                               buf = docstring(c, 1);
+                                               ms.os_ << 
ms.current_font_.convertCharacterToUnicodeEntityWithFont(buf, ms.inText());
+                                               buf.clear();
+                                       }
+
+                                       if (!within_entity && !buf.empty()) {
+                                               lyxerr << "Assertion failed in 
MathLMStream::operator<<(docstring): not reading an entity "
+                                                      << "while the buffer is 
not empty (" << buf << ")";
+                                       }
+                               }
+                               if (!buf.empty()) {
+                                       lyxerr << "Assertion failed in 
MathLMStream::operator<<(docstring): the buffer is not empty (" << buf << ")";
+                                       ms.os_ << 
ms.current_font_.convertCharacterToUnicodeEntityWithFont(buf, ms.inText());
+                               }
+                       }
+               } else {
+                       // Old case (MathML3): MathML uses mathvariant to 
indicate fonts.
+                       ms.os_ << s;
+               }
+       }
        return ms;
 }
 
@@ -606,6 +803,20 @@ MathMLStream & operator<<(MathMLStream & ms, CTag const & 
t)
 }
 
 
+MathMLStream & operator<<(MathMLStream & ms, StartRespectFont)
+{
+       ms.respect_font_ = true;
+       return ms;
+}
+
+
+MathMLStream & operator<<(MathMLStream & ms, StopRespectFont)
+{
+       ms.respect_font_ = false;
+       return ms;
+}
+
+
 //////////////////////////////////////////////////////////////////////
 
 
diff --git a/src/mathed/MathStream.h b/src/mathed/MathStream.h
index d611ec8c7c..4ae9b7e2fc 100644
--- a/src/mathed/MathStream.h
+++ b/src/mathed/MathStream.h
@@ -88,6 +88,32 @@ public:
        /// Transforms this font into a class attribute for the HTML span tag.
        std::string toHTMLSpanClass() const;
 
+       /// Converts the character into the closest Unicode character that 
encodes
+       /// this font. If there is only a partial mapping, parts of the mapping 
are
+    /// applied. For instance, take the character C and a bold-italic font.
+    /// - If there is a bold-italic mapping for this character, it is returned.
+    /// - If there is only a bold mapping for this character, a bold character
+    ///   is returned. This font encoding is the closest one to the font.
+    /// - If there are two mappings (one bold, one italic), one of them is
+    ///   returned (arbitrary choice between the two).
+    /// - If there are no mappings, the original character is returned.
+    /// The mappings are defined in the global variable theMathVariantList.
+    ///
+    /// The character is supposed to be a single Latin letter (a-z, A-Z) or
+    /// digit (0-9) or the entity encoding a Greek character (0x3b1-0x3c9
+       /// for lower case, 0x3b1-0x3c9 for upper case), exactly like the
+    /// `unicode_alphanum_variants` file.
+       ///
+       /// If in_text, the default shape is up. If not in_text, the default 
shape
+       /// is italic. This behaviour matches that of MathMLStream::in_text_.
+       [[nodiscard]]
+       docstring convertCharacterToUnicodeWithFont(const docstring & c, bool 
in_text) const;
+       /// Converts the character into the closest Unicode character that 
encodes
+       /// this font as an entity if the character is not ASCII.
+       /// Also see convertCharacterToUnicodeWithFont.
+       [[nodiscard]]
+       docstring convertCharacterToUnicodeEntityWithFont(const docstring & c, 
bool in_text) const;
+
 private:
        MathFontFamily family_;
        MathFontSeries series_;
@@ -438,6 +464,14 @@ public:
 };
 
 
+/// Signalling elements for font handling. They do not output anything per se,
+/// they alter the state of the stream to either start or stop respecting
+/// fonts (i.e. output Unicode entities encoding the font, such as
+/// "Mathematical Italic Small A" &#1d44e;).
+struct StartRespectFont{};
+struct StopRespectFont{};
+
+
 /// Throw MathExportException to signal that the attempt to export
 /// some math in the current format did not succeed. E.g., we can't
 /// export xymatrix as MathML, so that will throw, and we'll fall back
@@ -503,6 +537,8 @@ private:
        MathStyle font_math_style_;
        /// Current font (which might be nested).
        MathFontInfo current_font_;
+       /// whether the output shall respect the current font
+       bool respect_font_ = false;
        ///
        friend class SetMode;
        friend MathMLStream & operator<<(MathMLStream &, MathAtom const &);
@@ -513,6 +549,8 @@ private:
        friend MathMLStream & operator<<(MathMLStream &, ETag const &);
        friend MathMLStream & operator<<(MathMLStream &, ETagInline const &);
        friend MathMLStream & operator<<(MathMLStream &, CTag const &);
+       friend MathMLStream & operator<<(MathMLStream &, StartRespectFont);
+       friend MathMLStream & operator<<(MathMLStream &, StopRespectFont);
 };
 
 ///
@@ -537,6 +575,10 @@ MathMLStream & operator<<(MathMLStream &, ETag const &);
 MathMLStream & operator<<(MathMLStream &, ETagInline const &);
 ///
 MathMLStream & operator<<(MathMLStream &, CTag const &);
+/// Starts respecting fonts until meeting StopRespectFont.
+MathMLStream & operator<<(MathMLStream &, StartRespectFont);
+/// Stops respecting fonts.
+MathMLStream & operator<<(MathMLStream &, StopRespectFont);
 
 
 /// A simpler version of ModeSpecifier, for MathML
-- 
lyx-cvs mailing list
lyx-cvs@lists.lyx.org
https://lists.lyx.org/mailman/listinfo/lyx-cvs

Reply via email to