commit f70409b3b00cca400c01ea2a28b174e360b9024f Author: Thibaut Cuvelier <tcuvel...@lyx.org> Date: Mon Nov 4 02:07:41 2024 +0100
MathStream: perform the conversion for MathML per-character for MathML Core in case there is an active font. "Per-character" is performed as a user might see it: you need to find entities before doing the mapping! --- src/mathed/InsetMathBoldSymbol.cpp | 14 ++- src/mathed/InsetMathBox.cpp | 6 +- src/mathed/InsetMathBrace.cpp | 4 +- src/mathed/InsetMathChar.cpp | 4 +- src/mathed/InsetMathSize.cpp | 4 +- src/mathed/InsetMathSymbol.cpp | 9 +- src/mathed/MathStream.cpp | 215 ++++++++++++++++++++++++++++++++++++- src/mathed/MathStream.h | 42 ++++++++ 8 files changed, 284 insertions(+), 14 deletions(-) diff --git a/src/mathed/InsetMathBoldSymbol.cpp b/src/mathed/InsetMathBoldSymbol.cpp index cd78ff7a68..ea9be7edf8 100644 --- a/src/mathed/InsetMathBoldSymbol.cpp +++ b/src/mathed/InsetMathBoldSymbol.cpp @@ -110,9 +110,17 @@ void InsetMathBoldSymbol::write(TeXMathStream & os) const void InsetMathBoldSymbol::mathmlize(MathMLStream & ms) const { - ms << MTagInline("mstyle", "mathvariant='bold'") - << cell(0) - << ETagInline("mstyle"); + if (ms.version() == MathMLVersion::mathmlCore) { + // All three kinds have the same meaning (and are recognised in + // MathFontInfo::fromMacro). + MathFontInfo old_font = ms.fontInfo().mergeWith(MathFontInfo::fromMacro(from_ascii("boldsymbol"))); + ms << cell(0); + ms.fontInfo() = old_font; + } else { + ms << MTagInline("mstyle", "mathvariant='bold'") + << cell(0) + << ETagInline("mstyle"); + } } diff --git a/src/mathed/InsetMathBox.cpp b/src/mathed/InsetMathBox.cpp index 578370ffd1..17415adcbf 100644 --- a/src/mathed/InsetMathBox.cpp +++ b/src/mathed/InsetMathBox.cpp @@ -406,9 +406,9 @@ void InsetMathBoxed::infoize(odocstream & os) const void InsetMathBoxed::mathmlize(MathMLStream & ms) const { - ms << MTag("mrow", "class='boxed'"); - ms << cell(0); - ms << ETag("mrow"); + ms << MTag("mrow", "class='boxed'") + << cell(0) + << ETag("mrow"); } diff --git a/src/mathed/InsetMathBrace.cpp b/src/mathed/InsetMathBrace.cpp index 4455c42a17..544bbf63ec 100644 --- a/src/mathed/InsetMathBrace.cpp +++ b/src/mathed/InsetMathBrace.cpp @@ -102,7 +102,9 @@ void InsetMathBrace::octave(OctaveStream & os) const void InsetMathBrace::mathmlize(MathMLStream & ms) const { - ms << MTag("mrow") << cell(0) << ETag("mrow"); + ms << MTag("mrow") + << cell(0) + << ETag("mrow"); } diff --git a/src/mathed/InsetMathChar.cpp b/src/mathed/InsetMathChar.cpp index 801cab60aa..4ff2e2a462 100644 --- a/src/mathed/InsetMathChar.cpp +++ b/src/mathed/InsetMathChar.cpp @@ -262,7 +262,7 @@ void InsetMathChar::mathmlize(MathMLStream & ms) const if (ms.inText()) { if (entity.empty()) - ms << char_; + ms << StartRespectFont() << char_ << StopRespectFont(); else ms << from_ascii(entity); return; @@ -279,7 +279,7 @@ void InsetMathChar::mathmlize(MathMLStream & ms) const (isAlphaASCII(char_) || Encodings::isMathAlpha(char_)) ? "mi" : "mo"; ms << MTagInline(type, std::string(type) == "mo" ? "stretchy='false'" : "") - << char_type(char_) + << StartRespectFont() << char_type(char_) << StopRespectFont() << ETagInline(type); } diff --git a/src/mathed/InsetMathSize.cpp b/src/mathed/InsetMathSize.cpp index 5055f2d73e..97308c752f 100644 --- a/src/mathed/InsetMathSize.cpp +++ b/src/mathed/InsetMathSize.cpp @@ -85,7 +85,9 @@ void InsetMathSize::mathmlize(MathMLStream & ms) const stringstream attrs; attrs << "displaystyle='" << (dispstyle ? "true" : "false") << "' scriptlevel='" << scriptlevel << "'"; - ms << MTag("mstyle", attrs.str()) << cell(0) << ETag("mstyle"); + ms << MTag("mstyle", attrs.str()) + << cell(0) + << ETag("mstyle"); } diff --git a/src/mathed/InsetMathSymbol.cpp b/src/mathed/InsetMathSymbol.cpp index 14e84fdfd2..69d6b03d98 100644 --- a/src/mathed/InsetMathSymbol.cpp +++ b/src/mathed/InsetMathSymbol.cpp @@ -161,11 +161,16 @@ void InsetMathSymbol::mathmlize(MathMLStream & ms) const // FIXME We may need to do more interesting things // with MathMLtype. ms << MTagInline(sym_->MathMLtype()); - if (sym_->xmlname == "x") + if (sym_->xmlname == "x") { // unknown so far ms << name(); - else + } else if (strcmp(sym_->MathMLtype(), "mi") == 0) { + // If it's a character or a Greek letter (i.e. "mi"), map to a font. + ms << StartRespectFont() << sym_->xmlname << StopRespectFont(); + } else { + // Operators do not have font variants. ms << sym_->xmlname; + } ms << ETagInline(sym_->MathMLtype()); } diff --git a/src/mathed/MathStream.cpp b/src/mathed/MathStream.cpp index c6f78f72c1..ea1459e5d8 100644 --- a/src/mathed/MathStream.cpp +++ b/src/mathed/MathStream.cpp @@ -27,6 +27,8 @@ #include <cstring> #include <FontInfo.h> +#include "support/lstrings.h" + using namespace std; namespace lyx { @@ -69,7 +71,8 @@ MathFontInfo MathFontInfo::fromMacro(const docstring& tag) font.shape_ = MATH_UP_SHAPE; else if (tag == "frak" || tag == "mathfrak") font.family_ = MATH_FRAKTUR_FAMILY; - else if (tag == "mathbf" || tag == "textbf") + else if (tag == "mathbf" || tag == "textbf" + || tag == "boldsymbol" || tag == "bm" || tag == "hm") font.series_ = MATH_BOLD_SERIES; else if (tag == "mathbb" || tag == "mathbbm" || tag == "mathds") @@ -193,6 +196,139 @@ std::string MathFontInfo::toHTMLSpanClass() const } +docstring MathFontInfo::convertCharacterToUnicodeEntityWithFont(const docstring & c, bool in_text) const +{ + if (c.size() <= 1) { + return c; + } + // Otherwise, it's an entity, like 0x1d44e (as a hexadecimal number). + return from_ascii("&#") + convertCharacterToUnicodeWithFont(c, in_text) + from_ascii(";"); +} + + +docstring MathFontInfo::convertCharacterToUnicodeWithFont(const docstring & c, bool in_text) const +{ + MathVariantList const & mvl = mathedVariantList(); + + // If this character is unknown, exit early. + const auto it = mvl.find(support::ascii_lowercase(c)); + if (it == mvl.end()) { + return c; + } + + // Check for the best variant. Heuristically: + // - First check the font type: normal, script, fraktur, etc. This is the + // most constraining factor. + // - Second, check for shape and series. + // If the variant for one factor does not exist, ignore it and continue + // the search. Hence, we store the copies of family, shape, and series. + UnicodeVariants const & variants = it->second; + + MathFontFamily family = family_; + MathFontSeries series = series_; + MathFontShape shape = shape_; + + if (family == MATH_INHERIT_FAMILY) { + family = MATH_NORMAL_FAMILY; + } + if (series == MATH_INHERIT_SERIES) { + series = MATH_MEDIUM_SERIES; + } + if (shape == MATH_INHERIT_SHAPE) { + shape = in_text ? MATH_UP_SHAPE : MATH_ITALIC_SHAPE; + } + + if (family == MATH_MONOSPACE_FAMILY) { + if (!variants.monospace.empty()) return variants.monospace; + family = MATH_NORMAL_FAMILY; + } + + if (family == MATH_DOUBLE_STRUCK_FAMILY) { + if (!variants.double_struck.empty()) return variants.double_struck; + family = MATH_NORMAL_FAMILY; + } + + if (family == MATH_FRAKTUR_FAMILY) { + if (series == MATH_BOLD_SERIES) { + if (!variants.bold_fraktur.empty()) return variants.bold_fraktur; + series = MATH_MEDIUM_SERIES; + } + + if (series == MATH_MEDIUM_SERIES) { + if (!variants.fraktur.empty()) return variants.fraktur; + } + + family = MATH_NORMAL_FAMILY; + } + + if (family == MATH_SCRIPT_FAMILY) { + if (series == MATH_BOLD_SERIES) { + if (!variants.bold_script.empty()) return variants.bold_script; + series = MATH_MEDIUM_SERIES; + } + + if (series == MATH_MEDIUM_SERIES) { + if (!variants.script.empty()) return variants.script; + } + + family = MATH_NORMAL_FAMILY; + } + + if (family == MATH_SANS_FAMILY) { + if (series == MATH_BOLD_SERIES) { + if (shape == MATH_UP_SHAPE) { + if (!variants.bold_sans.empty()) return variants.bold_sans; + } else { + if (!variants.bold_italic_sans.empty()) return variants.bold_italic_sans; + } + series = MATH_MEDIUM_SERIES; + } + + if (series == MATH_MEDIUM_SERIES) { + if (shape == MATH_UP_SHAPE) { + if (!variants.sans.empty()) return variants.sans; + } else { + if (!variants.italic_sans.empty()) return variants.italic_sans; + } + } + + family = MATH_NORMAL_FAMILY; + } + + if (family != MATH_NORMAL_FAMILY) { + LYXERR(Debug::MATHED, + "Unexpected case in MathFontInfo::convertCharacterToUnicodeWithFont" + <<"(c = " << to_ascii(c) << ", in_text = " << in_text << "), unrecognised family: " + << "family_ = " << family_ << ", series = " << series_ << ", shape = " << shape_); + // Continue processing to return a value that matches the other constraints. + } + + if (series == MATH_BOLD_SERIES) { + if (shape == MATH_UP_SHAPE) { + if (!variants.bold.empty()) return variants.bold; + } else { + if (!variants.bold_italic.empty()) return variants.bold_italic; + } + series = MATH_MEDIUM_SERIES; + } + + if (series == MATH_MEDIUM_SERIES) { + if (shape == MATH_UP_SHAPE) { + if (!variants.character.empty()) return variants.character; + } else { + if (!variants.italic.empty()) return variants.italic; + } + } + + // The previous cases should have matched, unless this code is not up to date. + LYXERR(Debug::MATHED, + "Unexpected case in MathFontInfo::convertCharacterToUnicodeWithFont" + <<"(c = " << c << ", in_text = " << in_text << "), unrecognised series/shape: " + << "family_ = " << family_ << ", series = " << series_ << ", shape = " << shape_); + return variants.character; +} + + NormalStream & operator<<(NormalStream & ns, MathAtom const & at) { at->normalize(ns); @@ -515,7 +651,68 @@ MathMLStream & operator<<(MathMLStream & ms, MathData const & ar) MathMLStream & operator<<(MathMLStream & ms, docstring const & s) { ms.beforeText(); - ms.os_ << s; + if (!ms.respect_font_) { + // Ignore fonts for now. This is especially useful for tags. + ms.os_ << s; + } else { + // Only care about fonts if they are currently enabled. + if (ms.version() == MathMLVersion::mathmlCore) { + // New case: MathML uses Unicode characters to indicate fonts. + // If possible, avoid doing the mapping: it involves looking up a hash + // table and doing a lot of conditions *per character* + bool needs_no_mapping = + (ms.current_font_.family() == MathFontInfo::MathFontFamily::MATH_INHERIT_FAMILY || + ms.current_font_.family() == MathFontInfo::MathFontFamily::MATH_NORMAL_FAMILY) && + (ms.current_font_.series() == MathFontInfo::MathFontSeries::MATH_INHERIT_SERIES || + ms.current_font_.series() == MathFontInfo::MathFontSeries::MATH_MEDIUM_SERIES) && + (ms.current_font_.shape() == MathFontInfo::MathFontShape::MATH_INHERIT_SHAPE || + (ms.in_mtext_ && ms.current_font_.shape() == MathFontInfo::MathFontShape::MATH_UP_SHAPE) || + (!ms.in_mtext_ && ms.current_font_.shape() == MathFontInfo::MathFontShape::MATH_ITALIC_SHAPE)); + if (needs_no_mapping) { + ms.os_ << s; + } else { + // Perform the conversion character per character (which might + // mean consume a complete Greek entity!). + docstring buf; + bool within_entity = false; + for (const char_type c : s) { + if (!within_entity && c == '&') { // New entity. + within_entity = true; + } else if (within_entity && c == '#') { // Still new entity. + // Nothing to do: unicode_alphanum_variants only has + // the code point, not the full XML/HTML entity. + } else if (within_entity && c == ';') { // End of entity. + if (buf.starts_with('x')) { + // An HTML entity is typically α, but + // unicode_alpha_num_variants has 0x3B1. + buf.insert(0, from_ascii("0")); + } + ms.os_ << ms.current_font_.convertCharacterToUnicodeEntityWithFont(buf, ms.inText()); + buf.clear(); + within_entity = false; + } else if (within_entity) { // Within new entity. + buf += c; + } else { + buf = docstring(c, 1); + ms.os_ << ms.current_font_.convertCharacterToUnicodeEntityWithFont(buf, ms.inText()); + buf.clear(); + } + + if (!within_entity && !buf.empty()) { + lyxerr << "Assertion failed in MathLMStream::operator<<(docstring): not reading an entity " + << "while the buffer is not empty (" << buf << ")"; + } + } + if (!buf.empty()) { + lyxerr << "Assertion failed in MathLMStream::operator<<(docstring): the buffer is not empty (" << buf << ")"; + ms.os_ << ms.current_font_.convertCharacterToUnicodeEntityWithFont(buf, ms.inText()); + } + } + } else { + // Old case (MathML3): MathML uses mathvariant to indicate fonts. + ms.os_ << s; + } + } return ms; } @@ -606,6 +803,20 @@ MathMLStream & operator<<(MathMLStream & ms, CTag const & t) } +MathMLStream & operator<<(MathMLStream & ms, StartRespectFont) +{ + ms.respect_font_ = true; + return ms; +} + + +MathMLStream & operator<<(MathMLStream & ms, StopRespectFont) +{ + ms.respect_font_ = false; + return ms; +} + + ////////////////////////////////////////////////////////////////////// diff --git a/src/mathed/MathStream.h b/src/mathed/MathStream.h index d611ec8c7c..4ae9b7e2fc 100644 --- a/src/mathed/MathStream.h +++ b/src/mathed/MathStream.h @@ -88,6 +88,32 @@ public: /// Transforms this font into a class attribute for the HTML span tag. std::string toHTMLSpanClass() const; + /// Converts the character into the closest Unicode character that encodes + /// this font. If there is only a partial mapping, parts of the mapping are + /// applied. For instance, take the character C and a bold-italic font. + /// - If there is a bold-italic mapping for this character, it is returned. + /// - If there is only a bold mapping for this character, a bold character + /// is returned. This font encoding is the closest one to the font. + /// - If there are two mappings (one bold, one italic), one of them is + /// returned (arbitrary choice between the two). + /// - If there are no mappings, the original character is returned. + /// The mappings are defined in the global variable theMathVariantList. + /// + /// The character is supposed to be a single Latin letter (a-z, A-Z) or + /// digit (0-9) or the entity encoding a Greek character (0x3b1-0x3c9 + /// for lower case, 0x3b1-0x3c9 for upper case), exactly like the + /// `unicode_alphanum_variants` file. + /// + /// If in_text, the default shape is up. If not in_text, the default shape + /// is italic. This behaviour matches that of MathMLStream::in_text_. + [[nodiscard]] + docstring convertCharacterToUnicodeWithFont(const docstring & c, bool in_text) const; + /// Converts the character into the closest Unicode character that encodes + /// this font as an entity if the character is not ASCII. + /// Also see convertCharacterToUnicodeWithFont. + [[nodiscard]] + docstring convertCharacterToUnicodeEntityWithFont(const docstring & c, bool in_text) const; + private: MathFontFamily family_; MathFontSeries series_; @@ -438,6 +464,14 @@ public: }; +/// Signalling elements for font handling. They do not output anything per se, +/// they alter the state of the stream to either start or stop respecting +/// fonts (i.e. output Unicode entities encoding the font, such as +/// "Mathematical Italic Small A" d44e;). +struct StartRespectFont{}; +struct StopRespectFont{}; + + /// Throw MathExportException to signal that the attempt to export /// some math in the current format did not succeed. E.g., we can't /// export xymatrix as MathML, so that will throw, and we'll fall back @@ -503,6 +537,8 @@ private: MathStyle font_math_style_; /// Current font (which might be nested). MathFontInfo current_font_; + /// whether the output shall respect the current font + bool respect_font_ = false; /// friend class SetMode; friend MathMLStream & operator<<(MathMLStream &, MathAtom const &); @@ -513,6 +549,8 @@ private: friend MathMLStream & operator<<(MathMLStream &, ETag const &); friend MathMLStream & operator<<(MathMLStream &, ETagInline const &); friend MathMLStream & operator<<(MathMLStream &, CTag const &); + friend MathMLStream & operator<<(MathMLStream &, StartRespectFont); + friend MathMLStream & operator<<(MathMLStream &, StopRespectFont); }; /// @@ -537,6 +575,10 @@ MathMLStream & operator<<(MathMLStream &, ETag const &); MathMLStream & operator<<(MathMLStream &, ETagInline const &); /// MathMLStream & operator<<(MathMLStream &, CTag const &); +/// Starts respecting fonts until meeting StopRespectFont. +MathMLStream & operator<<(MathMLStream &, StartRespectFont); +/// Stops respecting fonts. +MathMLStream & operator<<(MathMLStream &, StopRespectFont); /// A simpler version of ModeSpecifier, for MathML -- lyx-cvs mailing list lyx-cvs@lists.lyx.org https://lists.lyx.org/mailman/listinfo/lyx-cvs