We currently do too many encoding changes for LaTeX output. This triggers an inputenc bug: http://bugzilla.lyx.org/show_bug.cgi?id=3235. It is also incorrect (as can be seen with the attached file, it is impossible to export it to LaTeX).
The attached patch fixes this bug and two other problems I discovered. It is not completely finished yet (insets deriving from InsetText other than tabular need a bit of work), but the most complicated inputenc testing document I have from Dov, and the attached file work fine with it. The patch is probably too complicated to understand it as a whole. I'd like to put it in bit by bit. José, when do you want to release the next beta, and when should I start sending the bits? Georg
heb-test-multilang-par-greek.lyx
Description: application/lyx
Index: src/paragraph.h =================================================================== --- src/paragraph.h (Revision 17412) +++ src/paragraph.h (Arbeitskopie) @@ -31,6 +31,7 @@ namespace lyx { class Buffer; class BufferParams; class Counters; +class Encoding; class InsetBase; class InsetBibitem; class LaTeXFeatures; @@ -125,7 +126,7 @@ public: /// bool simpleTeXOnePar(Buffer const &, BufferParams const &, - LyXFont const & outerfont, odocstream &, + LyXFont const &, Encoding const &, odocstream &, TexRow & texrow, OutputParams const &) const; /// Can we drop the standard paragraph wrapper? Index: src/paragraph_pimpl.C =================================================================== --- src/paragraph_pimpl.C (Revision 17412) +++ src/paragraph_pimpl.C (Arbeitskopie) @@ -482,7 +482,6 @@ void Paragraph::Pimpl::simpleTeXSpecialC odocstream & os, TexRow & texrow, OutputParams const & runparams, - LyXFont & font, LyXFont & running_font, LyXFont & basefont, LyXFont const & outerfont, @@ -524,15 +523,16 @@ void Paragraph::Pimpl::simpleTeXSpecialC } else { if (open_font) { column += running_font.latexWriteEndChanges( - os, basefont, basefont, bparams); + os, basefont, basefont); open_font = false; } - basefont = owner_->getLayoutFont(bparams, outerfont); - running_font = basefont; - if (font.family() == LyXFont::TYPEWRITER_FAMILY) + if (running_font.family() == LyXFont::TYPEWRITER_FAMILY) os << '~'; + basefont = owner_->getLayoutFont(bparams, outerfont); + running_font = basefont; + if (runparams.moving_arg) os << "\\protect "; @@ -578,7 +578,7 @@ void Paragraph::Pimpl::simpleTeXSpecialC // some insets cannot be inside a font change command if (open_font && inset->noFontChange()) { column += running_font.latexWriteEndChanges( - os, basefont, basefont, bparams); + os, basefont, basefont); open_font = false; basefont = owner_->getLayoutFont(bparams, outerfont); running_font = basefont; @@ -634,7 +634,7 @@ void Paragraph::Pimpl::simpleTeXSpecialC break; } // Typewriter font also has them - if (font.family() == LyXFont::TYPEWRITER_FAMILY) { + if (running_font.family() == LyXFont::TYPEWRITER_FAMILY) { os.put(c); break; } @@ -659,7 +659,7 @@ void Paragraph::Pimpl::simpleTeXSpecialC case '-': // "--" in Typewriter mode -> "-{}-" if (i <= size() - 2 && getChar(i + 1) == '-' - && font.family() == LyXFont::TYPEWRITER_FAMILY) { + && running_font.family() == LyXFont::TYPEWRITER_FAMILY) { os << "-{}"; column += 2; } else { @@ -711,7 +711,7 @@ void Paragraph::Pimpl::simpleTeXSpecialC // I assume this is hack treating typewriter as verbatim // FIXME UNICODE: This can fail if c cannot be encoded // in the current encoding. - if (font.family() == LyXFont::TYPEWRITER_FAMILY) { + if (running_font.family() == LyXFont::TYPEWRITER_FAMILY) { if (c != '\0') { os.put(c); } @@ -738,7 +738,7 @@ void Paragraph::Pimpl::simpleTeXSpecialC } if (pnr == phrases_nr && c != '\0') { - Encoding const & encoding = getEncoding(bparams, doc_encoding, font); + Encoding const & encoding = getEncoding(bparams, doc_encoding, running_font); if (i < size() - 1) { char_type next = getChar(i + 1); if (Encodings::isCombiningChar(next)) { Index: src/paragraph_pimpl.h =================================================================== --- src/paragraph_pimpl.h (Revision 17412) +++ src/paragraph_pimpl.h (Arbeitskopie) @@ -141,7 +141,7 @@ public: void simpleTeXSpecialChars(Buffer const &, BufferParams const &, Encoding const &, odocstream &, TexRow & texrow, OutputParams const &, - LyXFont & font, LyXFont & running_font, + LyXFont & running_font, LyXFont & basefont, LyXFont const & outerfont, bool & open_font, Index: src/tabular.C =================================================================== --- src/tabular.C (Revision 17412) +++ src/tabular.C (Arbeitskopie) @@ -25,6 +25,7 @@ #include "cursor.h" #include "debug.h" #include "LaTeXFeatures.h" +#include "language.h" #include "lyxlex.h" #include "outputparams.h" #include "paragraph.h" @@ -2149,7 +2150,7 @@ int LyXTabular::TeXRow(odocstream & os, } ++ret; } - + for (col_type j = 0; j < columns_; ++j) { if (isPartOfMultiColumn(i, j)) continue; @@ -2171,7 +2172,12 @@ int LyXTabular::TeXRow(odocstream & os, if (!isLastCellInRow(cell)) { // not last cell in row os << " & "; } + Paragraph const & last = inset->paragraphs().back(); + if (!last.empty()) + runparams.encoding = last.getFontSettings(buf.params(), + last.size() - 1).language()->encoding(); ++cell; + } os << "\\tabularnewline"; if (row_info[i].bottom_space_default) { Index: src/outputparams.C =================================================================== --- src/outputparams.C (Revision 17412) +++ src/outputparams.C (Arbeitskopie) @@ -19,7 +19,7 @@ namespace lyx { OutputParams::OutputParams() : flavor(LATEX), nice(false), moving_arg(false), - local_font(0), free_spacing(false), use_babel(false), + local_font(0), encoding(0), free_spacing(false), use_babel(false), linelen(0), depth(0), exportdata(new ExportData), inComment(false), Index: src/lyxfont.C =================================================================== --- src/lyxfont.C (Revision 17412) +++ src/lyxfont.C (Arbeitskopie) @@ -23,7 +23,6 @@ #include "LColor.h" #include "lyxlex.h" #include "lyxrc.h" -#include "output_latex.h" #include "support/lstrings.h" @@ -738,16 +737,14 @@ void LyXFont::lyxWriteChanges(LyXFont co /// Writes the head of the LaTeX needed to impose this font // Returns number of chars written. int LyXFont::latexWriteStartChanges(odocstream & os, LyXFont const & base, - LyXFont const & prev, - BufferParams const & bparams) const + LyXFont const & prev) const { bool env = false; - int count = switchEncoding(os, bparams, *(prev.language()->encoding()), - *(language()->encoding())); + int count = 0; if (language()->babel() != base.language()->babel() && language() != prev.language()) { - if (isRightToLeft() != prev.isRightToLeft()) { + if (isRightToLeft() != base.isRightToLeft()) { if (isRightToLeft()) { os << "\\R{"; count += 3; @@ -836,8 +833,7 @@ int LyXFont::latexWriteStartChanges(odoc // Returns number of chars written // This one corresponds to latexWriteStartChanges(). (Asger) int LyXFont::latexWriteEndChanges(odocstream & os, LyXFont const & base, - LyXFont const & next, - BufferParams const & bparams) const + LyXFont const & next) const { int count = 0; bool env = false; @@ -901,8 +897,6 @@ int LyXFont::latexWriteEndChanges(odocst os << '}'; ++count; } - count += switchEncoding(os, bparams, *(language()->encoding()), - *(next.language()->encoding())); return count; } Index: src/outputparams.h =================================================================== --- src/outputparams.h (Revision 17412) +++ src/outputparams.h (Arbeitskopie) @@ -21,6 +21,7 @@ namespace lyx { +class Encoding; class ExportData; class LyXFont; @@ -68,6 +69,9 @@ public: */ mutable std::string document_language; + /// Current stream encoding. Only used for LaTeX. + mutable Encoding const * encoding; + /** free_spacing == true means that the inset is in a free-spacing paragraph. */ Index: src/lyxfont.h =================================================================== --- src/lyxfont.h (Revision 17412) +++ src/lyxfont.h (Arbeitskopie) @@ -300,16 +300,14 @@ public: font state active now. */ int latexWriteStartChanges(odocstream &, LyXFont const & base, - LyXFont const & prev, - BufferParams const &) const; + LyXFont const & prev) const; /** Writes the tail of the LaTeX needed to change to this font. Returns number of chars written. Base is the font state we want to achieve. */ int latexWriteEndChanges(odocstream &, LyXFont const & base, - LyXFont const & next, - BufferParams const &) const; + LyXFont const & next) const; /// Build GUI description of font state Index: src/paragraph.C =================================================================== --- src/paragraph.C (Revision 17412) +++ src/paragraph.C (Arbeitskopie) @@ -33,6 +33,7 @@ #include "lyxrow.h" #include "messages.h" #include "outputparams.h" +#include "output_latex.h" #include "paragraph_funcs.h" #include "rowpainter.h" @@ -935,6 +936,7 @@ int Paragraph::endTeXParParams(BufferPar bool Paragraph::simpleTeXOnePar(Buffer const & buf, BufferParams const & bparams, LyXFont const & outerfont, + Encoding const & prev_encoding, odocstream & os, TexRow & texrow, OutputParams const & runparams) const { @@ -963,7 +965,6 @@ bool Paragraph::simpleTeXOnePar(Buffer c // As long as we are in the label, this font is the base font of the // label. Before the first body character it is set to the base font // of the body. - // This must be identical to basefont in TeXOnePar(). LyXFont basefont; // output change tracking marks only if desired, @@ -1009,13 +1010,14 @@ bool Paragraph::simpleTeXOnePar(Buffer c // Computed only once per paragraph since bparams.encoding() is expensive Encoding const & doc_encoding = bparams.encoding(); + for (pos_type i = 0; i < size(); ++i) { // First char in paragraph or after label? if (i == body_pos) { if (body_pos > 0) { if (open_font) { column += running_font.latexWriteEndChanges( - os, basefont, basefont, bparams); + os, basefont, basefont); open_font = false; } basefont = getLayoutFont(bparams, outerfont); @@ -1054,10 +1056,10 @@ bool Paragraph::simpleTeXOnePar(Buffer c changeType, output); runningChangeType = changeType; - value_type c = getChar(i); + value_type const c = getChar(i); // Fully instantiated font - LyXFont font = getFont(bparams, i, outerfont); + LyXFont const font = getFont(bparams, i, outerfont); LyXFont const last_font = running_font; @@ -1068,37 +1070,43 @@ bool Paragraph::simpleTeXOnePar(Buffer c { column += running_font.latexWriteEndChanges( os, basefont, - (i == body_pos-1) ? basefont : font, - bparams); + (i == body_pos-1) ? basefont : font); running_font = basefont; open_font = false; } + // Switch file encoding if necessary + column += switchEncoding(os, bparams, + (i == 0) ? + prev_encoding : + *(last_font.language()->encoding()), + *(font.language()->encoding())); + // Do we need to change font? if ((font != running_font || font.language() != running_font.language()) && i != body_pos - 1) { - column += font.latexWriteStartChanges( - os, basefont, last_font, bparams); + column += font.latexWriteStartChanges(os, basefont, + last_font); running_font = font; open_font = true; } if (c == ' ') { - // Do not print the separation of the optional argument - if (i != body_pos - 1) { - if (pimpl_->simpleTeXBlanks(bparams, - doc_encoding, os, texrow, - i, column, font, *style)) - // A surrogate pair was output. We - // must not call simpleTeXSpecialChars - // in this iteration, since - // simpleTeXBlanks incremented i, and - // simpleTeXSpecialChars would output - // the combining character again. - continue; - } + if (i == body_pos - 1) + // Do not print the separation of the + // optional argument + continue; + if (pimpl_->simpleTeXBlanks(bparams, + doc_encoding, os, texrow, + i, column, font, *style)) + // A surrogate pair was output. We must not + // call simpleTeXSpecialChars in this + // iteration, since simpleTeXBlanks + // incremented i, and simpleTeXSpecialChars + // would output the combining character again. + continue; } OutputParams rp = runparams; @@ -1106,32 +1114,19 @@ bool Paragraph::simpleTeXOnePar(Buffer c rp.local_font = &font; rp.intitle = style->intitle; pimpl_->simpleTeXSpecialChars(buf, bparams, doc_encoding, os, - texrow, rp, font, running_font, + texrow, rp, running_font, basefont, outerfont, open_font, runningChangeType, *style, i, column, c); } // If we have an open font definition, we have to close it if (open_font) { -#ifdef FIXED_LANGUAGE_END_DETECTION - if (next_) { - running_font - .latexWriteEndChanges(os, basefont, - next_->getFont(bparams, 0, outerfont), - bparams); - } else { - running_font.latexWriteEndChanges(os, basefont, - basefont, bparams); - } -#else #ifdef WITH_WARNINGS //#warning For now we ALWAYS have to close the foreign font settings if they are //#warning there as we start another \selectlanguage with the next paragraph if //#warning we are in need of this. This should be fixed sometime (Jug) #endif - running_font.latexWriteEndChanges(os, basefont, basefont, - bparams); -#endif + running_font.latexWriteEndChanges(os, basefont, basefont); } column += Changes::latexMarkChange(os, Index: src/output_latex.C =================================================================== --- src/output_latex.C (Revision 17412) +++ src/output_latex.C (Arbeitskopie) @@ -96,30 +96,30 @@ TeXEnvironment(Buffer const & buf, LyXLayout_ptr const & style = pit->layout(); - Language const * language = pit->getParLanguage(bparams); - Language const * doc_language = bparams.language; - Language const * previous_language = + Language const * const par_language = pit->getParLanguage(bparams); + Language const * const doc_language = bparams.language; + Language const * const prev_par_language = (pit != paragraphs.begin()) ? boost::prior(pit)->getParLanguage(bparams) : doc_language; - if (language->babel() != previous_language->babel()) { + if (par_language->babel() != prev_par_language->babel()) { if (!lyxrc.language_command_end.empty() && - previous_language->babel() != doc_language->babel()) { + prev_par_language->babel() != doc_language->babel()) { os << from_ascii(subst( lyxrc.language_command_end, "$$lang", - previous_language->babel())) + prev_par_language->babel())) << '\n'; texrow.newline(); } if (lyxrc.language_command_end.empty() || - language->babel() != doc_language->babel()) { + par_language->babel() != doc_language->babel()) { os << from_ascii(subst( lyxrc.language_command_begin, "$$lang", - language->babel())) + par_language->babel())) << '\n'; texrow.newline(); } @@ -255,14 +255,26 @@ TeXOnePar(Buffer const & buf, OutputParams runparams = runparams_in; runparams.moving_arg |= style->needprotect; - Language const * language = pit->getParLanguage(bparams); - Language const * doc_language = bparams.language; - Language const * previous_language = - (pit != paragraphs.begin()) - ? boost::prior(pit)->getParLanguage(bparams) - : doc_language; + Language const * const par_language = pit->getParLanguage(bparams); + Language const * const doc_language = bparams.language; + Language const * prev_par_language; + Language const * previous_language; + if (pit == paragraphs.begin()) { + prev_par_language = doc_language; + previous_language = doc_language; + } else { + Paragraph const & prev = *boost::prior(pit); + prev_par_language = prev.getParLanguage(bparams); + // We don't need the fully instantiated font, since the + // language is never inherited. + previous_language = prev.getFontSettings( + bparams, prev.size() - 1).language(); + } + Encoding const * const encoding = runparams.encoding ? + runparams.encoding : + previous_language->encoding(); - if (language->babel() != previous_language->babel() + if (par_language->babel() != prev_par_language->babel() // check if we already put language command in TeXEnvironment() && !(style->isEnvironment() && (pit == paragraphs.begin() || @@ -271,45 +283,27 @@ TeXOnePar(Buffer const & buf, || boost::prior(pit)->getDepth() < pit->getDepth()))) { if (!lyxrc.language_command_end.empty() && - previous_language->babel() != doc_language->babel()) + prev_par_language->babel() != doc_language->babel()) { os << from_ascii(subst(lyxrc.language_command_end, "$$lang", - previous_language->babel())) + prev_par_language->babel())) << '\n'; texrow.newline(); } if (lyxrc.language_command_end.empty() || - language->babel() != doc_language->babel()) + par_language->babel() != doc_language->babel()) { os << from_ascii(subst( lyxrc.language_command_begin, "$$lang", - language->babel())) + par_language->babel())) << '\n'; texrow.newline(); } } - LyXFont const outerfont = - outerFont(std::distance(paragraphs.begin(), pit), - paragraphs); - // This must be identical to basefont in Paragraph::simpleTeXOnePar - LyXFont basefont = (pit->beginOfBody() > 0) ? - pit->getLabelFont(bparams, outerfont) : - pit->getLayoutFont(bparams, outerfont); - Encoding const & outer_encoding(*(outerfont.language()->encoding())); - // FIXME we switch from the outer encoding to the encoding of - // this paragraph, since I could not figure out the correct - // logic to take the encoding of the previous paragraph into - // account. This may result in some unneeded encoding changes. - if (switchEncoding(os, bparams, outer_encoding, - *(basefont.language()->encoding()))) { - os << '\n'; - texrow.newline(); - } - // In an inset with unlimited length (all in one row), // don't allow any special options in the paragraph if (!pit->forceDefaultParagraphs()) { @@ -360,9 +354,14 @@ TeXOnePar(Buffer const & buf, break; } + LyXFont const outerfont = + outerFont(std::distance(paragraphs.begin(), pit), + paragraphs); + // FIXME UNICODE os << from_utf8(everypar); bool need_par = pit->simpleTeXOnePar(buf, bparams, outerfont, + *encoding, os, texrow, runparams); // Make sure that \\par is done with the font of the last @@ -434,7 +433,7 @@ TeXOnePar(Buffer const & buf, } if (boost::next(pit) == paragraphs.end() - && language->babel() != doc_language->babel()) { + && par_language->babel() != doc_language->babel()) { // Since \selectlanguage write the language to the aux file, // we need to reset the language at the end of footnote or // float. @@ -452,17 +451,10 @@ TeXOnePar(Buffer const & buf, os << from_ascii(subst( lyxrc.language_command_end, "$$lang", - language->babel())); + par_language->babel())); pending_newline = true; } - // FIXME we switch from the encoding of this paragraph to the - // outer encoding, since I could not figure out the correct logic - // to take the encoding of the next paragraph into account. - // This may result in some unneeded encoding changes. - basefont = pit->getLayoutFont(bparams, outerfont); - switchEncoding(os, bparams, *(basefont.language()->encoding()), - outer_encoding); if (pending_newline) { os << '\n'; texrow.newline();