Here is a small experimentation I did. It is known that parsing of very large files is slowed down by the fact that we do a whole lot of string compares instead of a good old binary search like in lyxlex.
Typically, when loading a big file from Herbert, Buffer::parseSingleLyXformat2Token takes 25.6% of the time, and a big part of that is due to 2114004 strings comparisons. It occured to me that this could be alleviated by reordering the tokens in the chained if:s. I whipped the following simple script --counttokens---- #!/bin/sh grep '^\\' $1 | sed -e 's/^\(\\[^ ]*\) .*$/\1/'|sort|uniq -c|sort -k1,1nr -------- And running it on the book, I get ginette: ./counttokens fichiers/Versuch.lyx 7767 \layout 6239 \begin_inset 6239 \end_inset 5138 \latex 4841 \backslash 2638 \emph 2555 \family 2540 \noun 2537 \color 2534 \bar 2534 \shape 2420 \size 1733 \series 277 \begin_float 277 \end_float [..] Therefore I moved the fonts, insets and backslash (useful for ERT) tests to the top of the method. The result is that Buffer::parseSingleLyXformat2Token takes 16.2% of the time, and the 404503 string comparisons now take less time than InsertChar, which makes more sense. I attach the patch instead of commiting it, because I want to have a virtual nod from Lars before. I would not want to break the carefully crafted compatibility reading... Note that this is tested with lyxstring. Results with normal strings may be completely different... JMarc
Index: src/ChangeLog =================================================================== RCS file: /usr/local/lyx/cvsroot/lyx-devel/src/ChangeLog,v retrieving revision 1.651 diff -u -r1.651 ChangeLog --- src/ChangeLog 27 Mar 2002 23:27:12 -0000 1.651 +++ src/ChangeLog 27 Mar 2002 23:29:00 -0000 @@ -1,3 +1,9 @@ +2002-03-28 Jean-Marc Lasgouttes <[EMAIL PROTECTED]> + + * buffer.C (parseSingleLyXformat2Token): reorder a bit the tests + in order to reduce drastically the number of comparisons needed to + parse a large document + 2002-03-27 Jean-Marc Lasgouttes <[EMAIL PROTECTED]> * lyxfunc.C (getStatus): return 'disabled' early for LFUN_NOACTION Index: src/buffer.C =================================================================== RCS file: /usr/local/lyx/cvsroot/lyx-devel/src/buffer.C,v retrieving revision 1.322 diff -u -r1.322 buffer.C --- src/buffer.C 25 Mar 2002 11:15:25 -0000 1.322 +++ src/buffer.C 27 Mar 2002 23:29:11 -0000 @@ -564,6 +564,102 @@ #endif #ifndef NO_COMPABILITY + } else if (token == "\\end_inset") { + lyxerr << "Solitary \\end_inset. Missing \\begin_inset?.\n" + << "Last inset read was: " << last_inset_read + << endl; + // Simply ignore this. The insets do not have + // to read this. + // But insets should read it, it is a part of + // the inset isn't it? Lgb. + } else if (token == "\\begin_inset") { +#ifndef NO_COMPABILITY + insertErtContents(par, pos, false); + ert_stack.push(ert_comp); + ert_comp = ErtComp(); +#endif + readInset(lex, par, pos, font); +#ifndef NO_COMPABILITY + ert_comp = ert_stack.top(); + ert_stack.pop(); + insertErtContents(par, pos); +#endif + } else if (token == "\\family") { + lex.next(); + font.setLyXFamily(lex.getString()); + } else if (token == "\\series") { + lex.next(); + font.setLyXSeries(lex.getString()); + } else if (token == "\\shape") { + lex.next(); + font.setLyXShape(lex.getString()); + } else if (token == "\\size") { + lex.next(); + font.setLyXSize(lex.getString()); +#ifndef NO_COMPABILITY + } else if (token == "\\latex") { + lex.next(); + string const tok = lex.getString(); + if (tok == "no_latex") { + // Do the insetert. + insertErtContents(par, pos); + } else if (tok == "latex") { + ert_comp.active = true; + ert_comp.font = font; + } else if (tok == "default") { + // Do the insetert. + insertErtContents(par, pos); + } else { + lex.printError("Unknown LaTeX font flag " + "`$$Token'"); + } +#endif + } else if (token == "\\lang") { + lex.next(); + string const tok = lex.getString(); + Language const * lang = languages.getLanguage(tok); + if (lang) { + font.setLanguage(lang); + } else { + font.setLanguage(params.language); + lex.printError("Unknown language `$$Token'"); + } + } else if (token == "\\numeric") { + lex.next(); + font.setNumber(font.setLyXMisc(lex.getString())); + } else if (token == "\\emph") { + lex.next(); + font.setEmph(font.setLyXMisc(lex.getString())); + } else if (token == "\\bar") { + lex.next(); + string const tok = lex.getString(); + // This is dirty, but gone with LyX3. (Asger) + if (tok == "under") + font.setUnderbar(LyXFont::ON); + else if (tok == "no") + font.setUnderbar(LyXFont::OFF); + else if (tok == "default") + font.setUnderbar(LyXFont::INHERIT); + else + lex.printError("Unknown bar font flag " + "`$$Token'"); + } else if (token == "\\noun") { + lex.next(); + font.setNoun(font.setLyXMisc(lex.getString())); + } else if (token == "\\color") { + lex.next(); + font.setLyXColor(lex.getString()); + } else if (token == "\\backslash") { +#ifndef NO_COMPABILITY + if (ert_comp.active) { + ert_comp.contents += "\\"; + } else { +#endif + par->insertChar(pos, '\\', font); + ++pos; +#ifndef NO_COMPABILITY + } +#endif } else if (token == "\\begin_float") { insertErtContents(par, pos); //insertErtContents(par, pos, false); @@ -951,71 +1047,6 @@ } else if (token == "\\float_placement") { lex.nextToken(); params.float_placement = lex.getString(); - } else if (token == "\\family") { - lex.next(); - font.setLyXFamily(lex.getString()); - } else if (token == "\\series") { - lex.next(); - font.setLyXSeries(lex.getString()); - } else if (token == "\\shape") { - lex.next(); - font.setLyXShape(lex.getString()); - } else if (token == "\\size") { - lex.next(); - font.setLyXSize(lex.getString()); -#ifndef NO_COMPABILITY - } else if (token == "\\latex") { - lex.next(); - string const tok = lex.getString(); - if (tok == "no_latex") { - // Do the insetert. - insertErtContents(par, pos); - } else if (tok == "latex") { - ert_comp.active = true; - ert_comp.font = font; - } else if (tok == "default") { - // Do the insetert. - insertErtContents(par, pos); - } else { - lex.printError("Unknown LaTeX font flag " - "`$$Token'"); - } -#endif - } else if (token == "\\lang") { - lex.next(); - string const tok = lex.getString(); - Language const * lang = languages.getLanguage(tok); - if (lang) { - font.setLanguage(lang); - } else { - font.setLanguage(params.language); - lex.printError("Unknown language `$$Token'"); - } - } else if (token == "\\numeric") { - lex.next(); - font.setNumber(font.setLyXMisc(lex.getString())); - } else if (token == "\\emph") { - lex.next(); - font.setEmph(font.setLyXMisc(lex.getString())); - } else if (token == "\\bar") { - lex.next(); - string const tok = lex.getString(); - // This is dirty, but gone with LyX3. (Asger) - if (tok == "under") - font.setUnderbar(LyXFont::ON); - else if (tok == "no") - font.setUnderbar(LyXFont::OFF); - else if (tok == "default") - font.setUnderbar(LyXFont::INHERIT); - else - lex.printError("Unknown bar font flag " - "`$$Token'"); - } else if (token == "\\noun") { - lex.next(); - font.setNoun(font.setLyXMisc(lex.getString())); - } else if (token == "\\color") { - lex.next(); - font.setLyXColor(lex.getString()); } else if (token == "\\align") { int tmpret = lex.findToken(string_align); if (tmpret == -1) ++tmpret; @@ -1062,26 +1093,6 @@ lex.eatLine(); par->params().labelWidthString(lex.getString()); // do not delete this token, it is still needed! - } else if (token == "\\end_inset") { - lyxerr << "Solitary \\end_inset. Missing \\begin_inset?.\n" - << "Last inset read was: " << last_inset_read - << endl; - // Simply ignore this. The insets do not have - // to read this. - // But insets should read it, it is a part of - // the inset isn't it? Lgb. - } else if (token == "\\begin_inset") { -#ifndef NO_COMPABILITY - insertErtContents(par, pos, false); - ert_stack.push(ert_comp); - ert_comp = ErtComp(); -#endif - readInset(lex, par, pos, font); -#ifndef NO_COMPABILITY - ert_comp = ert_stack.top(); - ert_stack.pop(); - insertErtContents(par, pos); -#endif } else if (token == "\\SpecialChar") { LyXLayout const & layout = textclasslist[params.textclass][par->layout()]; @@ -1157,17 +1168,6 @@ par->bibkey = new InsetBibKey(p); } par->bibkey->read(this, lex); - } else if (token == "\\backslash") { -#ifndef NO_COMPABILITY - if (ert_comp.active) { - ert_comp.contents += "\\"; - } else { -#endif - par->insertChar(pos, '\\', font); - ++pos; -#ifndef NO_COMPABILITY - } -#endif } else if (token == "\\the_end") { #ifndef NO_COMPABILITY // If we still have some ert active here we have to insert