Am Donnerstag, 4. Oktober 2018 10:00:42 CEST schrieb Scott Kostyshak <skost...@lyx.org>: > On Thu, Oct 04, 2018 at 10:27:17AM +0200, Kornel Benko wrote: > > Am Donnerstag, 4. Oktober 2018 10:17:43 CEST schrieb Kornel Benko > > <kor...@lyx.org>: > > > Both, lyx2.4 and lyx2.3 > > > To reproduce: > > > > > > 1.) Open (or create) a small lyx-file. (Say 3 lines) > > > 2.) select the whole buffer > > > 3.) in findadv search for a string (ignore format or not) > > > ==> crash > > > > > > LASSERT() is called at lyxfind.cpp in stringifyFromForSearch() > > > LASSERT(cur.pos() >= 0 && cur.pos() <= cur.lastpos(), > > > return docstring()); > > > > > > In my case > > > cur.pos() == 12 > > > cur.lastpos() == 11 > > > len == -1 > > > > > > > OK, selecting the lines so, that the last selected line is longer than the > > first selected, > > then we get the crash. Looks like cur.pos() and cur.lastpos() are referring > > to different lines. > > I can reproduce. It seems I can reproduce even on 2.1.0, which is > surprising since I would have guessed this bug would be more common. > Well, good find! Probably a lot of people have run into this and it > hasn't been reported (?) [1]. > > Scott
I am fighting with regex and language distinction. Our latex output does not specify enough info for the find routines. The playing with this led to crash. Now, I have a working solution. Would you like to test? If setting for 'not ignore format', you should be able to find for instance a string with specified language. Unfortunately there is there are many possible 'features' which have to be the same as in the searched string. So searching as regex [a-z]* only words in document language, not bold/emphasized or whatever. Kornel
diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp index 936ea24..1452f6c 100644 --- a/src/lyxfind.cpp +++ b/src/lyxfind.cpp @@ -797,10 +797,11 @@ static docstring buffer_to_latex(Buffer & buffer) runparams.nice = true; runparams.flavor = OutputParams::LATEX; runparams.linelen = 80; //lyxrc.plaintext_linelen; // No side effect of file copying and image conversion runparams.dryrun = true; + runparams.for_search = true; pit_type const endpit = buffer.paragraphs().size(); for (pit_type pit = 0; pit != endpit; ++pit) { TeXOnePar(buffer, buffer.text(), pit, os, runparams); LYXERR(Debug::FIND, "searchString up to here: " << ods.str()); } @@ -841,19 +842,105 @@ static size_t identifyLeading(string const & s) { string t = s; // @TODO Support \item[text] // Kornel: Added textsl, textsf, textit, texttt and noun // + allow to seach for colored text too - while (regex_replace(t, t, REGEX_BOS "\\\\(emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave|subsubsection|subsection|section|subparagraph|paragraph|part)\\*?\\{", "") + while (regex_replace(t, t, REGEX_BOS "\\\\foreignlanguage\\{[a-z]+\\}\\{", "")) + ; + while (regex_replace(t, t, REGEX_BOS "\\\\((emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)\\{", "") || regex_replace(t, t, REGEX_BOS "\\$", "") || regex_replace(t, t, REGEX_BOS "\\\\\\[ ", "") || regex_replace(t, t, REGEX_BOS "\\\\item ", "") || regex_replace(t, t, REGEX_BOS "\\\\begin\\{[a-zA-Z_]*\\*?\\} ", "")) - LYXERR(Debug::FIND, " after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'"); + ; + LYXERR(Debug::FIND, " after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'"); return s.find(t); } +static int findclosing(string p, int start, int end) +{ + int skip = 0; + int depth = 0; + for (int i = start; i < end; i += 1 + skip) { + char c; + c = p[i]; + skip = 0; + if (c == '\\') skip = 1; + else if (c == '{') depth++; + else if (c == '}') { + if (depth == 0) return(i); + --depth; + } + } + return(-1); +} + + +static string correctlanguagesetting(string par, bool from_regex) +{ + static string langstart = "\\foreignlanguage{"; + static int llen = langstart.length(); + static bool removefirstlang = false; + + int parlen = par.length(); + string result = par; + + while ((parlen > 0) && (par[parlen-1] == '\n')) { + parlen--; + } + if (par.compare(0, llen, langstart) == 0) { + if (from_regex) { + removefirstlang = false; + } + int i = findclosing(par, llen, par.length()); + if (removefirstlang) { + if (i < 0) + result = ""; + else { + int closepos = findclosing(par, i+2, par.length()); + if (closepos > 0) { + result = par.substr(i+2, closepos-i-2) + par.substr(closepos+1, parlen - closepos-1); + } + else { + result = par.substr(i+2, parlen-i-2); + } + } + } + else if (i > 0) { + // skip '}{' after the language spec + int closepos = findclosing(par, i+2, par.length()); + size_t insertpos = par.find(langstart, i+2); + if (closepos < 0) { + if (insertpos == string::npos) { + // there are no closing in par, and no next lang spec + result = par.substr(0, parlen) + "}"; + } + else { + // Add '}' at insertpos only, because closing is missing + result = par.substr(0,insertpos) + "}" + par.substr(insertpos, parlen-insertpos); + } + } + else if ((size_t) closepos > insertpos) { + // Add '}' at insertpos and remove from closepos if closepos > insertpos + result = par.substr(0,insertpos) + "}" + par.substr(insertpos, closepos - insertpos) + par.substr(closepos+1, parlen -closepos-1); + } + } + else { + result = par; + // For i == 0, it is empty language spec + // and for i < 0 it is Error + } + } + else { + if (from_regex) { + removefirstlang = true; + } + } + // Either not found language spec,or is single and closed spec or empty + return(result); +} + // Remove trailing closure of math, macros and environments, so to catch parts of them. static int identifyClosing(string & t) { int open_braces = 0; @@ -885,20 +972,23 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & par_as_string = normalize(ds, !use_regexp); open_braces = 0; close_wildcards = 0; size_t lead_size = 0; + // correct the language settings + par_as_string = correctlanguagesetting(par_as_string, true); if (opt.ignoreformat) { if (!use_regexp) { // if par_as_string_nolead were emty, // the following call to findAux will always *find* the string // in the checked data, and thus always using the slow // examining of the current text part. par_as_string_nolead = par_as_string; } } else { lead_size = identifyLeading(par_as_string); + LYXERR(Debug::FIND, "Lead_size: " << lead_size); lead_as_string = par_as_string.substr(0, lead_size); par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size); } if (!use_regexp) { @@ -1106,13 +1196,14 @@ string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const t.replace(pos, 1, " "); // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify // Kornel: Added textsl, textsf, textit, texttt and noun // + allow to seach for colored text too LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, \\*section{} macros from: " << t); - while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave|subsubsection|subsection|section|subparagraph|paragraph|part)(\\{\\})+", "")) + while (regex_replace(t, t, "\\\\((emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)(\\{\\})+", "")) LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); + while (regex_replace(t, t, "\\\\foreignlanguage\\{[a-z]+\\}(\\{(\\\\item )?\\})+", "")); // FIXME - check what preceeds the brace if (hack_braces) { if (opt.ignoreformat) while (regex_replace(t, t, "\\{", "_x_<") || regex_replace(t, t, "\\}", "_x_>")) @@ -1183,19 +1274,23 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) runparams.nice = false; runparams.flavor = OutputParams::LATEX; runparams.linelen = 8000; //lyxrc.plaintext_linelen; // No side effect of file copying and image conversion runparams.dryrun = true; + runparams.for_search = true; if (cur.inTexted()) { // @TODO what about searching beyond/across paragraph breaks ? pos_type endpos = cur.paragraph().size(); if (len != -1 && endpos > cur.pos() + len) endpos = cur.pos() + len; TeXOnePar(buf, *cur.innerText(), cur.pit(), os, runparams, string(), cur.pos(), endpos); LYXERR(Debug::FIND, "Latexified text: '" << lyx::to_utf8(ods.str()) << "'"); + string s = correctlanguagesetting(lyx::to_utf8(ods.str()), false); + LYXERR(Debug::FIND, "Latexified text: '" << s << "'"); + return(lyx::from_utf8(s)); } else if (cur.inMathed()) { // Retrieve the math environment type, and add '$' or '$[' or others (\begin{equation}) accordingly for (int s = cur.depth() - 1; s >= 0; --s) { CursorSlice const & cs = cur[s]; if (cs.asInsetMath() && cs.asInsetMath()->asHullInset()) { @@ -1391,12 +1486,12 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) docstring stringifyFromForSearch(FindAndReplaceOptions const & opt, DocIterator const & cur, int len) { - LASSERT(cur.pos() >= 0 && cur.pos() <= cur.lastpos(), - return docstring()); + if (cur.pos() < 0 || cur.pos() > cur.lastpos()) + return docstring(); if (!opt.ignoreformat) return latexifyFromCursor(cur, len); else return stringifyFromCursor(cur, len); } diff --git a/src/output_latex.cpp b/src/output_latex.cpp index f73990d..96f51df 100644 --- a/src/output_latex.cpp +++ b/src/output_latex.cpp @@ -812,14 +812,16 @@ void TeXOnePar(Buffer const & buf, && priorpar->layout().isEnvironment() && (priorpar->getDepth() > par.getDepth() || (priorpar->getDepth() == par.getDepth() && priorpar->layout() != par.layout())); Language const * const prev_language = - (priorpar && !priorpar->isPassThru()) - ? (use_prev_env_language ? state->prev_env_language_ - : priorpar->getParLanguage(bparams)) - : outer_language; + runparams_in.for_search ? + languages.getLanguage("ignore") + :(priorpar && !priorpar->isPassThru()) + ? (use_prev_env_language ? state->prev_env_language_ + : priorpar->getParLanguage(bparams)) + : outer_language; bool const use_polyglossia = runparams.use_polyglossia; string const par_lang = use_polyglossia ? getPolyglossiaEnvName(par_language): par_language->babel(); string const prev_lang = use_polyglossia ? @@ -852,11 +854,12 @@ void TeXOnePar(Buffer const & buf, bool const in_polyglossia_rtl_env = use_polyglossia && runparams.local_font != 0 && outer_language->rightToLeft() && !par_language->rightToLeft(); - bool const localswitch = text.inset().forceLocalFontSwitch() + bool const localswitch = runparams_in.for_search + || text.inset().forceLocalFontSwitch() || (using_begin_end && text.inset().forcePlainLayout()) || in_polyglossia_rtl_env; if (localswitch) { lang_begin_command = use_polyglossia ? "\\text$$lang$$opts{" : lyxrc.language_command_local;
signature.asc
Description: This is a digitally signed message part.