Re: Crash in findadv

Kornel Benko Thu, 04 Oct 2018 12:19:22 -0700

Am Donnerstag, 4. Oktober 2018 10:00:42 CEST schrieb Scott Kostyshak 
<skost...@lyx.org>:
> On Thu, Oct 04, 2018 at 10:27:17AM +0200, Kornel Benko wrote:
> > Am Donnerstag, 4. Oktober 2018 10:17:43 CEST schrieb Kornel Benko 
> > <kor...@lyx.org>:
> > > Both, lyx2.4 and lyx2.3
> > > To reproduce:
> > > 
> > > 1.) Open (or create) a small lyx-file. (Say 3 lines)
> > > 2.) select the whole buffer
> > > 3.) in findadv search for a string (ignore format or not)
> > >   ==> crash
> > > 
> > > LASSERT() is called at lyxfind.cpp in stringifyFromForSearch()
> > >   LASSERT(cur.pos() >= 0 && cur.pos() <= cur.lastpos(),
> > >                 return docstring());
> > > 
> > > In my case
> > >   cur.pos() == 12
> > >   cur.lastpos() == 11
> > >   len == -1
> > > 
> > 
> > OK, selecting the lines so, that the last selected line is longer than the 
> > first selected,
> > then we get the crash. Looks like cur.pos() and cur.lastpos() are referring 
> > to different lines.
> 
> I can reproduce. It seems I can reproduce even on 2.1.0, which is
> surprising since I would have guessed this bug would be more common.
> Well, good find! Probably a lot of people have run into this and it
> hasn't been reported (?) [1].
> 
> Scott


I am fighting with regex and language distinction. Our latex output does not 
specify
enough info for the find routines. The playing with this led to crash.

Now, I have a working solution. Would you like to test?

If setting for 'not ignore format', you should be able to find for instance a 
string with specified language.
Unfortunately there is there are many possible 'features' which have to be the 
same as in the searched string.

So searching as regex [a-z]* only words in document language, not 
bold/emphasized or whatever.

        Kornel

diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp
index 936ea24..1452f6c 100644
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -797,10 +797,11 @@ static docstring buffer_to_latex(Buffer & buffer)
 	runparams.nice = true;
 	runparams.flavor = OutputParams::LATEX;
 	runparams.linelen = 80; //lyxrc.plaintext_linelen;
 	// No side effect of file copying and image conversion
 	runparams.dryrun = true;
+	runparams.for_search = true;
 	pit_type const endpit = buffer.paragraphs().size();
 	for (pit_type pit = 0; pit != endpit; ++pit) {
 		TeXOnePar(buffer, buffer.text(), pit, os, runparams);
 		LYXERR(Debug::FIND, "searchString up to here: " << ods.str());
 	}
@@ -841,19 +842,105 @@ static size_t identifyLeading(string const & s)
 {
 	string t = s;
 	// @TODO Support \item[text]
 	// Kornel: Added textsl, textsf, textit, texttt and noun
 	// + allow to seach for colored text too
-	while (regex_replace(t, t, REGEX_BOS "\\\\(emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave|subsubsection|subsection|section|subparagraph|paragraph|part)\\*?\\{", "")
+	while (regex_replace(t, t, REGEX_BOS "\\\\foreignlanguage\\{[a-z]+\\}\\{", ""))
+		;
+	while (regex_replace(t, t, REGEX_BOS "\\\\((emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)\\{", "")
 	       || regex_replace(t, t, REGEX_BOS "\\$", "")
 	       || regex_replace(t, t, REGEX_BOS "\\\\\\[ ", "")
 	       || regex_replace(t, t, REGEX_BOS "\\\\item ", "")
 	       || regex_replace(t, t, REGEX_BOS "\\\\begin\\{[a-zA-Z_]*\\*?\\} ", ""))
-		LYXERR(Debug::FIND, "  after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'");
+	       ;
+	LYXERR(Debug::FIND, "  after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'");
 	return s.find(t);
 }
 
+static int findclosing(string p, int start, int end)
+{
+	int skip = 0;
+	int depth = 0;
+	for (int i = start; i < end; i += 1 + skip) {
+		char c;
+		c = p[i];
+		skip = 0;
+		if (c == '\\') skip = 1;
+		else if (c == '{') depth++;
+		else if (c == '}') {
+			if (depth == 0) return(i);
+			--depth;
+		}
+	}
+	return(-1);
+}
+
+
+static string correctlanguagesetting(string par, bool from_regex)
+{
+	static string langstart = "\\foreignlanguage{";
+	static int llen = langstart.length();
+	static bool removefirstlang = false;
+
+	int parlen = par.length();
+	string result = par;
+
+	while ((parlen > 0) && (par[parlen-1] == '\n')) {
+		parlen--;
+	}
+	if (par.compare(0, llen, langstart) == 0) {
+		if (from_regex) {
+			removefirstlang = false;
+		}
+		int i = findclosing(par, llen, par.length());
+		if (removefirstlang) {
+			if (i < 0)
+				result = "";
+			else {
+				int closepos = findclosing(par, i+2, par.length());
+				if (closepos > 0) {
+					result = par.substr(i+2, closepos-i-2) + par.substr(closepos+1, parlen - closepos-1);
+				}
+				else {
+					result = par.substr(i+2, parlen-i-2);
+				}
+			}
+		}
+		else if (i > 0) {
+			// skip '}{' after the language spec
+			int closepos = findclosing(par, i+2, par.length());
+			size_t insertpos = par.find(langstart, i+2);
+			if (closepos < 0) {
+				if (insertpos == string::npos) {
+					// there are no closing in par, and no next lang spec
+					result = par.substr(0, parlen) + "}";
+				}
+				else {
+					// Add '}' at insertpos only, because closing is missing
+					result = par.substr(0,insertpos) + "}" + par.substr(insertpos, parlen-insertpos);
+				}
+			}
+			else if ((size_t) closepos > insertpos) {
+				// Add '}' at insertpos and remove from closepos if closepos > insertpos
+				result = par.substr(0,insertpos) + "}" + par.substr(insertpos, closepos - insertpos) + par.substr(closepos+1, parlen -closepos-1);
+			}
+		}
+		else {
+			result = par;
+			// For i == 0, it is empty language spec
+			// and for i < 0 it is Error
+		}
+	}
+	else {
+		if (from_regex) {
+			removefirstlang = true;
+		}
+	}
+	// Either not found language spec,or is single and closed spec or empty
+	return(result);
+}
+
 
 // Remove trailing closure of math, macros and environments, so to catch parts of them.
 static int identifyClosing(string & t)
 {
 	int open_braces = 0;
@@ -885,20 +972,23 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const &
 	par_as_string = normalize(ds, !use_regexp);
 	open_braces = 0;
 	close_wildcards = 0;
 
 	size_t lead_size = 0;
+	// correct the language settings
+	par_as_string = correctlanguagesetting(par_as_string, true);
 	if (opt.ignoreformat) {
 		if (!use_regexp) {
 			// if par_as_string_nolead were emty,
 			// the following call to findAux will always *find* the string
 			// in the checked data, and thus always using the slow
 			// examining of the current text part.
 			par_as_string_nolead = par_as_string;
 		}
 	} else {
 		lead_size = identifyLeading(par_as_string);
+		LYXERR(Debug::FIND, "Lead_size: " << lead_size);
 		lead_as_string = par_as_string.substr(0, lead_size);
 		par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size);
 	}
 
 	if (!use_regexp) {
@@ -1106,13 +1196,14 @@ string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const
 		t.replace(pos, 1, " ");
 	// Remove stale empty \emph{}, \textbf{} and similar blocks from latexify
 	// Kornel: Added textsl, textsf, textit, texttt and noun
 	// + allow to seach for colored text too
 	LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, \\*section{} macros from: " << t);
-	while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave|subsubsection|subsection|section|subparagraph|paragraph|part)(\\{\\})+", ""))
+	while (regex_replace(t, t, "\\\\((emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)(\\{\\})+", ""))
 		LYXERR(Debug::FIND, "  further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
 
+	while (regex_replace(t, t, "\\\\foreignlanguage\\{[a-z]+\\}(\\{(\\\\item )?\\})+", ""));
 	// FIXME - check what preceeds the brace
 	if (hack_braces) {
 		if (opt.ignoreformat)
 			while (regex_replace(t, t, "\\{", "_x_<")
 			       || regex_replace(t, t, "\\}", "_x_>"))
@@ -1183,19 +1274,23 @@ docstring latexifyFromCursor(DocIterator const & cur, int len)
 	runparams.nice = false;
 	runparams.flavor = OutputParams::LATEX;
 	runparams.linelen = 8000; //lyxrc.plaintext_linelen;
 	// No side effect of file copying and image conversion
 	runparams.dryrun = true;
+	runparams.for_search = true;
 
 	if (cur.inTexted()) {
 		// @TODO what about searching beyond/across paragraph breaks ?
 		pos_type endpos = cur.paragraph().size();
 		if (len != -1 && endpos > cur.pos() + len)
 			endpos = cur.pos() + len;
 		TeXOnePar(buf, *cur.innerText(), cur.pit(), os, runparams,
 			  string(), cur.pos(), endpos);
 		LYXERR(Debug::FIND, "Latexified text: '" << lyx::to_utf8(ods.str()) << "'");
+		string s = correctlanguagesetting(lyx::to_utf8(ods.str()), false);
+		LYXERR(Debug::FIND, "Latexified text: '" << s << "'");
+		return(lyx::from_utf8(s));
 	} else if (cur.inMathed()) {
 		// Retrieve the math environment type, and add '$' or '$[' or others (\begin{equation}) accordingly
 		for (int s = cur.depth() - 1; s >= 0; --s) {
 			CursorSlice const & cs = cur[s];
 			if (cs.asInsetMath() && cs.asInsetMath()->asHullInset()) {
@@ -1391,12 +1486,12 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match)
 
 
 docstring stringifyFromForSearch(FindAndReplaceOptions const & opt,
 				 DocIterator const & cur, int len)
 {
-	LASSERT(cur.pos() >= 0 && cur.pos() <= cur.lastpos(),
-	        return docstring());
+	if (cur.pos() < 0 || cur.pos() > cur.lastpos())
+	        return docstring();
 	if (!opt.ignoreformat)
 		return latexifyFromCursor(cur, len);
 	else
 		return stringifyFromCursor(cur, len);
 }
diff --git a/src/output_latex.cpp b/src/output_latex.cpp
index f73990d..96f51df 100644
--- a/src/output_latex.cpp
+++ b/src/output_latex.cpp
@@ -812,14 +812,16 @@ void TeXOnePar(Buffer const & buf,
 			&& priorpar->layout().isEnvironment()
 			&& (priorpar->getDepth() > par.getDepth()
 			    || (priorpar->getDepth() == par.getDepth()
 				    && priorpar->layout() != par.layout()));
 	Language const * const prev_language =
-		(priorpar && !priorpar->isPassThru())
-		? (use_prev_env_language ? state->prev_env_language_
-					 : priorpar->getParLanguage(bparams))
-		: outer_language;
+		runparams_in.for_search ?
+			languages.getLanguage("ignore")
+		:(priorpar && !priorpar->isPassThru())
+			? (use_prev_env_language ? state->prev_env_language_
+					 	: priorpar->getParLanguage(bparams))
+			: outer_language;
 
 	bool const use_polyglossia = runparams.use_polyglossia;
 	string const par_lang = use_polyglossia ?
 		getPolyglossiaEnvName(par_language): par_language->babel();
 	string const prev_lang = use_polyglossia ?
@@ -852,11 +854,12 @@ void TeXOnePar(Buffer const & buf,
 	bool const in_polyglossia_rtl_env =
 		use_polyglossia
 		&& runparams.local_font != 0
 		&& outer_language->rightToLeft()
 		&& !par_language->rightToLeft();
-	bool const localswitch = text.inset().forceLocalFontSwitch()
+	bool const localswitch = runparams_in.for_search
+			|| text.inset().forceLocalFontSwitch()
 			|| (using_begin_end && text.inset().forcePlainLayout())
 			|| in_polyglossia_rtl_env;
 	if (localswitch) {
 		lang_begin_command = use_polyglossia ?
 			    "\\text$$lang$$opts{" : lyxrc.language_command_local;

signature.asc
Description: This is a digitally signed message part.

Re: Crash in findadv

Reply via email to