Hello,

please, find attached a preliminary patch implementing the idea.
Now, each paragraph that does not contain any match is only stringified
once, then we switch to the next one. When a matching paragraph
is found, it is searched for more carefully by going pos by pos.

Improvement w.r.t. the old unconditional pos-by-pos search is significant
(search in the math manual completes in a few seconds with debug enabled).

Forward search seems ok, backward search has problems, but I'd like to
collect preliminary comments about the approach.

Also, I had to add a ::stringify() method in Paragraph, that mimics the old
behaviour of the Paragraph::asString(), as now the latter method seems
to have been switched to a ToC-oriented usage.

Bye,

   T.
Tommaso Cucinotta ha scritto:
I think I can easily modify it so as to realise a hierarchically sequential search (not a real binary search). Instead of advancing one position, exporting from the current position to the end of paragraph, then searching for a matching segment *at the start*, I would try to advance one paragraph each time, exporting the entire paragraph, searching for any match *inside*, then, if a match is found, proceed recursively advancing one position at a time searching for the exact
position that has the match at beginning.

I'll see if I can play a little bit tonight with the idea.

   T.

Abdelrazak Younes ha scritto:
Yes that too ;-)

That's also why the binary search is the most practical choice...

Abdel.






--
Tommaso Cucinotta, Computer Engineering PhD, Researcher
ReTiS Lab, Scuola Superiore Sant'Anna, Pisa, Italy
Tel +39 050 882 024, Fax +39 050 882 003
http://feanor.sssup.it/~tommaso

Index: src/lyxfind.cpp
===================================================================
--- src/lyxfind.cpp	(revisione 27615)
+++ src/lyxfind.cpp	(copia locale)
@@ -45,6 +45,7 @@
 #include "support/lstrings.h"
 
 #include <boost/regex.hpp>
+#include <boost/next_prior.hpp>
 
 using namespace std;
 using namespace lyx::support;
@@ -534,10 +535,14 @@
 	/** Tests if text starting at the supplied position matches with the one provided to the MatchStringAdv
 	 ** constructor as opt.search, under the opt.* options settings.
 	 **
+	 ** @param at_begin
+	 ** 	If set, then match is searched only against beginning of text starting at cur.
+	 ** 	If unset, then match is searched anywhere in text starting at cur.
+	 ** 
 	 ** @return
 	 ** The length of the matching text, or zero if no match was found.
 	 **/
-	int operator()(DocIterator const & cur, int len = -1) const;
+	int operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const;
 
 public:
 	/// buffer
@@ -564,6 +569,8 @@
 	string par_as_string;
 	// regular expression to use for searching
 	boost::regex regexp;
+	// same as regexp, but prefixed with a ".*"
+	boost::regex regexp2;
 	// unmatched open braces in the search string/regexp
 	int open_braces;
 	// number of (.*?) subexpressions added at end of search regexp for closing
@@ -625,21 +632,30 @@
 		par_as_string = string("\\`") + par_as_string;
 		LYXERR(Debug::DEBUG, "Replaced text (to be used as regex): " << par_as_string);
 		regexp = boost::regex(par_as_string);
+		regexp2 = boost::regex(string(".*") + par_as_string);
 	}
 }
 
-int MatchStringAdv::operator()(DocIterator const & cur, int len) const
+
+int MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin) const
 {
 	docstring docstr = stringifyFromForSearch(opt, buf, cur, len);
 	LYXERR(Debug::DEBUG, "Matching against     '" << lyx::to_utf8(docstr) << "'");
 	string str = normalize(docstr);
 	LYXERR(Debug::DEBUG, "After normalization: '" << str << "'");
 	if (! opt.regexp) {
-		if (str.substr(0, par_as_string.size()) == par_as_string)
-			return par_as_string.size();
+		if (at_begin) {
+			if (str.substr(0, par_as_string.size()) == par_as_string)
+				return par_as_string.size();
+		} else {
+			size_t pos = str.find(par_as_string);
+			if (pos != string::npos)
+				return par_as_string.size();
+		}
 	} else {
 		// Try all possible regexp matches, until one that verifies the braces match test is found
-		boost::sregex_iterator re_it(str.begin(), str.end(), regexp);
+		boost::regex const *p_regexp = at_begin ? &regexp : &regexp2;
+		boost::sregex_iterator re_it(str.begin(), str.end(), *p_regexp);
 		boost::sregex_iterator re_it_end;
 		for (; re_it != re_it_end; ++re_it) {
 			boost::match_results<string::const_iterator> const & m = *re_it;
@@ -693,7 +709,16 @@
 			Paragraph const & par = cur.paragraph();
 			// TODO what about searching beyond/across paragraph breaks ?
 			// TODO Try adding a AS_STR_INSERTS as last arg
-			return par.asString(cur.pos(), ( len == -1 || cur.pos() + len > int(par.size()) ) ? int(par.size()) : cur.pos() + len, AS_STR_INSETS);
+			pos_type end = ( len == -1 || cur.pos() + len > int(par.size()) ) ? int(par.size()) : cur.pos() + len;
+			OutputParams runparams(&cur.buffer()->params().encoding());
+			odocstringstream os;
+			runparams.nice = true;
+			runparams.flavor = OutputParams::LATEX;
+			runparams.linelen = 100000; //lyxrc.plaintext_linelen;
+			// No side effect of file copying and image conversion
+			runparams.dryrun = true;
+			LYXERR(Debug::DEBUG, "Stringifying with cur: " << cur << ", from pos: " << cur.pos() << ", end: " << end);
+			return par.stringify(cur.pos(), end, AS_STR_INSETS, runparams);
 	} else if (cur.inMathed()) {
 			odocstringstream os;
 			CursorSlice cs = cur.top();
@@ -795,7 +820,7 @@
 	size_t d;
 	DocIterator old_cur(cur.buffer());
 	do {
-	  LYXERR(Debug::DEBUG, "Forwarding one step (searching for innermost match)");
+		LYXERR(Debug::DEBUG, "Forwarding one step (searching for innermost match)");
 		d = cur.depth();
 		old_cur = cur;
 		cur.forwardPos();
@@ -823,58 +848,93 @@
 	return len;
 }
 
+
 /// Finds forward
 int findForwardAdv(DocIterator & cur, MatchStringAdv const & match)
 {
 	if (!cur)
 		return 0;
-	for (; cur; cur.forwardPos()) {
-		// odocstringstream ods;
-		// ods << _("Searching ... ")
-		//     << (cur.bottom().lastpit() - cur.bottom().pit()) * 100 / total;
-		// cur.message(ods.str());
-		if (match(cur))
-			return findAdvFinalize(cur, match);
-	}
+	int wrap_answer;
+	do {
+		while (cur && !match(cur, -1, false)) {
+			if (cur.pit() < cur.lastpit())
+				cur.forwardPar();
+			else {
+				cur.forwardPos();
+			}
+		}
+		for (; cur; cur.forwardPos()) {
+			if (match(cur))
+				return findAdvFinalize(cur, match);
+		}
+		wrap_answer = frontend::Alert::prompt(
+			_("Wrap search ?"),
+			_("End of document reached while searching forward\n"
+				"\n"
+				"Continue searching from beginning ?"),
+			0, 1, _("&Yes"), _("&No"));
+		cur.clear();
+		cur.push_back(CursorSlice(match.buf.inset()));
+	} while (wrap_answer == 0);
 	return 0;
 }
 
+
 /// Finds backwards
-int findBackwardsAdv(DocIterator & cur, MatchStringAdv const & match)
-{
+int findBackwardsAdv(DocIterator & cur, MatchStringAdv const & match) {
 	//	if (cur.pos() > 0 || cur.depth() > 0)
 	//		cur.backwardPos();
 	DocIterator cur_orig(cur);
 	if (match(cur_orig))
 		findAdvFinalize(cur_orig, match);
-	// int total = cur.bottom().pit() + 1;
-	for (; cur; cur.backwardPos()) {
-		// odocstringstream ods;
-		// ods << _("Searching ... ") << (total - cur.bottom().pit()) * 100 / total;
-		// cur.message(ods.str());
-		if (match(cur)) {
-			// Find the most backward consecutive match within same
-			// paragraph while searching backwards.
-			int pit = cur.pit();
-			int old_len;
-			DocIterator old_cur;
-			int len = findAdvFinalize(cur, match);
-			do {
-				old_cur = cur;
-				old_len = len;
+	//	int total = cur.bottom().pit() + 1;
+	int wrap_answer;
+	do {
+		// TODO No ! così non va.
+		bool pit_changed = false;
+		while (cur && !match(cur, -1, false)) {
+			if (cur.pit() > 0)
+				--cur.pit();
+			else {
 				cur.backwardPos();
-				LYXERR(Debug::DEBUG, "old_cur: " << old_cur
-						<< ", old_len: " << len << ", cur: " << cur);
-			} while (cur && cur.pit() == pit && match(cur)
-				 && (len = findAdvFinalize(cur, match)) > old_len);
-			cur = old_cur;
-			len = old_len;
-			LYXERR(Debug::DEBUG, "cur_orig    : " << cur_orig);
-			LYXERR(Debug::DEBUG, "cur         : " << cur);
-			if (cur != cur_orig)
-				return len;
+				if (cur)
+					cur.pos() = 0;
+			}
+			pit_changed = true;
 		}
-	}
+		if (cur && pit_changed)
+			cur.pos() = cur.lastpos();
+		for (; cur; cur.backwardPos()) {
+			if (match(cur)) {
+				// Find the most backward consecutive match within same paragraph while searching backwards.
+				int pit = cur.pit();
+				int old_len;
+				DocIterator old_cur;
+				int len = findAdvFinalize(cur, match);
+				do {
+					old_cur = cur;
+					old_len = len;
+					cur.backwardPos();
+					LYXERR(Debug::DEBUG, "old_cur: " << old_cur << ", old_len=" << len << ", cur: " << cur);
+				} while (cur && cur.pit() == pit && match(cur)
+					&& (len = findAdvFinalize(cur, match)) > old_len);
+				cur = old_cur;
+				len = old_len;
+				LYXERR(Debug::DEBUG, "cur_orig    : " << cur_orig);
+				LYXERR(Debug::DEBUG, "cur         : " << cur);
+				if (cur != cur_orig)
+					return len;
+			}
+		}
+		wrap_answer = frontend::Alert::prompt(
+			_("Wrap search ?"),
+			_("Beginning of document reached while searching backwards\n"
+				"\n"
+				"Continue searching from end ?"),
+			0, 1, _("&Yes"), _("&No"));
+		cur = doc_iterator_end(&match.buf);
+		cur.backwardPos();
+	} while (wrap_answer == 0);
 	return 0;
 }
 

Index: src/Paragraph.cpp
===================================================================
--- src/Paragraph.cpp	(revisione 27622)
+++ src/Paragraph.cpp	(copia locale)
@@ -2440,6 +2440,29 @@
 }
 
 
+docstring Paragraph::stringify(pos_type beg, pos_type end, int options, OutputParams & runparams) const
+{
+	odocstringstream os;
+
+	if (beg == 0 
+		&& options & AS_STR_LABEL
+		&& !d->params_.labelString().empty())
+		os << d->params_.labelString() << ' ';
+
+	for (pos_type i = beg; i < end; ++i) {
+		char_type const c = d->text_[i];
+		if (isPrintable(c) || c == '\t'
+		    || (c == '\n' && options & AS_STR_NEWLINES))
+			os.put(c);
+		else if (c == META_INSET && options & AS_STR_INSETS) {
+			getInset(i)->plaintext(os, runparams);
+		}
+	}
+
+	return os.str();
+}
+
+
 void Paragraph::setInsetOwner(Inset const * inset)
 {
 	d->inset_owner_ = inset;
Index: src/lyxfind.h
===================================================================
--- src/lyxfind.h	(revisione 27615)
+++ src/lyxfind.h	(copia locale)
@@ -68,22 +68,23 @@
 
 class FindAdvOptions {
 public:
-  FindAdvOptions(
-                 docstring const & search,
-                 bool casesensitive,
-                 bool matchword,
-                 bool forward,
-                 bool expandmacros,
-                 bool ignoreformat,
-                 bool regexp);
-  FindAdvOptions() {}
-  docstring search;
-  bool casesensitive;
-  bool matchword;
-  bool forward;
-  bool expandmacros;
-  bool ignoreformat;
-  bool regexp;
+	FindAdvOptions(
+		docstring const & search,
+		bool casesensitive,
+		bool matchword,
+		bool forward,
+		bool expandmacros,
+		bool ignoreformat,
+		bool regexp
+	);
+	FindAdvOptions() {  }
+	docstring search;
+	bool casesensitive;
+	bool matchword;
+	bool forward;
+	bool expandmacros;
+	bool ignoreformat;
+	bool regexp;
 };
 
 /// Write a FindAdvOptions instance to a stringstream
@@ -105,8 +106,11 @@
  ** This is useful for computing opt.search from the SearchAdvDialog controller (ControlSearchAdv).
  ** Ideally, this should not be needed, and the opt.search field should become a Text const &.
  **/
-docstring stringifyFromForSearch(FindAdvOptions const & opt,
-    Buffer const & buf, DocIterator const & cur, int len = -1);
+docstring stringifyFromForSearch(
+	FindAdvOptions const & opt,
+	Buffer const & buf,
+	DocIterator const & cur,
+	int len = -1);
 
 } // namespace lyx
 
Index: src/Paragraph.h
===================================================================
--- src/Paragraph.h	(revisione 27615)
+++ src/Paragraph.h	(copia locale)
@@ -124,6 +124,10 @@
 	docstring asString(pos_type beg, pos_type end,
 		int options = AS_STR_NONE) const;
 
+	/// Extract only the explicitly visible text (without any formatting),
+	/// descending into insets
+	docstring stringify(pos_type beg, pos_type end, int options, OutputParams & runparams) const;
+
 	///
 	void write(std::ostream &, BufferParams const &,
 		   depth_type & depth) const;

Reply via email to