Tommaso Cucinotta <tomm...@lyx.org> writes: | Hi, > | I just "enjoyed" extending the Find Advanced feature to exploit multi-cores. > | It exploits QThreadPool to create "worker" threads (as many as the | underlying | physical CPUs detected by QThreadPool). Each thread searches in a single | paragraph, then "queries" the next paragraph to be searched from a | common synchronized "monitor" (see FindAdvHits::nextRange()), then | keeps going. In the end, the | first logically sequential match is returned (i.e., no alteration on the | current behavior/semantics of Find Advanced). So, if a thread searching | later paragraphs finds a hit while another thread that is searching prior | paragraphs is still searching, then the result delivery will wait till the | latter one either finds nothing, or it finds its own match (which will be | returned, in this case). > | From a preliminary/rough test, finding "The above bib" in UserGuide.lyx | (matching on the very last sentence of the document), takes | -) 15 secs with the current trunk | -) 8 secs with this patch applied > | ... on an Intel Core2 Duo P9600@2.66GHz with this configure line ... | ac_cs_config="'--prefix=/usr/local/lyx-trunk' | --with-version-suffix=-trunk' '--disable-debug' | --disable-stdlib-debug' '--enable-threads' '--without-included-boost'" > | Please, find attached the corresponding patch, in case you have any | comments. > | A couple of things that deserve your attention: > | 1) currently, I can make this work only if I enable threading in | boost, i.e., see | beginning of patch applying to configure.ac. I guess this might | slow down | some parts of LyX on single-processors, but, who owns such a weird | machine | nowadays :-) ? We might also add a configure-time option that | enables/disables | the thread-safe boost and the parallelized part of FindAdv, if | desirable | (automatically enabled on multi-cores & disabled on single-cores). > | 2) there may be cases in which finding the first hit takes actually | more time, because | of a subsequent long paragraph where searching takes a lot more | time. This | should go away once I merge my old "abort" feature that allows | searching threads | to finish earlier if cancelled by users (it will become | cancellable also by another | thread that found the first hit). > | 3) Still preliminary: the backwards search is not parallel yet > | 4) The patch includes a synchronized log facility (SyncErr) that I | need in this case, | otherwise the messages from the various threads intermix badly | with each other | while debugging > | Any comments welcome. > | Thanks, > | T. > > | Index: configure.ac | =================================================================== | --- configure.ac (revisione 40135) | +++ configure.ac (copia locale) | @@ -291,7 +291,6 @@ | #endif | #define BOOST_ENABLE_ASSERT_HANDLER 1 | | -#define BOOST_DISABLE_THREADS 1 | #define BOOST_NO_WREGEX 1 | #define BOOST_NO_WSTRING 1 | | Index: src/lyxfind.cpp | =================================================================== | --- src/lyxfind.cpp (revisione 40135) | +++ src/lyxfind.cpp (copia locale) | @@ -49,14 +49,39 @@ | #include "support/lassert.h" | #include "support/lstrings.h" | | -#include "support/regex.h" | -#include <boost/next_prior.hpp> | +#include "boost/regex.hpp" | +#include "boost/next_prior.hpp" | | +#include <QThread> | +#include <QThreadPool> | +#include <QMutex> | +#include <QMutexLocker> | +#include <QWaitCondition>
Oh. how I dislike the Qt-fication of lyx... | + | using namespace std; | using namespace lyx::support; | +using namespace boost; | | namespace lyx { | | +#include <unistd.h> | +#include <sys/syscall.h> | +#include <sys/types.h> | + | +static pid_t gettid() { | + pid_t tid; | + tid = syscall(SYS_gettid); | + return tid; | +} Should be in placed in support somewhere. Also you should not need it. Why not use the thread-ids from the threading library (right debugging code.) | + | +static QMutex dbg_mtx; | +#define SyncErr(lev, expr) do { \ | + if (lyx::lyxerr.debugging(lev)) { \ | + QMutexLocker lock(&dbg_mtx); \ | + LYXERR(lev, "thread=" << gettid() << ": " << expr); \ | + } \ | + } while (0) | + For debugging ok... for a permanent solution make LYXERR do the locking. The use of this macro really muddles the whole patch. | namespace { | | bool parse_bool(docstring & howto) | @@ -489,59 +514,62 @@ | | typedef vector<pair<string, string> > Escapes; | | +struct StaticEscapes { | + Escapes regexp_escapes; | + Escapes lyx_unescapes; | + Escapes regexp_latex_escapes; | + StaticEscapes() { | + regexp_escapes.push_back(pair<string, string>("$", "\\$")); | + regexp_escapes.push_back(pair<string, string>("{", "\\{")); | + regexp_escapes.push_back(pair<string, string>("}", "\\}")); | + regexp_escapes.push_back(pair<string, string>("[", "\\[")); | + regexp_escapes.push_back(pair<string, string>("]", "\\]")); | + regexp_escapes.push_back(pair<string, string>("(", "\\(")); | + regexp_escapes.push_back(pair<string, string>(")", "\\)")); | + regexp_escapes.push_back(pair<string, string>("+", "\\+")); | + regexp_escapes.push_back(pair<string, string>("*", "\\*")); | + regexp_escapes.push_back(pair<string, string>(".", "\\.")); | + regexp_escapes.push_back(pair<string, string>("\\", "(?:\\\\|\\\\backslash)")); | + regexp_escapes.push_back(pair<string, string>("~", "(?:\\\\textasciitilde|\\\\sim)")); | + regexp_escapes.push_back(pair<string, string>("^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\mathcircumflex)")); | + | + lyx_unescapes.push_back(pair<string, string>("\\%", "%")); | + lyx_unescapes.push_back(pair<string, string>("\\mathcircumflex ", "^")); | + lyx_unescapes.push_back(pair<string, string>("\\mathcircumflex", "^")); | + lyx_unescapes.push_back(pair<string, string>("\\backslash ", "\\")); | + lyx_unescapes.push_back(pair<string, string>("\\backslash", "\\")); | + lyx_unescapes.push_back(pair<string, string>("\\\\{", "_x_<")); | + lyx_unescapes.push_back(pair<string, string>("\\\\}", "_x_>")); | + lyx_unescapes.push_back(pair<string, string>("\\sim ", "~")); | + lyx_unescapes.push_back(pair<string, string>("\\sim", "~")); | + | + regexp_latex_escapes.push_back(pair<string, string>("\\\\", "(?:\\\\\\\\|\\\\backslash|\\\\textbackslash\\{\\})")); | + regexp_latex_escapes.push_back(pair<string, string>("(<?!\\\\\\\\textbackslash)\\{", "\\\\\\{")); | + regexp_latex_escapes.push_back(pair<string, string>("(<?!\\\\\\\\textbackslash\\\\\\{)\\}", "\\\\\\}")); | + regexp_latex_escapes.push_back(pair<string, string>("\\[", "\\{\\[\\}")); | + regexp_latex_escapes.push_back(pair<string, string>("\\]", "\\{\\]\\}")); | + regexp_latex_escapes.push_back(pair<string, string>("\\^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\mathcircumflex)")); | + regexp_latex_escapes.push_back(pair<string, string>("%", "\\\\\\%")); | + } | +}; | + | +static StaticEscapes escapes; | + | /// A map of symbols and their escaped equivalent needed within a regex. | /// @note Beware of order | Escapes const & get_regexp_escapes() | { | - static Escapes escape_map; | - if (escape_map.empty()) { | - escape_map.push_back(pair<string, string>("$", "\\$")); | - escape_map.push_back(pair<string, string>("{", "\\{")); | - escape_map.push_back(pair<string, string>("}", "\\}")); | - escape_map.push_back(pair<string, string>("[", "\\[")); | - escape_map.push_back(pair<string, string>("]", "\\]")); | - escape_map.push_back(pair<string, string>("(", "\\(")); | - escape_map.push_back(pair<string, string>(")", "\\)")); | - escape_map.push_back(pair<string, string>("+", "\\+")); | - escape_map.push_back(pair<string, string>("*", "\\*")); | - escape_map.push_back(pair<string, string>(".", "\\.")); | - escape_map.push_back(pair<string, string>("\\", "(?:\\\\|\\\\backslash)")); | - escape_map.push_back(pair<string, string>("~", "(?:\\\\textasciitilde|\\\\sim)")); | - escape_map.push_back(pair<string, string>("^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\mathcircumflex)")); | - } | - return escape_map; | + return escapes.regexp_escapes; | } | | /// A map of lyx escaped strings and their unescaped equivalent. | Escapes const & get_lyx_unescapes() { | - static Escapes escape_map; | - if (escape_map.empty()) { | - escape_map.push_back(pair<string, string>("\\%", "%")); | - escape_map.push_back(pair<string, string>("\\mathcircumflex ", "^")); | - escape_map.push_back(pair<string, string>("\\mathcircumflex", "^")); | - escape_map.push_back(pair<string, string>("\\backslash ", "\\")); | - escape_map.push_back(pair<string, string>("\\backslash", "\\")); | - escape_map.push_back(pair<string, string>("\\\\{", "_x_<")); | - escape_map.push_back(pair<string, string>("\\\\}", "_x_>")); | - escape_map.push_back(pair<string, string>("\\sim ", "~")); | - escape_map.push_back(pair<string, string>("\\sim", "~")); | - } | - return escape_map; | + return escapes.lyx_unescapes; | } | | /// A map of escapes turning a regexp matching text to one matching latex. | Escapes const & get_regexp_latex_escapes() { | - static Escapes escape_map; | - if (escape_map.empty()) { | - escape_map.push_back(pair<string, string>("\\\\", "(?:\\\\\\\\|\\\\backslash|\\\\textbackslash\\{\\})")); | - escape_map.push_back(pair<string, string>("(<?!\\\\\\\\textbackslash)\\{", "\\\\\\{")); | - escape_map.push_back(pair<string, string>("(<?!\\\\\\\\textbackslash\\\\\\{)\\}", "\\\\\\}")); | - escape_map.push_back(pair<string, string>("\\[", "\\{\\[\\}")); | - escape_map.push_back(pair<string, string>("\\]", "\\{\\]\\}")); | - escape_map.push_back(pair<string, string>("\\^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\mathcircumflex)")); | - escape_map.push_back(pair<string, string>("%", "\\\\\\%")); | - } | - return escape_map; | + return escapes.regexp_latex_escapes; | } | | /** @todo Probably the maps need to be migrated to regexps, in order to distinguish if | @@ -549,19 +577,19 @@ | **/ | string apply_escapes(string s, Escapes const & escape_map) | { | - LYXERR(Debug::FIND, "Escaping: '" << s << "'"); | + SyncErr(Debug::FIND, "Escaping: '" << s << "'"); | Escapes::const_iterator it; | for (it = escape_map.begin(); it != escape_map.end(); ++it) { | -// LYXERR(Debug::FIND, "Escaping " << it->first << " as " << it->second); | +// SyncErr(Debug::FIND, "Escaping " << it->first << " as " << it->second); | unsigned int pos = 0; | while (pos < s.length() && (pos = s.find(it->first, pos)) < s.length()) { | s.replace(pos, it->first.length(), it->second); | - LYXERR(Debug::FIND, "After escape: " << s); | + SyncErr(Debug::FIND, "After escape: " << s); | pos += it->second.length(); | -// LYXERR(Debug::FIND, "pos: " << pos); | +// SyncErr(Debug::FIND, "pos: " << pos); | } | } | - LYXERR(Debug::FIND, "Escaped : '" << s << "'"); | + SyncErr(Debug::FIND, "Escaped : '" << s << "'"); | return s; | } | | @@ -576,38 +604,38 @@ | size_t new_pos = s.find("\\regexp{", pos); | if (new_pos == string::npos) | new_pos = s.size(); | - LYXERR(Debug::FIND, "new_pos: " << new_pos); | + SyncErr(Debug::FIND, "new_pos: " << new_pos); | string t = apply_escapes(s.substr(pos, new_pos - pos), get_lyx_unescapes()); | - LYXERR(Debug::FIND, "t [lyx]: " << t); | + SyncErr(Debug::FIND, "t [lyx]: " << t); | t = apply_escapes(t, get_regexp_escapes()); | - LYXERR(Debug::FIND, "t [rxp]: " << t); | + SyncErr(Debug::FIND, "t [rxp]: " << t); | s.replace(pos, new_pos - pos, t); | new_pos = pos + t.size(); | - LYXERR(Debug::FIND, "Regexp after escaping: " << s); | - LYXERR(Debug::FIND, "new_pos: " << new_pos); | + SyncErr(Debug::FIND, "Regexp after escaping: " << s); | + SyncErr(Debug::FIND, "new_pos: " << new_pos); | if (new_pos == s.size()) | break; | // Might fail if \\endregexp{} is preceeded by unexpected stuff (weird escapes) | size_t end_pos = s.find("\\endregexp{}}", new_pos + 8); | - LYXERR(Debug::FIND, "end_pos: " << end_pos); | + SyncErr(Debug::FIND, "end_pos: " << end_pos); | t = s.substr(new_pos + 8, end_pos - (new_pos + 8)); | - LYXERR(Debug::FIND, "t in regexp : " << t); | + SyncErr(Debug::FIND, "t in regexp : " << t); | t = apply_escapes(t, get_lyx_unescapes()); | - LYXERR(Debug::FIND, "t in regexp [lyx]: " << t); | + SyncErr(Debug::FIND, "t in regexp [lyx]: " << t); | if (match_latex) { | t = apply_escapes(t, get_regexp_latex_escapes()); | - LYXERR(Debug::FIND, "t in regexp [ltx]: " << t); | + SyncErr(Debug::FIND, "t in regexp [ltx]: " << t); | } | if (end_pos == s.size()) { | s.replace(new_pos, end_pos - new_pos, t); | pos = s.size(); | - LYXERR(Debug::FIND, "Regexp after \\regexp{} removal: " << s); | + SyncErr(Debug::FIND, "Regexp after \\regexp{} removal: " << s); | break; | } | s.replace(new_pos, end_pos + 13 - new_pos, t); | - LYXERR(Debug::FIND, "Regexp after \\regexp{...\\endregexp{}} removal: " << s); | + SyncErr(Debug::FIND, "Regexp after \\regexp{...\\endregexp{}} removal: " << s); | pos = new_pos + t.size(); | - LYXERR(Debug::FIND, "pos: " << pos); | + SyncErr(Debug::FIND, "pos: " << pos); | } | return s; | } | @@ -616,10 +644,10 @@ | bool regex_replace(string const & s, string & t, string const & searchstr, | string const & replacestr) | { | - lyx::regex e(searchstr); | + boost::regex e(searchstr); | ostringstream oss; | ostream_iterator<char, char> it(oss); | - lyx::regex_replace(it, s.begin(), s.end(), e, replacestr); | + boost::regex_replace(it, s.begin(), s.end(), e, replacestr); | // tolerate t and s be references to the same variable | bool rv = (s != oss.str()); | t = oss.str(); | @@ -640,7 +668,7 @@ | { | int open_pars = 0; | string::const_iterator it = beg; | - LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << string(beg, end) << "'"); | + SyncErr(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << string(beg, end) << "'"); | for (; it != end; ++it) { | // Skip escaped braces in the count | if (*it == '\\') { | @@ -651,19 +679,19 @@ | ++open_pars; | } else if (*it == '}') { | if (open_pars == 0) { | - LYXERR(Debug::FIND, "Found unmatched closed brace"); | + SyncErr(Debug::FIND, "Found unmatched closed brace"); | return false; | } else | --open_pars; | } | } | if (open_pars != unmatched) { | - LYXERR(Debug::FIND, "Found " << open_pars | + SyncErr(Debug::FIND, "Found " << open_pars | << " instead of " << unmatched | << " unmatched open braces at the end of count"); | return false; | } | - LYXERR(Debug::FIND, "Braces match as expected"); | + SyncErr(Debug::FIND, "Braces match as expected"); | return true; | } | | @@ -714,9 +742,9 @@ | // normalized string to search | string par_as_string; | // regular expression to use for searching | - lyx::regex regexp; | + boost::regex regexp; | // same as regexp, but prefixed with a ".*" | - lyx::regex regexp2; | + boost::regex regexp2; | // leading format material as string | string lead_as_string; | // par_as_string after removal of lead_as_string | @@ -745,7 +773,7 @@ | pit_type const endpit = buffer.paragraphs().size(); | for (pit_type pit = 0; pit != endpit; ++pit) { | TeXOnePar(buffer, buffer.text(), pit, os, runparams); | - LYXERR(Debug::FIND, "searchString up to here: " << ods.str()); | + SyncErr(Debug::FIND, "searchString up to here: " << ods.str()); | } | return ods.str(); | } | @@ -763,7 +791,7 @@ | runparams.dryrun = true; | for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) { | Paragraph const & par = buffer.paragraphs().at(pit); | - LYXERR(Debug::FIND, "Adding to search string: '" | + SyncErr(Debug::FIND, "Adding to search string: '" | << par.stringify(pos_type(0), par.size(), | AS_STR_INSETS, runparams) | << "'"); | @@ -784,7 +812,7 @@ | || regex_replace(t, t, "^\\\\\\[ ", "") | || regex_replace(t, t, "^\\\\item ", "") | || regex_replace(t, t, "^\\\\begin\\{[a-zA-Z_]*\\*?\\} ", "")) | - LYXERR(Debug::FIND, " after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'"); | + SyncErr(Debug::FIND, " after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'"); | return s.find(t); | } | | @@ -793,7 +821,7 @@ | static int identifyClosing(string & t) { | int open_braces = 0; | do { | - LYXERR(Debug::FIND, "identifyClosing(): t now is '" << t << "'"); | + SyncErr(Debug::FIND, "identifyClosing(): t now is '" << t << "'"); | if (regex_replace(t, t, "(.*[^\\\\])\\$\\'", "$1")) | continue; | if (regex_replace(t, t, "(.*[^\\\\]) \\\\\\]\\'", "$1")) | @@ -831,20 +859,20 @@ | if (!use_regexp) { | open_braces = identifyClosing(par_as_string); | identifyClosing(par_as_string_nolead); | - LYXERR(Debug::FIND, "Open braces: " << open_braces); | - LYXERR(Debug::FIND, "Built MatchStringAdv object: par_as_string = '" << par_as_string << "'"); | + SyncErr(Debug::FIND, "Open braces: " << open_braces); | + SyncErr(Debug::FIND, "Built MatchStringAdv object: par_as_string = '" << par_as_string << "'"); | } else { | string lead_as_regexp; | if (lead_size > 0) { | // @todo No need to search for \regexp{} insets in leading material | lead_as_regexp = escape_for_regex(par_as_string.substr(0, lead_size), !opt.ignoreformat); | par_as_string = par_as_string_nolead; | - LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'"); | - LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); | + SyncErr(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'"); | + SyncErr(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); | } | par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat); | // Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them. | - LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); | + SyncErr(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); | if ( | // Insert .* before trailing '\$' ('$' has been escaped by escape_for_regex) | regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\$)\\'", "$1(.*?)$2") | @@ -858,19 +886,19 @@ | ) { | ++close_wildcards; | } | - LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); | - LYXERR(Debug::FIND, "Open braces: " << open_braces); | - LYXERR(Debug::FIND, "Close .*? : " << close_wildcards); | - LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string); | + SyncErr(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); | + SyncErr(Debug::FIND, "Open braces: " << open_braces); | + SyncErr(Debug::FIND, "Close .*? : " << close_wildcards); | + SyncErr(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string); | // If entered regexp must match at begin of searched string buffer | string regexp_str = string("\\`") + lead_as_regexp + par_as_string; | - LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); | - regexp = lyx::regex(regexp_str); | + SyncErr(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); | + regexp = boost::regex(regexp_str); | | // If entered regexp may match wherever in searched string buffer | string regexp2_str = string("\\`.*") + lead_as_regexp + ".*" + par_as_string; | - LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); | - regexp2 = lyx::regex(regexp2_str); | + SyncErr(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); | + regexp2 = boost::regex(regexp2_str); | } | } | | @@ -878,14 +906,14 @@ | int MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const | { | docstring docstr = stringifyFromForSearch(opt, cur, len); | - LYXERR(Debug::FIND, "Matching against '" << lyx::to_utf8(docstr) << "'"); | + SyncErr(Debug::FIND, "Matching against '" << lyx::to_utf8(docstr) << "'"); | string str = normalize(docstr, true); | - LYXERR(Debug::FIND, "After normalization: '" << str << "'"); | + SyncErr(Debug::FIND, "After normalization: '" << str << "'"); | if (! use_regexp) { | - LYXERR(Debug::FIND, "Searching in normal mode: par_as_string='" << par_as_string << "', str='" << str << "'"); | - LYXERR(Debug::FIND, "Searching in normal mode: lead_as_string='" << lead_as_string << "', par_as_string_nolead='" << par_as_string_nolead << "'"); | + SyncErr(Debug::FIND, "Searching in normal mode: par_as_string='" << par_as_string << "', str='" << str << "'"); | + SyncErr(Debug::FIND, "Searching in normal mode: lead_as_string='" << lead_as_string << "', par_as_string_nolead='" << par_as_string_nolead << "'"); | if (at_begin) { | - LYXERR(Debug::FIND, "size=" << par_as_string.size() << ", substr='" << str.substr(0, par_as_string.size()) << "'"); | + SyncErr(Debug::FIND, "size=" << par_as_string.size() << ", substr='" << str.substr(0, par_as_string.size()) << "'"); | if (str.substr(0, par_as_string.size()) == par_as_string) | return par_as_string.size(); | } else { | @@ -894,7 +922,7 @@ | return par_as_string.size(); | } | } else { | - LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin); | + SyncErr(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin); | // Try all possible regexp matches, | //until one that verifies the braces match test is found | regex const *p_regexp = at_begin ? ®exp : ®exp2; | @@ -926,7 +954,7 @@ | int MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin) const | { | int res = findAux(cur, len, at_begin); | - LYXERR(Debug::FIND, | + SyncErr(Debug::FIND, | "res=" << res << ", at_begin=" << at_begin << ", matchword=" << opt.matchword << ", inTexted=" << cur.inTexted()); | if (res == 0 || !at_begin || !opt.matchword || !cur.inTexted()) | return res; | @@ -935,7 +963,7 @@ | par.isWordSeparator(cur.pos() - 1) : true; | bool ws_right = cur.pos() + res < par.size() ? | par.isWordSeparator(cur.pos() + res) : true; | - LYXERR(Debug::FIND, | + SyncErr(Debug::FIND, | "cur.pos()=" << cur.pos() << ", res=" << res | << ", separ: " << ws_left << ", " << ws_right | << endl); | @@ -963,20 +991,20 @@ | while ((pos = t.find("\n")) != string::npos) | t.replace(pos, 1, " "); | // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify | - LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, \\*section{} macros from: " << t); | + SyncErr(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, \\*section{} macros from: " << t); | while (regex_replace(t, t, "\\\\(emph|textbf|subsubsection|subsection|section|subparagraph|paragraph|part)(\\{\\})+", "")) | - LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); | + SyncErr(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); | | // FIXME - check what preceeds the brace | if (hack_braces) { | if (opt.ignoreformat) | while (regex_replace(t, t, "\\{", "_x_<") | || regex_replace(t, t, "\\}", "_x_>")) | - LYXERR(Debug::FIND, "After {} replacement: '" << t << "'"); | + SyncErr(Debug::FIND, "After {} replacement: '" << t << "'"); | else | while (regex_replace(t, t, "\\\\\\{", "_x_<") | || regex_replace(t, t, "\\\\\\}", "_x_>")) | - LYXERR(Debug::FIND, "After {} replacement: '" << t << "'"); | + SyncErr(Debug::FIND, "After {} replacement: '" << t << "'"); | } | | return t; | @@ -985,7 +1013,7 @@ | | docstring stringifyFromCursor(DocIterator const & cur, int len) | { | - LYXERR(Debug::FIND, "Stringifying with len=" << len << " from cursor at pos: " << cur); | + SyncErr(Debug::FIND, "Stringifying with len=" << len << " from cursor at pos: " << cur); | if (cur.inTexted()) { | Paragraph const & par = cur.paragraph(); | // TODO what about searching beyond/across paragraph breaks ? | @@ -999,7 +1027,7 @@ | runparams.linelen = 100000; //lyxrc.plaintext_linelen; | // No side effect of file copying and image conversion | runparams.dryrun = true; | - LYXERR(Debug::FIND, "Stringifying with cur: " | + SyncErr(Debug::FIND, "Stringifying with cur: " | << cur << ", from pos: " << cur.pos() << ", end: " << end); | return par.stringify(cur.pos(), end, AS_STR_INSETS, runparams); | } else if (cur.inMathed()) { | @@ -1011,10 +1039,10 @@ | ? md.end() : md.begin() + cs.pos() + len ); | for (MathData::const_iterator it = md.begin() + cs.pos(); it != it_end; ++it) | s = s + asString(*it); | - LYXERR(Debug::FIND, "Stringified math: '" << s << "'"); | + SyncErr(Debug::FIND, "Stringified math: '" << s << "'"); | return s; | } | - LYXERR(Debug::FIND, "Don't know how to stringify from here: " << cur); | + SyncErr(Debug::FIND, "Don't know how to stringify from here: " << cur); | return docstring(); | } | | @@ -1025,8 +1053,8 @@ | */ | docstring latexifyFromCursor(DocIterator const & cur, int len) | { | - LYXERR(Debug::FIND, "Latexifying with len=" << len << " from cursor at pos: " << cur); | - LYXERR(Debug::FIND, " with cur.lastpost=" << cur.lastpos() << ", cur.lastrow=" | + SyncErr(Debug::FIND, "Latexifying with len=" << len << " from cursor at pos: " << cur); | + SyncErr(Debug::FIND, " with cur.lastpost=" << cur.lastpos() << ", cur.lastrow=" | << cur.lastrow() << ", cur.lastcol=" << cur.lastcol()); | Buffer const & buf = *cur.buffer(); | LASSERT(buf.params().isLatex(), /* */); | @@ -1048,7 +1076,7 @@ | endpos = cur.pos() + len; | TeXOnePar(buf, *cur.innerText(), cur.pit(), os, runparams, | string(), cur.pos(), endpos); | - LYXERR(Debug::FIND, "Latexified text: '" << lyx::to_utf8(ods.str()) << "'"); | + SyncErr(Debug::FIND, "Latexified text: '" << lyx::to_utf8(ods.str()) << "'"); | } else if (cur.inMathed()) { | // Retrieve the math environment type, and add '$' or '$[' or others (\begin{equation}) accordingly | for (int s = cur.depth() - 1; s >= 0; --s) { | @@ -1078,9 +1106,9 @@ | break; | } | } | - LYXERR(Debug::FIND, "Latexified math: '" << lyx::to_utf8(ods.str()) << "'"); | + SyncErr(Debug::FIND, "Latexified math: '" << lyx::to_utf8(ods.str()) << "'"); | } else { | - LYXERR(Debug::FIND, "Don't know how to stringify from here: " << cur); | + SyncErr(Debug::FIND, "Don't know how to stringify from here: " << cur); | } | return ods.str(); | } | @@ -1097,23 +1125,23 @@ | size_t d; | DocIterator old_cur(cur.buffer()); | do { | - LYXERR(Debug::FIND, "Forwarding one step (searching for innermost match)"); | + SyncErr(Debug::FIND, "Forwarding one step (searching for innermost match)"); | d = cur.depth(); | old_cur = cur; | cur.forwardPos(); | } while (cur && cur.depth() > d && match(cur) > 0); | cur = old_cur; | LASSERT(match(cur) > 0, /* */); | - LYXERR(Debug::FIND, "Ok"); | + SyncErr(Debug::FIND, "Ok"); | | // Compute the match length | int len = 1; | if (cur.pos() + len > cur.lastpos()) | return 0; | - LYXERR(Debug::FIND, "verifying unmatch with len = " << len); | + SyncErr(Debug::FIND, "verifying unmatch with len = " << len); | while (cur.pos() + len <= cur.lastpos() && match(cur, len) == 0) { | ++len; | - LYXERR(Debug::FIND, "verifying unmatch with len = " << len); | + SyncErr(Debug::FIND, "verifying unmatch with len = " << len); | } | // Length of matched text (different from len param) | int old_len = match(cur, len); | @@ -1122,26 +1150,26 @@ | while ((new_len = match(cur, len + 1)) > old_len) { | ++len; | old_len = new_len; | - LYXERR(Debug::FIND, "verifying match with len = " << len); | + SyncErr(Debug::FIND, "verifying match with len = " << len); | } | return len; | } | | | /// Finds forward | -int findForwardAdv(DocIterator & cur, MatchStringAdv & match) | +int findForwardAdv(DocIterator & cur, MatchStringAdv const & match, DocIterator const & cur_end) | { | if (!cur) | return 0; | - while (cur) { | - LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur); | + while (cur && cur < cur_end) { | + SyncErr(Debug::FIND, "findForwardAdv() cur: " << cur); | int match_len = match(cur, -1, false); | - LYXERR(Debug::FIND, "match_len: " << match_len); | - if (match_len) { | - for (; cur; cur.forwardPos()) { | - LYXERR(Debug::FIND, "Advancing cur: " << cur); | + SyncErr(Debug::FIND, "match_len: " << match_len); | + if (match_len && cur < cur_end) { | + for (; cur && cur < cur_end; cur.forwardPos()) { | + SyncErr(Debug::FIND, "Advancing cur: " << cur); | int match_len = match(cur); | - LYXERR(Debug::FIND, "match_len: " << match_len); | + SyncErr(Debug::FIND, "match_len: " << match_len); | if (match_len) { | // Sometimes in finalize we understand it wasn't a match | // and we need to continue the outest loop | @@ -1150,16 +1178,16 @@ | return len; | } | } | - if (!cur) | + if (!cur || cur == cur_end) | return 0; | } | if (cur.pit() < cur.lastpit()) { | - LYXERR(Debug::FIND, "Advancing par: cur=" << cur); | + SyncErr(Debug::FIND, "Advancing par: cur=" << cur); | cur.forwardPar(); | } else { | // This should exit nested insets, if any, or otherwise undefine the currsor. | cur.pos() = cur.lastpos(); | - LYXERR(Debug::FIND, "Advancing pos: cur=" << cur); | + SyncErr(Debug::FIND, "Advancing pos: cur=" << cur); | cur.forwardPos(); | } | } | @@ -1175,7 +1203,7 @@ | int len = findAdvFinalize(tmp_cur, match); | Inset & inset = cur.inset(); | for (; cur != cur_begin; cur.backwardPos()) { | - LYXERR(Debug::FIND, "findMostBackwards(): cur=" << cur); | + SyncErr(Debug::FIND, "findMostBackwards(): cur=" << cur); | DocIterator new_cur = cur; | new_cur.backwardPos(); | if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur)) | @@ -1185,7 +1213,7 @@ | break; | len = new_len; | } | - LYXERR(Debug::FIND, "findMostBackwards(): exiting with cur=" << cur); | + SyncErr(Debug::FIND, "findMostBackwards(): exiting with cur=" << cur); | return len; | } | | @@ -1212,11 +1240,11 @@ | cur.pos() = cur.lastpos(); | else | cur.pos() = cur_orig.pos(); | - LYXERR(Debug::FIND, "findBackAdv2: cur: " << cur); | + SyncErr(Debug::FIND, "findBackAdv2: cur: " << cur); | DocIterator cur_prev_iter; | do { | found_match = match(cur); | - LYXERR(Debug::FIND, "findBackAdv3: found_match=" | + SyncErr(Debug::FIND, "findBackAdv3: found_match=" | << found_match << ", cur: " << cur); | if (found_match) | return findMostBackwards(cur, match); | @@ -1282,13 +1310,13 @@ | static bool firstUppercase(DocIterator const & cur) { | char_type ch1, ch2; | if (cur.pos() >= cur.lastpos() - 1) { | - LYXERR(Debug::FIND, "No upper-case at cur: " << cur); | + SyncErr(Debug::FIND, "No upper-case at cur: " << cur); | return false; | } | ch1 = cur.paragraph().getChar(cur.pos()); | ch2 = cur.paragraph().getChar(cur.pos()+1); | bool result = isUpperCase(ch1) && isLowerCase(ch2); | - LYXERR(Debug::FIND, "firstUppercase(): " | + SyncErr(Debug::FIND, "firstUppercase(): " | << "ch1=" << ch1 << "(" << char(ch1) << "), ch2=" | << ch2 << "(" << char(ch2) << ")" | << ", result=" << result << ", cur=" << cur); | @@ -1322,7 +1350,7 @@ | || sel_beg.pit() != sel_end.pit()) | return; | int sel_len = sel_end.pos() - sel_beg.pos(); | - LYXERR(Debug::FIND, "sel_beg: " << sel_beg << ", sel_end: " << sel_end | + SyncErr(Debug::FIND, "sel_beg: " << sel_beg << ", sel_end: " << sel_end | << ", sel_len: " << sel_len << endl); | if (sel_len == 0) | return; | @@ -1352,12 +1380,12 @@ | repl_buffer.changeLanguage( | repl_buffer.language(), | cur.getFont().language()); | - LYXERR(Debug::FIND, "Replacing by pasteParagraphList()ing repl_buffer"); | - LYXERR(Debug::FIND, "Before pasteParagraphList() cur=" << cur << endl); | + SyncErr(Debug::FIND, "Replacing by pasteParagraphList()ing repl_buffer"); | + SyncErr(Debug::FIND, "Before pasteParagraphList() cur=" << cur << endl); | cap::pasteParagraphList(cur, repl_buffer.paragraphs(), | repl_buffer.params().documentClassPtr(), | bv->buffer().errorList("Paste")); | - LYXERR(Debug::FIND, "After pasteParagraphList() cur=" << cur << endl); | + SyncErr(Debug::FIND, "After pasteParagraphList() cur=" << cur << endl); | sel_len = repl_buffer.paragraphs().begin()->size(); | } else if (cur.inMathed()) { | TexRow texrow; | @@ -1371,29 +1399,174 @@ | TeXOnePar(repl_buffer, repl_buffer.text(), 0, os, runparams); | //repl_buffer.getSourceCode(ods, 0, repl_buffer.paragraphs().size(), false); | docstring repl_latex = ods.str(); | - LYXERR(Debug::FIND, "Latexified replace_buffer: '" << repl_latex << "'"); | + SyncErr(Debug::FIND, "Latexified replace_buffer: '" << repl_latex << "'"); | string s; | regex_replace(to_utf8(repl_latex), s, "\\$(.*)\\$", "$1"); | regex_replace(s, s, "\\\\\\[(.*)\\\\\\]", "$1"); | repl_latex = from_utf8(s); | - LYXERR(Debug::FIND, "Replacing by insert()ing latex: '" << repl_latex << "' cur=" << cur << " with depth=" << cur.depth()); | + SyncErr(Debug::FIND, "Replacing by insert()ing latex: '" << repl_latex << "' cur=" << cur << " with depth=" << cur.depth()); | MathData ar(cur.buffer()); | asArray(repl_latex, ar, Parse::NORMAL); | cur.insert(ar); | sel_len = ar.size(); | - LYXERR(Debug::FIND, "After insert() cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len); | + SyncErr(Debug::FIND, "After insert() cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len); | } | if (cur.pos() >= sel_len) | cur.pos() -= sel_len; | else | cur.pos() = 0; | - LYXERR(Debug::FIND, "After pos adj cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len); | + SyncErr(Debug::FIND, "After pos adj cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len); | bv->putSelectionAt(DocIterator(cur), sel_len, !opt.forward); | bv->processUpdateFlags(Update::Force); | bv->buffer().updatePreviews(); | } | | | +struct FindAdvHit { | + DocIterator dit; | + int match_len; //< -1 means invalid FindAdvHit contents | + | + | + FindAdvHit() | + : match_len(-1) { } | + | + | + FindAdvHit(DocIterator const & dit, int match_len) | + : dit(dit), match_len(match_len) { } | +}; | + | +/// Synchronized vector of hits. | +/// Allows worker threads to push their search results here, | +/// and the parent to wait for the first hit to become available. | +class FindAdvHits { | + std::map<int, FindAdvHit> hits_; | + mutable QMutex hits_mtx_; | + QWaitCondition found_; | + DocIterator cur_; | + int first_id_; ///< Id of first par search result to be waited for | + int last_id_; ///< Id of last par still being searched | + | + bool exists_(int id) const { | + std::map<int, FindAdvHit>::const_iterator it = hits_.find(id); | + return it != hits_.end(); | + } | + | +public: | + FindAdvHits() { | + first_id_ = 0; | + last_id_ = -1; | + } | + | + | + void clear(DocIterator const & cur) { | + QMutexLocker lock(&hits_mtx_); | + first_id_ = 0; | + last_id_ = -1; | + hits_.clear(); | + cur_ = cur; | + } | + | + | + int nextRange(DocIterator & cur_beg, DocIterator & cur_end) { | + QMutexLocker lock(&hits_mtx_); | + if (!cur_) | + return -1; | + cur_beg = cur_; | + cur_.forwardPar(); | + if (cur_) | + cur_.pos() = 0; | + cur_end = cur_; | + ++last_id_; | + SyncErr(Debug::FIND, "Returning range from cur_beg=" << cur_beg << " to cur_end=" << cur_end << ", last_id_=" << last_id_); | + return last_id_; | + } | + | + | + void insert(int id, FindAdvHit const & hit) { | + QMutexLocker lock(&hits_mtx_); | + // @todo Insertion and immediate removal can be avoided, but... | + hits_[id] = hit; | + if (id == first_id_) { | + while (exists_(first_id_) && hits_[first_id_].match_len == 0) { | + SyncErr(Debug::FIND, "Erasing id=" << first_id_); | + hits_.erase(first_id_); | + ++first_id_; | + SyncErr(Debug::FIND, "first_id_=" << first_id_); | + } | + SyncErr(Debug::FIND, "first_id_=" << first_id_ << ", last_id_=" << last_id_); | + /* if ((exists_(first_id_) && hits_[first_id_].match_len > 0) */ | + /* || first_id_ > last_id_) */ | + } | + SyncErr(Debug::FIND, "Waking up"); | + found_.wakeOne(); | + } | + | + | + int waitForFirstHit(DocIterator & dit) { | + QMutexLocker lock(&hits_mtx_); | + SyncErr(Debug::FIND, "Searching for first_id_=" << first_id_ << ", last_id_=" << last_id_); | + std::map<int, FindAdvHit>::const_iterator it = hits_.find(first_id_); | + while (it == hits_.end() && (last_id_ == -1 || first_id_ <= last_id_)) { | + SyncErr(Debug::FIND, "waiting for found_"); | + found_.wait(&hits_mtx_); | + SyncErr(Debug::FIND, "Searching for first_id_=" << first_id_); | + it = hits_.find(first_id_); | + SyncErr(Debug::FIND, "Woken up: it==end is " << (it == hits_.end())); | + } | + if (it == hits_.end() || it->second.match_len == 0) | + return 0; | + FindAdvHit const & hit = it->second; | + dit = hit.dit; | + return hit.match_len; | + } | +}; | + | + | +static FindAdvHits hits; | + | + | +class FindAdvThread : public QRunnable { | + MatchStringAdv const & matchAdv_; | + | +public: | + FindAdvThread(MatchStringAdv const & matchAdv) | + : matchAdv_(matchAdv) { } | + | + void run() { | + do { | + DocIterator dit; | + DocIterator dit_end; | + int id = hits.nextRange(dit, dit_end); | + if (id < 0) | + break; | + SyncErr(Debug::FIND, "Calling findForwardAdv() from " << dit << " to " << dit_end << ", id_=" << id); | + int match_len = findForwardAdv(dit, matchAdv_, dit_end); | + SyncErr(Debug::FIND, "findForwardAdv() result (id=" << id << "): dit=" << dit << ", match_len=" << match_len); | + hits.insert(id, FindAdvHit(dit, match_len)); | + /// @todo REVIEW EXIT CONDITION!!! | + if (match_len > 0) | + break; | + } while (true); | + } | +}; | + | + | +int findForwardAdvPar(DocIterator & cur, MatchStringAdv const & match) { | + QRunnable *p_thread; | + hits.clear(cur); | + for (int i = 0; i < QThread::idealThreadCount(); ++i) { | + // Ownership taken by QThreadPool on start() | + SyncErr(Debug::FIND, "Starting findadv thread"); | + p_thread = new FindAdvThread(match); | + QThreadPool::globalInstance()->start(p_thread); | + } | + int match_len = hits.waitForFirstHit(cur); | + SyncErr(Debug::FIND, "Waiting for children - match_len=" << match_len); | + QThreadPool::globalInstance()->waitForDone(); | + return match_len; | +} | + | + | /// Perform a FindAdv operation. | bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) | { | @@ -1404,12 +1577,12 @@ | MatchStringAdv matchAdv(bv->buffer(), opt); | findAdvReplace(bv, opt, matchAdv); | cur = bv->cursor(); | - if (opt.forward) | - match_len = findForwardAdv(cur, matchAdv); | - else | - match_len = findBackwardsAdv(cur, matchAdv); | + if (opt.forward) { | + match_len = findForwardAdvPar(cur, matchAdv); | + } else | + match_len = findBackwardsAdv(cur, matchAdv); | } catch (...) { | - // This may only be raised by lyx::regex() | + // This may only be raised by boost::regex() | bv->message(_("Invalid regular expression!")); | return false; | } | @@ -1421,7 +1594,7 @@ | | bv->message(_("Match found!")); | | - LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << match_len); | + SyncErr(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << match_len); | bv->putSelectionAt(cur, match_len, !opt.forward); | | return true; | @@ -1440,7 +1613,7 @@ | << opt.keep_case << ' ' | << int(opt.scope); | | - LYXERR(Debug::FIND, "built: " << os.str()); | + SyncErr(Debug::FIND, "built: " << os.str()); | | return os; | } | @@ -1448,7 +1621,7 @@ | | istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) | { | - LYXERR(Debug::FIND, "parsing"); | + SyncErr(Debug::FIND, "parsing"); | string s; | string line; | getline(is, line); | @@ -1460,7 +1633,7 @@ | break; | getline(is, line); | } | - LYXERR(Debug::FIND, "file_buf_name: '" << s << "'"); | + SyncErr(Debug::FIND, "file_buf_name: '" << s << "'"); | opt.find_buf_name = from_utf8(s); | is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat; | is.get(); // Waste space before replace string | @@ -1474,13 +1647,13 @@ | break; | getline(is, line); | } | - LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'"); | + SyncErr(Debug::FIND, "repl_buf_name: '" << s << "'"); | opt.repl_buf_name = from_utf8(s); | is >> opt.keep_case; | int i; | is >> i; | opt.scope = FindAndReplaceOptions::SearchScope(i); | - LYXERR(Debug::FIND, "parsed: " << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' ' | + SyncErr(Debug::FIND, "parsed: " << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' ' | << opt.expandmacros << ' ' << opt.ignoreformat << ' ' << opt.keep_case); | return is; | }