We use utf-8 encoded strings. In order to fully use regular expressions in FindAdv, we need the created regex be case sensitive. This works good for ascii charaters, but fails on
The try to use docstring works better on unix systems. But there are some problems. 1.) regexes are still unable to handle case-insensivity for e.g. cyrillic characters, although one can here already use constructs like [а-и]. 2.) under mingw we get 'invalid regex' error. This means, that using docstring does not help much. The next try is using QString. This works pretty good, but uses classes introduced in QT5. The differences in lyxfind.cpp are big, so that using '#ifdef's does not seem appropriate. (See attached) So the question is: Can we drop support for QT4 in master? Or has someone a better sollution? Kornel
diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp index 4ba14ee088..e89f806ee9 100644 --- a/src/lyxfind.cpp +++ b/src/lyxfind.cpp @@ -49,10 +49,11 @@ #include "support/lstrings.h" #include "support/textutils.h" #include <map> #include <regex> +#include <QRegularExpression> using namespace std; using namespace lyx::support; namespace lyx { @@ -791,26 +792,26 @@ bool regex_replace(string const & s, string & t, string const & searchstr, ** \frac{.*}{x} matches \frac{x+\frac{y}{x}}{z} with .* being 'x+\frac{y'. ** ** @param unmatched ** Number of open braces that must remain open at the end for the verification to succeed. **/ -bool braces_match(string::const_iterator const & beg, - string::const_iterator const & end, +bool braces_match(QString const & beg, int unmatched = 0) { int open_pars = 0; - string::const_iterator it = beg; - LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << string(beg, end) << "'"); - for (; it != end; ++it) { + LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg.toStdString() << "'"); + int lastidx = beg.size(); + for (int i=0; i < lastidx; ++i) { // Skip escaped braces in the count - if (*it == '\\') { - ++it; - if (it == end) + QChar c = beg.at(i); + if (c == '\\') { + ++i; + if (i >= lastidx) break; - } else if (*it == '{') { + } else if (c == '{') { ++open_pars; - } else if (*it == '}') { + } else if (c == '}') { if (open_pars == 0) { LYXERR(Debug::FIND, "Found unmatched closed brace"); return false; } else --open_pars; @@ -835,10 +836,11 @@ public: MatchResult(): match_len(0),match2end(0), pos(0) {}; }; /** The class performing a match between a position in the document and the FindAdvOptions. **/ + class MatchStringAdv { public: MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt); /** Tests if text starting at the supplied position matches with the one provided to the MatchStringAdv @@ -850,10 +852,12 @@ public: ** ** @return ** The length of the matching text, or zero if no match was found. **/ MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const; + bool regexIsValid; + string regexError; public: /// buffer lyx::Buffer * p_buf; /// first buffer on which search was started @@ -880,13 +884,13 @@ private: **/ string normalize(docstring const & s, bool hack_braces) const; // normalized string to search string par_as_string; // regular expression to use for searching - regex regexp; + QRegularExpression regexp; // same as regexp, but prefixed with a ".*?" - regex regexp2; + QRegularExpression regexp2; // leading format material as string string lead_as_string; // par_as_string after removal of lead_as_string string par_as_string_nolead; // unmatched open braces in the search string/regexp @@ -2880,13 +2884,13 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & break; } lng -= 2; open_braces++; } - else + else break; -} + } if (lng < par_as_string.size()) par_as_string = par_as_string.substr(0,lng); /* // save '\.' regex_replace(par_as_string, par_as_string, "\\\\\\.", "_xxbdotxx_"); @@ -2921,40 +2925,74 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & regexp_str = "(" + lead_as_regexp + ")" + par_as_string; regexp2_str = "(" + lead_as_regexp + ").*?" + par_as_string; } LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); + QRegularExpression::PatternOptions popts; if (! opt.casesensitive) { - regexp = regex(regexp_str, std::regex_constants::icase); - regexp2 = regex(regexp2_str, std::regex_constants::icase); + popts = QRegularExpression::CaseInsensitiveOption; } else { - regexp = regex(regexp_str); - regexp2 = regex(regexp2_str); + popts = QRegularExpression::NoPatternOption; + } + regexp = QRegularExpression(QString::fromStdString(regexp_str), popts); + regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts); + regexError = ""; + if (regexp.isValid() && regexp2.isValid()) { + regexIsValid = true; + // Check '{', '}' pairs inside the regex + int balanced = 0; + int skip = 1; + for (int i = 0; i < par_as_string.size(); i+= skip) { + char c = par_as_string[i]; + if (c == '\\') { + skip = 2; + continue; + } + if (c == '{') + balanced++; + else if (c == '}') { + balanced--; + if (balanced < 0) + break; + } + skip = 1; + } + if (balanced != 0) { + regexIsValid = false; + regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\""; + } + } + else { + regexIsValid = false; + if (!regexp.isValid()) + regexError += "Invalid regexp \"" + regexp_str + "\", error = " + regexp.errorString().toStdString(); + if (!regexp2.isValid()) + regexError += "Invalid regexp2 \"" + regexp2_str + "\", error = " + regexp2.errorString().toStdString(); } } } // Count number of characters in string // {]} ==> 1 // \& ==> 1 // --- ==> 1 // \\[a-zA-Z]+ ==> 1 -static int computeSize(string s, int len) +static int computeSize(QStringRef s, int len) { if (len == 0) return 0; int skip = 1; int count = 0; for (int i = 0; i < len; i += skip, count++) { if (s[i] == '\\') { skip = 2; - if (isalpha(s[i+1])) { + if (s.at(i+1).isLetter()) { for (int j = 2; i+j < len; j++) { - if (! isalpha(s[i+j])) { - if (s[i+j] == ' ') + if (s.at(i+j).isLetter()) { + if (s.at(i+j) == ' ') skip++; else if ((s[i+j] == '{') && s[i+j+1] == '}') skip += 2; else if ((s[i+j] == '{') && (i + j + 1 >= len)) skip++; @@ -3011,63 +3049,61 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be LYXERR(Debug::FIND, "Matching against '" << lyx::to_utf8(docstr) << "'"); LYXERR(Debug::FIND, "After normalization: '" << str << "'"); if (use_regexp) { LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin); - regex const *p_regexp; - regex_constants::match_flag_type flags; + QString qstr = QString::fromStdString(str); + QRegularExpression const *p_regexp; + QRegularExpression::MatchType flags = QRegularExpression::NormalMatch; if (at_begin) { - flags = regex_constants::match_continuous; p_regexp = ®exp; } else { - flags = regex_constants::match_default; p_regexp = ®exp2; } - sregex_iterator re_it(str.begin(), str.end(), *p_regexp, flags); - if (re_it == sregex_iterator()) + QRegularExpressionMatch match = p_regexp->match(qstr, 0, flags); + if (!match.hasMatch()) return mres; - match_results<string::const_iterator> const & m = *re_it; - // Check braces on segments that matched all (.*?) subexpressions, // except the last "padding" one inserted by lyx. - for (size_t i = 1; i < m.size() - 1; ++i) - if (!braces_match(m[i].first, m[i].second, open_braces)) + for (int i = 1; i < match.lastCapturedIndex(); ++i) + if (!braces_match(match.captured(i), open_braces)) return mres; // Exclude from the returned match length any length // due to close wildcards added at end of regexp // and also the length of the leading (e.g. '\emph{}') // // Whole found string, including the leading: m[0].second - m[0].first // Size of the leading string: m[1].second - m[1].first int leadingsize = 0; - if (m.size() > 1) - leadingsize = m[1].second - m[1].first; + if (match.lastCapturedIndex() > 0) + leadingsize = match.capturedEnd(1) - match.capturedStart(1); int result; - for (size_t i = 0; i < m.size(); i++) { - LYXERR(Debug::FIND, "Match " << i << " is " << m[i].second - m[i].first << " long"); + + int lastidx = match.lastCapturedIndex(); + for (int i = 0; i <= lastidx; i++) { + LYXERR(Debug::FIND, "Match " << i << " is " << match.capturedEnd(i) - match.capturedStart(i) << " long"); } if (close_wildcards == 0) - result = m[0].second - m[0].first; - + result = match.capturedEnd(0) - match.capturedStart(0); else - result = m[m.size() - close_wildcards].first - m[0].first; + result = match.capturedStart(lastidx + 1 - close_wildcards) - match.capturedStart(0); - size_t pos = m.position(size_t(0)); + size_t pos = match.capturedStart(0); // Ignore last closing characters while (result > 0) { - if (str[pos+result-1] == '}') + if (qstr.at(pos+result-1) == '}') --result; else break; } if (result > leadingsize) result -= leadingsize; else result = 0; - mres.match_len = computeSize(str.substr(pos+leadingsize,result), result); - mres.match2end = str.size() - pos - leadingsize; + mres.match_len = computeSize(QStringRef(&qstr, pos+leadingsize,result), result); + mres.match2end = qstr.size() - pos - leadingsize; mres.pos = pos+leadingsize; return mres; } // else !use_regexp: but all code paths above return @@ -3777,10 +3813,14 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) if (theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true) == 0) return false; try { MatchStringAdv matchAdv(bv->buffer(), opt); + if (!matchAdv.regexIsValid) { + bv->message(lyx::from_utf8(matchAdv.regexError)); + return(false); + } int length = bv->cursor().selectionEnd().pos() - bv->cursor().selectionBegin().pos(); if (length > 0) bv->putSelectionAt(bv->cursor().selectionBegin(), length, !opt.forward); num_replaced += findAdvReplace(bv, opt, matchAdv); cur = bv->cursor();
pgpdnwcNsgoi3.pgp
Description: Digitale Signatur von OpenPGP
-- lyx-devel mailing list lyx-devel@lists.lyx.org http://lists.lyx.org/mailman/listinfo/lyx-devel