We use utf-8 encoded strings.
In order to fully use regular expressions in FindAdv, we need the created regex 
be case
sensitive. This works good for ascii charaters, but fails on

The try to use docstring works better on unix systems. But there are some 
problems.
1.) regexes are still unable to handle case-insensivity for e.g. cyrillic 
characters,
although one can here already use constructs like [а-и].
2.) under mingw we get 'invalid regex' error.
This means, that using docstring does not help much.

The next try is using QString. This works pretty good, but uses classes 
introduced in QT5.

The differences in lyxfind.cpp are big, so that using '#ifdef's does not seem 
appropriate.
(See attached)

So the question is: Can we drop support for QT4 in master?
Or has someone a better sollution?

        Kornel
diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp
index 4ba14ee088..e89f806ee9 100644
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -49,10 +49,11 @@
 #include "support/lstrings.h"
 #include "support/textutils.h"
 
 #include <map>
 #include <regex>
+#include <QRegularExpression>
 
 using namespace std;
 using namespace lyx::support;
 
 namespace lyx {
@@ -791,26 +792,26 @@ bool regex_replace(string const & s, string & t, string const & searchstr,
  ** \frac{.*}{x} matches \frac{x+\frac{y}{x}}{z} with .* being 'x+\frac{y'.
  **
  ** @param unmatched
  ** Number of open braces that must remain open at the end for the verification to succeed.
  **/
-bool braces_match(string::const_iterator const & beg,
-		  string::const_iterator const & end,
+bool braces_match(QString const & beg,
 		  int unmatched = 0)
 {
 	int open_pars = 0;
-	string::const_iterator it = beg;
-	LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << string(beg, end) << "'");
-	for (; it != end; ++it) {
+	LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg.toStdString() << "'");
+	int lastidx = beg.size();
+	for (int i=0; i < lastidx; ++i) {
 		// Skip escaped braces in the count
-		if (*it == '\\') {
-			++it;
-			if (it == end)
+		QChar c = beg.at(i);
+		if (c == '\\') {
+			++i;
+			if (i >= lastidx)
 				break;
-		} else if (*it == '{') {
+		} else if (c == '{') {
 			++open_pars;
-		} else if (*it == '}') {
+		} else if (c == '}') {
 			if (open_pars == 0) {
 				LYXERR(Debug::FIND, "Found unmatched closed brace");
 				return false;
 			} else
 				--open_pars;
@@ -835,10 +836,11 @@ public:
 	MatchResult(): match_len(0),match2end(0), pos(0) {};
 };
 
 /** The class performing a match between a position in the document and the FindAdvOptions.
  **/
+
 class MatchStringAdv {
 public:
 	MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt);
 
 	/** Tests if text starting at the supplied position matches with the one provided to the MatchStringAdv
@@ -850,10 +852,12 @@ public:
 	 **
 	 ** @return
 	 ** The length of the matching text, or zero if no match was found.
 	 **/
 	MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const;
+	bool regexIsValid;
+	string regexError;
 
 public:
 	/// buffer
 	lyx::Buffer * p_buf;
 	/// first buffer on which search was started
@@ -880,13 +884,13 @@ private:
 	 **/
 	string normalize(docstring const & s, bool hack_braces) const;
 	// normalized string to search
 	string par_as_string;
 	// regular expression to use for searching
-	regex regexp;
+	QRegularExpression regexp;
 	// same as regexp, but prefixed with a ".*?"
-	regex regexp2;
+	QRegularExpression regexp2;
 	// leading format material as string
 	string lead_as_string;
 	// par_as_string after removal of lead_as_string
 	string par_as_string_nolead;
 	// unmatched open braces in the search string/regexp
@@ -2880,13 +2884,13 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const &
 							break;
 					}
 					lng -= 2;
 					open_braces++;
 				}
-	else
+				else
 					break;
-}
+			}
 			if (lng < par_as_string.size())
 				par_as_string = par_as_string.substr(0,lng);
 			/*
 			// save '\.'
 			regex_replace(par_as_string, par_as_string, "\\\\\\.", "_xxbdotxx_");
@@ -2921,40 +2925,74 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const &
 			regexp_str = "(" + lead_as_regexp + ")" + par_as_string;
 			regexp2_str = "(" + lead_as_regexp + ").*?" + par_as_string;
 		}
 		LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'");
 		LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'");
+		QRegularExpression::PatternOptions popts;
 		if (! opt.casesensitive) {
-			regexp = regex(regexp_str, std::regex_constants::icase);
-			regexp2 = regex(regexp2_str, std::regex_constants::icase);
+			popts = QRegularExpression::CaseInsensitiveOption;
 		}
 		else {
-			regexp = regex(regexp_str);
-			regexp2 = regex(regexp2_str);
+			popts = QRegularExpression::NoPatternOption;
+		}
+		regexp = QRegularExpression(QString::fromStdString(regexp_str), popts);
+		regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts);
+		regexError = "";
+		if (regexp.isValid() && regexp2.isValid()) {
+			regexIsValid = true;
+			// Check '{', '}' pairs inside the regex
+			int balanced = 0;
+			int skip = 1;
+			for (int i = 0; i < par_as_string.size(); i+= skip) {
+				char c = par_as_string[i];
+				if (c == '\\') {
+					skip = 2;
+					continue;
+				}
+				if (c == '{')
+					balanced++;
+				else if (c == '}') {
+					balanced--;
+					if (balanced < 0)
+						break;
+				}
+				skip = 1;
+			}
+			if (balanced != 0) {
+				regexIsValid = false;
+				regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\"";
+			}
+		}
+		else {
+			regexIsValid = false;
+			if (!regexp.isValid())
+				regexError += "Invalid regexp \"" + regexp_str + "\", error = " + regexp.errorString().toStdString();
+			if (!regexp2.isValid())
+				regexError += "Invalid regexp2 \"" + regexp2_str + "\", error = " + regexp2.errorString().toStdString();
 		}
 	}
 }
 
 
 // Count number of characters in string
 // {]} ==> 1
 // \&  ==> 1
 // --- ==> 1
 // \\[a-zA-Z]+ ==> 1
-static int computeSize(string s, int len)
+static int computeSize(QStringRef s, int len)
 {
 	if (len == 0)
 		return 0;
 	int skip = 1;
 	int count = 0;
 	for (int i = 0; i < len; i += skip, count++) {
 		if (s[i] == '\\') {
 			skip = 2;
-			if (isalpha(s[i+1])) {
+			if (s.at(i+1).isLetter()) {
 				for (int j = 2;  i+j < len; j++) {
-					if (! isalpha(s[i+j])) {
-						if (s[i+j] == ' ')
+					if (s.at(i+j).isLetter()) {
+						if (s.at(i+j) == ' ')
 							skip++;
 						else if ((s[i+j] == '{') && s[i+j+1] == '}')
 							skip += 2;
 						else if ((s[i+j] == '{') && (i + j + 1 >= len))
 							skip++;
@@ -3011,63 +3049,61 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
 	LYXERR(Debug::FIND, "Matching against     '" << lyx::to_utf8(docstr) << "'");
 	LYXERR(Debug::FIND, "After normalization: '" << str << "'");
 
 	if (use_regexp) {
 		LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin);
-		regex const *p_regexp;
-		regex_constants::match_flag_type flags;
+		QString qstr = QString::fromStdString(str);
+		QRegularExpression const *p_regexp;
+		QRegularExpression::MatchType flags = QRegularExpression::NormalMatch;
 		if (at_begin) {
-			flags = regex_constants::match_continuous;
 			p_regexp = &regexp;
 		} else {
-			flags = regex_constants::match_default;
 			p_regexp = &regexp2;
 		}
-		sregex_iterator re_it(str.begin(), str.end(), *p_regexp, flags);
-		if (re_it == sregex_iterator())
+		QRegularExpressionMatch match = p_regexp->match(qstr, 0, flags);
+		if (!match.hasMatch())
 			return mres;
-		match_results<string::const_iterator> const & m = *re_it;
-
 		// Check braces on segments that matched all (.*?) subexpressions,
 		// except the last "padding" one inserted by lyx.
-		for (size_t i = 1; i < m.size() - 1; ++i)
-			if (!braces_match(m[i].first, m[i].second, open_braces))
+		for (int i = 1; i < match.lastCapturedIndex(); ++i)
+			if (!braces_match(match.captured(i), open_braces))
 				return mres;
 
 		// Exclude from the returned match length any length
 		// due to close wildcards added at end of regexp
 		// and also the length of the leading (e.g. '\emph{}')
 		//
 		// Whole found string, including the leading: m[0].second - m[0].first
 		// Size of the leading string: m[1].second - m[1].first
 		int leadingsize = 0;
-		if (m.size() > 1)
-			leadingsize = m[1].second - m[1].first;
+		if (match.lastCapturedIndex() > 0)
+			leadingsize = match.capturedEnd(1) - match.capturedStart(1);
 		int result;
-		for (size_t i = 0; i < m.size(); i++) {
-			LYXERR(Debug::FIND, "Match " << i << " is " << m[i].second - m[i].first << " long");
+
+		int lastidx = match.lastCapturedIndex();
+		for (int i = 0; i <= lastidx; i++) {
+			LYXERR(Debug::FIND, "Match " << i << " is " << match.capturedEnd(i) - match.capturedStart(i) << " long");
 		}
 		if (close_wildcards == 0)
-			result = m[0].second - m[0].first;
-
+			result = match.capturedEnd(0) - match.capturedStart(0);
 		else
-			result =  m[m.size() - close_wildcards].first - m[0].first;
+			result =  match.capturedStart(lastidx + 1 - close_wildcards) - match.capturedStart(0);
 
-		size_t pos = m.position(size_t(0));
+		size_t pos = match.capturedStart(0);
 		// Ignore last closing characters
 		while (result > 0) {
-			if (str[pos+result-1] == '}')
+			if (qstr.at(pos+result-1) == '}')
 				--result;
 			else
 				break;
 		}
 		if (result > leadingsize)
 			result -= leadingsize;
 		else
 			result = 0;
-		mres.match_len = computeSize(str.substr(pos+leadingsize,result), result);
-		mres.match2end = str.size() - pos - leadingsize;
+		mres.match_len = computeSize(QStringRef(&qstr, pos+leadingsize,result), result);
+		mres.match2end = qstr.size() - pos - leadingsize;
 		mres.pos = pos+leadingsize;
 		return mres;
 	}
 
 	// else !use_regexp: but all code paths above return
@@ -3777,10 +3813,14 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt)
 	if (theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true) == 0)
 		return false;
 
 	try {
 		MatchStringAdv matchAdv(bv->buffer(), opt);
+		if (!matchAdv.regexIsValid) {
+			bv->message(lyx::from_utf8(matchAdv.regexError));
+			return(false);
+		}
 		int length = bv->cursor().selectionEnd().pos() - bv->cursor().selectionBegin().pos();
 		if (length > 0)
 			bv->putSelectionAt(bv->cursor().selectionBegin(), length, !opt.forward);
 		num_replaced += findAdvReplace(bv, opt, matchAdv);
 		cur = bv->cursor();

Attachment: pgpdnwcNsgoi3.pgp
Description: Digitale Signatur von OpenPGP

-- 
lyx-devel mailing list
lyx-devel@lists.lyx.org
http://lists.lyx.org/mailman/listinfo/lyx-devel

Reply via email to