commit 675072471730e52fe4f561b7a9e4f99da7f92483
Author: Kornel Benko <[email protected]>
Date:   Mon Jan 4 07:16:59 2021 +0100

    Amend(2) 8c67cb8c: (FindAdv: Try to make regex search with format enabled 
somehow faster)
    
    Use innermost nesting to start searches.
    Some fine tuning to determine correct match.
    (If the regex contains '(\S)\1' at the end, then this regex would match 
'}}',
    but this is often the case at and of examined string. We have to disable 
this invalid match.
    )
---
 src/lyxfind.cpp |  306 ++++++++++++++++++++++++++++++++++++++----------------
 1 files changed, 215 insertions(+), 91 deletions(-)

diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp
index 94a54bc..c728af1 100644
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -745,6 +745,7 @@ string escape_for_regex(string s, bool match_latex)
                        new_pos = s.size();
                string t;
                if (new_pos > pos) {
+                       // outside regexp
                        LYXERR(Debug::FIND, "new_pos: " << new_pos);
                        t = apply_escapes(s.substr(pos, new_pos - pos), 
get_lyx_unescapes());
                        LYXERR(Debug::FIND, "t [lyx]: " << t);
@@ -875,6 +876,12 @@ static MatchResult::range interpretMatch(MatchResult 
&oldres, MatchResult &newre
     return MatchResult::newIsTooFar;
   if ((newres.match_len == oldres.match_len) && (newres.match2end == 
oldres.match2end))
     return MatchResult::newIsBetter;
+  if ((newres.match_len == oldres.match_len) && (newres.match2end -2 == 
oldres.match2end)) {
+    // The string contained for instance "\usepackage...fontenc ..."
+    // and now after moved 9 char forward contains "ge...{fontenc} ..."
+    // so we accept it as OK
+    return MatchResult::newIsBetter;
+  }
   return MatchResult::newIsInvalid;
 }
 
@@ -947,6 +954,7 @@ private:
        // number of (.*?) subexpressions added at end of search regexp for 
closing
        // environments, math mode, styles, etc...
        int close_wildcards;
+public:
        // Are we searching with regular expressions ?
        bool use_regexp;
 };
@@ -1415,6 +1423,7 @@ static void buildAccentsMap()
   accents["LaTeX"]         = getutf8(0xf0012);
   accents["latexe"]        = getutf8(0xf0013);
   accents["LaTeXe"]        = getutf8(0xf0013);
+  accents["lyxarrow"]      = getutf8(0xf0020);
   accents["backslash lyx"]           = getutf8(0xf0010);       // Used logos 
inserted with starting \backslash
   accents["backslash LyX"]           = getutf8(0xf0010);
   accents["backslash tex"]           = getutf8(0xf0011);
@@ -1423,6 +1432,7 @@ static void buildAccentsMap()
   accents["backslash LaTeX"]         = getutf8(0xf0012);
   accents["backslash latexe"]        = getutf8(0xf0013);
   accents["backslash LaTeXe"]        = getutf8(0xf0013);
+  accents["backslash lyxarrow"]      = getutf8(0xf0020);
   accents["ddot{\\imath}"] = "ï";
   buildaccent("ddot", "aAeEhHiIioOtuUwWxXyY",
                       "äÄëËḧḦïÏïöÖẗüÜẅẄẍẌÿŸ"); // umlaut
@@ -1488,7 +1498,7 @@ void Intervall::removeAccents()
     buildAccentsMap();
   static regex const 
accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|"
          
"cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}"
-      "|((i|imath|jmath|cdot|[a-z]+space)|((backslash 
)?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?)))(?![a-zA-Z]))");
+      "|((i|imath|jmath|cdot|[a-z]+space)|((backslash 
)?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow)))(?![a-zA-Z]))");
   smatch sub;
   for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != 
end; ++itacc) {
     sub = *itacc;
@@ -2024,6 +2034,7 @@ void LatexInfo::buildEntries(bool isPatternString)
         if (interval_.par[optend] != '{') {
           closings = 0;
           found.parenthesiscount = 0;
+          found.head = "\\" + key;
         }
         else
           closings = found.parenthesiscount;
@@ -2042,9 +2053,17 @@ void LatexInfo::buildEntries(bool isPatternString)
         found._tokensize = found.head.length();
         found._dataStart = found._tokenstart + found.head.length();
         if (found.keytype == KeyInfo::doRemove) {
-          int endpar = 2 + interval_.findclosing(found._dataStart, 
interval_.par.length(), '{', '}', closings);
-          found._dataStart = endpar;
-          found._tokensize = found._dataStart - found._tokenstart;
+          if (closings > 0) {
+            size_t endpar = 2 + interval_.findclosing(found._dataStart, 
interval_.par.length(), '{', '}', closings);
+           if (endpar >= interval_.par.length())
+             found._dataStart = interval_.par.length();
+           else
+             found._dataStart = endpar;
+            found._tokensize = found._dataStart - found._tokenstart;
+          }
+          else {
+            found._dataStart = found._tokenstart + found._tokensize;
+          } 
           closings = 0;
         }
         if (interval_.par.substr(found._dataStart-1, 
15).compare("\\endarguments{}") == 0) {
@@ -2420,8 +2439,11 @@ int LatexInfo::dispatch(ostringstream &os, int 
previousStart, KeyInfo &actual)
     }
     case KeyInfo::isSize: {
       if (actual.disabled || (interval_.par[actual._dataStart] != '{') || 
(interval_.par[actual._dataStart-1] == ' ')) {
-        processRegion(actual._dataEnd, actual._dataEnd+1); /* remove possibly 
following {} */
-        interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
+        if (actual.parenthesiscount == 0)
+          interval_.addIntervall(actual._tokenstart, actual._dataEnd);
+        else {
+          interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
+        }
         nextKeyIdx = getNextKey();
       } else {
         // Here _dataStart points to '{', so correct it
@@ -3050,7 +3072,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, 
FindAndReplaceOptions const &
        }
 }
 
-
+#if 0
 // Count number of characters in string
 // {]} ==> 1
 // \&  ==> 1
@@ -3110,6 +3132,7 @@ static int computeSize(string s, int len)
        }
        return count;
 }
+#endif
 
 MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool 
at_begin) const
 {
@@ -3213,19 +3236,61 @@ MatchResult MatchStringAdv::findAux(DocIterator const & 
cur, int len, bool at_be
                        result = 0;
 #if QTSEARCH
                mres.match_prefix = match.capturedEnd(2) - 
match.capturedStart(2);
-               // mres.match_len = computeSize(QStringRef(&qstr, 
pos+leadingsize,result), result) - mres.match_prefix;
                mres.match_len = match.capturedEnd(0) - match.capturedEnd(2);
-               // mres.match2end = qstr.size() - pos - leadingsize - 
mres.match_prefix;
-               mres.match2end = qstr.size() - match.capturedEnd(0);
+               // because of different number of closing at end of string
+               // we have to 'unify' the length of the post-match.
+               // Done by ignoring closing parenthesis and linefeeds at string 
end
+               int matchend = match.capturedEnd(0);
+               while (mres.match_len > 0) {
+                 QChar c = qstr.at(matchend - 1);
+                 if ((c == '\n') || (c == '}') || (c == '{')) {
+                   mres.match_len--;
+                   matchend--;
+                 }
+                 else
+                   break;
+               }
+               size_t strsize = qstr.size();
+               while (strsize > (size_t) match.capturedEnd(0)) {
+                       QChar c = qstr.at(strsize-1);
+                       if ((c == '\n') || (c == '}')) {
+                               --strsize;
+                       }
+                       else
+                               break;
+               }
+               // LYXERR0(qstr.toStdString());
+               mres.match2end = strsize - matchend;
                mres.pos = match.capturedStart(2);
 #else
                mres.match_prefix = m[2].second - m[2].first;
-               // mres.match_len = 
computeSize(str.substr(pos+leadingsize,result), result) - mres.match_prefix;
                mres.match_len = m[0].second - m[2].second;
-               // mres.match2end = str.size() - pos - leadingsize - 
mres.match_prefix;
-               mres.match2end = str.size() - m[0].second;
-               mres.pos = m[2].first;
+               // ignore closing parenthesis and linefeeds at string end
+               size_t strend = m[0].second - m[0].first;
+               int matchend = strend;
+               while (mres.match_len > 0) {
+                 char c = str.at(matchend - 1);
+                 if ((c == '\n') || (c == '}') || (c == '{')) {
+                   mres.match_len--;
+                   matchend--;
+                 }
+                 else
+                   break;
+               }
+               size_t strsize = str.size();
+               while (strsize > strend) {
+                       if ((str.at(strsize-1) == '}') || (str.at(strsize-1) == 
'\n')) {
+                               --strsize;
+                       }
+                       else
+                               break;
+               }
+               // LYXERR0(str);
+               mres.match2end = strsize - matchend;
+               mres.pos = m[2].first - m[0].first;;
 #endif
+               if (mres.match2end < 0)
+                 mres.match_len = 0;
                mres.leadsize = leadingsize;
                return mres;
        }
@@ -3477,25 +3542,57 @@ docstring latexifyFromCursor(DocIterator const & cur, 
int len)
        return ods.str();
 }
 
+#if 0
+// Debugging output
+static void displayMResult(MatchResult &mres, int increment)
+{
+  LYXERR0( "pos: " << mres.pos << " increment " << increment);
+  LYXERR0( "leadsize: " << mres.leadsize);
+  LYXERR0( "match_len: " << mres.match_len);
+  LYXERR0( "match_prefix: " << mres.match_prefix);
+  LYXERR0( "match2end: " << mres.match2end);
+}
+       #define displayMres(s,i) displayMResult(s,i);
+#else
+       #define displayMres(s,i)
+#endif
 
 /** Finalize an advanced find operation, advancing the cursor to the innermost
  ** position that matches, plus computing the length of the matching text to
  ** be selected
  **/
-int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match)
+int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, int 
expected_len)
 {
        // Search the foremost position that matches (avoids find of entire math
        // inset when match at start of it)
        size_t d;
        DocIterator old_cur(cur.buffer());
+       MatchResult mres;
        do {
                LYXERR(Debug::FIND, "Forwarding one step (searching for 
innermost match)");
                d = cur.depth();
                old_cur = cur;
                cur.forwardPos();
-       } while (cur && cur.depth() > d && match(cur).match_len > 0);
+               if (!cur)
+                       break;
+               if (cur.depth() > d)
+                       continue;
+               if (cur.depth() == d)
+                       break;
+               mres = match(cur);
+               displayMres(mres, 1);
+               if (expected_len > 0) {
+                       if (mres.match_len < expected_len)
+                               break;
+               }
+               else {
+                       if (mres.match_len <= 0)
+                               break;
+               }
+       } while (1);
        cur = old_cur;
-       int max_match = match(cur).match_len;     /* match valid only if not 
searching whole words */
+       mres = match(cur);      /* match valid only if not searching whole 
words */
+       int max_match = mres.match_len;
        if (max_match <= 0) return 0;
        LYXERR(Debug::FIND, "Ok");
 
@@ -3503,7 +3600,9 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv 
const & match)
         int len = 1;
        if (cur.pos() + len > cur.lastpos())
          return 0;
-       if (match.opt.matchword) {
+       // regexp should use \w+, \S+, or \b(some string)\b
+       // to search for whole words
+       if (match.opt.matchword && !match.use_regexp) {
           LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
           while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len 
<= 0) {
             ++len;
@@ -3523,28 +3622,31 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv 
const & match)
           if (old_match == 0)
             len = 0;
         }
-       else {
-         int minl = 1;
-         int maxl = cur.lastpos() - cur.pos();
-         // Greedy behaviour while matching regexps
-         while (maxl > minl) {
-           int actual_match = match(cur, len).match_len;
-           if (actual_match >= max_match) {
-             // actual_match > max_match _can_ happen,
-             // if the search area splits
-             // some following word so that the regex
-             // (e.g. 'r.*r\b' matches 'r' from the middle of the
-             // splitted word)
-             // This means, the len value is too big
-             maxl = len;
-             len = (int)((maxl + minl)/2);
-           }
-           else {
-             // (actual_match < max_match)
-             minl = len + 1;
-             len = (int)((maxl + minl)/2);
-           }
-         }
+        else {
+          int minl = 1;
+          int maxl = cur.lastpos() - cur.pos();
+          // Greedy behaviour while matching regexps
+          while (maxl > minl) {
+            int actual_match = match(cur, len).match_len;
+            if (actual_match >= max_match) {
+              // actual_match > max_match _can_ happen,
+              // if the search area splits
+              // some following word so that the regex
+              // (e.g. 'r.*r\b' matches 'r' from the middle of the
+              // splitted word)
+              // This means, the len value is too big
+              maxl = len;
+              if (maxl - minl < 4)
+                len = (int)((maxl + minl)/2);
+              else
+                len = (int)(minl + (maxl - minl + 3)/4);
+            }
+            else {
+              // (actual_match < max_match)
+              minl = len + 1;
+              len = (int)((maxl + minl)/2);
+            }
+          }
           old_cur = cur;
           // Search for real start of matched characters
           while (len > 1) {
@@ -3582,29 +3684,33 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv 
const & match)
        return len;
 }
 
-#if 0
-static void displayMResult(MatchResult &mres)
-{
-  LYXERR0( "pos: " << mres.pos);
-  LYXERR0( "leadsize: " << mres.leadsize);
-  LYXERR0( "match_len: " << mres.match_len);
-  LYXERR0( "match_prefix: " << mres.match_prefix);
-  LYXERR0( "match2end: " << mres.match2end);
-}
-       #define displayMres(s) displayMResult(s);
-#else
-       #define displayMres(s)
-#endif
-
 /// Finds forward
 int findForwardAdv(DocIterator & cur, MatchStringAdv const & match)
 {
        if (!cur)
                return 0;
        while (!theApp()->longOperationCancelled() && cur) {
+               {
+                 // forward to
+                 size_t d;
+                 DocIterator old_cur(cur.buffer());
+                 do {
+                   d = cur.depth();
+                   old_cur = cur;
+                   cur.forwardPos();
+                   if (!cur)
+                     break;
+                   if (cur.depth() > d)
+                     continue;
+                   if (cur.depth() == d)
+                     break;
+                 } while (1);
+                 cur = old_cur;
+               }
+
                LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
                MatchResult mres = match(cur, -1, false);
-               displayMres(mres)
+               displayMres(mres,-1)
                int match_len = mres.match_len;
                if ((mres.pos > 100000) || (mres.match2end > 100000) || 
(match_len > 100000)) {
                        LYXERR(Debug::INFO, "BIG LENGTHS: " << mres.pos << ", " 
<< match_len << ", " << mres.match2end);
@@ -3613,11 +3719,11 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv 
const & match)
                if (match_len > 0) {
                        // Try to find the begin of searched string
                        int increment;
-                        int firstInvalid = 100000;
-                        if (mres.match_prefix + mres.pos - mres.leadsize > 0)
-                          increment = (mres.match_prefix + mres.pos - 
mres.leadsize)/2;
-                        else
-                          increment = 10;
+                       int firstInvalid = 100000;
+                       if (mres.match_prefix + mres.pos - mres.leadsize > 1)
+                         increment = (mres.match_prefix + mres.pos - 
mres.leadsize + 1)*3/4;
+                       else
+                         increment = 10;
                        LYXERR(Debug::FIND, "Set increment to " << increment);
                        while (increment > 0) {
                                DocIterator old_cur = cur;
@@ -3631,38 +3737,56 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv 
const & match)
                                }
                                else {
                                        MatchResult mres2 = match(cur, -1, 
false);
-                                       displayMres(mres2)
-                                          switch (interpretMatch(mres, mres2)) 
{
-                                          case MatchResult::newIsTooFar:
-                                            // behind the expected match
-                                            firstInvalid = increment;
-                                            cur = old_cur;
-                                            increment /= 2;
-                                            break;
-                                          case MatchResult::newIsBetter:
-                                            // not reached yet
-                                            mres = mres2;
-                                            firstInvalid -= increment;
-                                            if (increment > firstInvalid/2)
-                                              increment = firstInvalid/2;
-                                            break;
-                                          default:
-                                            // Handle not like 
MatchResult::newIsTooFar
-                                            LYXERR0( "Something is wrong: 
Increment = " << increment << " match_prefix = " << mres.match_prefix);
-                                            firstInvalid--;
-                                            increment = firstInvalid -1;
-                                            cur = old_cur;
-                                            break;
-                                          }
+                                       displayMres(mres2,increment)
+                                       switch (interpretMatch(mres, mres2)) {
+                                       case MatchResult::newIsTooFar:
+                                         // behind the expected match
+                                         firstInvalid = increment;
+                                         cur = old_cur;
+                                         increment /= 2;
+                                         break;
+                                       case MatchResult::newIsBetter:
+                                         // not reached ye, but 
cur.pos()+increment is bettert
+                                         mres = mres2;
+                                         firstInvalid -= increment;
+                                         if (increment > firstInvalid*3/4)
+                                           increment = firstInvalid*3/4;
+                                         if ((mres2.pos == mres2.leadsize) && 
(increment >= mres2.match_prefix)) {
+                                           if (increment >= mres2.match_prefix)
+                                             increment = 
(mres2.match_prefix+1)*3/4;
+                                         }
+                                         break;
+                                       default:
+                                         // Todo@
+                                         // Handle not like 
MatchResult::newIsTooFar
+                                         // LYXERR0( "Something is wrong: 
Increment = " << increment << " match_prefix = " << mres.match_prefix);
+                                         firstInvalid--;
+                                         increment = increment*3/4;
+                                         cur = old_cur;
+                                         break;
+                                       }
                                }
                        }
-                        // LYXERR0("Leaving first loop");
+                       // LYXERR0("Leaving first loop");
+                       {
+                         LYXERR(Debug::FIND, "Finalizing 1");
+                         int len = findAdvFinalize(cur, match, mres.match_len);
+                         if (len > 0)
+                           return len;
+                         else {
+                           // try next possible match
+                           cur.forwardPos();
+                           continue;
+                         }
+                       }
+                       // The following code is newer reached
+                       // but parts of it may be needed in future
                        int match_len_zero_count = 0;
                        MatchResult mres3;
                        for (int i = 0; !theApp()->longOperationCancelled() && 
cur; cur.forwardPos()) {
                                if (i++ > 3) {
                                        mres3 = match(cur, -1, false);
-                                       displayMres(mres3)
+                                       displayMres(mres3, 1)
                                        int remaining_len = mres3.match_len;
                                        if (remaining_len <= 0) {
                                                // Apparently the searched 
string is not in the remaining part
@@ -3674,19 +3798,19 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv 
const & match)
                                }
                                LYXERR(Debug::FIND, "Advancing cur: " << cur);
                                mres3 = match(cur, 1);
-                               displayMres(mres3)
+                               displayMres(mres3, 1)
                                int match_len3 = mres3.match_len;
                                if (match_len3 < 0)
                                        continue;
                                mres3 = match(cur);
-                               displayMres(mres3)
+                               displayMres(mres3, 1)
                                int match_len2 = mres3.match_len;
                                LYXERR(Debug::FIND, "match_len2: " << 
match_len2);
                                if (match_len2 > 0) {
                                        // Sometimes in finalize we understand 
it wasn't a match
                                        // and we need to continue the outest 
loop
-                                       LYXERR(Debug::FIND, "Finalizing");
-                                       int len = findAdvFinalize(cur, match);
+                                       LYXERR(Debug::FIND, "Finalizing 2");
+                                       int len = findAdvFinalize(cur, match, 
mres.match_len);
                                        if (len > 0) {
                                                return len;
                                        }
@@ -3724,7 +3848,7 @@ int findMostBackwards(DocIterator & cur, MatchStringAdv 
const & match)
 {
        DocIterator cur_begin = doc_iterator_begin(cur.buffer());
        DocIterator tmp_cur = cur;
-       int len = findAdvFinalize(tmp_cur, match);
+       int len = findAdvFinalize(tmp_cur, match, -1);
        Inset & inset = cur.inset();
        for (; cur != cur_begin; cur.backwardPos()) {
                LYXERR(Debug::FIND, "findMostBackwards(): cur=" << cur);
@@ -3732,7 +3856,7 @@ int findMostBackwards(DocIterator & cur, MatchStringAdv 
const & match)
                new_cur.backwardPos();
                if (new_cur == cur || &new_cur.inset() != &inset || 
!match(new_cur).match_len)
                        break;
-               int new_len = findAdvFinalize(new_cur, match);
+               int new_len = findAdvFinalize(new_cur, match, -1);
                if (new_len == len)
                        break;
                len = new_len;
-- 
lyx-cvs mailing list
[email protected]
http://lists.lyx.org/mailman/listinfo/lyx-cvs

Reply via email to