sc/inc/queryiter.hxx              |   10 +++++--
 sc/qa/unit/ucalc_sort.cxx         |   32 ++++++++++++-------------
 sc/source/core/data/queryiter.cxx |   48 +++++++++++++++++++++++++-------------
 3 files changed, 55 insertions(+), 35 deletions(-)

New commits:
commit 1d8c31ea2555b339d5a4bc5449d40d546045435b
Author:     Luboš Luňák <l.lu...@collabora.com>
AuthorDate: Fri May 6 11:53:09 2022 +0200
Commit:     Luboš Luňák <l.lu...@collabora.com>
CommitDate: Tue May 10 16:25:21 2022 +0200

    sort out query iterator's BinarySearch() corner cases handling
    
    If the code detects the range is not actually properly sorted,
    set position to the first row and return false to force
    linear search. If the searched for value belongs before the first
    item, do the same, in which case the linear search should see
    that it is this case (before this case nRow was set to the first
    row regardless of where the searched for value belonged).
    
    Change-Id: I8ff346783d93d74ff409b19aea394e202885647d
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/134100
    Tested-by: Jenkins
    Reviewed-by: Luboš Luňák <l.lu...@collabora.com>

diff --git a/sc/inc/queryiter.hxx b/sc/inc/queryiter.hxx
index 847241124a2e..f151bba90ab6 100644
--- a/sc/inc/queryiter.hxx
+++ b/sc/inc/queryiter.hxx
@@ -141,9 +141,13 @@ protected:
 
     /* Only works if no regular expression is involved, only searches for rows 
in one column,
        and only the first query entry is considered with simple conditions 
SC_LESS,SC_LESS_EQUAL,
-       SC_EQUAL (sorted ascending) or SC_GREATER,SC_GREATER_EQUAL (sorted 
descending). Delivers
-       a starting point, continue with e.g. GetThis() and GetNext() 
afterwards. Introduced
-       for FindEqualOrSortedLastInRange(). */
+       SC_EQUAL (sorted ascending) or SC_GREATER,SC_GREATER_EQUAL (sorted 
descending). It
+       delivers a starting point set to nRow, i.e. the last row that either 
matches the searched
+       for value, or the last row that matches the condition. Continue with 
e.g. GetThis() and
+       GetNext() afterwards. Returns false if the searched for value is not in 
the search range
+       or if the range is not properly sorted, with nRow in that case set to 
the first row or after
+       the last row. In that case use GetFirst().
+    */
     bool BinarySearch();
 
 public:
diff --git a/sc/qa/unit/ucalc_sort.cxx b/sc/qa/unit/ucalc_sort.cxx
index e99b0a38cec3..b381d6195b04 100644
--- a/sc/qa/unit/ucalc_sort.cxx
+++ b/sc/qa/unit/ucalc_sort.cxx
@@ -2176,26 +2176,26 @@ void TestSort::testQueryBinarySearch()
     }
 
     {
-        // All values are larger than 0, so this should be an error.
+        // All values are larger than 0, so there should be no match.
         m_pDoc->SetFormula( formulaAddress, "=MATCH(0;" + ascendingRangeName + 
";1)",
             formula::FormulaGrammar::GRAM_NATIVE_UI);
         CPPUNIT_ASSERT_EQUAL( FormulaError::NotAvailable, m_pDoc->GetErrCode( 
formulaAddress ));
 
         ScQueryParam param = makeSearchParam( range, ascendingCol, 
SC_LESS_EQUAL, 0 );
         TestQueryIterator it( *m_pDoc, m_pDoc->GetNonThreadedContext(), 0, 
param, false );
-        CPPUNIT_ASSERT(it.BinarySearch());
+        CPPUNIT_ASSERT(!it.BinarySearch());
         CPPUNIT_ASSERT_EQUAL(SCROW(0), it.GetRow());
     }
     {
         ScQueryParam param = makeSearchParam( range, ascendingCol, SC_LESS, 0 
);
         TestQueryIterator it( *m_pDoc, m_pDoc->GetNonThreadedContext(), 0, 
param, false );
-        CPPUNIT_ASSERT(it.BinarySearch());
+        CPPUNIT_ASSERT(!it.BinarySearch());
         CPPUNIT_ASSERT_EQUAL(SCROW(0), it.GetRow());
     }
     {
         ScQueryParam param = makeSearchParam( range, ascendingCol, SC_EQUAL, 0 
);
         TestQueryIterator it( *m_pDoc, m_pDoc->GetNonThreadedContext(), 0, 
param, false );
-        CPPUNIT_ASSERT(it.BinarySearch());
+        CPPUNIT_ASSERT(!it.BinarySearch());
         CPPUNIT_ASSERT_EQUAL(SCROW(0), it.GetRow());
     }
 
@@ -2242,45 +2242,45 @@ void TestSort::testQueryBinarySearch()
     }
 
     {
-        // Descending, all values are smaller than 10, so this should be an 
error.
+        // Descending, all values are smaller than 10, so there should be no 
match.
         m_pDoc->SetFormula( formulaAddress, "=MATCH(10;" + descendingRangeName 
+ ";-1)",
             formula::FormulaGrammar::GRAM_NATIVE_UI);
         CPPUNIT_ASSERT_EQUAL( FormulaError::NotAvailable, m_pDoc->GetErrCode( 
formulaAddress ));
 
         ScQueryParam param = makeSearchParam( range, descendingCol, 
SC_GREATER_EQUAL, 10 );
         TestQueryIterator it( *m_pDoc, m_pDoc->GetNonThreadedContext(), 0, 
param, false );
-        CPPUNIT_ASSERT(it.BinarySearch());
+        CPPUNIT_ASSERT(!it.BinarySearch());
         CPPUNIT_ASSERT_EQUAL(SCROW(0), it.GetRow());
     }
     {
         ScQueryParam param = makeSearchParam( range, descendingCol, 
SC_GREATER, 10 );
         TestQueryIterator it( *m_pDoc, m_pDoc->GetNonThreadedContext(), 0, 
param, false );
-        CPPUNIT_ASSERT(it.BinarySearch());
+        CPPUNIT_ASSERT(!it.BinarySearch());
         CPPUNIT_ASSERT_EQUAL(SCROW(0), it.GetRow());
     }
 
     {
-        // Search as ascending but use descending range (=search will not 
work).
+        // Search as ascending but use descending range, will return no match.
         ScQueryParam param = makeSearchParam( range, descendingCol, 
SC_LESS_EQUAL, 1 );
         TestQueryIterator it( *m_pDoc, m_pDoc->GetNonThreadedContext(), 0, 
param, false );
-        CPPUNIT_ASSERT(it.BinarySearch());
-        // CPPUNIT_ASSERT_EQUAL(SCROW(10), it.GetRow());
+        CPPUNIT_ASSERT(!it.BinarySearch());
+        CPPUNIT_ASSERT_EQUAL(SCROW(0), it.GetRow());
     }
 
     {
-        // Search as descending but use ascending range (=search will not 
work).
+        // Search as descending but use ascending range, will return no match.
         ScQueryParam param = makeSearchParam( range, ascendingCol, 
SC_GREATER_EQUAL, 9 );
         TestQueryIterator it( *m_pDoc, m_pDoc->GetNonThreadedContext(), 0, 
param, false );
-        CPPUNIT_ASSERT(it.BinarySearch());
-        // CPPUNIT_ASSERT_EQUAL(SCROW(10), it.GetRow());
+        CPPUNIT_ASSERT(!it.BinarySearch());
+        CPPUNIT_ASSERT_EQUAL(SCROW(0), it.GetRow());
     }
 
     {
-        // SC_EQUAL with descending is considered an error.
+        // SC_EQUAL with descending is considered an error, will return no 
match.
         ScQueryParam param = makeSearchParam( range, descendingCol, SC_EQUAL, 
9 );
         TestQueryIterator it( *m_pDoc, m_pDoc->GetNonThreadedContext(), 0, 
param, false );
-        CPPUNIT_ASSERT(it.BinarySearch());
-        // CPPUNIT_ASSERT_EQUAL(SCROW(10), it.GetRow());
+        CPPUNIT_ASSERT(!it.BinarySearch());
+        CPPUNIT_ASSERT_EQUAL(SCROW(0), it.GetRow());
     }
 
     m_pDoc->DeleteTab(0);
diff --git a/sc/source/core/data/queryiter.cxx 
b/sc/source/core/data/queryiter.cxx
index 73a700f108f8..6f8f6483140a 100644
--- a/sc/source/core/data/queryiter.cxx
+++ b/sc/source/core/data/queryiter.cxx
@@ -291,6 +291,7 @@ bool ScQueryCellIteratorBase< accessType, queryType 
>::BinarySearch()
 
     assert(nTab < rDoc.GetTableCount() && "index out of bounds, FIX IT");
     nCol = maParam.nCol1;
+    nRow = maParam.nRow1;
 
     if (nCol >= rDoc.maTabs[nTab]->GetAllocatedColumnsCount())
         return false;
@@ -309,7 +310,6 @@ bool ScQueryCellIteratorBase< accessType, queryType 
>::BinarySearch()
     bool bFirstStringIgnore = bIgnoreMismatchOnLeadingStrings &&
         !maParam.bHasHeader && bByString;
 
-    nRow = maParam.nRow1;
     if (maParam.bHasHeader)
         ++nRow;
 
@@ -364,7 +364,6 @@ bool ScQueryCellIteratorBase< accessType, queryType 
>::BinarySearch()
     // Bookkeeping values for breaking up the binary search in case the data
     // range isn't strictly sorted.
     size_t nLastInRange = nLo;
-    size_t nFirstLastInRange = nLastInRange;
     double fLastInRangeValue = bAscending ?
         -(::std::numeric_limits<double>::max()) :
             ::std::numeric_limits<double>::max();
@@ -400,6 +399,7 @@ bool ScQueryCellIteratorBase< accessType, queryType 
>::BinarySearch()
     sal_Int32 nRes = 0;
     std::optional<size_t> found;
     bool bDone = false;
+    bool orderBroken = false;
     while (nLo <= nHi && !bDone)
     {
         size_t nMid = (nLo+nHi)/2;
@@ -438,7 +438,7 @@ bool ScQueryCellIteratorBase< accessType, queryType 
>::BinarySearch()
                     else if (fLastInRangeValue >= nCellVal)
                     {
                         // not strictly sorted, continue with GetThis()
-                        nLastInRange = nFirstLastInRange;
+                        orderBroken = true;
                         bDone = true;
                     }
                 }
@@ -457,7 +457,7 @@ bool ScQueryCellIteratorBase< accessType, queryType 
>::BinarySearch()
                     else if (fLastInRangeValue <= nCellVal)
                     {
                         // not strictly sorted, continue with GetThis()
-                        nLastInRange = nFirstLastInRange;
+                        orderBroken = true;
                         bDone = true;
                     }
                 }
@@ -481,7 +481,7 @@ bool ScQueryCellIteratorBase< accessType, queryType 
>::BinarySearch()
                 else if (nTmp > 0)
                 {
                     // not strictly sorted, continue with GetThis()
-                    nLastInRange = nFirstLastInRange;
+                    orderBroken = true;
                     bDone = true;
                 }
             }
@@ -497,7 +497,7 @@ bool ScQueryCellIteratorBase< accessType, queryType 
>::BinarySearch()
                 else if (nTmp < 0)
                 {
                     // not strictly sorted, continue with GetThis()
-                    nLastInRange = nFirstLastInRange;
+                    orderBroken = true;
                     bDone = true;
                 }
             }
@@ -564,18 +564,32 @@ bool ScQueryCellIteratorBase< accessType, queryType 
>::BinarySearch()
         }
     }
 
-    if (found)
+    bool isInRange;
+    if (orderBroken)
+    {
+        // Reset position to the first row in range and force caller
+        // to search from start.
+        nLo = aIndexer.getLowIndex();
+        isInRange = false;
+    }
+    else if (found)
+    {
         nLo = *found;
+        isInRange = true;
+    }
     else
     {
-        // If all hits didn't result in a moving limit there's something
-        // strange, e.g. data range not properly sorted, or only identical
-        // values encountered, which doesn't mean there aren't any others in
-        // between... leave it to GetThis(). The condition for this would be
-        // if (nLastInRange == nFirstLastInRange) nLo = nFirstLastInRange;
-        // Else, in case no exact match was found, we step back for a
-        // subsequent GetThis() to find the last in range. Effectively this is
-        // --nLo with nLastInRange == nLo-1. Both conditions combined yield:
+        // Not nothing was found and the search position is at the start,
+        // then the possible match would need to be before the data range.
+        // In that case return false to force the caller to search from the 
start
+        // and detect this.
+        isInRange = nLo != aIndexer.getLowIndex();
+        // If nothing was found, that is either because there is no value
+        // that would match exactly, or the data range is not properly sorted
+        // and we failed to detect (doing so reliably would require a linear 
scan).
+        // Set the position to the last one that was in matching range (i.e. 
before
+        // where the exact match would be), and leave sorting it out to 
GetThis()
+        // or whatever the caller uses.
         nLo = nLastInRange;
     }
 
@@ -584,7 +598,7 @@ bool ScQueryCellIteratorBase< accessType, queryType 
>::BinarySearch()
     {
         nRow = aCellData.second;
         maCurPos = aIndexer.getPosition(nLo);
-        return true;
+        return isInRange;
     }
     else
     {
@@ -625,6 +639,8 @@ bool ScQueryCellIterator< accessType 
>::FindEqualOrSortedLastInRange( SCCOL& nFo
             maParam.mbRangeLookup = false;
             bFound = GetThis();
         }
+        else // Not sorted properly, or before the range (in which case 
GetFirst() will be simple).
+            bFound = GetFirst();
     }
     else
     {

Reply via email to