sc/inc/queryiter.hxx | 10 +++++-- sc/qa/unit/ucalc_sort.cxx | 32 ++++++++++++------------- sc/source/core/data/queryiter.cxx | 48 +++++++++++++++++++++++++------------- 3 files changed, 55 insertions(+), 35 deletions(-)
New commits: commit 1d8c31ea2555b339d5a4bc5449d40d546045435b Author: Luboš Luňák <l.lu...@collabora.com> AuthorDate: Fri May 6 11:53:09 2022 +0200 Commit: Luboš Luňák <l.lu...@collabora.com> CommitDate: Tue May 10 16:25:21 2022 +0200 sort out query iterator's BinarySearch() corner cases handling If the code detects the range is not actually properly sorted, set position to the first row and return false to force linear search. If the searched for value belongs before the first item, do the same, in which case the linear search should see that it is this case (before this case nRow was set to the first row regardless of where the searched for value belonged). Change-Id: I8ff346783d93d74ff409b19aea394e202885647d Reviewed-on: https://gerrit.libreoffice.org/c/core/+/134100 Tested-by: Jenkins Reviewed-by: Luboš Luňák <l.lu...@collabora.com> diff --git a/sc/inc/queryiter.hxx b/sc/inc/queryiter.hxx index 847241124a2e..f151bba90ab6 100644 --- a/sc/inc/queryiter.hxx +++ b/sc/inc/queryiter.hxx @@ -141,9 +141,13 @@ protected: /* Only works if no regular expression is involved, only searches for rows in one column, and only the first query entry is considered with simple conditions SC_LESS,SC_LESS_EQUAL, - SC_EQUAL (sorted ascending) or SC_GREATER,SC_GREATER_EQUAL (sorted descending). Delivers - a starting point, continue with e.g. GetThis() and GetNext() afterwards. Introduced - for FindEqualOrSortedLastInRange(). */ + SC_EQUAL (sorted ascending) or SC_GREATER,SC_GREATER_EQUAL (sorted descending). It + delivers a starting point set to nRow, i.e. the last row that either matches the searched + for value, or the last row that matches the condition. Continue with e.g. GetThis() and + GetNext() afterwards. Returns false if the searched for value is not in the search range + or if the range is not properly sorted, with nRow in that case set to the first row or after + the last row. In that case use GetFirst(). + */ bool BinarySearch(); public: diff --git a/sc/qa/unit/ucalc_sort.cxx b/sc/qa/unit/ucalc_sort.cxx index e99b0a38cec3..b381d6195b04 100644 --- a/sc/qa/unit/ucalc_sort.cxx +++ b/sc/qa/unit/ucalc_sort.cxx @@ -2176,26 +2176,26 @@ void TestSort::testQueryBinarySearch() } { - // All values are larger than 0, so this should be an error. + // All values are larger than 0, so there should be no match. m_pDoc->SetFormula( formulaAddress, "=MATCH(0;" + ascendingRangeName + ";1)", formula::FormulaGrammar::GRAM_NATIVE_UI); CPPUNIT_ASSERT_EQUAL( FormulaError::NotAvailable, m_pDoc->GetErrCode( formulaAddress )); ScQueryParam param = makeSearchParam( range, ascendingCol, SC_LESS_EQUAL, 0 ); TestQueryIterator it( *m_pDoc, m_pDoc->GetNonThreadedContext(), 0, param, false ); - CPPUNIT_ASSERT(it.BinarySearch()); + CPPUNIT_ASSERT(!it.BinarySearch()); CPPUNIT_ASSERT_EQUAL(SCROW(0), it.GetRow()); } { ScQueryParam param = makeSearchParam( range, ascendingCol, SC_LESS, 0 ); TestQueryIterator it( *m_pDoc, m_pDoc->GetNonThreadedContext(), 0, param, false ); - CPPUNIT_ASSERT(it.BinarySearch()); + CPPUNIT_ASSERT(!it.BinarySearch()); CPPUNIT_ASSERT_EQUAL(SCROW(0), it.GetRow()); } { ScQueryParam param = makeSearchParam( range, ascendingCol, SC_EQUAL, 0 ); TestQueryIterator it( *m_pDoc, m_pDoc->GetNonThreadedContext(), 0, param, false ); - CPPUNIT_ASSERT(it.BinarySearch()); + CPPUNIT_ASSERT(!it.BinarySearch()); CPPUNIT_ASSERT_EQUAL(SCROW(0), it.GetRow()); } @@ -2242,45 +2242,45 @@ void TestSort::testQueryBinarySearch() } { - // Descending, all values are smaller than 10, so this should be an error. + // Descending, all values are smaller than 10, so there should be no match. m_pDoc->SetFormula( formulaAddress, "=MATCH(10;" + descendingRangeName + ";-1)", formula::FormulaGrammar::GRAM_NATIVE_UI); CPPUNIT_ASSERT_EQUAL( FormulaError::NotAvailable, m_pDoc->GetErrCode( formulaAddress )); ScQueryParam param = makeSearchParam( range, descendingCol, SC_GREATER_EQUAL, 10 ); TestQueryIterator it( *m_pDoc, m_pDoc->GetNonThreadedContext(), 0, param, false ); - CPPUNIT_ASSERT(it.BinarySearch()); + CPPUNIT_ASSERT(!it.BinarySearch()); CPPUNIT_ASSERT_EQUAL(SCROW(0), it.GetRow()); } { ScQueryParam param = makeSearchParam( range, descendingCol, SC_GREATER, 10 ); TestQueryIterator it( *m_pDoc, m_pDoc->GetNonThreadedContext(), 0, param, false ); - CPPUNIT_ASSERT(it.BinarySearch()); + CPPUNIT_ASSERT(!it.BinarySearch()); CPPUNIT_ASSERT_EQUAL(SCROW(0), it.GetRow()); } { - // Search as ascending but use descending range (=search will not work). + // Search as ascending but use descending range, will return no match. ScQueryParam param = makeSearchParam( range, descendingCol, SC_LESS_EQUAL, 1 ); TestQueryIterator it( *m_pDoc, m_pDoc->GetNonThreadedContext(), 0, param, false ); - CPPUNIT_ASSERT(it.BinarySearch()); - // CPPUNIT_ASSERT_EQUAL(SCROW(10), it.GetRow()); + CPPUNIT_ASSERT(!it.BinarySearch()); + CPPUNIT_ASSERT_EQUAL(SCROW(0), it.GetRow()); } { - // Search as descending but use ascending range (=search will not work). + // Search as descending but use ascending range, will return no match. ScQueryParam param = makeSearchParam( range, ascendingCol, SC_GREATER_EQUAL, 9 ); TestQueryIterator it( *m_pDoc, m_pDoc->GetNonThreadedContext(), 0, param, false ); - CPPUNIT_ASSERT(it.BinarySearch()); - // CPPUNIT_ASSERT_EQUAL(SCROW(10), it.GetRow()); + CPPUNIT_ASSERT(!it.BinarySearch()); + CPPUNIT_ASSERT_EQUAL(SCROW(0), it.GetRow()); } { - // SC_EQUAL with descending is considered an error. + // SC_EQUAL with descending is considered an error, will return no match. ScQueryParam param = makeSearchParam( range, descendingCol, SC_EQUAL, 9 ); TestQueryIterator it( *m_pDoc, m_pDoc->GetNonThreadedContext(), 0, param, false ); - CPPUNIT_ASSERT(it.BinarySearch()); - // CPPUNIT_ASSERT_EQUAL(SCROW(10), it.GetRow()); + CPPUNIT_ASSERT(!it.BinarySearch()); + CPPUNIT_ASSERT_EQUAL(SCROW(0), it.GetRow()); } m_pDoc->DeleteTab(0); diff --git a/sc/source/core/data/queryiter.cxx b/sc/source/core/data/queryiter.cxx index 73a700f108f8..6f8f6483140a 100644 --- a/sc/source/core/data/queryiter.cxx +++ b/sc/source/core/data/queryiter.cxx @@ -291,6 +291,7 @@ bool ScQueryCellIteratorBase< accessType, queryType >::BinarySearch() assert(nTab < rDoc.GetTableCount() && "index out of bounds, FIX IT"); nCol = maParam.nCol1; + nRow = maParam.nRow1; if (nCol >= rDoc.maTabs[nTab]->GetAllocatedColumnsCount()) return false; @@ -309,7 +310,6 @@ bool ScQueryCellIteratorBase< accessType, queryType >::BinarySearch() bool bFirstStringIgnore = bIgnoreMismatchOnLeadingStrings && !maParam.bHasHeader && bByString; - nRow = maParam.nRow1; if (maParam.bHasHeader) ++nRow; @@ -364,7 +364,6 @@ bool ScQueryCellIteratorBase< accessType, queryType >::BinarySearch() // Bookkeeping values for breaking up the binary search in case the data // range isn't strictly sorted. size_t nLastInRange = nLo; - size_t nFirstLastInRange = nLastInRange; double fLastInRangeValue = bAscending ? -(::std::numeric_limits<double>::max()) : ::std::numeric_limits<double>::max(); @@ -400,6 +399,7 @@ bool ScQueryCellIteratorBase< accessType, queryType >::BinarySearch() sal_Int32 nRes = 0; std::optional<size_t> found; bool bDone = false; + bool orderBroken = false; while (nLo <= nHi && !bDone) { size_t nMid = (nLo+nHi)/2; @@ -438,7 +438,7 @@ bool ScQueryCellIteratorBase< accessType, queryType >::BinarySearch() else if (fLastInRangeValue >= nCellVal) { // not strictly sorted, continue with GetThis() - nLastInRange = nFirstLastInRange; + orderBroken = true; bDone = true; } } @@ -457,7 +457,7 @@ bool ScQueryCellIteratorBase< accessType, queryType >::BinarySearch() else if (fLastInRangeValue <= nCellVal) { // not strictly sorted, continue with GetThis() - nLastInRange = nFirstLastInRange; + orderBroken = true; bDone = true; } } @@ -481,7 +481,7 @@ bool ScQueryCellIteratorBase< accessType, queryType >::BinarySearch() else if (nTmp > 0) { // not strictly sorted, continue with GetThis() - nLastInRange = nFirstLastInRange; + orderBroken = true; bDone = true; } } @@ -497,7 +497,7 @@ bool ScQueryCellIteratorBase< accessType, queryType >::BinarySearch() else if (nTmp < 0) { // not strictly sorted, continue with GetThis() - nLastInRange = nFirstLastInRange; + orderBroken = true; bDone = true; } } @@ -564,18 +564,32 @@ bool ScQueryCellIteratorBase< accessType, queryType >::BinarySearch() } } - if (found) + bool isInRange; + if (orderBroken) + { + // Reset position to the first row in range and force caller + // to search from start. + nLo = aIndexer.getLowIndex(); + isInRange = false; + } + else if (found) + { nLo = *found; + isInRange = true; + } else { - // If all hits didn't result in a moving limit there's something - // strange, e.g. data range not properly sorted, or only identical - // values encountered, which doesn't mean there aren't any others in - // between... leave it to GetThis(). The condition for this would be - // if (nLastInRange == nFirstLastInRange) nLo = nFirstLastInRange; - // Else, in case no exact match was found, we step back for a - // subsequent GetThis() to find the last in range. Effectively this is - // --nLo with nLastInRange == nLo-1. Both conditions combined yield: + // Not nothing was found and the search position is at the start, + // then the possible match would need to be before the data range. + // In that case return false to force the caller to search from the start + // and detect this. + isInRange = nLo != aIndexer.getLowIndex(); + // If nothing was found, that is either because there is no value + // that would match exactly, or the data range is not properly sorted + // and we failed to detect (doing so reliably would require a linear scan). + // Set the position to the last one that was in matching range (i.e. before + // where the exact match would be), and leave sorting it out to GetThis() + // or whatever the caller uses. nLo = nLastInRange; } @@ -584,7 +598,7 @@ bool ScQueryCellIteratorBase< accessType, queryType >::BinarySearch() { nRow = aCellData.second; maCurPos = aIndexer.getPosition(nLo); - return true; + return isInRange; } else { @@ -625,6 +639,8 @@ bool ScQueryCellIterator< accessType >::FindEqualOrSortedLastInRange( SCCOL& nFo maParam.mbRangeLookup = false; bFound = GetThis(); } + else // Not sorted properly, or before the range (in which case GetFirst() will be simple). + bFound = GetFirst(); } else {