sc/inc/queryiter.hxx | 32 ++++++-- sc/source/core/data/queryiter.cxx | 142 ++++++++++++++++++++++++++++--------- sc/source/core/tool/interpr1.cxx | 71 +++++++++++++----- sc/source/core/tool/rangecache.cxx | 1 4 files changed, 187 insertions(+), 59 deletions(-)
New commits: commit e98a22b4e4f9c5a9249e634dd0489d30d9de2bb1 Author: Luboš Luňák <l.lu...@collabora.com> AuthorDate: Fri May 6 20:50:32 2022 +0200 Commit: Luboš Luňák <l.lu...@collabora.com> CommitDate: Wed May 11 11:48:37 2022 +0200 use ScSortedRangeCache also for generic queries This enables use for e.g. VLOOKUP or COUNTIFS (as COUNTIFS does not use the specialized code for COUNTIF). Change-Id: Idad7503750d421f3f1c9ac34dfe95393fa3ead15 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/134124 Tested-by: Jenkins Reviewed-by: Luboš Luňák <l.lu...@collabora.com> diff --git a/sc/inc/queryiter.hxx b/sc/inc/queryiter.hxx index 2b542233d7cd..c27aadf92c68 100644 --- a/sc/inc/queryiter.hxx +++ b/sc/inc/queryiter.hxx @@ -60,7 +60,8 @@ template<> class ScQueryCellIteratorAccessSpecific< ScQueryCellIteratorAccess::Direct > { protected: - ScQueryCellIteratorAccessSpecific( ScDocument& rDocument, const ScQueryParam& rParam ); + ScQueryCellIteratorAccessSpecific( ScDocument& rDocument, ScInterpreterContext& rContext, + const ScQueryParam& rParam ); // Initialize position for new column. void InitPos(); // Increase position (next row). @@ -74,6 +75,7 @@ protected: PositionType maCurPos; ScQueryParam maParam; ScDocument& rDoc; + ScInterpreterContext& mrContext; SCTAB nTab; SCCOL nCol; SCROW nRow; @@ -92,8 +94,10 @@ class ScQueryCellIteratorAccessSpecific< ScQueryCellIteratorAccess::SortedCache public: void SetSortedRangeCache( const ScSortedRangeCache& cache ); protected: - ScQueryCellIteratorAccessSpecific( ScDocument& rDocument, const ScQueryParam& rParam ); - void InitPos(); + ScQueryCellIteratorAccessSpecific( ScDocument& rDocument, ScInterpreterContext& rContext, + const ScQueryParam& rParam ); + void InitPosStart(); + void InitPosFinish( SCROW beforeRow, SCROW lastRow ); void IncPos(); void IncBlock() { IncPos(); } // Cannot skip entire block, not linear. @@ -102,19 +106,19 @@ protected: PositionType maCurPos; ScQueryParam maParam; ScDocument& rDoc; + ScInterpreterContext& mrContext; SCTAB nTab; SCCOL nCol; SCROW nRow; const ScSortedRangeCache* sortedCache; size_t sortedCachePos; + size_t sortedCachePosLast; class SortedCacheIndexer; typedef std::pair<ScRefCellValue, SCROW> BinarySearchCellType; SortedCacheIndexer MakeBinarySearchIndexer(const sc::CellStoreType& rCells, SCROW nStartRow, SCROW nEndRow); -private: - void UpdatePos(); }; // Data and functionality for specific types of query. @@ -150,7 +154,6 @@ protected: nTestEqualConditionFulfilled = nTestEqualConditionEnabled | nTestEqualConditionMatched }; - ScInterpreterContext& mrContext; sal_uInt8 nStopOnMismatch; sal_uInt8 nTestEqualCondition; bool bAdvanceQuery; @@ -160,16 +163,18 @@ protected: using AccessBase::maCurPos; using AccessBase::maParam; using AccessBase::rDoc; + using AccessBase::mrContext; using AccessBase::nTab; using AccessBase::nCol; using AccessBase::nRow; - using AccessBase::InitPos; using AccessBase::IncPos; using AccessBase::IncBlock; using typename AccessBase::BinarySearchCellType; using AccessBase::MakeBinarySearchIndexer; using TypeBase::HandleItemFound; + void InitPos(); + // The actual query function. It will call HandleItemFound() for any matching type // and return if HandleItemFound() returns true. void PerformQuery(); @@ -248,6 +253,7 @@ class ScQueryCellIterator // Make base members directly visible here (templated bases need 'this->'). using Base::maParam; using Base::rDoc; + using Base::mrContext; using Base::nTab; using Base::nCol; using Base::nRow; @@ -303,6 +309,18 @@ public: typedef ScQueryCellIterator< ScQueryCellIteratorAccess::Direct > ScQueryCellIteratorDirect; +class ScQueryCellIteratorSortedCache + : public ScQueryCellIterator< ScQueryCellIteratorAccess::SortedCache > +{ + typedef ScQueryCellIterator< ScQueryCellIteratorAccess::SortedCache > Base; +public: + ScQueryCellIteratorSortedCache(ScDocument& rDocument, ScInterpreterContext& rContext, + SCTAB nTable, const ScQueryParam& aParam, bool bMod) + : Base( rDocument, rContext, nTable, aParam, bMod ) {} + // Returns true if this iterator can be used for the given query. + static bool CanBeUsed(const ScQueryParam& aParam); +}; + template<> class ScQueryCellIteratorTypeSpecific< ScQueryCellIteratorType::CountIf > diff --git a/sc/source/core/data/queryiter.cxx b/sc/source/core/data/queryiter.cxx index 184e86ada37a..438128ef5f47 100644 --- a/sc/source/core/data/queryiter.cxx +++ b/sc/source/core/data/queryiter.cxx @@ -60,8 +60,7 @@ template< ScQueryCellIteratorAccess accessType, ScQueryCellIteratorType queryType > ScQueryCellIteratorBase< accessType, queryType >::ScQueryCellIteratorBase(ScDocument& rDocument, ScInterpreterContext& rContext, SCTAB nTable, const ScQueryParam& rParam, bool bMod ) - : AccessBase( rDocument, rParam ) - , mrContext( rContext ) + : AccessBase( rDocument, rContext, rParam ) , nStopOnMismatch( nStopOnMismatchDisabled ) , nTestEqualCondition( nTestEqualConditionDisabled ) , bAdvanceQuery( false ) @@ -234,6 +233,47 @@ void ScQueryCellIteratorBase< accessType, queryType >::PerformQuery() } } +template< ScQueryCellIteratorAccess accessType, ScQueryCellIteratorType queryType > +void ScQueryCellIteratorBase< accessType, queryType >::InitPos() +{ + if constexpr( accessType != ScQueryCellIteratorAccess::SortedCache ) + AccessBase::InitPos(); + else + { + // This should be all in AccessBase::InitPos(), but that one can't call + // BinarySearch(), so do it this way instead. + AccessBase::InitPosStart(); + ScQueryOp& op = maParam.GetEntry(0).eOp; + SCROW beforeRow = -1; + SCROW lastRow = -1; + if( op == SC_EQUAL ) + { + if( BinarySearch( nCol )) + { + // BinarySearch() searches for the last item that matches. Now we + // also need to find the first item where to start. Find the last + // non-matching position using SC_LESS and the start position + // is the one after it. + lastRow = nRow; + ScQueryOp saveOp = op; + op = SC_LESS; + if( BinarySearch( nCol )) + beforeRow = nRow; + // If BinarySearch() returns false, there was no match, which means + // there's no value smaller. In that case BinarySearch() has set + // the position to the first row in the range. + op = saveOp; // back to SC_EQUAL + } + } + else + { // The range is from the start up to and including the last matching. + if( BinarySearch( nCol )) + lastRow = nRow; + } + AccessBase::InitPosFinish( beforeRow, lastRow ); + } +} + template< ScQueryCellIteratorAccess accessType, ScQueryCellIteratorType queryType > void ScQueryCellIteratorBase< accessType, queryType >::AdvanceQueryParamEntryField() { @@ -799,9 +839,11 @@ bool ScQueryCellIterator< accessType >::FindEqualOrSortedLastInRange( SCCOL& nFo // Direct linear cell access using mdds. ScQueryCellIteratorAccessSpecific< ScQueryCellIteratorAccess::Direct > - ::ScQueryCellIteratorAccessSpecific( ScDocument& rDocument, const ScQueryParam& rParam ) + ::ScQueryCellIteratorAccessSpecific( ScDocument& rDocument, + ScInterpreterContext& rContext, const ScQueryParam& rParam ) : maParam( rParam ) , rDoc( rDocument ) + , mrContext( rContext ) { } @@ -1008,9 +1050,11 @@ ScQueryCellIteratorAccessSpecific< ScQueryCellIteratorAccess::Direct >::MakeBina // Sorted access using ScSortedRangeCache. ScQueryCellIteratorAccessSpecific< ScQueryCellIteratorAccess::SortedCache > - ::ScQueryCellIteratorAccessSpecific( ScDocument& rDocument, const ScQueryParam& rParam ) + ::ScQueryCellIteratorAccessSpecific( ScDocument& rDocument, + ScInterpreterContext& rContext, const ScQueryParam& rParam ) : maParam( rParam ) , rDoc( rDocument ) + , mrContext( rContext ) { } @@ -1020,31 +1064,54 @@ void ScQueryCellIteratorAccessSpecific< ScQueryCellIteratorAccess::SortedCache > sortedCache = &cache; } -void ScQueryCellIteratorAccessSpecific< ScQueryCellIteratorAccess::SortedCache >::InitPos() +// The idea in iterating using the sorted cache is that the iteration is instead done +// over indexes of the sorted cache (which is a stable sort of the cell contents) in the range +// that fits the query condition and then that is mapped to rows. This will result in iterating +// over only matching rows in their sorted order (and for equal rows in their row order). +void ScQueryCellIteratorAccessSpecific< ScQueryCellIteratorAccess::SortedCache >::InitPosStart() { - assert(sortedCache->getRange().aStart.Row() == maParam.nRow1); - assert(!(maParam.bHasHeader && maParam.bByRow)); - sortedCachePos = 0; - UpdatePos(); + ScRange aSortedRangeRange( nCol, maParam.nRow1, nTab, nCol, maParam.nRow2, nTab ); + ScQueryOp& op = maParam.GetEntry(0).eOp; + // We want all matching values first in the sort order, + bool descending = op == SC_GREATER || op == SC_GREATER_EQUAL; + SetSortedRangeCache( rDoc.GetSortedRangeCache( aSortedRangeRange, descending, &mrContext )); + // InitPosFinish() needs to be called after this, ScQueryCellIteratorBase::InitPos() + // will handle that } -void ScQueryCellIteratorAccessSpecific< ScQueryCellIteratorAccess::SortedCache >::IncPos() +void ScQueryCellIteratorAccessSpecific< ScQueryCellIteratorAccess::SortedCache >::InitPosFinish( + SCROW beforeRow, SCROW lastRow ) { - ++sortedCachePos; - UpdatePos(); + const ScColumn& rCol = rDoc.maTabs[nTab]->CreateColumnIfNotExists(nCol); + if(lastRow >= 0) + { + sortedCachePos = beforeRow >= 0 ? sortedCache->indexForRow(beforeRow) + 1 : 0; + sortedCachePosLast = sortedCache->indexForRow(lastRow); + if(sortedCachePos <= sortedCachePosLast) + { + nRow = sortedCache->rowForIndex(sortedCachePos); + maCurPos = rCol.maCells.position(nRow); + return; + } + } + // No rows, set to end. + sortedCachePos = sortedCachePosLast = 0; + maCurPos.first = rCol.maCells.end(); + maCurPos.second = 0; } -void ScQueryCellIteratorAccessSpecific< ScQueryCellIteratorAccess::SortedCache >::UpdatePos() +void ScQueryCellIteratorAccessSpecific< ScQueryCellIteratorAccess::SortedCache >::IncPos() { - // TODO optimize? - const ScColumn& rCol = rDoc.maTabs[nTab]->CreateColumnIfNotExists(nCol); - if(sortedCachePos < sortedCache->size()) + const ScColumn& rCol = rDoc.maTabs[nTab]->aCol[nCol]; + if(sortedCachePos < sortedCachePosLast) { + ++sortedCachePos; nRow = sortedCache->rowForIndex(sortedCachePos); maCurPos = rCol.maCells.position(nRow); } else { + // This will make PerformQuery() go to next column. maCurPos.first = rCol.maCells.end(); maCurPos.second = 0; } @@ -1125,6 +1192,25 @@ ScQueryCellIteratorAccessSpecific< ScQueryCellIteratorAccess::SortedCache >::Mak return SortedCacheIndexer(rCells, nStartRow, nEndRow, sortedCache); } +static bool CanBeUsedForSorterCache(const ScQueryParam& rParam) +{ + if(!rParam.GetEntry(0).bDoQuery || rParam.GetEntry(1).bDoQuery + || rParam.GetEntry(0).GetQueryItems().size() != 1 ) + return false; + if(rParam.eSearchType != utl::SearchParam::SearchType::Normal) + return false; + if(rParam.GetEntry(0).GetQueryItem().meType != ScQueryEntry::ByValue) + return false; + if(!rParam.bByRow) + return false; + if(rParam.bHasHeader) + return false; + if(rParam.GetEntry(0).eOp != SC_LESS && rParam.GetEntry(0).eOp != SC_LESS_EQUAL + && rParam.GetEntry(0).eOp != SC_GREATER && rParam.GetEntry(0).eOp != SC_GREATER_EQUAL + && rParam.GetEntry(0).eOp != SC_EQUAL) + return false; + return true; +} // Generic query implementation. @@ -1162,6 +1248,11 @@ bool ScQueryCellIterator< accessType >::GetNext() return GetThis(); } +bool ScQueryCellIteratorSortedCache::CanBeUsed(const ScQueryParam& rParam) +{ + return CanBeUsedForSorterCache(rParam); +} + // Countifs implementation. bool ScQueryCellIteratorTypeSpecific< ScQueryCellIteratorType::CountIf >::HandleItemFound() @@ -1188,22 +1279,7 @@ sal_uInt64 ScCountIfCellIterator< accessType >::GetCount() bool ScCountIfCellIteratorSortedCache::CanBeUsed(const ScQueryParam& rParam) { - if(!rParam.GetEntry(0).bDoQuery || rParam.GetEntry(1).bDoQuery - || rParam.GetEntry(0).GetQueryItems().size() != 1 ) - return false; - if(rParam.eSearchType != utl::SearchParam::SearchType::Normal) - return false; - if(rParam.GetEntry(0).GetQueryItem().meType != ScQueryEntry::ByValue) - return false; - if(!rParam.bByRow) - return false; - if(rParam.bHasHeader) - return false; - if(rParam.GetEntry(0).eOp != SC_LESS && rParam.GetEntry(0).eOp != SC_LESS_EQUAL - && rParam.GetEntry(0).eOp != SC_GREATER && rParam.GetEntry(0).eOp != SC_GREATER_EQUAL - && rParam.GetEntry(0).eOp != SC_EQUAL) - return false; - return true; + return CanBeUsedForSorterCache(rParam); } template<> @@ -1272,11 +1348,13 @@ sal_uInt64 ScCountIfCellIterator< ScQueryCellIteratorAccess::SortedCache >::GetC } template class ScQueryCellIterator< ScQueryCellIteratorAccess::Direct >; +template class ScQueryCellIterator< ScQueryCellIteratorAccess::SortedCache >; template class ScCountIfCellIterator< ScQueryCellIteratorAccess::Direct >; template class ScCountIfCellIterator< ScQueryCellIteratorAccess::SortedCache >; // gcc for some reason needs these too template class ScQueryCellIteratorBase< ScQueryCellIteratorAccess::Direct, ScQueryCellIteratorType::Generic >; +template class ScQueryCellIteratorBase< ScQueryCellIteratorAccess::SortedCache, ScQueryCellIteratorType::Generic >; template class ScQueryCellIteratorBase< ScQueryCellIteratorAccess::Direct, ScQueryCellIteratorType::CountIf >; template class ScQueryCellIteratorBase< ScQueryCellIteratorAccess::SortedCache, ScQueryCellIteratorType::CountIf >; diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx index ec67766a26d1..92b4de1b3443 100644 --- a/sc/source/core/tool/interpr1.cxx +++ b/sc/source/core/tool/interpr1.cxx @@ -6150,17 +6150,35 @@ void ScInterpreter::IterateParametersIfs( double(*ResultFunc)( const sc::ParamIf } else { - ScQueryCellIteratorDirect aCellIter(mrDoc, mrContext, nTab1, rParam, false); - // Increment Entry.nField in iterator when switching to next column. - aCellIter.SetAdvanceQueryParamEntryField( true ); - if ( aCellIter.GetFirst() ) + if( ScQueryCellIteratorSortedCache::CanBeUsed( rParam )) { - do + ScQueryCellIteratorSortedCache aCellIter(mrDoc, mrContext, nTab1, rParam, false); + // Increment Entry.nField in iterator when switching to next column. + aCellIter.SetAdvanceQueryParamEntryField( true ); + if ( aCellIter.GetFirst() ) { - size_t nC = aCellIter.GetCol() + nColDiff; - size_t nR = aCellIter.GetRow() + nRowDiff; - ++vConditions[nC * nDimensionRows + nR]; - } while ( aCellIter.GetNext() ); + do + { + size_t nC = aCellIter.GetCol() + nColDiff; + size_t nR = aCellIter.GetRow() + nRowDiff; + ++vConditions[nC * nDimensionRows + nR]; + } while ( aCellIter.GetNext() ); + } + } + else + { + ScQueryCellIteratorDirect aCellIter(mrDoc, mrContext, nTab1, rParam, false); + // Increment Entry.nField in iterator when switching to next column. + aCellIter.SetAdvanceQueryParamEntryField( true ); + if ( aCellIter.GetFirst() ) + { + do + { + size_t nC = aCellIter.GetCol() + nColDiff; + size_t nR = aCellIter.GetRow() + nRowDiff; + ++vConditions[nC * nDimensionRows + nR]; + } while ( aCellIter.GetNext() ); + } } } if (nRefArrayPos != std::numeric_limits<size_t>::max()) @@ -9951,28 +9969,43 @@ utl::SearchParam::SearchType ScInterpreter::DetectSearchType( std::u16string_vie static bool lcl_LookupQuery( ScAddress & o_rResultPos, ScDocument& rDoc, ScInterpreterContext& rContext, const ScQueryParam & rParam, const ScQueryEntry & rEntry ) { - bool bFound = false; - ScQueryCellIteratorDirect aCellIter( rDoc, rContext, rParam.nTab, rParam, false); if (rEntry.eOp != SC_EQUAL) { // range lookup <= or >= SCCOL nCol; SCROW nRow; - bFound = aCellIter.FindEqualOrSortedLastInRange( nCol, nRow); - if (bFound) + ScQueryCellIteratorDirect aCellIter( rDoc, rContext, rParam.nTab, rParam, false); + if( aCellIter.FindEqualOrSortedLastInRange( nCol, nRow )) { o_rResultPos.SetCol( nCol); o_rResultPos.SetRow( nRow); + return true; } } - else if (aCellIter.GetFirst()) + else // EQUAL { - // EQUAL - bFound = true; - o_rResultPos.SetCol( aCellIter.GetCol()); - o_rResultPos.SetRow( aCellIter.GetRow()); + if( ScQueryCellIteratorSortedCache::CanBeUsed( rParam )) + { + ScQueryCellIteratorSortedCache aCellIter( rDoc, rContext, rParam.nTab, rParam, false); + if (aCellIter.GetFirst()) + { + o_rResultPos.SetCol( aCellIter.GetCol()); + o_rResultPos.SetRow( aCellIter.GetRow()); + return true; + } + } + else + { + ScQueryCellIteratorDirect aCellIter( rDoc, rContext, rParam.nTab, rParam, false); + if (aCellIter.GetFirst()) + { + o_rResultPos.SetCol( aCellIter.GetCol()); + o_rResultPos.SetRow( aCellIter.GetRow()); + return true; + } + } } - return bFound; + return false; } // tdf#121052: diff --git a/sc/source/core/tool/rangecache.cxx b/sc/source/core/tool/rangecache.cxx index facd906a2f3e..0bfeb971969c 100644 --- a/sc/source/core/tool/rangecache.cxx +++ b/sc/source/core/tool/rangecache.cxx @@ -35,7 +35,6 @@ ScSortedRangeCache::ScSortedRangeCache(ScDocument* pDoc, const ScRange& rRange, assert(maRange.aStart.Tab() == maRange.aEnd.Tab()); SCTAB nTab = maRange.aStart.Tab(); SCTAB nCol = maRange.aStart.Col(); - mSortedRows.reserve(maRange.aEnd.Row() - maRange.aStart.Row() + 1); for (SCROW nRow = maRange.aStart.Row(); nRow <= maRange.aEnd.Row(); ++nRow) { ScRefCellValue cell(pDoc->GetRefCellValue(ScAddress(nCol, nRow, nTab)));