sc/inc/cellform.hxx                    |   13 +--
 sc/inc/cellvalue.hxx                   |    2 
 sc/inc/queryevaluator.hxx              |    4 -
 sc/source/core/data/cellvalue.cxx      |   21 +++++
 sc/source/core/data/column3.cxx        |    2 
 sc/source/core/data/queryevaluator.cxx |  126 ++++++++++++++++++---------------
 sc/source/core/tool/cellform.cxx       |   63 +++++++++++-----
 sc/source/core/tool/interpr4.cxx       |    2 
 sc/source/core/tool/rangecache.cxx     |    5 -
 9 files changed, 150 insertions(+), 88 deletions(-)

New commits:
commit a958e09409b94b97c0d0aa67dd605d321dc42615
Author:     Noel Grandin <noel.gran...@collabora.co.uk>
AuthorDate: Wed Oct 23 16:22:14 2024 +0200
Commit:     Noel Grandin <noel.gran...@collabora.co.uk>
CommitDate: Fri Oct 25 08:20:51 2024 +0200

    speedup saving large XLS file with lots of query formula
    
    make more use of svl::SharedString instead of
    unnecessarily creating OUString that then need to be
    interned. We were spending all our time inside the
    the SharedStringPool::intern function.
    
    20s to 12s
    
    Change-Id: I344f0acfd462fdb6d78e392881f066a56844fa94
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/175538
    Tested-by: Jenkins
    Reviewed-by: Caolán McNamara <caolan.mcnam...@collabora.com>
    Reviewed-by: Noel Grandin <noel.gran...@collabora.co.uk>

diff --git a/sc/inc/cellform.hxx b/sc/inc/cellform.hxx
index a4a985e2d9cc..f1efb330746e 100644
--- a/sc/inc/cellform.hxx
+++ b/sc/inc/cellform.hxx
@@ -28,6 +28,7 @@ class ScAddress;
 class ScDocument;
 struct ScInterpreterContext;
 struct ScRefCellValue;
+namespace svl { class SharedStringPool; }
 
 class SC_DLLPUBLIC ScCellFormat
 {
@@ -43,18 +44,14 @@ public:
         const Color** ppColor, ScInterpreterContext* pContext, bool bNullVals 
= true,
         bool bFormula  = false );
 
-    // Note that if pShared is set and a value is returned that way, the 
returned OUString is empty.
-    static OUString GetInputString(
+    static svl::SharedString GetInputSharedString(
         const ScRefCellValue& rCell, sal_uInt32 nFormat, ScInterpreterContext* 
pContext,
-        const ScDocument& rDoc, const svl::SharedString** pShared = nullptr, 
bool bFiltering = false,
-        bool bForceSystemLocale = false );
+        const ScDocument& rDoc, svl::SharedStringPool& rStrPool,
+        bool bFiltering = false, bool bForceSystemLocale = false );
 
     static OUString GetInputString(
         const ScRefCellValue& rCell, sal_uInt32 nFormat, ScInterpreterContext* 
pContext,
-        const ScDocument& rDoc, bool bFiltering, bool bForceSystemLocale = 
false )
-    {
-        return GetInputString( rCell, nFormat, pContext, rDoc, nullptr, 
bFiltering, bForceSystemLocale );
-    }
+        const ScDocument& rDoc, bool bFiltering = false, bool 
bForceSystemLocale = false );
 
     static OUString GetOutputString(
         ScDocument& rDoc, const ScAddress& rPos, const ScRefCellValue& rCell );
diff --git a/sc/inc/cellvalue.hxx b/sc/inc/cellvalue.hxx
index d5a29c607c90..531f477acb2e 100644
--- a/sc/inc/cellvalue.hxx
+++ b/sc/inc/cellvalue.hxx
@@ -22,6 +22,7 @@ struct ScRefCellValue;
 namespace sc {
 struct ColumnBlockPosition;
 }
+namespace svl { class SharedStringPool; }
 
 /**
  * Store arbitrary cell value of any kind.  It only stores cell value and
@@ -174,6 +175,7 @@ public:
      *          ScEditUtil::GetString().
      */
     SC_DLLPUBLIC OUString getString( const ScDocument* pDoc ) const;
+    SC_DLLPUBLIC svl::SharedString getSharedString( const ScDocument* pDoc, 
svl::SharedStringPool& rStrPool ) const;
 
     /**
      * Retrieve a string value without modifying the states of any objects in
diff --git a/sc/inc/queryevaluator.hxx b/sc/inc/queryevaluator.hxx
index 6d3012141db8..a2cbc0848a3b 100644
--- a/sc/inc/queryevaluator.hxx
+++ b/sc/inc/queryevaluator.hxx
@@ -118,8 +118,8 @@ public:
                                const ScRefCellValue& rCell);
     static bool isQueryByString(ScQueryOp eOp, ScQueryEntry::QueryType eType,
                                 const ScRefCellValue& rCell);
-    OUString getCellString(const ScRefCellValue& rCell, SCROW nRow, SCCOL nCol,
-                           const svl::SharedString** sharedString);
+    OUString getCellString(const ScRefCellValue& rCell, SCROW nRow, SCCOL 
nCol);
+    svl::SharedString getCellSharedString(const ScRefCellValue& rCell, SCROW 
nRow, SCCOL nCol);
     static bool isMatchWholeCell(const ScDocument& rDoc, ScQueryOp eOp);
 };
 
diff --git a/sc/source/core/data/cellvalue.cxx 
b/sc/source/core/data/cellvalue.cxx
index 4330ea972992..2d51b57a827d 100644
--- a/sc/source/core/data/cellvalue.cxx
+++ b/sc/source/core/data/cellvalue.cxx
@@ -19,6 +19,7 @@
 #include <formula/token.hxx>
 #include <formula/errorcodes.hxx>
 #include <svl/sharedstring.hxx>
+#include <svl/sharedstringpool.hxx>
 
 namespace {
 
@@ -659,6 +660,26 @@ OUString ScRefCellValue::getString( const ScDocument* pDoc 
) const
     return getStringImpl(*this, pDoc);
 }
 
+svl::SharedString ScRefCellValue::getSharedString( const ScDocument* pDoc, 
svl::SharedStringPool& rStrPool ) const
+{
+    switch (getType())
+    {
+        case CELLTYPE_VALUE:
+            return rStrPool.intern(OUString::number(getDouble()));
+        case CELLTYPE_STRING:
+            return *getSharedString();
+        case CELLTYPE_EDIT:
+            if (auto pEditText = getEditText())
+                return rStrPool.intern(ScEditUtil::GetString(*pEditText, 
pDoc));
+            break;
+        case CELLTYPE_FORMULA:
+            return getFormula()->GetString();
+        default:
+            ;
+    }
+    return svl::SharedString::getEmptyString();
+}
+
 OUString ScRefCellValue::getRawString( const ScDocument& rDoc ) const
 {
     return getRawStringImpl(*this, rDoc);
diff --git a/sc/source/core/data/column3.cxx b/sc/source/core/data/column3.cxx
index 25f943f68955..9326fbab4d1f 100644
--- a/sc/source/core/data/column3.cxx
+++ b/sc/source/core/data/column3.cxx
@@ -3163,7 +3163,7 @@ double* ScColumn::GetValueCell( SCROW nRow )
 OUString ScColumn::GetInputString( const ScRefCellValue& aCell, SCROW nRow, 
bool bForceSystemLocale ) const
 {
     sal_uInt32 nFormat = GetNumberFormat(GetDoc().GetNonThreadedContext(), 
nRow);
-    return ScCellFormat::GetInputString(aCell, nFormat, nullptr, GetDoc(), 
nullptr, false, bForceSystemLocale);
+    return ScCellFormat::GetInputString(aCell, nFormat, nullptr, GetDoc(), 
false, bForceSystemLocale);
 }
 
 double ScColumn::GetValue( SCROW nRow ) const
diff --git a/sc/source/core/data/queryevaluator.cxx 
b/sc/source/core/data/queryevaluator.cxx
index 060716e08d92..4d3a02575066 100644
--- a/sc/source/core/data/queryevaluator.cxx
+++ b/sc/source/core/data/queryevaluator.cxx
@@ -295,8 +295,7 @@ std::pair<bool, bool> 
ScQueryEvaluator::compareByValue(const ScRefCellValue& rCe
     return std::pair<bool, bool>(bOk, bTestEqual);
 }
 
-OUString ScQueryEvaluator::getCellString(const ScRefCellValue& rCell, SCROW 
nRow, SCCOL nCol,
-                                         const svl::SharedString** 
sharedString)
+OUString ScQueryEvaluator::getCellString(const ScRefCellValue& rCell, SCROW 
nRow, SCCOL nCol)
 {
     if (rCell.getType() == CELLTYPE_FORMULA
         && rCell.getFormula()->GetErrCode() != FormulaError::NONE)
@@ -311,20 +310,50 @@ OUString ScQueryEvaluator::getCellString(const 
ScRefCellValue& rCell, SCROW nRow
             assert(pos.second); // inserted
             it = pos.first;
         }
-        *sharedString = &it->second;
-        return OUString();
+        return it->second.getString();
     }
     else if (rCell.getType() == CELLTYPE_STRING)
     {
-        *sharedString = rCell.getSharedString();
-        return OUString();
+        return rCell.getSharedString()->getString();
     }
     else
     {
         sal_uInt32 nFormat
             = mpContext ? mrTab.GetNumberFormat(*mpContext, ScAddress(nCol, 
nRow, mrTab.GetTab()))
                         : mrTab.GetNumberFormat(nCol, nRow);
-        return ScCellFormat::GetInputString(rCell, nFormat, mpContext, mrDoc, 
sharedString, true);
+        return ScCellFormat::GetInputString(rCell, nFormat, mpContext, mrDoc, 
true);
+    }
+}
+
+svl::SharedString ScQueryEvaluator::getCellSharedString(const ScRefCellValue& 
rCell, SCROW nRow,
+                                                        SCCOL nCol)
+{
+    if (rCell.getType() == CELLTYPE_FORMULA
+        && rCell.getFormula()->GetErrCode() != FormulaError::NONE)
+    {
+        // Error cell is evaluated as string (for now).
+        const FormulaError error = rCell.getFormula()->GetErrCode();
+        auto it = mCachedSharedErrorStrings.find(error);
+        if (it == mCachedSharedErrorStrings.end())
+        {
+            svl::SharedString str = 
mrStrPool.intern(ScGlobal::GetErrorString(error));
+            auto pos = mCachedSharedErrorStrings.insert({ error, str });
+            assert(pos.second); // inserted
+            it = pos.first;
+        }
+        return it->second;
+    }
+    else if (rCell.getType() == CELLTYPE_STRING)
+    {
+        return *rCell.getSharedString();
+    }
+    else
+    {
+        sal_uInt32 nFormat
+            = mpContext ? mrTab.GetNumberFormat(*mpContext, ScAddress(nCol, 
nRow, mrTab.GetTab()))
+                        : mrTab.GetNumberFormat(nCol, nRow);
+        return ScCellFormat::GetInputSharedString(rCell, nFormat, mpContext, 
mrDoc, mrStrPool,
+                                                  true);
     }
 }
 
@@ -705,16 +734,14 @@ std::pair<bool, bool> 
ScQueryEvaluator::processEntry(SCROW nRow, SCCOL nCol, ScR
             }
         }
     }
-    const svl::SharedString* cellSharedString = nullptr;
-    std::optional<OUString> oCellString;
+    svl::SharedString cellSharedString;
     const bool bFastCompareByString = isFastCompareByString(rEntry);
     if (rEntry.eOp == SC_EQUAL && rItems.size() >= 10 && bFastCompareByString)
     {
         // The same as above but for strings. Try to optimize the case when
         // it's a svl::SharedString comparison. That happens when SC_EQUAL is 
used
         // and simple matching is used, see compareByString()
-        if (!oCellString)
-            oCellString = getCellString(aCell, nRow, rEntry.nField, 
&cellSharedString);
+        cellSharedString = getCellSharedString(aCell, nRow, rEntry.nField);
         // Allow also checking ScQueryEntry::ByValue if the cell is not 
numeric,
         // as in that case isQueryByNumeric() would be false and 
isQueryByString() would
         // be true because of SC_EQUAL making isTextMatchOp() true.
@@ -722,51 +749,45 @@ std::pair<bool, bool> 
ScQueryEvaluator::processEntry(SCROW nRow, SCCOL nCol, ScR
         // For ScQueryEntry::ByString check that the cell is represented by a 
shared string,
         // which means it's either a string cell or a formula error. This is 
not as
         // generous as isQueryByString() but it should be enough and better be 
safe.
-        if (cellSharedString != nullptr)
+        if (rItems.size() >= 100)
         {
-            if (rItems.size() >= 100)
+            // Sort, cache and binary search for the string in items.
+            // Since each SharedString is identified by pointer value,
+            // sorting by pointer value is enough.
+            if (mCachedSortedItemStrings.size() <= nEntryIndex)
             {
-                // Sort, cache and binary search for the string in items.
-                // Since each SharedString is identified by pointer value,
-                // sorting by pointer value is enough.
-                if (mCachedSortedItemStrings.size() <= nEntryIndex)
+                mCachedSortedItemStrings.resize(nEntryIndex + 1);
+                auto& values = mCachedSortedItemStrings[nEntryIndex];
+                values.reserve(rItems.size());
+                for (const auto& rItem : rItems)
                 {
-                    mCachedSortedItemStrings.resize(nEntryIndex + 1);
-                    auto& values = mCachedSortedItemStrings[nEntryIndex];
-                    values.reserve(rItems.size());
-                    for (const auto& rItem : rItems)
+                    if (rItem.meType == ScQueryEntry::ByString
+                        || (compareByValue && rItem.meType == 
ScQueryEntry::ByValue))
                     {
-                        if (rItem.meType == ScQueryEntry::ByString
-                            || (compareByValue && rItem.meType == 
ScQueryEntry::ByValue))
-                        {
-                            values.push_back(mrParam.bCaseSens
-                                                 ? rItem.maString.getData()
-                                                 : 
rItem.maString.getDataIgnoreCase());
-                        }
+                        values.push_back(mrParam.bCaseSens ? 
rItem.maString.getData()
+                                                           : 
rItem.maString.getDataIgnoreCase());
                     }
-                    std::sort(values.begin(), values.end());
                 }
-                auto& values = mCachedSortedItemStrings[nEntryIndex];
-                const rtl_uString* string = mrParam.bCaseSens
-                                                ? cellSharedString->getData()
-                                                : 
cellSharedString->getDataIgnoreCase();
-                auto it = std::lower_bound(values.begin(), values.end(), 
string);
-                if (it != values.end() && *it == string)
-                    return std::make_pair(true, true);
+                std::sort(values.begin(), values.end());
             }
-            else
+            auto& values = mCachedSortedItemStrings[nEntryIndex];
+            const rtl_uString* string = mrParam.bCaseSens ? 
cellSharedString.getData()
+                                                          : 
cellSharedString.getDataIgnoreCase();
+            auto it = std::lower_bound(values.begin(), values.end(), string);
+            if (it != values.end() && *it == string)
+                return std::make_pair(true, true);
+        }
+        else
+        {
+            for (const auto& rItem : rItems)
             {
-                for (const auto& rItem : rItems)
+                if ((rItem.meType == ScQueryEntry::ByString
+                     || (compareByValue && rItem.meType == 
ScQueryEntry::ByValue))
+                    && (mrParam.bCaseSens ? cellSharedString.getData() == 
rItem.maString.getData()
+                                          : 
cellSharedString.getDataIgnoreCase()
+                                                == 
rItem.maString.getDataIgnoreCase()))
                 {
-                    if ((rItem.meType == ScQueryEntry::ByString
-                         || (compareByValue && rItem.meType == 
ScQueryEntry::ByValue))
-                        && (mrParam.bCaseSens
-                                ? cellSharedString->getData() == 
rItem.maString.getData()
-                                : cellSharedString->getDataIgnoreCase()
-                                      == rItem.maString.getDataIgnoreCase()))
-                    {
-                        return std::make_pair(true, true);
-                    }
+                    return std::make_pair(true, true);
                 }
             }
         }
@@ -794,15 +815,12 @@ std::pair<bool, bool> 
ScQueryEvaluator::processEntry(SCROW nRow, SCCOL nCol, ScR
         }
         else if (isQueryByString(rEntry.eOp, rItem.meType, aCell))
         {
-            if (!oCellString)
-                oCellString = getCellString(aCell, nRow, rEntry.nField, 
&cellSharedString);
+            cellSharedString = getCellSharedString(aCell, nRow, rEntry.nField);
             std::pair<bool, bool> aThisRes;
-            if (cellSharedString && bFastCompareByString) // fast
-                aThisRes = compareByString<true>(rEntry, rItem, 
cellSharedString, nullptr);
-            else if (cellSharedString)
-                aThisRes = compareByString(rEntry, rItem, cellSharedString, 
nullptr);
+            if (bFastCompareByString) // fast
+                aThisRes = compareByString<true>(rEntry, rItem, 
&cellSharedString, nullptr);
             else
-                aThisRes = compareByString(rEntry, rItem, nullptr, 
&*oCellString);
+                aThisRes = compareByString(rEntry, rItem, &cellSharedString, 
nullptr);
             aRes.first |= aThisRes.first;
             aRes.second |= aThisRes.second;
         }
diff --git a/sc/source/core/tool/cellform.cxx b/sc/source/core/tool/cellform.cxx
index c20175cd9a62..692b019586c2 100644
--- a/sc/source/core/tool/cellform.cxx
+++ b/sc/source/core/tool/cellform.cxx
@@ -21,6 +21,7 @@
 
 #include <svl/numformat.hxx>
 #include <svl/sharedstring.hxx>
+#include <svl/sharedstringpool.hxx>
 
 #include <formulacell.hxx>
 #include <document.hxx>
@@ -130,12 +131,10 @@ OUString ScCellFormat::GetString(
 
 OUString ScCellFormat::GetInputString(
     const ScRefCellValue& rCell, sal_uInt32 nFormat, ScInterpreterContext* 
pContext, const ScDocument& rDoc,
-    const svl::SharedString** pShared, bool bFiltering, bool 
bForceSystemLocale )
+    bool bFiltering, bool bForceSystemLocale )
 {
     ScInterpreterContext& rContext = pContext ? *pContext : 
rDoc.GetNonThreadedContext();
 
-    if(pShared != nullptr)
-        *pShared = nullptr;
     switch (rCell.getType())
     {
         case CELLTYPE_STRING:
@@ -160,35 +159,63 @@ OUString ScCellFormat::GetInputString(
                 rContext.NFGetInputLineString(pFC->GetValue(), nFormat, *str, 
bFiltering, bForceSystemLocale);
             }
             else
-            {
-                const svl::SharedString& shared = pFC->GetString();
-                // Allow callers to optimize by avoiding converting later back 
to OUString.
-                // To avoid refcounting that won't be needed, do not even 
return the OUString.
-                if( pShared != nullptr )
-                    *pShared = &shared;
-                else
-                    str = shared.getString();
-            }
+                str = pFC->GetString().getString();
 
             const FormulaError nErrCode = pFC->GetErrCode();
             if (nErrCode != FormulaError::NONE)
-            {
                 str.reset();
-                if( pShared != nullptr )
-                    *pShared = nullptr;
-            }
 
             return str ? std::move(*str) : svl::SharedString::EMPTY_STRING;
         }
         case CELLTYPE_NONE:
-            if( pShared != nullptr )
-                *pShared = &svl::SharedString::getEmptyString();
             return svl::SharedString::EMPTY_STRING;
         default:
             return svl::SharedString::EMPTY_STRING;
     }
 }
 
+svl::SharedString ScCellFormat::GetInputSharedString(
+    const ScRefCellValue& rCell, sal_uInt32 nFormat, ScInterpreterContext* 
pContext, const ScDocument& rDoc,
+    svl::SharedStringPool& rStrPool,
+    bool bFiltering, bool bForceSystemLocale )
+{
+    ScInterpreterContext& rContext = pContext ? *pContext : 
rDoc.GetNonThreadedContext();
+
+    switch (rCell.getType())
+    {
+        case CELLTYPE_STRING:
+        case CELLTYPE_EDIT:
+            return rCell.getSharedString(&rDoc, rStrPool);
+        case CELLTYPE_VALUE:
+        {
+            OUString str;
+            rContext.NFGetInputLineString(rCell.getDouble(), nFormat, str, 
bFiltering, bForceSystemLocale);
+            return rStrPool.intern(str);
+        }
+        break;
+        case CELLTYPE_FORMULA:
+        {
+            ScFormulaCell* pFC = rCell.getFormula();
+            const FormulaError nErrCode = pFC->GetErrCode();
+            if (nErrCode != FormulaError::NONE)
+                return svl::SharedString::getEmptyString();
+            else if (pFC->IsEmptyDisplayedAsString())
+                return svl::SharedString::getEmptyString();
+            else if (pFC->IsValue())
+            {
+                OUString str;
+                rContext.NFGetInputLineString(pFC->GetValue(), nFormat, str, 
bFiltering, bForceSystemLocale);
+                return rStrPool.intern(str);
+            }
+            else
+                return pFC->GetString();
+        }
+        case CELLTYPE_NONE:
+        default:
+            return svl::SharedString::getEmptyString();
+    }
+}
+
 OUString ScCellFormat::GetOutputString( ScDocument& rDoc, const ScAddress& 
rPos, const ScRefCellValue& rCell )
 {
     if (rCell.isEmpty())
diff --git a/sc/source/core/tool/interpr4.cxx b/sc/source/core/tool/interpr4.cxx
index 4f88ad3b4d27..d463ffdd81a6 100644
--- a/sc/source/core/tool/interpr4.cxx
+++ b/sc/source/core/tool/interpr4.cxx
@@ -253,7 +253,7 @@ void ScInterpreter::GetCellString( svl::SharedString& rStr, 
ScRefCellValue& rCel
     {
         case CELLTYPE_STRING:
         case CELLTYPE_EDIT:
-            rStr = mrStrPool.intern(rCell.getString(&mrDoc));
+            rStr = rCell.getSharedString(&mrDoc, mrStrPool);
         break;
         case CELLTYPE_FORMULA:
         {
diff --git a/sc/source/core/tool/rangecache.cxx 
b/sc/source/core/tool/rangecache.cxx
index 24a8a39313ff..f5e98343455b 100644
--- a/sc/source/core/tool/rangecache.cxx
+++ b/sc/source/core/tool/rangecache.cxx
@@ -186,10 +186,7 @@ ScSortedRangeCache::ScSortedRangeCache(ScDocument* pDoc, 
const ScRange& rRange,
                 assert(!ScQueryEvaluator::isQueryByValue(mQueryOp, mQueryType, 
cell));
                 if (ScQueryEvaluator::isQueryByString(mQueryOp, mQueryType, 
cell))
                 {
-                    const svl::SharedString* sharedString = nullptr;
-                    OUString string = evaluator.getCellString(cell, nRow, 
nCol, &sharedString);
-                    if (sharedString)
-                        string = sharedString->getString();
+                    OUString string = evaluator.getCellString(cell, nRow, 
nCol);
                     colrowData.push_back(ColRowData{ mRowSearch ? nRow : nCol, 
string });
                 }
             }

Reply via email to