include/o3tl/sorted_vector.hxx           |   57 +++++++++++++++++++++++--------
 sc/source/filter/inc/sheetdatabuffer.hxx |    9 ++++
 sc/source/filter/oox/sheetdatabuffer.cxx |   12 +++++-
 3 files changed, 64 insertions(+), 14 deletions(-)

New commits:
commit 6810aa937caca167a6dc1aa0ac63d5995a143b10
Author:     Luboš Luňák <l.lu...@collabora.com>
AuthorDate: Sun Mar 6 19:24:37 2022 +0100
Commit:     Luboš Luňák <l.lu...@collabora.com>
CommitDate: Mon Mar 7 10:40:20 2022 +0100

    faster bulk insert into o3tl::sorted_vector (tdf#117366)
    
    Repeated insert() into o3tl::sorted_vector can turn out to be
    pathological, repeatedly moving most items aside for each
    element inserted. Make it possible to insert a normal vector
    that's been pre-sorted and made unique. 31s->9s for loading
    tdf#117366.
    
    Change-Id: If3a0366dd240ad46c23f5f3dc04e781b8c4b2aa2
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/131085
    Tested-by: Jenkins
    Reviewed-by: Luboš Luňák <l.lu...@collabora.com>

diff --git a/include/o3tl/sorted_vector.hxx b/include/o3tl/sorted_vector.hxx
index 2bd8b7273a9a..13c0b25f8f0e 100644
--- a/include/o3tl/sorted_vector.hxx
+++ b/include/o3tl/sorted_vector.hxx
@@ -12,7 +12,9 @@
 
 #include <vector>
 #include <algorithm>
+#include <cassert>
 #include <functional>
+#include <iterator>
 #include <memory>
 #include <type_traits>
 
@@ -229,19 +231,32 @@ public:
 
     void insert(sorted_vector<Value,Compare,Find> const& rOther)
     {
-       // optimization for the rather common case that we are overwriting this 
with the contents
-       // of another sorted vector
-       if ( empty() )
-       {
-           m_vector.insert(m_vector.begin(), rOther.m_vector.begin(), 
rOther.m_vector.end());
-       }
-       else
-       {
-           for (const_iterator it = rOther.m_vector.begin(); it != 
rOther.m_vector.end(); ++it)
-           {
-               insert(*it);
-           }
-       }
+        // optimization for the rather common case that we are overwriting 
this with the contents
+        // of another sorted vector
+        if ( empty() )
+            m_vector.insert(m_vector.begin(), rOther.m_vector.begin(), 
rOther.m_vector.end());
+        else
+            insert_internal( rOther.m_vector );
+    }
+
+    void insert_sorted_unique_vector(const std::vector<Value>& rOther)
+    {
+        assert( std::is_sorted(rOther.begin(), rOther.end(), Compare()));
+        assert( std::unique(rOther.begin(), rOther.end(), compare_equal) == 
rOther.end());
+        if ( empty() )
+            m_vector.insert(m_vector.begin(), rOther.m_vector.begin(), 
rOther.m_vector.end());
+        else
+            insert_internal( rOther );
+    }
+
+    void insert_sorted_unique_vector(std::vector<Value>&& rOther)
+    {
+        assert( std::is_sorted(rOther.begin(), rOther.end(), Compare()));
+        assert( std::unique(rOther.begin(), rOther.end(), compare_equal) == 
rOther.end());
+        if ( empty() )
+            m_vector.swap( rOther );
+        else
+            insert_internal( rOther );
     }
 
     /* Clear() elements in the vector, and free them one by one. */
@@ -266,6 +281,22 @@ public:
     }
 
 private:
+    static bool compare_equal( const Value& v1, const Value& v2 )
+    {   // Synthetize == check from < check for std::unique asserts above.
+        return !Compare()( v1, v2 ) && !Compare()( v2, v1 );
+    }
+
+    void insert_internal( const std::vector<Value>& rOther )
+    {
+        // Do a union in one pass rather than repeated insert() that could 
repeatedly
+        // move large amounts of data.
+        vector_t tmp;
+        tmp.reserve( m_vector.size() + rOther.size());
+        std::set_union( m_vector.begin(), m_vector.end(),
+                        rOther.begin(), rOther.end(),
+                        std::back_inserter( tmp ), Compare());
+        m_vector.swap( tmp );
+    }
 
     vector_t m_vector;
 };
diff --git a/sc/source/filter/inc/sheetdatabuffer.hxx 
b/sc/source/filter/inc/sheetdatabuffer.hxx
index a673f91b66ec..7f4930bff450 100644
--- a/sc/source/filter/inc/sheetdatabuffer.hxx
+++ b/sc/source/filter/inc/sheetdatabuffer.hxx
@@ -201,8 +201,17 @@ private:
             return lhs.mnEndRow<rhs.mnStartRow;
         }
     };
+    struct StyleRowRangeCompEqual
+    {
+        bool operator() (const RowRangeStyle& lhs, const RowRangeStyle& rhs) 
const
+        {
+            return lhs.mnEndRow==rhs.mnStartRow;
+        }
+    };
     typedef ::o3tl::sorted_vector< RowRangeStyle, StyleRowRangeComp > 
RowStyles;
     typedef ::std::map< sal_Int32, RowStyles > ColStyles;
+    typedef ::std::vector< RowRangeStyle > TmpRowStyles;
+    typedef ::std::map< sal_Int32, TmpRowStyles > TmpColStyles;
     /** Stores information about a merged cell range. */
     struct MergedRange
     {
diff --git a/sc/source/filter/oox/sheetdatabuffer.cxx 
b/sc/source/filter/oox/sheetdatabuffer.cxx
index d4d0ad87affd..b4d6158f66fe 100644
--- a/sc/source/filter/oox/sheetdatabuffer.cxx
+++ b/sc/source/filter/oox/sheetdatabuffer.cxx
@@ -353,6 +353,9 @@ void SheetDataBuffer::addColXfStyles()
         addIfNotInMyMap( getStyles(), rangeStyleListMap, rFormatKeyPair.first, 
rFormatKeyPair.second, rRangeList );
     }
     // gather all ranges that have the same style and apply them in bulk
+    // Collect data in unsorted vectors and sort them just once at the end
+    // instead of possibly slow repeated inserts.
+    TmpColStyles tmpStylesPerColumn;
     for ( const auto& [rFormatKeyPair, rRanges] : rangeStyleListMap )
     {
         for (const ScRange & rAddress : rRanges)
@@ -363,9 +366,16 @@ void SheetDataBuffer::addColXfStyles()
             aStyleRows.mnStartRow = rAddress.aStart.Row();
             aStyleRows.mnEndRow = rAddress.aEnd.Row();
             for ( sal_Int32 nCol = rAddress.aStart.Col(); nCol <= 
rAddress.aEnd.Col(); ++nCol )
-               maStylesPerColumn[ nCol ].insert( aStyleRows );
+               tmpStylesPerColumn[ nCol ].push_back( aStyleRows );
         }
     }
+    for( auto& rowStyles : tmpStylesPerColumn )
+    {
+        TmpRowStyles& s = rowStyles.second;
+        std::sort( s.begin(), s.end(), StyleRowRangeComp());
+        s.erase( std::unique( s.begin(), s.end(), StyleRowRangeCompEqual()), 
s.end());
+        maStylesPerColumn[ rowStyles.first ].insert_sorted_unique_vector( 
std::move( s ));
+    }
 }
 
 void SheetDataBuffer::addColXfStyleProcessRowRanges()

Reply via email to