This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 39c69c766e9 [Optimize](Variant) optimize schema update performance 
(#45480) (#45731)
39c69c766e9 is described below

commit 39c69c766e9b17bf3bccd89410acb6be3931ecf3
Author: lihangyu <lihan...@selectdb.com>
AuthorDate: Sat Dec 21 23:41:03 2024 +0800

    [Optimize](Variant) optimize schema update performance (#45480) (#45731)
    
    (#45480)
---
 be/src/olap/rowset_builder.cpp    | 24 +++++++++++++-----------
 be/src/olap/tablet_schema.cpp     | 15 +++++++++++++++
 be/src/olap/tablet_schema.h       |  3 +++
 be/src/vec/common/schema_util.cpp |  5 ++---
 4 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/be/src/olap/rowset_builder.cpp b/be/src/olap/rowset_builder.cpp
index 1929ffbb78e..c668df4bd33 100644
--- a/be/src/olap/rowset_builder.cpp
+++ b/be/src/olap/rowset_builder.cpp
@@ -327,21 +327,22 @@ Status RowsetBuilder::commit_txn() {
     SCOPED_TIMER(_commit_txn_timer);
 
     const RowsetWriterContext& rw_ctx = _rowset_writer->context();
-    if (rw_ctx.tablet_schema->num_variant_columns() > 0) {
+    if (rw_ctx.tablet_schema->num_variant_columns() > 0 && _rowset->num_rows() 
> 0) {
         // Need to merge schema with `rw_ctx.merged_tablet_schema` in prior,
         // merged schema keeps the newest merged schema for the rowset, which 
is updated and merged
         // during flushing segments.
         if (rw_ctx.merged_tablet_schema != nullptr) {
             
RETURN_IF_ERROR(tablet()->update_by_least_common_schema(rw_ctx.merged_tablet_schema));
+        } else {
+            // We should merge rowset schema further, in case that the 
merged_tablet_schema maybe null
+            // when enable_memtable_on_sink_node is true, the 
merged_tablet_schema will not be passed to
+            // the destination backend.
+            // update tablet schema when meet variant columns, before 
commit_txn
+            // Eg. rowset schema:       A(int),    B(float),  C(int), D(int)
+            // _tabelt->tablet_schema:  A(bigint), B(double)
+            //  => update_schema:       A(bigint), B(double), C(int), D(int)
+            
RETURN_IF_ERROR(tablet()->update_by_least_common_schema(rw_ctx.tablet_schema));
         }
-        // We should merge rowset schema further, in case that the 
merged_tablet_schema maybe null
-        // when enable_memtable_on_sink_node is true, the merged_tablet_schema 
will not be passed to
-        // the destination backend.
-        // update tablet schema when meet variant columns, before commit_txn
-        // Eg. rowset schema:       A(int),    B(float),  C(int), D(int)
-        // _tabelt->tablet_schema:  A(bigint), B(double)
-        //  => update_schema:       A(bigint), B(double), C(int), D(int)
-        
RETURN_IF_ERROR(tablet()->update_by_least_common_schema(rw_ctx.tablet_schema));
     }
 
     // Transfer ownership of `PendingRowsetGuard` to `TxnManager`
@@ -379,7 +380,6 @@ Status BaseRowsetBuilder::cancel() {
 void BaseRowsetBuilder::_build_current_tablet_schema(int64_t index_id,
                                                      const 
OlapTableSchemaParam* table_schema_param,
                                                      const TabletSchema& 
ori_tablet_schema) {
-    _tablet_schema->copy_from(ori_tablet_schema);
     // find the right index id
     int i = 0;
     auto indexes = table_schema_param->indexes();
@@ -388,11 +388,13 @@ void 
BaseRowsetBuilder::_build_current_tablet_schema(int64_t index_id,
             break;
         }
     }
-
     if (!indexes.empty() && !indexes[i]->columns.empty() &&
         indexes[i]->columns[0]->unique_id() >= 0) {
+        _tablet_schema->shawdow_copy_without_columns(ori_tablet_schema);
         _tablet_schema->build_current_tablet_schema(index_id, 
table_schema_param->version(),
                                                     indexes[i], 
ori_tablet_schema);
+    } else {
+        _tablet_schema->copy_from(ori_tablet_schema);
     }
     if (_tablet_schema->schema_version() > ori_tablet_schema.schema_version()) 
{
         // After schema change, should include extracted column
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 7da0f99537a..488e9755b23 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -1047,6 +1047,21 @@ void TabletSchema::copy_from(const TabletSchema& 
tablet_schema) {
     _table_id = tablet_schema.table_id();
 }
 
+void TabletSchema::shawdow_copy_without_columns(const TabletSchema& 
tablet_schema) {
+    *this = tablet_schema;
+    _field_path_to_index.clear();
+    _field_name_to_index.clear();
+    _field_id_to_index.clear();
+    _num_columns = 0;
+    _num_variant_columns = 0;
+    _num_null_columns = 0;
+    _num_key_columns = 0;
+    _cols.clear();
+    _vl_field_mem_size = 0;
+    // notice : do not ref columns
+    _column_cache_handlers.clear();
+}
+
 void TabletSchema::update_index_info_from(const TabletSchema& tablet_schema) {
     for (auto& col : _cols) {
         if (col->unique_id() < 0) {
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index c9a0d45bd9b..75b3a78e183 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -300,6 +300,8 @@ public:
     // Must make sure the row column is always the last column
     void add_row_column();
     void copy_from(const TabletSchema& tablet_schema);
+    // lightweight copy, take care of lifecycle of TabletColumn
+    void shawdow_copy_without_columns(const TabletSchema& tablet_schema);
     void update_index_info_from(const TabletSchema& tablet_schema);
     std::string to_key() const;
     // Don't use.
@@ -481,6 +483,7 @@ public:
 private:
     friend bool operator==(const TabletSchema& a, const TabletSchema& b);
     friend bool operator!=(const TabletSchema& a, const TabletSchema& b);
+    TabletSchema(const TabletSchema&) = default;
 
     void clear_column_cache_handlers();
 
diff --git a/be/src/vec/common/schema_util.cpp 
b/be/src/vec/common/schema_util.cpp
index b373dbd1347..51a3ed8c317 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -431,9 +431,8 @@ Status get_least_common_schema(const 
std::vector<TabletSchemaSPtr>& schemas,
     // duplicated paths following the update_least_common_schema process.
     auto build_schema_without_extracted_columns = [&](const TabletSchemaSPtr& 
base_schema) {
         output_schema = std::make_shared<TabletSchema>();
-        output_schema->copy_from(*base_schema);
-        // Merge columns from other schemas
-        output_schema->clear_columns();
+        // not copy columns but only shadow copy other attributes
+        output_schema->shawdow_copy_without_columns(*base_schema);
         // Get all columns without extracted columns and collect variant col 
unique id
         for (const TabletColumnPtr& col : base_schema->columns()) {
             if (col->is_variant_type()) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to