This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 34cf65bcc14 Fix inaccurate memory size for  metadata (#49147)
34cf65bcc14 is described below

commit 34cf65bcc14a37f29983ef2ab6a2357048cf3056
Author: wangbo <[email protected]>
AuthorDate: Wed Apr 2 11:21:48 2025 +0800

    Fix inaccurate memory size for  metadata (#49147)
    
    ### What problem does this PR solve?
    Accurately counting the memory of all class member variables is a
    complex task, so we first count the memory occupied by the size of the
    class.
---
 be/src/olap/metadata_adder.h                       |  40 ++++-
 be/src/olap/rowset/segment_v2/index_page.cpp       |   6 +-
 be/src/olap/rowset/segment_v2/index_page.h         |   1 -
 .../olap/rowset/segment_v2/ordinal_page_index.cpp  |   5 -
 be/src/olap/rowset/segment_v2/ordinal_page_index.h |   2 -
 be/src/olap/tablet_schema.cpp                      |   7 +-
 be/src/olap/tablet_schema.h                        |   1 -
 be/test/olap/metadata_adder_test.cpp               | 197 +++++++++++++++++++++
 8 files changed, 238 insertions(+), 21 deletions(-)

diff --git a/be/src/olap/metadata_adder.h b/be/src/olap/metadata_adder.h
index 5b5ba163224..8afa96acec0 100644
--- a/be/src/olap/metadata_adder.h
+++ b/be/src/olap/metadata_adder.h
@@ -146,13 +146,35 @@ public:
 protected:
     MetadataAdder(const MetadataAdder& other);
 
+    MetadataAdder(MetadataAdder&& other);
+
     virtual ~MetadataAdder();
 
     virtual int64_t get_metadata_size() const { return sizeof(T); }
 
     void update_metadata_size();
 
-    MetadataAdder<T>& operator=(const MetadataAdder<T>& other) = default;
+    MetadataAdder<T>& operator=(const MetadataAdder<T>& other) {
+        int64_t old_size = this->_current_meta_size;
+        this->_current_meta_size = other._current_meta_size;
+        int64_t size_diff = this->_current_meta_size - old_size;
+        add_mem_size(size_diff);
+
+        return *this;
+    }
+
+    MetadataAdder<T>& operator=(MetadataAdder<T>&& other) {
+        int64_t old_size = this->_current_meta_size;
+        this->_current_meta_size = other._current_meta_size;
+        int64_t size_diff = this->_current_meta_size - old_size;
+        add_mem_size(size_diff);
+
+        other.clear_memory();
+
+        return *this;
+    }
+
+    void clear_memory();
 
     int64_t _current_meta_size {0};
 
@@ -168,6 +190,15 @@ MetadataAdder<T>::MetadataAdder(const MetadataAdder<T>& 
other) {
     add_mem_size(this->_current_meta_size);
 }
 
+template <typename T>
+MetadataAdder<T>::MetadataAdder(MetadataAdder&& other) {
+    this->_current_meta_size = other._current_meta_size;
+    add_num(1);
+    add_mem_size(this->_current_meta_size);
+
+    other.clear_memory();
+}
+
 template <typename T>
 MetadataAdder<T>::MetadataAdder() {
     this->_current_meta_size = sizeof(T);
@@ -181,6 +212,13 @@ MetadataAdder<T>::~MetadataAdder() {
     add_num(-1);
 }
 
+template <typename T>
+void MetadataAdder<T>::clear_memory() {
+    int64_t old_size = _current_meta_size;
+    _current_meta_size = sizeof(T);
+    add_mem_size(_current_meta_size - old_size);
+}
+
 template <typename T>
 void MetadataAdder<T>::update_metadata_size() {
     int64_t old_size = _current_meta_size;
diff --git a/be/src/olap/rowset/segment_v2/index_page.cpp 
b/be/src/olap/rowset/segment_v2/index_page.cpp
index 1b033a9ff62..8fa3288b4d8 100644
--- a/be/src/olap/rowset/segment_v2/index_page.cpp
+++ b/be/src/olap/rowset/segment_v2/index_page.cpp
@@ -65,7 +65,7 @@ Status IndexPageBuilder::get_first_key(Slice* key) const {
 ///////////////////////////////////////////////////////////////////////////////
 
 int64_t IndexPageReader::get_metadata_size() const {
-    return sizeof(IndexPageReader) + _vl_field_mem_size;
+    return sizeof(IndexPageReader) + _footer.ByteSizeLong();
 }
 
 Status IndexPageReader::parse(const Slice& body, const IndexPageFooterPB& 
footer) {
@@ -84,11 +84,7 @@ Status IndexPageReader::parse(const Slice& body, const 
IndexPageFooterPB& footer
         }
         _keys.push_back(key);
         _values.push_back(value);
-        _vl_field_mem_size += sizeof(char) * key.size;
     }
-    _vl_field_mem_size +=
-            _keys.capacity() * sizeof(Slice) + _values.capacity() * 
sizeof(PagePointer);
-    _vl_field_mem_size += _footer.ByteSizeLong();
 
     update_metadata_size();
     _parsed = true;
diff --git a/be/src/olap/rowset/segment_v2/index_page.h 
b/be/src/olap/rowset/segment_v2/index_page.h
index 0ebf425fc5c..8e78f631f2b 100644
--- a/be/src/olap/rowset/segment_v2/index_page.h
+++ b/be/src/olap/rowset/segment_v2/index_page.h
@@ -118,7 +118,6 @@ private:
     IndexPageFooterPB _footer;
     std::vector<Slice> _keys;
     std::vector<PagePointer> _values;
-    int64_t _vl_field_mem_size {0};
 };
 
 class IndexPageIterator {
diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp 
b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp
index 8faab35cebb..27345b59be2 100644
--- a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp
+++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp
@@ -133,11 +133,6 @@ Status OrdinalIndexReader::_load(bool use_page_cache, bool 
kept_in_memory,
     return Status::OK();
 }
 
-int64_t OrdinalIndexReader::get_metadata_size() const {
-    return sizeof(OrdinalIndexReader) + _ordinals.capacity() * 
sizeof(ordinal_t) +
-           _pages.capacity() * sizeof(PagePointer);
-}
-
 OrdinalPageIndexIterator OrdinalIndexReader::seek_at_or_before(ordinal_t 
ordinal) {
     int32_t left = 0;
     int32_t right = _num_pages - 1;
diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.h 
b/be/src/olap/rowset/segment_v2/ordinal_page_index.h
index df60edb12d1..660c97e315d 100644
--- a/be/src/olap/rowset/segment_v2/ordinal_page_index.h
+++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.h
@@ -97,8 +97,6 @@ private:
                  std::unique_ptr<OrdinalIndexPB> index_meta,
                  OlapReaderStatistics* index_load_stats);
 
-    int64_t get_metadata_size() const override;
-
 private:
     friend OrdinalPageIndexIterator;
 
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 2ffc4a2d1a3..601ce807737 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -858,7 +858,7 @@ TabletSchema::~TabletSchema() {
 }
 
 int64_t TabletSchema::get_metadata_size() const {
-    return sizeof(TabletSchema) + _vl_field_mem_size;
+    return sizeof(TabletSchema);
 }
 
 void TabletSchema::append_column(TabletColumn column, ColumnType col_type) {
@@ -1025,10 +1025,7 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& 
schema, bool ignore_extrac
 
         _cols.emplace_back(std::move(column));
         if (!_cols.back()->is_extracted_column()) {
-            _vl_field_mem_size +=
-                    sizeof(StringRef) + sizeof(char) * 
_cols.back()->name().size() + sizeof(size_t);
             _field_name_to_index.emplace(StringRef(_cols.back()->name()), 
_num_columns);
-            _vl_field_mem_size += sizeof(int32_t) * 2;
             _field_id_to_index[_cols.back()->unique_id()] = _num_columns;
         }
         _num_columns++;
@@ -1087,7 +1084,6 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& 
schema, bool ignore_extrac
     
_row_store_column_unique_ids.assign(schema.row_store_column_unique_ids().begin(),
                                         
schema.row_store_column_unique_ids().end());
     _enable_variant_flatten_nested = schema.enable_variant_flatten_nested();
-    _vl_field_mem_size += _row_store_column_unique_ids.capacity() * 
sizeof(int32_t);
     update_metadata_size();
 }
 
@@ -1108,7 +1104,6 @@ void TabletSchema::shawdow_copy_without_columns(const 
TabletSchema& tablet_schem
     _num_null_columns = 0;
     _num_key_columns = 0;
     _cols.clear();
-    _vl_field_mem_size = 0;
     // notice : do not ref columns
     _column_cache_handlers.clear();
 }
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index 8d0f3f111de..9b350b45cff 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -611,7 +611,6 @@ private:
     // ATTN: For compability reason empty cids means all columns of tablet 
schema are encoded to row column
     std::vector<int32_t> _row_store_column_unique_ids;
     bool _enable_variant_flatten_nested = false;
-    int64_t _vl_field_mem_size {0}; // variable length field
 };
 
 bool operator==(const TabletSchema& a, const TabletSchema& b);
diff --git a/be/test/olap/metadata_adder_test.cpp 
b/be/test/olap/metadata_adder_test.cpp
new file mode 100644
index 00000000000..de096f9dc80
--- /dev/null
+++ b/be/test/olap/metadata_adder_test.cpp
@@ -0,0 +1,197 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/metadata_adder.h"
+
+#include <gtest/gtest.h>
+
+#include "olap/rowset/segment_v2/zone_map_index.h"
+#include "olap/tablet_schema.h"
+#include "olap/tablet_schema_helper.h"
+
+namespace doris {
+
+class MetadataAdderTest : public testing::Test {
+public:
+    const std::string kTestDir = "./ut_dir/metadata_adder_test";
+
+    void SetUp() override {
+        std::string filter_str = ::testing::GTEST_FLAG(filter);
+        std::cout << "GTEST_FILTER: " << filter_str << std::endl;
+
+        if (filter_str != "MetadataAdderTest.*") {
+            GTEST_SKIP() << "Skipping this test when executing concurrently.";
+        }
+
+        auto st = io::global_local_filesystem()->delete_directory(kTestDir);
+        ASSERT_TRUE(st.ok()) << st;
+        st = io::global_local_filesystem()->create_directory(kTestDir);
+        ASSERT_TRUE(st.ok()) << st;
+    }
+
+    void TearDown() override {
+        
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir).ok());
+    }
+
+    MetadataAdderTest() = default;
+    ~MetadataAdderTest() override = default;
+};
+
+template <typename T>
+void test_construct_new_obj() {
+    int classSize = sizeof(T);
+
+    std::cout << "Class Name: " << typeid(T).name() << std::endl;
+
+    ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == 0);
+    // 1 default construct
+    {
+        T t1;
+        ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == classSize);
+        T t2;
+        ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == classSize * 2);
+    }
+
+    ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == 0);
+    // 2 copy construct
+    {
+        T t1;
+        T t2 = t1;
+        ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == classSize * 2);
+    }
+
+    ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == 0);
+
+    // 3 move construct
+    {
+        T t1;
+        T t2 = std::move(t1);
+        ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == classSize * 2);
+    }
+
+    ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == 0);
+
+    // 4 copy assignment
+    {
+        T t1;
+        T t2;
+        int before_size = MetadataAdder<T>::get_all_tablets_size();
+        t1 = t2;
+        ASSERT_TRUE(before_size == MetadataAdder<T>::get_all_tablets_size());
+        ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == classSize * 2);
+    }
+
+    ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == 0);
+
+    // 5 move assignment
+    {
+        T t1;
+        T t2;
+        int before_size = MetadataAdder<T>::get_all_tablets_size();
+        t1 = std::move(t2);
+        ASSERT_TRUE(before_size == MetadataAdder<T>::get_all_tablets_size());
+        ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == classSize * 2);
+    }
+
+    ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == 0);
+}
+
+TEST_F(MetadataAdderTest, metadata_adder_test) {
+    test_construct_new_obj<TabletSchema>();
+    test_construct_new_obj<TabletColumn>();
+    test_construct_new_obj<TabletIndex>();
+}
+
+TEST_F(MetadataAdderTest, meta_load_with_pb_test) {
+    {
+        auto fs = io::global_local_filesystem();
+        TabletColumnPtr int_column = create_int_key(0);
+        Field* int_field = FieldFactory::create(*int_column);
+
+        // 1 load first column
+        segment_v2::ColumnIndexMetaPB index_meta1;
+        std::string file1 = kTestDir + "/copy_obj1";
+        {
+            std::unique_ptr<segment_v2::ZoneMapIndexWriter> builder(nullptr);
+            
static_cast<void>(segment_v2::ZoneMapIndexWriter::create(int_field, builder));
+            for (int i = 0; i < 100; i++) {
+                builder->add_values((const uint8_t*)&i, 1);
+            }
+            static_cast<void>(builder->flush());
+            {
+                io::FileWriterPtr file_writer;
+                EXPECT_TRUE(fs->create_file(file1, &file_writer).ok());
+                EXPECT_TRUE(builder->finish(file_writer.get(), 
&index_meta1).ok());
+                EXPECT_TRUE(file_writer->close().ok());
+            }
+        }
+
+        
ASSERT_TRUE(MetadataAdder<segment_v2::ZoneMapIndexReader>::get_all_segments_size()
 == 0);
+
+        io::FileReaderSPtr file_reader;
+        EXPECT_TRUE(fs->open_file(file1, &file_reader).ok());
+        segment_v2::ZoneMapIndexReader zonemap_col_reader(
+                file_reader, index_meta1.zone_map_index().page_zone_maps());
+        Status status = zonemap_col_reader.load(true, false);
+
+        int mem_size = zonemap_col_reader.get_metadata_size();
+
+        
ASSERT_TRUE(MetadataAdder<segment_v2::ZoneMapIndexReader>::get_all_segments_size()
 ==
+                    mem_size);
+
+        // load second column
+        segment_v2::ColumnIndexMetaPB index_meta2;
+        TabletColumnPtr varchar_column = create_varchar_key(0);
+        Field* str_field = FieldFactory::create(*varchar_column);
+
+        std::string file2 = kTestDir + "/copy_obj2";
+        {
+            std::unique_ptr<segment_v2::ZoneMapIndexWriter> builder(nullptr);
+            
static_cast<void>(segment_v2::ZoneMapIndexWriter::create(str_field, builder));
+            std::vector<std::string> values1 = {"aaaa", "bbbb", "cccc", 
"dddd", "eeee", "ffff"};
+            for (auto& value : values1) {
+                Slice slice(value);
+                builder->add_values((const uint8_t*)&slice, 1);
+            }
+            static_cast<void>(builder->flush());
+            {
+                io::FileWriterPtr file_writer;
+                EXPECT_TRUE(fs->create_file(file2, &file_writer).ok());
+                EXPECT_TRUE(builder->finish(file_writer.get(), 
&index_meta2).ok());
+                EXPECT_TRUE(file_writer->close().ok());
+            }
+        }
+
+        io::FileReaderSPtr file_reader2;
+        EXPECT_TRUE(fs->open_file(file2, &file_reader2).ok());
+        segment_v2::ZoneMapIndexReader zonemap_col_reader2(
+                file_reader2, index_meta2.zone_map_index().page_zone_maps());
+        Status status2 = zonemap_col_reader2.load(true, false);
+
+        int mem_size2 = zonemap_col_reader2.get_metadata_size();
+
+        
ASSERT_TRUE(MetadataAdder<segment_v2::ZoneMapIndexReader>::get_all_segments_size()
 ==
+                    mem_size2 + mem_size);
+
+        delete int_field;
+        delete str_field;
+    }
+
+    
ASSERT_TRUE(MetadataAdder<segment_v2::ZoneMapIndexReader>::get_all_segments_size()
 == 0);
+}
+
+}; // namespace doris
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to