This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 34cf65bcc14 Fix inaccurate memory size for metadata (#49147)
34cf65bcc14 is described below
commit 34cf65bcc14a37f29983ef2ab6a2357048cf3056
Author: wangbo <[email protected]>
AuthorDate: Wed Apr 2 11:21:48 2025 +0800
Fix inaccurate memory size for metadata (#49147)
### What problem does this PR solve?
Accurately counting the memory of all class member variables is a
complex task, so we first count the memory occupied by the size of the
class.
---
be/src/olap/metadata_adder.h | 40 ++++-
be/src/olap/rowset/segment_v2/index_page.cpp | 6 +-
be/src/olap/rowset/segment_v2/index_page.h | 1 -
.../olap/rowset/segment_v2/ordinal_page_index.cpp | 5 -
be/src/olap/rowset/segment_v2/ordinal_page_index.h | 2 -
be/src/olap/tablet_schema.cpp | 7 +-
be/src/olap/tablet_schema.h | 1 -
be/test/olap/metadata_adder_test.cpp | 197 +++++++++++++++++++++
8 files changed, 238 insertions(+), 21 deletions(-)
diff --git a/be/src/olap/metadata_adder.h b/be/src/olap/metadata_adder.h
index 5b5ba163224..8afa96acec0 100644
--- a/be/src/olap/metadata_adder.h
+++ b/be/src/olap/metadata_adder.h
@@ -146,13 +146,35 @@ public:
protected:
MetadataAdder(const MetadataAdder& other);
+ MetadataAdder(MetadataAdder&& other);
+
virtual ~MetadataAdder();
virtual int64_t get_metadata_size() const { return sizeof(T); }
void update_metadata_size();
- MetadataAdder<T>& operator=(const MetadataAdder<T>& other) = default;
+ MetadataAdder<T>& operator=(const MetadataAdder<T>& other) {
+ int64_t old_size = this->_current_meta_size;
+ this->_current_meta_size = other._current_meta_size;
+ int64_t size_diff = this->_current_meta_size - old_size;
+ add_mem_size(size_diff);
+
+ return *this;
+ }
+
+ MetadataAdder<T>& operator=(MetadataAdder<T>&& other) {
+ int64_t old_size = this->_current_meta_size;
+ this->_current_meta_size = other._current_meta_size;
+ int64_t size_diff = this->_current_meta_size - old_size;
+ add_mem_size(size_diff);
+
+ other.clear_memory();
+
+ return *this;
+ }
+
+ void clear_memory();
int64_t _current_meta_size {0};
@@ -168,6 +190,15 @@ MetadataAdder<T>::MetadataAdder(const MetadataAdder<T>&
other) {
add_mem_size(this->_current_meta_size);
}
+template <typename T>
+MetadataAdder<T>::MetadataAdder(MetadataAdder&& other) {
+ this->_current_meta_size = other._current_meta_size;
+ add_num(1);
+ add_mem_size(this->_current_meta_size);
+
+ other.clear_memory();
+}
+
template <typename T>
MetadataAdder<T>::MetadataAdder() {
this->_current_meta_size = sizeof(T);
@@ -181,6 +212,13 @@ MetadataAdder<T>::~MetadataAdder() {
add_num(-1);
}
+template <typename T>
+void MetadataAdder<T>::clear_memory() {
+ int64_t old_size = _current_meta_size;
+ _current_meta_size = sizeof(T);
+ add_mem_size(_current_meta_size - old_size);
+}
+
template <typename T>
void MetadataAdder<T>::update_metadata_size() {
int64_t old_size = _current_meta_size;
diff --git a/be/src/olap/rowset/segment_v2/index_page.cpp
b/be/src/olap/rowset/segment_v2/index_page.cpp
index 1b033a9ff62..8fa3288b4d8 100644
--- a/be/src/olap/rowset/segment_v2/index_page.cpp
+++ b/be/src/olap/rowset/segment_v2/index_page.cpp
@@ -65,7 +65,7 @@ Status IndexPageBuilder::get_first_key(Slice* key) const {
///////////////////////////////////////////////////////////////////////////////
int64_t IndexPageReader::get_metadata_size() const {
- return sizeof(IndexPageReader) + _vl_field_mem_size;
+ return sizeof(IndexPageReader) + _footer.ByteSizeLong();
}
Status IndexPageReader::parse(const Slice& body, const IndexPageFooterPB&
footer) {
@@ -84,11 +84,7 @@ Status IndexPageReader::parse(const Slice& body, const
IndexPageFooterPB& footer
}
_keys.push_back(key);
_values.push_back(value);
- _vl_field_mem_size += sizeof(char) * key.size;
}
- _vl_field_mem_size +=
- _keys.capacity() * sizeof(Slice) + _values.capacity() *
sizeof(PagePointer);
- _vl_field_mem_size += _footer.ByteSizeLong();
update_metadata_size();
_parsed = true;
diff --git a/be/src/olap/rowset/segment_v2/index_page.h
b/be/src/olap/rowset/segment_v2/index_page.h
index 0ebf425fc5c..8e78f631f2b 100644
--- a/be/src/olap/rowset/segment_v2/index_page.h
+++ b/be/src/olap/rowset/segment_v2/index_page.h
@@ -118,7 +118,6 @@ private:
IndexPageFooterPB _footer;
std::vector<Slice> _keys;
std::vector<PagePointer> _values;
- int64_t _vl_field_mem_size {0};
};
class IndexPageIterator {
diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp
b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp
index 8faab35cebb..27345b59be2 100644
--- a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp
+++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp
@@ -133,11 +133,6 @@ Status OrdinalIndexReader::_load(bool use_page_cache, bool
kept_in_memory,
return Status::OK();
}
-int64_t OrdinalIndexReader::get_metadata_size() const {
- return sizeof(OrdinalIndexReader) + _ordinals.capacity() *
sizeof(ordinal_t) +
- _pages.capacity() * sizeof(PagePointer);
-}
-
OrdinalPageIndexIterator OrdinalIndexReader::seek_at_or_before(ordinal_t
ordinal) {
int32_t left = 0;
int32_t right = _num_pages - 1;
diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.h
b/be/src/olap/rowset/segment_v2/ordinal_page_index.h
index df60edb12d1..660c97e315d 100644
--- a/be/src/olap/rowset/segment_v2/ordinal_page_index.h
+++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.h
@@ -97,8 +97,6 @@ private:
std::unique_ptr<OrdinalIndexPB> index_meta,
OlapReaderStatistics* index_load_stats);
- int64_t get_metadata_size() const override;
-
private:
friend OrdinalPageIndexIterator;
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 2ffc4a2d1a3..601ce807737 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -858,7 +858,7 @@ TabletSchema::~TabletSchema() {
}
int64_t TabletSchema::get_metadata_size() const {
- return sizeof(TabletSchema) + _vl_field_mem_size;
+ return sizeof(TabletSchema);
}
void TabletSchema::append_column(TabletColumn column, ColumnType col_type) {
@@ -1025,10 +1025,7 @@ void TabletSchema::init_from_pb(const TabletSchemaPB&
schema, bool ignore_extrac
_cols.emplace_back(std::move(column));
if (!_cols.back()->is_extracted_column()) {
- _vl_field_mem_size +=
- sizeof(StringRef) + sizeof(char) *
_cols.back()->name().size() + sizeof(size_t);
_field_name_to_index.emplace(StringRef(_cols.back()->name()),
_num_columns);
- _vl_field_mem_size += sizeof(int32_t) * 2;
_field_id_to_index[_cols.back()->unique_id()] = _num_columns;
}
_num_columns++;
@@ -1087,7 +1084,6 @@ void TabletSchema::init_from_pb(const TabletSchemaPB&
schema, bool ignore_extrac
_row_store_column_unique_ids.assign(schema.row_store_column_unique_ids().begin(),
schema.row_store_column_unique_ids().end());
_enable_variant_flatten_nested = schema.enable_variant_flatten_nested();
- _vl_field_mem_size += _row_store_column_unique_ids.capacity() *
sizeof(int32_t);
update_metadata_size();
}
@@ -1108,7 +1104,6 @@ void TabletSchema::shawdow_copy_without_columns(const
TabletSchema& tablet_schem
_num_null_columns = 0;
_num_key_columns = 0;
_cols.clear();
- _vl_field_mem_size = 0;
// notice : do not ref columns
_column_cache_handlers.clear();
}
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index 8d0f3f111de..9b350b45cff 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -611,7 +611,6 @@ private:
// ATTN: For compability reason empty cids means all columns of tablet
schema are encoded to row column
std::vector<int32_t> _row_store_column_unique_ids;
bool _enable_variant_flatten_nested = false;
- int64_t _vl_field_mem_size {0}; // variable length field
};
bool operator==(const TabletSchema& a, const TabletSchema& b);
diff --git a/be/test/olap/metadata_adder_test.cpp
b/be/test/olap/metadata_adder_test.cpp
new file mode 100644
index 00000000000..de096f9dc80
--- /dev/null
+++ b/be/test/olap/metadata_adder_test.cpp
@@ -0,0 +1,197 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/metadata_adder.h"
+
+#include <gtest/gtest.h>
+
+#include "olap/rowset/segment_v2/zone_map_index.h"
+#include "olap/tablet_schema.h"
+#include "olap/tablet_schema_helper.h"
+
+namespace doris {
+
+class MetadataAdderTest : public testing::Test {
+public:
+ const std::string kTestDir = "./ut_dir/metadata_adder_test";
+
+ void SetUp() override {
+ std::string filter_str = ::testing::GTEST_FLAG(filter);
+ std::cout << "GTEST_FILTER: " << filter_str << std::endl;
+
+ if (filter_str != "MetadataAdderTest.*") {
+ GTEST_SKIP() << "Skipping this test when executing concurrently.";
+ }
+
+ auto st = io::global_local_filesystem()->delete_directory(kTestDir);
+ ASSERT_TRUE(st.ok()) << st;
+ st = io::global_local_filesystem()->create_directory(kTestDir);
+ ASSERT_TRUE(st.ok()) << st;
+ }
+
+ void TearDown() override {
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir).ok());
+ }
+
+ MetadataAdderTest() = default;
+ ~MetadataAdderTest() override = default;
+};
+
+template <typename T>
+void test_construct_new_obj() {
+ int classSize = sizeof(T);
+
+ std::cout << "Class Name: " << typeid(T).name() << std::endl;
+
+ ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == 0);
+ // 1 default construct
+ {
+ T t1;
+ ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == classSize);
+ T t2;
+ ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == classSize * 2);
+ }
+
+ ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == 0);
+ // 2 copy construct
+ {
+ T t1;
+ T t2 = t1;
+ ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == classSize * 2);
+ }
+
+ ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == 0);
+
+ // 3 move construct
+ {
+ T t1;
+ T t2 = std::move(t1);
+ ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == classSize * 2);
+ }
+
+ ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == 0);
+
+ // 4 copy assignment
+ {
+ T t1;
+ T t2;
+ int before_size = MetadataAdder<T>::get_all_tablets_size();
+ t1 = t2;
+ ASSERT_TRUE(before_size == MetadataAdder<T>::get_all_tablets_size());
+ ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == classSize * 2);
+ }
+
+ ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == 0);
+
+ // 5 move assignment
+ {
+ T t1;
+ T t2;
+ int before_size = MetadataAdder<T>::get_all_tablets_size();
+ t1 = std::move(t2);
+ ASSERT_TRUE(before_size == MetadataAdder<T>::get_all_tablets_size());
+ ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == classSize * 2);
+ }
+
+ ASSERT_TRUE(MetadataAdder<T>::get_all_tablets_size() == 0);
+}
+
+TEST_F(MetadataAdderTest, metadata_adder_test) {
+ test_construct_new_obj<TabletSchema>();
+ test_construct_new_obj<TabletColumn>();
+ test_construct_new_obj<TabletIndex>();
+}
+
+TEST_F(MetadataAdderTest, meta_load_with_pb_test) {
+ {
+ auto fs = io::global_local_filesystem();
+ TabletColumnPtr int_column = create_int_key(0);
+ Field* int_field = FieldFactory::create(*int_column);
+
+ // 1 load first column
+ segment_v2::ColumnIndexMetaPB index_meta1;
+ std::string file1 = kTestDir + "/copy_obj1";
+ {
+ std::unique_ptr<segment_v2::ZoneMapIndexWriter> builder(nullptr);
+
static_cast<void>(segment_v2::ZoneMapIndexWriter::create(int_field, builder));
+ for (int i = 0; i < 100; i++) {
+ builder->add_values((const uint8_t*)&i, 1);
+ }
+ static_cast<void>(builder->flush());
+ {
+ io::FileWriterPtr file_writer;
+ EXPECT_TRUE(fs->create_file(file1, &file_writer).ok());
+ EXPECT_TRUE(builder->finish(file_writer.get(),
&index_meta1).ok());
+ EXPECT_TRUE(file_writer->close().ok());
+ }
+ }
+
+
ASSERT_TRUE(MetadataAdder<segment_v2::ZoneMapIndexReader>::get_all_segments_size()
== 0);
+
+ io::FileReaderSPtr file_reader;
+ EXPECT_TRUE(fs->open_file(file1, &file_reader).ok());
+ segment_v2::ZoneMapIndexReader zonemap_col_reader(
+ file_reader, index_meta1.zone_map_index().page_zone_maps());
+ Status status = zonemap_col_reader.load(true, false);
+
+ int mem_size = zonemap_col_reader.get_metadata_size();
+
+
ASSERT_TRUE(MetadataAdder<segment_v2::ZoneMapIndexReader>::get_all_segments_size()
==
+ mem_size);
+
+ // load second column
+ segment_v2::ColumnIndexMetaPB index_meta2;
+ TabletColumnPtr varchar_column = create_varchar_key(0);
+ Field* str_field = FieldFactory::create(*varchar_column);
+
+ std::string file2 = kTestDir + "/copy_obj2";
+ {
+ std::unique_ptr<segment_v2::ZoneMapIndexWriter> builder(nullptr);
+
static_cast<void>(segment_v2::ZoneMapIndexWriter::create(str_field, builder));
+ std::vector<std::string> values1 = {"aaaa", "bbbb", "cccc",
"dddd", "eeee", "ffff"};
+ for (auto& value : values1) {
+ Slice slice(value);
+ builder->add_values((const uint8_t*)&slice, 1);
+ }
+ static_cast<void>(builder->flush());
+ {
+ io::FileWriterPtr file_writer;
+ EXPECT_TRUE(fs->create_file(file2, &file_writer).ok());
+ EXPECT_TRUE(builder->finish(file_writer.get(),
&index_meta2).ok());
+ EXPECT_TRUE(file_writer->close().ok());
+ }
+ }
+
+ io::FileReaderSPtr file_reader2;
+ EXPECT_TRUE(fs->open_file(file2, &file_reader2).ok());
+ segment_v2::ZoneMapIndexReader zonemap_col_reader2(
+ file_reader2, index_meta2.zone_map_index().page_zone_maps());
+ Status status2 = zonemap_col_reader2.load(true, false);
+
+ int mem_size2 = zonemap_col_reader2.get_metadata_size();
+
+
ASSERT_TRUE(MetadataAdder<segment_v2::ZoneMapIndexReader>::get_all_segments_size()
==
+ mem_size2 + mem_size);
+
+ delete int_field;
+ delete str_field;
+ }
+
+
ASSERT_TRUE(MetadataAdder<segment_v2::ZoneMapIndexReader>::get_all_segments_size()
== 0);
+}
+
+}; // namespace doris
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]