This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 83ea4ea984 [refractor](bitmap) bitmap serialize and deserialize 
refractor (#11921)
83ea4ea984 is described below

commit 83ea4ea984a462a2c7e1f8915c1a5ad36f21cd30
Author: camby <104178...@qq.com>
AuthorDate: Mon Aug 22 08:52:20 2022 +0800

    [refractor](bitmap) bitmap serialize and deserialize refractor (#11921)
    
    
    Co-authored-by: cambyzju <zhuxiaol...@baidu.com>
---
 be/src/vec/data_types/data_type_bitmap.cpp | 40 ++++++++++-----------
 be/test/vec/core/column_complex_test.cpp   | 56 ++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 22 deletions(-)

diff --git a/be/src/vec/data_types/data_type_bitmap.cpp 
b/be/src/vec/data_types/data_type_bitmap.cpp
index cb1a2f2e45..5c49f9c65c 100644
--- a/be/src/vec/data_types/data_type_bitmap.cpp
+++ b/be/src/vec/data_types/data_type_bitmap.cpp
@@ -45,24 +45,22 @@ char* DataTypeBitMap::serialize(const IColumn& column, 
char* buf) const {
     auto& data_column = assert_cast<const ColumnBitmap&>(*ptr);
 
     // serialize the bitmap size array, row num saves at index 0
-    const auto row_num = column.size();
-    size_t bitmap_size_array[row_num + 1];
-    bitmap_size_array[0] = row_num;
-    for (size_t i = 0; i < row_num; ++i) {
+    size_t* meta_ptr = (size_t*)buf;
+    meta_ptr[0] = column.size();
+    for (size_t i = 0; i < meta_ptr[0]; ++i) {
         auto& bitmap = const_cast<BitmapValue&>(data_column.get_element(i));
-        bitmap_size_array[i + 1] = bitmap.getSizeInBytes();
+        meta_ptr[i + 1] = bitmap.getSizeInBytes();
     }
-    auto allocate_len_size = sizeof(size_t) * (row_num + 1);
-    memcpy(buf, bitmap_size_array, allocate_len_size);
-    buf += allocate_len_size;
+
     // serialize each bitmap
-    for (size_t i = 0; i < row_num; ++i) {
+    char* data_ptr = buf + sizeof(size_t) * (meta_ptr[0] + 1);
+    for (size_t i = 0; i < meta_ptr[0]; ++i) {
         auto& bitmap = const_cast<BitmapValue&>(data_column.get_element(i));
-        bitmap.write(buf);
-        buf += bitmap_size_array[i + 1];
+        bitmap.write(data_ptr);
+        data_ptr += meta_ptr[i + 1];
     }
 
-    return buf;
+    return data_ptr;
 }
 
 const char* DataTypeBitMap::deserialize(const char* buf, IColumn* column) 
const {
@@ -70,19 +68,17 @@ const char* DataTypeBitMap::deserialize(const char* buf, 
IColumn* column) const
     auto& data = data_column.get_data();
 
     // deserialize the bitmap size array
-    size_t row_num = *reinterpret_cast<const size_t*>(buf);
-    buf += sizeof(size_t);
-    size_t bitmap_size_array[row_num];
-    memcpy(bitmap_size_array, buf, sizeof(size_t) * row_num);
-    buf += sizeof(size_t) * row_num;
+    const size_t* meta_ptr = reinterpret_cast<const size_t*>(buf);
+
     // deserialize each bitmap
-    data.resize(row_num);
-    for (int i = 0; i < row_num; ++i) {
-        data[i].deserialize(buf);
-        buf += bitmap_size_array[i];
+    data.resize(meta_ptr[0]);
+    const char* data_ptr = buf + sizeof(size_t) * (meta_ptr[0] + 1);
+    for (size_t i = 0; i < meta_ptr[0]; ++i) {
+        data[i].deserialize(data_ptr);
+        data_ptr += meta_ptr[i + 1];
     }
 
-    return buf;
+    return data_ptr;
 }
 
 MutableColumnPtr DataTypeBitMap::create_column() const {
diff --git a/be/test/vec/core/column_complex_test.cpp 
b/be/test/vec/core/column_complex_test.cpp
index 1eb8cd906d..ce9e4d60f3 100644
--- a/be/test/vec/core/column_complex_test.cpp
+++ b/be/test/vec/core/column_complex_test.cpp
@@ -44,4 +44,60 @@ TEST(ColumnComplexTest, BasicTest) {
 TEST(ColumnComplexType, DataTypeBitmapTest) {
     std::make_shared<DataTypeBitMap>();
 }
+
+class ColumnBitmapTest : public testing::Test {
+public:
+    virtual void SetUp() override {}
+    virtual void TearDown() override {}
+
+    void check_bitmap_column(const IColumn& l, const IColumn& r) {
+        ASSERT_EQ(l.size(), r.size());
+        const auto& l_col = assert_cast<const ColumnBitmap&>(l);
+        const auto& r_col = assert_cast<const ColumnBitmap&>(r);
+        for (size_t i = 0; i < l_col.size(); ++i) {
+            auto& l_bitmap = const_cast<BitmapValue&>(l_col.get_element(i));
+            auto& r_bitmap = const_cast<BitmapValue&>(r_col.get_element(i));
+            ASSERT_EQ(l_bitmap.xor_cardinality(r_bitmap), 0);
+        }
+    }
+
+    void check_serialize_and_deserialize(MutableColumnPtr& col) {
+        auto column = assert_cast<ColumnBitmap*>(col.get());
+        auto size = _bitmap_type.get_uncompressed_serialized_bytes(*column);
+        std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
+        auto result = _bitmap_type.serialize(*column, buf.get());
+        ASSERT_EQ(result, buf.get() + size);
+
+        auto column2 = _bitmap_type.create_column();
+        _bitmap_type.deserialize(buf.get(), column2.get());
+        check_bitmap_column(*column, *column2.get());
+    }
+
+private:
+    DataTypeBitMap _bitmap_type;
+};
+
+TEST_F(ColumnBitmapTest, SerializeAndDeserialize) {
+    auto column = _bitmap_type.create_column();
+
+    // empty column
+    check_serialize_and_deserialize(column);
+
+    // bitmap with lots of rows
+    const size_t row_size = 20000;
+    auto& data = assert_cast<ColumnBitmap&>(*column.get()).get_data();
+    data.resize(row_size);
+    check_serialize_and_deserialize(column);
+
+    // bitmap with values case 1
+    data[0].add(10);
+    data[0].add(1000000);
+    check_serialize_and_deserialize(column);
+
+    // bitmap with values case 2
+    data[row_size - 1].add(33333);
+    data[row_size - 1].add(0);
+    check_serialize_and_deserialize(column);
+}
+
 } // namespace doris::vectorized


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to