This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 83ea4ea984 [refractor](bitmap) bitmap serialize and deserialize refractor (#11921) 83ea4ea984 is described below commit 83ea4ea984a462a2c7e1f8915c1a5ad36f21cd30 Author: camby <104178...@qq.com> AuthorDate: Mon Aug 22 08:52:20 2022 +0800 [refractor](bitmap) bitmap serialize and deserialize refractor (#11921) Co-authored-by: cambyzju <zhuxiaol...@baidu.com> --- be/src/vec/data_types/data_type_bitmap.cpp | 40 ++++++++++----------- be/test/vec/core/column_complex_test.cpp | 56 ++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 22 deletions(-) diff --git a/be/src/vec/data_types/data_type_bitmap.cpp b/be/src/vec/data_types/data_type_bitmap.cpp index cb1a2f2e45..5c49f9c65c 100644 --- a/be/src/vec/data_types/data_type_bitmap.cpp +++ b/be/src/vec/data_types/data_type_bitmap.cpp @@ -45,24 +45,22 @@ char* DataTypeBitMap::serialize(const IColumn& column, char* buf) const { auto& data_column = assert_cast<const ColumnBitmap&>(*ptr); // serialize the bitmap size array, row num saves at index 0 - const auto row_num = column.size(); - size_t bitmap_size_array[row_num + 1]; - bitmap_size_array[0] = row_num; - for (size_t i = 0; i < row_num; ++i) { + size_t* meta_ptr = (size_t*)buf; + meta_ptr[0] = column.size(); + for (size_t i = 0; i < meta_ptr[0]; ++i) { auto& bitmap = const_cast<BitmapValue&>(data_column.get_element(i)); - bitmap_size_array[i + 1] = bitmap.getSizeInBytes(); + meta_ptr[i + 1] = bitmap.getSizeInBytes(); } - auto allocate_len_size = sizeof(size_t) * (row_num + 1); - memcpy(buf, bitmap_size_array, allocate_len_size); - buf += allocate_len_size; + // serialize each bitmap - for (size_t i = 0; i < row_num; ++i) { + char* data_ptr = buf + sizeof(size_t) * (meta_ptr[0] + 1); + for (size_t i = 0; i < meta_ptr[0]; ++i) { auto& bitmap = const_cast<BitmapValue&>(data_column.get_element(i)); - bitmap.write(buf); - buf += bitmap_size_array[i + 1]; + bitmap.write(data_ptr); + data_ptr += meta_ptr[i + 1]; } - return buf; + return data_ptr; } const char* DataTypeBitMap::deserialize(const char* buf, IColumn* column) const { @@ -70,19 +68,17 @@ const char* DataTypeBitMap::deserialize(const char* buf, IColumn* column) const auto& data = data_column.get_data(); // deserialize the bitmap size array - size_t row_num = *reinterpret_cast<const size_t*>(buf); - buf += sizeof(size_t); - size_t bitmap_size_array[row_num]; - memcpy(bitmap_size_array, buf, sizeof(size_t) * row_num); - buf += sizeof(size_t) * row_num; + const size_t* meta_ptr = reinterpret_cast<const size_t*>(buf); + // deserialize each bitmap - data.resize(row_num); - for (int i = 0; i < row_num; ++i) { - data[i].deserialize(buf); - buf += bitmap_size_array[i]; + data.resize(meta_ptr[0]); + const char* data_ptr = buf + sizeof(size_t) * (meta_ptr[0] + 1); + for (size_t i = 0; i < meta_ptr[0]; ++i) { + data[i].deserialize(data_ptr); + data_ptr += meta_ptr[i + 1]; } - return buf; + return data_ptr; } MutableColumnPtr DataTypeBitMap::create_column() const { diff --git a/be/test/vec/core/column_complex_test.cpp b/be/test/vec/core/column_complex_test.cpp index 1eb8cd906d..ce9e4d60f3 100644 --- a/be/test/vec/core/column_complex_test.cpp +++ b/be/test/vec/core/column_complex_test.cpp @@ -44,4 +44,60 @@ TEST(ColumnComplexTest, BasicTest) { TEST(ColumnComplexType, DataTypeBitmapTest) { std::make_shared<DataTypeBitMap>(); } + +class ColumnBitmapTest : public testing::Test { +public: + virtual void SetUp() override {} + virtual void TearDown() override {} + + void check_bitmap_column(const IColumn& l, const IColumn& r) { + ASSERT_EQ(l.size(), r.size()); + const auto& l_col = assert_cast<const ColumnBitmap&>(l); + const auto& r_col = assert_cast<const ColumnBitmap&>(r); + for (size_t i = 0; i < l_col.size(); ++i) { + auto& l_bitmap = const_cast<BitmapValue&>(l_col.get_element(i)); + auto& r_bitmap = const_cast<BitmapValue&>(r_col.get_element(i)); + ASSERT_EQ(l_bitmap.xor_cardinality(r_bitmap), 0); + } + } + + void check_serialize_and_deserialize(MutableColumnPtr& col) { + auto column = assert_cast<ColumnBitmap*>(col.get()); + auto size = _bitmap_type.get_uncompressed_serialized_bytes(*column); + std::unique_ptr<char[]> buf = std::make_unique<char[]>(size); + auto result = _bitmap_type.serialize(*column, buf.get()); + ASSERT_EQ(result, buf.get() + size); + + auto column2 = _bitmap_type.create_column(); + _bitmap_type.deserialize(buf.get(), column2.get()); + check_bitmap_column(*column, *column2.get()); + } + +private: + DataTypeBitMap _bitmap_type; +}; + +TEST_F(ColumnBitmapTest, SerializeAndDeserialize) { + auto column = _bitmap_type.create_column(); + + // empty column + check_serialize_and_deserialize(column); + + // bitmap with lots of rows + const size_t row_size = 20000; + auto& data = assert_cast<ColumnBitmap&>(*column.get()).get_data(); + data.resize(row_size); + check_serialize_and_deserialize(column); + + // bitmap with values case 1 + data[0].add(10); + data[0].add(1000000); + check_serialize_and_deserialize(column); + + // bitmap with values case 2 + data[row_size - 1].add(33333); + data[row_size - 1].add(0); + check_serialize_and_deserialize(column); +} + } // namespace doris::vectorized --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org