This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f4d49856c61 [test](beut) Add unit test cases for Block (#50480)
f4d49856c61 is described below

commit f4d49856c61535af36bd6f94f9796dd762823975
Author: Jerry Hu <[email protected]>
AuthorDate: Tue Apr 29 18:51:49 2025 +0800

    [test](beut) Add unit test cases for Block (#50480)
---
 be/src/vec/core/block.cpp                          |  89 +---
 be/src/vec/core/block.h                            |  28 +-
 .../vec/exec/format/arrow/arrow_stream_reader.cpp  |   2 +-
 be/test/vec/core/block_test.cpp                    | 535 ++++++++++++++++++++-
 4 files changed, 538 insertions(+), 116 deletions(-)

diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index 94e4ee75f36..d075b5531ab 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -22,6 +22,7 @@
 
 #include <fmt/format.h>
 #include <gen_cpp/data.pb.h>
+#include <glog/logging.h>
 #include <snappy.h>
 #include <streamvbyte.h>
 #include <sys/types.h>
@@ -383,19 +384,6 @@ size_t Block::rows() const {
     return 0;
 }
 
-std::string Block::each_col_size() const {
-    std::string ss;
-    for (const auto& elem : data) {
-        if (elem.column) {
-            ss += elem.column->size();
-            ss += " | ";
-        } else {
-            ss += "-1 | ";
-        }
-    }
-    return ss;
-}
-
 void Block::set_num_rows(size_t length) {
     if (rows() > length) {
         for (auto& elem : data) {
@@ -629,6 +617,7 @@ MutableColumns Block::mutate_columns() {
     size_t num_columns = data.size();
     MutableColumns columns(num_columns);
     for (size_t i = 0; i < num_columns; ++i) {
+        DCHECK(data[i].type);
         columns[i] = data[i].column ? (*std::move(data[i].column)).mutate()
                                     : data[i].type->create_column();
     }
@@ -645,16 +634,6 @@ void Block::set_columns(MutableColumns&& columns) {
     }
 }
 
-void Block::set_columns(const Columns& columns) {
-    DCHECK_GE(columns.size(), data.size())
-            << fmt::format("Invalid size of columns, columns size: {}, data 
size: {}",
-                           columns.size(), data.size());
-    size_t num_columns = data.size();
-    for (size_t i = 0; i < num_columns; ++i) {
-        data[i].column = columns[i];
-    }
-}
-
 Block Block::clone_with_columns(MutableColumns&& columns) const {
     Block res;
 
@@ -666,24 +645,6 @@ Block Block::clone_with_columns(MutableColumns&& columns) 
const {
     return res;
 }
 
-Block Block::clone_with_columns(const Columns& columns) const {
-    Block res;
-
-    size_t num_columns = data.size();
-
-    if (num_columns != columns.size()) {
-        throw Exception(Status::FatalError(
-                "Cannot clone block with columns because block has {} columns, 
but {} columns "
-                "given.",
-                num_columns, columns.size()));
-    }
-
-    for (size_t i = 0; i < num_columns; ++i) {
-        res.insert({columns[i], data[i].type, data[i].name});
-    }
-    return res;
-}
-
 Block Block::clone_without_columns(const std::vector<int>* column_offset) 
const {
     Block res;
 
@@ -701,16 +662,6 @@ Block Block::clone_without_columns(const std::vector<int>* 
column_offset) const
     return res;
 }
 
-Block Block::sort_columns() const {
-    Block sorted_block;
-
-    for (const auto& name : index_by_name) {
-        sorted_block.insert(data[name.second]);
-    }
-
-    return sorted_block;
-}
-
 const ColumnsWithTypeAndName& Block::get_columns_with_type_and_name() const {
     return data;
 }
@@ -743,14 +694,6 @@ void Block::clear() {
     row_same_bit.clear();
 }
 
-std::string Block::print_use_count() {
-    std::stringstream ss;
-    for (auto& d : data) {
-        ss << ", [" << d.name << ", " << d.column->use_count() << "]";
-    }
-    return ss.str();
-}
-
 void Block::clear_column_data(int64_t column_size) noexcept {
     SCOPED_SKIP_MEMORY_CHECK();
     // data.size() greater than column_size, means here have some
@@ -764,7 +707,6 @@ void Block::clear_column_data(int64_t column_size) noexcept 
{
         if (d.column) {
             // Temporarily disable reference count check because a column 
might be referenced multiple times within a block.
             // Queries like this: `select c, c from t1;`
-            // DCHECK_EQ(d.column->use_count(), 1) << " " << print_use_count();
             (*std::move(d.column)).assume_mutable()->clear();
         }
     }
@@ -878,15 +820,6 @@ void Block::filter_block_internal(Block* block, const 
IColumn::Filter& filter) {
     }
 }
 
-Block Block::copy_block(const std::vector<int>& column_offset) const {
-    ColumnsWithTypeAndName columns_with_type_and_name;
-    for (auto offset : column_offset) {
-        DCHECK(offset < data.size());
-        columns_with_type_and_name.emplace_back(data[offset]);
-    }
-    return columns_with_type_and_name;
-}
-
 Status Block::append_to_block_by_selector(MutableBlock* dst,
                                           const IColumn::Selector& selector) 
const {
     RETURN_IF_CATCH_EXCEPTION({
@@ -1059,15 +992,6 @@ void MutableBlock::swap(MutableBlock& another) noexcept {
     index_by_name.swap(another.index_by_name);
 }
 
-void MutableBlock::swap(MutableBlock&& another) noexcept {
-    SCOPED_SKIP_MEMORY_CHECK();
-    clear();
-    _columns = std::move(another._columns);
-    _data_types = std::move(another._data_types);
-    _names = std::move(another._names);
-    index_by_name = std::move(another.index_by_name);
-}
-
 void MutableBlock::add_row(const Block* block, int row) {
     const auto& block_data = block->get_columns_with_type_and_name();
     for (size_t i = 0; i < _columns.size(); ++i) {
@@ -1266,15 +1190,6 @@ void MutableBlock::clear_column_data() noexcept {
     }
 }
 
-void MutableBlock::reset_column_data() noexcept {
-    SCOPED_SKIP_MEMORY_CHECK();
-    _columns.clear();
-    for (int i = 0; i < _names.size(); i++) {
-        _columns.emplace_back(_data_types[i]->create_column());
-        index_by_name[_names[i]] = i;
-    }
-}
-
 void MutableBlock::initialize_index_by_name() {
     for (size_t i = 0, size = _names.size(); i < size; ++i) {
         index_by_name[_names[i]] = i;
diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h
index 59ad41d9ba7..466760f7ed2 100644
--- a/be/src/vec/core/block.h
+++ b/be/src/vec/core/block.h
@@ -183,8 +183,6 @@ public:
     /// Returns number of rows from first column in block, not equal to 
nullptr. If no columns, returns 0.
     size_t rows() const;
 
-    std::string each_col_size() const;
-
     // Cut the rows in block, use in LIMIT operation
     void set_num_rows(size_t length);
 
@@ -220,8 +218,6 @@ public:
     Columns get_columns() const;
     Columns get_columns_and_convert();
 
-    void set_columns(const Columns& columns);
-    Block clone_with_columns(const Columns& columns) const;
     Block clone_without_columns(const std::vector<int>* column_offset = 
nullptr) const;
 
     /** Get empty columns with the same types as in block. */
@@ -234,9 +230,6 @@ public:
     void set_columns(MutableColumns&& columns);
     Block clone_with_columns(MutableColumns&& columns) const;
 
-    /** Get a block with columns that have been rearranged in the order of 
their names. */
-    Block sort_columns() const;
-
     void clear();
     void swap(Block& other) noexcept;
     void swap(Block&& other) noexcept;
@@ -269,18 +262,9 @@ public:
     std::string dump_data(size_t begin = 0, size_t row_limit = 100,
                           bool allow_null_mismatch = false) const;
 
-    static std::string dump_column(ColumnPtr col, DataTypePtr type) {
-        ColumnWithTypeAndName type_name {col, type, ""};
-        Block b {type_name};
-        return b.dump_data(0, b.rows());
-    }
-
     /** Get one line data from block, only use in load data */
     std::string dump_one_line(size_t row, int column_end) const;
 
-    // copy a new block by the offset column
-    Block copy_block(const std::vector<int>& column_offset) const;
-
     Status append_to_block_by_selector(MutableBlock* dst, const 
IColumn::Selector& selector) const;
 
     // need exception safety
@@ -394,10 +378,6 @@ public:
 
     void clear_same_bit() { row_same_bit.clear(); }
 
-    // return string contains use_count() of each columns
-    // for debug purpose.
-    std::string print_use_count();
-
     // remove tmp columns in block
     // in inverted index apply logic, in order to optimize query performance,
     // we built some temporary columns into block
@@ -524,7 +504,7 @@ public:
     std::string dump_types() const {
         std::string res;
         for (auto type : _data_types) {
-            if (res.size()) {
+            if (!res.empty()) {
                 res += ", ";
             }
             res += type->get_name();
@@ -571,7 +551,7 @@ public:
     template <typename T>
     [[nodiscard]] Status merge_impl(T&& block) {
         // merge is not supported in dynamic block
-        if (_columns.size() == 0 && _data_types.size() == 0) {
+        if (_columns.empty() && _data_types.empty()) {
             _data_types = block.get_data_types();
             _names = block.get_names();
             _columns.resize(block.columns());
@@ -624,8 +604,6 @@ public:
 
     void swap(MutableBlock& other) noexcept;
 
-    void swap(MutableBlock&& other) noexcept;
-
     void add_row(const Block* block, int row);
     // Batch add row should return error status if allocate memory failed.
     Status add_rows(const Block* block, const uint32_t* row_begin, const 
uint32_t* row_end,
@@ -646,8 +624,6 @@ public:
 
     // columns resist. columns' inner data removed.
     void clear_column_data() noexcept;
-    // reset columns by types and names.
-    void reset_column_data() noexcept;
 
     size_t allocated_bytes() const;
 
diff --git a/be/src/vec/exec/format/arrow/arrow_stream_reader.cpp 
b/be/src/vec/exec/format/arrow/arrow_stream_reader.cpp
index efe8e36bf20..2ce7dd09069 100644
--- a/be/src/vec/exec/format/arrow/arrow_stream_reader.cpp
+++ b/be/src/vec/exec/format/arrow/arrow_stream_reader.cpp
@@ -97,7 +97,7 @@ Status ArrowStreamReader::get_next_block(Block* block, 
size_t* read_rows, bool*
             std::string column_name = batch.schema()->field(c)->name();
 
             try {
-                vectorized::ColumnWithTypeAndName& column_with_name =
+                const vectorized::ColumnWithTypeAndName& column_with_name =
                         block->get_by_name(column_name);
                 column_with_name.type->get_serde()->read_column_from_arrow(
                         column_with_name.column->assume_mutable_ref(), column, 
0, num_rows, _ctzz);
diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp
index 7c876b00aa4..f1dd1c8f504 100644
--- a/be/test/vec/core/block_test.cpp
+++ b/be/test/vec/core/block_test.cpp
@@ -17,27 +17,40 @@
 
 #include "vec/core/block.h"
 
+#include <concurrentqueue.h>
+#include <gen_cpp/Descriptors_types.h>
+#include <gen_cpp/Metrics_types.h>
 #include <gen_cpp/segment_v2.pb.h>
 #include <gtest/gtest-message.h>
 #include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
 
-#include <algorithm>
 #include <cmath>
+#include <cstddef>
 #include <cstdint>
+#include <memory>
 #include <string>
+#include <vector>
 
 #include "agent/be_exec_version_manager.h"
 #include "common/config.h"
+#include "common/object_pool.h"
 #include "gen_cpp/data.pb.h"
-#include "gtest/gtest_pred_impl.h"
+#include "runtime/define_primitive_type.h"
+#include "runtime/descriptor_helper.h"
+#include "runtime/descriptors.h"
+#include "testutil/column_helper.h"
 #include "util/bitmap_value.h"
+#include "vec/columns/column.h"
 #include "vec/columns/column_array.h"
 #include "vec/columns/column_complex.h"
+#include "vec/columns/column_const.h"
 #include "vec/columns/column_decimal.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/column_string.h"
 #include "vec/columns/column_vector.h"
 #include "vec/columns/columns_number.h"
+#include "vec/common/sip_hash.h"
 #include "vec/core/field.h"
 #include "vec/core/types.h"
 #include "vec/data_types/data_type.h"
@@ -49,6 +62,7 @@
 #include "vec/data_types/data_type_nullable.h"
 #include "vec/data_types/data_type_number.h"
 #include "vec/data_types/data_type_string.h"
+#include "vec/data_types/data_type_struct.h"
 #include "vec/data_types/data_type_time_v2.h"
 #include "vec/runtime/vdatetime_value.h"
 
@@ -850,4 +864,521 @@ TEST(BlockTest, merge_with_shared_columns) {
     EXPECT_EQ(1034, src_block.rows());
 }
 
+namespace vectorized {
+template <typename T>
+void clear_blocks(moodycamel::ConcurrentQueue<T>& blocks,
+                  RuntimeProfile::Counter* memory_used_counter = nullptr);
+}
+
+TEST(BlockTest, clear_blocks) {
+    {
+        moodycamel::ConcurrentQueue<vectorized::Block> queue;
+
+        RuntimeProfile::Counter counter(TUnit::BYTES);
+        auto block1 = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+        auto block2 = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+
+        counter.update(block1.allocated_bytes());
+        counter.update(block2.allocated_bytes());
+
+        ASSERT_EQ(counter.value(), block1.allocated_bytes() + 
block2.allocated_bytes());
+
+        queue.enqueue(std::move(block1));
+        queue.enqueue(std::move(block2));
+
+        vectorized::clear_blocks(queue, &counter);
+        EXPECT_EQ(counter.value(), 0);
+
+        ASSERT_FALSE(queue.try_dequeue(block1));
+    }
+
+    {
+        moodycamel::ConcurrentQueue<std::unique_ptr<vectorized::Block>> queue;
+
+        RuntimeProfile::Counter counter(TUnit::BYTES);
+        auto block1 = std::make_unique<vectorized::Block>(
+                
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3}));
+        auto block2 = std::make_unique<vectorized::Block>(
+                
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({4, 5, 6}));
+
+        counter.update(block1->allocated_bytes());
+        counter.update(block2->allocated_bytes());
+
+        ASSERT_EQ(counter.value(), block1->allocated_bytes() + 
block2->allocated_bytes());
+
+        queue.enqueue(std::move(block1));
+        queue.enqueue(std::move(block2));
+
+        vectorized::clear_blocks(queue, &counter);
+        EXPECT_EQ(counter.value(), 0);
+
+        ASSERT_FALSE(queue.try_dequeue(block1));
+    }
+}
+
+TEST(BlockTest, replace_by_position) {
+    auto block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+    block.insert(0, 
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+                            {"a", "b", "c"}));
+
+    auto col = 
vectorized::ColumnHelper::create_column<vectorized::DataTypeInt32>({4, 5, 6});
+    block.replace_by_position(0, std::move(col));
+
+    ASSERT_EQ(block.get_by_position(0).column->get_int(0), 4);
+    ASSERT_EQ(block.get_by_position(0).column->get_int(1), 5);
+    ASSERT_EQ(block.get_by_position(0).column->get_int(2), 6);
+}
+
+TEST(BlockTest, compare_at) {
+    auto block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+    auto ret = block.compare_at(0, 0, block, -1);
+    ASSERT_EQ(ret, 0);
+
+    ret = block.compare_at(0, 1, block, -1);
+    ASSERT_LT(ret, 0);
+
+    ret = block.compare_at(2, 1, block, -1);
+    ASSERT_GT(ret, 0);
+
+    
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeInt32>(
+            {2, 2, 1}));
+    std::vector<uint32_t> compare_columns {1, 0};
+
+    ret = block.compare_at(0, 1, &compare_columns, block, -1);
+    ASSERT_LT(ret, 0);
+
+    ret = block.compare_at(1, 2, &compare_columns, block, -1);
+    ASSERT_GT(ret, 0);
+}
+
+TEST(BlockTest, same_bit) {
+    auto block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 1, 3});
+    std::vector<bool> same_bit = {false, true, false};
+    block.set_same_bit(same_bit.begin(), same_bit.end());
+    ASSERT_EQ(block.get_same_bit(0), false);
+    ASSERT_EQ(block.get_same_bit(1), true);
+    ASSERT_EQ(block.get_same_bit(2), false);
+
+    auto block2 = block;
+    block2.set_num_rows(2);
+    ASSERT_EQ(block2.rows(), 2);
+
+    int64_t length = 3;
+    block2.skip_num_rows(length);
+    ASSERT_EQ(1, length);
+    ASSERT_EQ(block2.rows(), 0);
+
+    block2 = block;
+    length = 1;
+    block2.skip_num_rows(length);
+    ASSERT_EQ(block2.rows(), 2);
+    ASSERT_EQ(block2.get_same_bit(0), true);
+
+    block2.skip_num_rows(length);
+    ASSERT_EQ(block2.rows(), 1);
+    ASSERT_EQ(block2.get_same_bit(0), false);
+
+    block.clear_same_bit();
+
+    ASSERT_EQ(block.get_same_bit(1), false);
+}
+
+TEST(BlockTest, dump) {
+    auto block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+    auto types = block.dump_types();
+    ASSERT_TRUE(types.find("Int32") != std::string::npos);
+
+    
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({}));
+    types = block.dump_types();
+    ASSERT_TRUE(types.find("String") != std::string::npos);
+
+    block.insert(
+            
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeFloat64>({}));
+    types = block.dump_types();
+    ASSERT_TRUE(types.find("Float64") != std::string::npos);
+
+    auto names = block.get_names();
+    for (const auto& name : names) {
+        ASSERT_EQ(name, "column");
+    }
+
+    auto dumped_names = block.dump_names();
+    ASSERT_TRUE(dumped_names.find(", ") != std::string::npos);
+    ASSERT_TRUE(dumped_names.find("column") != std::string::npos);
+
+    block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+    
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+            {"a", "b", "c"}));
+
+    auto dumped_line = block.dump_one_line(1, 2);
+    ASSERT_EQ(dumped_line, "2 b");
+}
+
+TEST(BlockTest, merge_impl_ignore_overflow) {
+    auto block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+    
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({}));
+    block.insert(
+            
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeFloat64>({}));
+    auto block2 = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+
+    auto mutable_block = vectorized::MutableBlock::build_mutable_block(&block);
+
+    auto st = mutable_block.merge_ignore_overflow(std::move(block2));
+    ASSERT_FALSE(st.ok());
+
+    block2 = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+    block2.insert(
+            
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({}));
+    block2.insert(
+            
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeFloat32>({}));
+
+    EXPECT_ANY_THROW(st = 
mutable_block.merge_impl_ignore_overflow(std::move(block2)));
+}
+
+TEST(BlockTest, merge_impl) {
+    auto block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+    
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({}));
+    block.insert(
+            
vectorized::ColumnHelper::create_nullable_column_with_name<vectorized::DataTypeFloat64>(
+                    {1.0}, {0}));
+
+    block.get_by_position(2).column = nullptr;
+
+    auto mutable_block = 
vectorized::MutableBlock::build_mutable_block(nullptr);
+
+    auto st = mutable_block.merge(std::move(block));
+    ASSERT_TRUE(st.ok()) << st.to_string();
+
+    auto block2 = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+    block2.insert(
+            
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({}));
+
+    st = mutable_block.merge_impl(std::move(block2));
+    ASSERT_FALSE(st);
+
+    block2 = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1});
+    block2.insert(
+            
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({"a"}));
+    block2.insert(
+            
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeFloat64>({}));
+
+    EXPECT_ANY_THROW(st = mutable_block.merge_impl(std::move(block2)));
+}
+
+TEST(BlockTest, ctor) {
+    TDescriptorTableBuilder builder;
+    TTupleDescriptorBuilder tuple_builder;
+    tuple_builder
+            .add_slot(
+                    
TSlotDescriptorBuilder().type(PrimitiveType::TYPE_INT).nullable(false).build())
+            .add_slot(TSlotDescriptorBuilder()
+                              .type(PrimitiveType::TYPE_STRING)
+                              .set_isMaterialized(false)
+                              .build());
+    tuple_builder.build(&builder);
+
+    auto t_table = builder.desc_tbl();
+
+    ObjectPool pool;
+    DescriptorTbl* tbl;
+    auto st = DescriptorTbl::create(&pool, t_table, &tbl);
+    ASSERT_TRUE(st.ok()) << st.to_string();
+
+    auto slots = tbl->get_tuple_descriptor(0)->slots();
+
+    std::vector<SlotDescriptor> slot_descs;
+    for (auto* slot : slots) {
+        slot_descs.push_back(*slot);
+    }
+
+    vectorized::Block block(slot_descs, 1);
+    ASSERT_EQ(block.columns(), 2);
+    ASSERT_EQ(block.get_by_position(0).type->get_type_id(), 
vectorized::TypeIndex::Int32);
+    ASSERT_EQ(block.get_by_position(1).type->get_type_id(), 
vectorized::TypeIndex::Nullable);
+
+    {
+        auto mutable_block =
+                
vectorized::MutableBlock::create_unique(tbl->get_tuple_descs(), 10, false);
+        ASSERT_EQ(mutable_block->columns(), 2);
+        auto mutable_block2 = vectorized::MutableBlock::create_unique();
+        mutable_block->swap(*mutable_block2);
+        ASSERT_EQ(mutable_block->columns(), 0);
+        ASSERT_EQ(mutable_block2->columns(), 2);
+    }
+
+    {
+        auto mutable_block =
+                
vectorized::MutableBlock::create_unique(tbl->get_tuple_descs(), 10, true);
+        ASSERT_EQ(mutable_block->columns(), 1);
+        auto mutable_block2 = vectorized::MutableBlock::create_unique();
+        mutable_block->swap(*mutable_block2);
+        ASSERT_EQ(mutable_block->columns(), 0);
+        ASSERT_EQ(mutable_block2->columns(), 1);
+    }
+}
+
+TEST(BlockTest, insert_erase) {
+    auto block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+
+    auto column_with_name =
+            
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({});
+    EXPECT_ANY_THROW(block.insert(2, column_with_name));
+    block.insert(0, column_with_name);
+
+    ASSERT_EQ(block.columns(), 2);
+    ASSERT_EQ(block.get_by_position(0).type->get_type_id(), 
vectorized::TypeIndex::String);
+
+    EXPECT_ANY_THROW(block.insert(3, std::move(column_with_name)));
+
+    column_with_name =
+            
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeFloat64>({});
+    block.insert(0, std::move(column_with_name));
+    ASSERT_EQ(block.columns(), 3);
+    ASSERT_EQ(block.get_by_position(0).type->get_type_id(), 
vectorized::TypeIndex::Float64);
+
+    std::set<size_t> positions = {0, 2};
+    block.erase(positions);
+
+    ASSERT_EQ(block.columns(), 1);
+    ASSERT_EQ(block.get_by_position(0).type->get_type_id(), 
vectorized::TypeIndex::String);
+
+    block.erase_tail(0);
+    ASSERT_EQ(block.columns(), 0);
+
+    EXPECT_ANY_THROW(block.erase("column"));
+    column_with_name =
+            
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({});
+    block.insert(0, column_with_name);
+    EXPECT_NO_THROW(block.erase("column"));
+    ASSERT_EQ(block.columns(), 0);
+
+    EXPECT_ANY_THROW(block.safe_get_by_position(0));
+
+    ASSERT_EQ(block.try_get_by_name("column"), nullptr);
+    EXPECT_ANY_THROW(block.get_by_name("column"));
+    EXPECT_ANY_THROW(block.get_position_by_name("column"));
+    block.insert(0, column_with_name);
+
+    EXPECT_NO_THROW(auto item = block.get_by_name("column"));
+    ASSERT_NE(block.try_get_by_name("column"), nullptr);
+    EXPECT_EQ(block.get_position_by_name("column"), 0);
+
+    block.insert({nullptr, nullptr, BeConsts::BLOCK_TEMP_COLUMN_PREFIX});
+    EXPECT_NO_THROW(auto item = 
block.get_by_name(BeConsts::BLOCK_TEMP_COLUMN_PREFIX));
+
+    block.erase_tmp_columns();
+    ASSERT_EQ(block.try_get_by_name(BeConsts::BLOCK_TEMP_COLUMN_PREFIX), 
nullptr);
+
+    {
+        // test const block
+        const auto const_block = block;
+        EXPECT_EQ(const_block.try_get_by_name("column2"), nullptr);
+        EXPECT_ANY_THROW(const_block.get_by_name("column2"));
+        EXPECT_ANY_THROW(const_block.get_position_by_name("column2"));
+
+        EXPECT_NO_THROW(auto item = const_block.get_by_name("column"));
+        ASSERT_NE(const_block.try_get_by_name("column"), nullptr);
+        EXPECT_EQ(const_block.get_position_by_name("column"), 0);
+    }
+
+    block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+    block.insert({nullptr, std::make_shared<vectorized::DataTypeString>(), 
"col1"});
+
+    block.insert({nullptr, std::make_shared<vectorized::DataTypeString>(), 
"col2"});
+
+    vectorized::MutableBlock mutable_block(&block);
+    mutable_block.erase("col1");
+    ASSERT_EQ(mutable_block.columns(), 2);
+
+    EXPECT_ANY_THROW(mutable_block.erase("col1"));
+    ASSERT_EQ(mutable_block.columns(), 2);
+    mutable_block.erase("col2");
+    ASSERT_EQ(mutable_block.columns(), 1);
+}
+
+TEST(BlockTest, check_number_of_rows) {
+    auto block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+
+    block.insert({nullptr, nullptr, "null_col"});
+    EXPECT_NO_THROW(block.check_number_of_rows(true));
+    EXPECT_ANY_THROW(block.check_number_of_rows(false));
+
+    block.insert(
+            
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({"abc"}));
+    EXPECT_ANY_THROW(block.check_number_of_rows(true));
+
+    EXPECT_ANY_THROW(block.columns_bytes());
+
+    ASSERT_GT(block.allocated_bytes(), 0);
+}
+
+TEST(BlockTest, clear_column_mem_not_keep) {
+    auto block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+    
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+            {"abc", "efg", "hij"}));
+
+    std::vector<bool> column_keep_flags {false, true};
+
+    auto block2 = block;
+    block2.clear_column_mem_not_keep(column_keep_flags, false);
+    ASSERT_EQ(block2.get_by_position(0).column->size(), 0);
+    ASSERT_EQ(block2.get_by_position(1).column->size(), 3);
+
+    block.clear_column_mem_not_keep(column_keep_flags, true);
+    ASSERT_EQ(block.get_by_position(0).column->size(), 3);
+    ASSERT_EQ(block.get_by_position(1).column->size(), 3);
+}
+
+TEST(BlockTest, filter) {
+    {
+        // normal filter
+        auto block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+        
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+                {"abc", "efg", "hij"}));
+
+        
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeUInt8>(
+                {1, 1, 1}));
+        auto st = vectorized::Block::filter_block(&block, {0, 1}, 2, 2);
+        ASSERT_TRUE(st.ok()) << st.to_string();
+        ASSERT_EQ(block.rows(), 3);
+
+        
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeUInt8>(
+                {0, 1, 0}));
+        st = vectorized::Block::filter_block(&block, {0, 1}, 2, 2);
+        ASSERT_TRUE(st.ok()) << st.to_string();
+        ASSERT_EQ(block.rows(), 1);
+
+        block.insert(
+                
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeUInt8>({0}));
+        auto block2 = block;
+        st = vectorized::Block::filter_block(&block2, {0, 1}, 2, 2);
+        ASSERT_TRUE(st.ok()) << st.to_string();
+        ASSERT_EQ(block2.rows(), 0);
+    }
+
+    {
+        // nullable filter column
+        auto block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+        
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+                {"abc", "efg", "hij"}));
+
+        
block.insert(vectorized::ColumnHelper::create_nullable_column_with_name<
+                     vectorized::DataTypeUInt8>({1, 1, 1}, {0, 1, 0}));
+        auto st = vectorized::Block::filter_block(&block, {0, 1}, 2, 2);
+        ASSERT_TRUE(st.ok()) << st.to_string();
+        ASSERT_EQ(block.rows(), 2);
+    }
+
+    {
+        // const filter column
+        auto block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+        
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+                {"abc", "efg", "hij"}));
+
+        
block.insert({vectorized::ColumnConst::create(vectorized::ColumnUInt8::create(1,
 1), 3),
+                      std::make_shared<vectorized::DataTypeUInt8>(), 
"filter"});
+        auto st = vectorized::Block::filter_block(&block, {0, 1}, 2, 2);
+        ASSERT_TRUE(st.ok()) << st.to_string();
+        ASSERT_EQ(block.rows(), 3);
+
+        
block.insert({vectorized::ColumnConst::create(vectorized::ColumnUInt8::create(1,
 0), 3),
+                      std::make_shared<vectorized::DataTypeUInt8>(), 
"filter2"});
+        st = vectorized::Block::filter_block(&block, {0, 1}, 2, 2);
+        ASSERT_TRUE(st.ok()) << st.to_string();
+        ASSERT_EQ(block.rows(), 0);
+    }
+
+    {
+        // append_to_block_by_selector
+        auto block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+        
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+                {"abc", "efg", "hij"}));
+
+        vectorized::IColumn::Selector selector(2);
+        selector[0] = 0;
+        selector[1] = 2;
+
+        vectorized::MutableBlock mutable_block(block.clone_empty());
+        auto st = block.append_to_block_by_selector(&mutable_block, selector);
+        ASSERT_TRUE(st.ok()) << st.to_string();
+        ASSERT_EQ(mutable_block.rows(), 2);
+        ASSERT_EQ(mutable_block.mutable_columns()[0]->get_int(0), 1);
+        ASSERT_EQ(mutable_block.mutable_columns()[0]->get_int(1), 3);
+    }
+}
+
+TEST(BlockTest, add_rows) {
+    auto block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+    
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+            {"abc", "efg", "hij"}));
+
+    auto block2 = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({4});
+    block2.insert(
+            
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({"lmn"}));
+
+    vectorized::MutableBlock mutable_block(&block);
+    mutable_block.add_row(&block2, 0);
+    ASSERT_EQ(mutable_block.rows(), 4);
+
+    vectorized::MutableBlock mutable_block2(&block2);
+    auto st = mutable_block2.add_rows(&block, {0, 2});
+    ASSERT_TRUE(st.ok()) << st.to_string();
+
+    ASSERT_EQ(mutable_block2.rows(), 3);
+}
+
+TEST(BlockTest, others) {
+    auto block = 
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+    
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+            {"abc", "efg", "hij"}));
+
+    block.shrink_char_type_column_suffix_zero({1, 2});
+
+    SipHash hash;
+    block.update_hash(hash);
+
+    auto hash_value = hash.get64();
+    ASSERT_EQ(hash_value, 15311230698310402164ULL);
+
+    auto column = 
vectorized::ColumnHelper::create_column<vectorized::DataTypeString>(
+            {"lmn", "opq", "rst"});
+
+    block.replace_by_position(1, std::move(column));
+    SipHash hash2;
+    block.update_hash(hash2);
+    hash_value = hash2.get64();
+    ASSERT_EQ(hash_value, 16980601693179295243ULL);
+
+    std::vector<int> result_column_ids = {0, 1};
+    block.shuffle_columns(result_column_ids);
+    ASSERT_EQ(block.get_by_position(0).type->get_type_id(), 
vectorized::TypeIndex::Int32);
+    ASSERT_EQ(block.get_by_position(1).type->get_type_id(), 
vectorized::TypeIndex::String);
+
+    result_column_ids = {1, 0};
+    block.shuffle_columns(result_column_ids);
+    ASSERT_EQ(block.get_by_position(1).type->get_type_id(), 
vectorized::TypeIndex::Int32);
+    ASSERT_EQ(block.get_by_position(0).type->get_type_id(), 
vectorized::TypeIndex::String);
+
+    result_column_ids = {1};
+    block.shuffle_columns(result_column_ids);
+    ASSERT_EQ(block.get_by_position(0).type->get_type_id(), 
vectorized::TypeIndex::Int32);
+    ASSERT_EQ(block.columns(), 1);
+
+    vectorized::MutableBlock mutable_block(&block);
+    auto dumped = mutable_block.dump_data();
+    ASSERT_GT(dumped.size(), 0) << "Dumped data size: " << dumped.size();
+
+    mutable_block.clear_column_data();
+    ASSERT_EQ(mutable_block.get_column_by_position(0)->size(), 0);
+    ASSERT_TRUE(mutable_block.has("column"));
+    ASSERT_EQ(mutable_block.get_position_by_name("column"), 0);
+
+    auto dumped_names = mutable_block.dump_names();
+    ASSERT_TRUE(dumped_names.find("column") != std::string::npos);
+
+    block.clear_names();
+    dumped_names = block.dump_names();
+    ASSERT_TRUE(dumped_names.empty()) << "Dumped names: " << dumped_names;
+}
+
 } // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to