This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new f4d49856c61 [test](beut) Add unit test cases for Block (#50480)
f4d49856c61 is described below
commit f4d49856c61535af36bd6f94f9796dd762823975
Author: Jerry Hu <[email protected]>
AuthorDate: Tue Apr 29 18:51:49 2025 +0800
[test](beut) Add unit test cases for Block (#50480)
---
be/src/vec/core/block.cpp | 89 +---
be/src/vec/core/block.h | 28 +-
.../vec/exec/format/arrow/arrow_stream_reader.cpp | 2 +-
be/test/vec/core/block_test.cpp | 535 ++++++++++++++++++++-
4 files changed, 538 insertions(+), 116 deletions(-)
diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index 94e4ee75f36..d075b5531ab 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -22,6 +22,7 @@
#include <fmt/format.h>
#include <gen_cpp/data.pb.h>
+#include <glog/logging.h>
#include <snappy.h>
#include <streamvbyte.h>
#include <sys/types.h>
@@ -383,19 +384,6 @@ size_t Block::rows() const {
return 0;
}
-std::string Block::each_col_size() const {
- std::string ss;
- for (const auto& elem : data) {
- if (elem.column) {
- ss += elem.column->size();
- ss += " | ";
- } else {
- ss += "-1 | ";
- }
- }
- return ss;
-}
-
void Block::set_num_rows(size_t length) {
if (rows() > length) {
for (auto& elem : data) {
@@ -629,6 +617,7 @@ MutableColumns Block::mutate_columns() {
size_t num_columns = data.size();
MutableColumns columns(num_columns);
for (size_t i = 0; i < num_columns; ++i) {
+ DCHECK(data[i].type);
columns[i] = data[i].column ? (*std::move(data[i].column)).mutate()
: data[i].type->create_column();
}
@@ -645,16 +634,6 @@ void Block::set_columns(MutableColumns&& columns) {
}
}
-void Block::set_columns(const Columns& columns) {
- DCHECK_GE(columns.size(), data.size())
- << fmt::format("Invalid size of columns, columns size: {}, data
size: {}",
- columns.size(), data.size());
- size_t num_columns = data.size();
- for (size_t i = 0; i < num_columns; ++i) {
- data[i].column = columns[i];
- }
-}
-
Block Block::clone_with_columns(MutableColumns&& columns) const {
Block res;
@@ -666,24 +645,6 @@ Block Block::clone_with_columns(MutableColumns&& columns)
const {
return res;
}
-Block Block::clone_with_columns(const Columns& columns) const {
- Block res;
-
- size_t num_columns = data.size();
-
- if (num_columns != columns.size()) {
- throw Exception(Status::FatalError(
- "Cannot clone block with columns because block has {} columns,
but {} columns "
- "given.",
- num_columns, columns.size()));
- }
-
- for (size_t i = 0; i < num_columns; ++i) {
- res.insert({columns[i], data[i].type, data[i].name});
- }
- return res;
-}
-
Block Block::clone_without_columns(const std::vector<int>* column_offset)
const {
Block res;
@@ -701,16 +662,6 @@ Block Block::clone_without_columns(const std::vector<int>*
column_offset) const
return res;
}
-Block Block::sort_columns() const {
- Block sorted_block;
-
- for (const auto& name : index_by_name) {
- sorted_block.insert(data[name.second]);
- }
-
- return sorted_block;
-}
-
const ColumnsWithTypeAndName& Block::get_columns_with_type_and_name() const {
return data;
}
@@ -743,14 +694,6 @@ void Block::clear() {
row_same_bit.clear();
}
-std::string Block::print_use_count() {
- std::stringstream ss;
- for (auto& d : data) {
- ss << ", [" << d.name << ", " << d.column->use_count() << "]";
- }
- return ss.str();
-}
-
void Block::clear_column_data(int64_t column_size) noexcept {
SCOPED_SKIP_MEMORY_CHECK();
// data.size() greater than column_size, means here have some
@@ -764,7 +707,6 @@ void Block::clear_column_data(int64_t column_size) noexcept
{
if (d.column) {
// Temporarily disable reference count check because a column
might be referenced multiple times within a block.
// Queries like this: `select c, c from t1;`
- // DCHECK_EQ(d.column->use_count(), 1) << " " << print_use_count();
(*std::move(d.column)).assume_mutable()->clear();
}
}
@@ -878,15 +820,6 @@ void Block::filter_block_internal(Block* block, const
IColumn::Filter& filter) {
}
}
-Block Block::copy_block(const std::vector<int>& column_offset) const {
- ColumnsWithTypeAndName columns_with_type_and_name;
- for (auto offset : column_offset) {
- DCHECK(offset < data.size());
- columns_with_type_and_name.emplace_back(data[offset]);
- }
- return columns_with_type_and_name;
-}
-
Status Block::append_to_block_by_selector(MutableBlock* dst,
const IColumn::Selector& selector)
const {
RETURN_IF_CATCH_EXCEPTION({
@@ -1059,15 +992,6 @@ void MutableBlock::swap(MutableBlock& another) noexcept {
index_by_name.swap(another.index_by_name);
}
-void MutableBlock::swap(MutableBlock&& another) noexcept {
- SCOPED_SKIP_MEMORY_CHECK();
- clear();
- _columns = std::move(another._columns);
- _data_types = std::move(another._data_types);
- _names = std::move(another._names);
- index_by_name = std::move(another.index_by_name);
-}
-
void MutableBlock::add_row(const Block* block, int row) {
const auto& block_data = block->get_columns_with_type_and_name();
for (size_t i = 0; i < _columns.size(); ++i) {
@@ -1266,15 +1190,6 @@ void MutableBlock::clear_column_data() noexcept {
}
}
-void MutableBlock::reset_column_data() noexcept {
- SCOPED_SKIP_MEMORY_CHECK();
- _columns.clear();
- for (int i = 0; i < _names.size(); i++) {
- _columns.emplace_back(_data_types[i]->create_column());
- index_by_name[_names[i]] = i;
- }
-}
-
void MutableBlock::initialize_index_by_name() {
for (size_t i = 0, size = _names.size(); i < size; ++i) {
index_by_name[_names[i]] = i;
diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h
index 59ad41d9ba7..466760f7ed2 100644
--- a/be/src/vec/core/block.h
+++ b/be/src/vec/core/block.h
@@ -183,8 +183,6 @@ public:
/// Returns number of rows from first column in block, not equal to
nullptr. If no columns, returns 0.
size_t rows() const;
- std::string each_col_size() const;
-
// Cut the rows in block, use in LIMIT operation
void set_num_rows(size_t length);
@@ -220,8 +218,6 @@ public:
Columns get_columns() const;
Columns get_columns_and_convert();
- void set_columns(const Columns& columns);
- Block clone_with_columns(const Columns& columns) const;
Block clone_without_columns(const std::vector<int>* column_offset =
nullptr) const;
/** Get empty columns with the same types as in block. */
@@ -234,9 +230,6 @@ public:
void set_columns(MutableColumns&& columns);
Block clone_with_columns(MutableColumns&& columns) const;
- /** Get a block with columns that have been rearranged in the order of
their names. */
- Block sort_columns() const;
-
void clear();
void swap(Block& other) noexcept;
void swap(Block&& other) noexcept;
@@ -269,18 +262,9 @@ public:
std::string dump_data(size_t begin = 0, size_t row_limit = 100,
bool allow_null_mismatch = false) const;
- static std::string dump_column(ColumnPtr col, DataTypePtr type) {
- ColumnWithTypeAndName type_name {col, type, ""};
- Block b {type_name};
- return b.dump_data(0, b.rows());
- }
-
/** Get one line data from block, only use in load data */
std::string dump_one_line(size_t row, int column_end) const;
- // copy a new block by the offset column
- Block copy_block(const std::vector<int>& column_offset) const;
-
Status append_to_block_by_selector(MutableBlock* dst, const
IColumn::Selector& selector) const;
// need exception safety
@@ -394,10 +378,6 @@ public:
void clear_same_bit() { row_same_bit.clear(); }
- // return string contains use_count() of each columns
- // for debug purpose.
- std::string print_use_count();
-
// remove tmp columns in block
// in inverted index apply logic, in order to optimize query performance,
// we built some temporary columns into block
@@ -524,7 +504,7 @@ public:
std::string dump_types() const {
std::string res;
for (auto type : _data_types) {
- if (res.size()) {
+ if (!res.empty()) {
res += ", ";
}
res += type->get_name();
@@ -571,7 +551,7 @@ public:
template <typename T>
[[nodiscard]] Status merge_impl(T&& block) {
// merge is not supported in dynamic block
- if (_columns.size() == 0 && _data_types.size() == 0) {
+ if (_columns.empty() && _data_types.empty()) {
_data_types = block.get_data_types();
_names = block.get_names();
_columns.resize(block.columns());
@@ -624,8 +604,6 @@ public:
void swap(MutableBlock& other) noexcept;
- void swap(MutableBlock&& other) noexcept;
-
void add_row(const Block* block, int row);
// Batch add row should return error status if allocate memory failed.
Status add_rows(const Block* block, const uint32_t* row_begin, const
uint32_t* row_end,
@@ -646,8 +624,6 @@ public:
// columns resist. columns' inner data removed.
void clear_column_data() noexcept;
- // reset columns by types and names.
- void reset_column_data() noexcept;
size_t allocated_bytes() const;
diff --git a/be/src/vec/exec/format/arrow/arrow_stream_reader.cpp
b/be/src/vec/exec/format/arrow/arrow_stream_reader.cpp
index efe8e36bf20..2ce7dd09069 100644
--- a/be/src/vec/exec/format/arrow/arrow_stream_reader.cpp
+++ b/be/src/vec/exec/format/arrow/arrow_stream_reader.cpp
@@ -97,7 +97,7 @@ Status ArrowStreamReader::get_next_block(Block* block,
size_t* read_rows, bool*
std::string column_name = batch.schema()->field(c)->name();
try {
- vectorized::ColumnWithTypeAndName& column_with_name =
+ const vectorized::ColumnWithTypeAndName& column_with_name =
block->get_by_name(column_name);
column_with_name.type->get_serde()->read_column_from_arrow(
column_with_name.column->assume_mutable_ref(), column,
0, num_rows, _ctzz);
diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp
index 7c876b00aa4..f1dd1c8f504 100644
--- a/be/test/vec/core/block_test.cpp
+++ b/be/test/vec/core/block_test.cpp
@@ -17,27 +17,40 @@
#include "vec/core/block.h"
+#include <concurrentqueue.h>
+#include <gen_cpp/Descriptors_types.h>
+#include <gen_cpp/Metrics_types.h>
#include <gen_cpp/segment_v2.pb.h>
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
-#include <algorithm>
#include <cmath>
+#include <cstddef>
#include <cstdint>
+#include <memory>
#include <string>
+#include <vector>
#include "agent/be_exec_version_manager.h"
#include "common/config.h"
+#include "common/object_pool.h"
#include "gen_cpp/data.pb.h"
-#include "gtest/gtest_pred_impl.h"
+#include "runtime/define_primitive_type.h"
+#include "runtime/descriptor_helper.h"
+#include "runtime/descriptors.h"
+#include "testutil/column_helper.h"
#include "util/bitmap_value.h"
+#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_complex.h"
+#include "vec/columns/column_const.h"
#include "vec/columns/column_decimal.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/columns/column_vector.h"
#include "vec/columns/columns_number.h"
+#include "vec/common/sip_hash.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
@@ -49,6 +62,7 @@
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
+#include "vec/data_types/data_type_struct.h"
#include "vec/data_types/data_type_time_v2.h"
#include "vec/runtime/vdatetime_value.h"
@@ -850,4 +864,521 @@ TEST(BlockTest, merge_with_shared_columns) {
EXPECT_EQ(1034, src_block.rows());
}
+namespace vectorized {
+template <typename T>
+void clear_blocks(moodycamel::ConcurrentQueue<T>& blocks,
+ RuntimeProfile::Counter* memory_used_counter = nullptr);
+}
+
+TEST(BlockTest, clear_blocks) {
+ {
+ moodycamel::ConcurrentQueue<vectorized::Block> queue;
+
+ RuntimeProfile::Counter counter(TUnit::BYTES);
+ auto block1 =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+ auto block2 =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+
+ counter.update(block1.allocated_bytes());
+ counter.update(block2.allocated_bytes());
+
+ ASSERT_EQ(counter.value(), block1.allocated_bytes() +
block2.allocated_bytes());
+
+ queue.enqueue(std::move(block1));
+ queue.enqueue(std::move(block2));
+
+ vectorized::clear_blocks(queue, &counter);
+ EXPECT_EQ(counter.value(), 0);
+
+ ASSERT_FALSE(queue.try_dequeue(block1));
+ }
+
+ {
+ moodycamel::ConcurrentQueue<std::unique_ptr<vectorized::Block>> queue;
+
+ RuntimeProfile::Counter counter(TUnit::BYTES);
+ auto block1 = std::make_unique<vectorized::Block>(
+
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3}));
+ auto block2 = std::make_unique<vectorized::Block>(
+
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({4, 5, 6}));
+
+ counter.update(block1->allocated_bytes());
+ counter.update(block2->allocated_bytes());
+
+ ASSERT_EQ(counter.value(), block1->allocated_bytes() +
block2->allocated_bytes());
+
+ queue.enqueue(std::move(block1));
+ queue.enqueue(std::move(block2));
+
+ vectorized::clear_blocks(queue, &counter);
+ EXPECT_EQ(counter.value(), 0);
+
+ ASSERT_FALSE(queue.try_dequeue(block1));
+ }
+}
+
+TEST(BlockTest, replace_by_position) {
+ auto block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+ block.insert(0,
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+ {"a", "b", "c"}));
+
+ auto col =
vectorized::ColumnHelper::create_column<vectorized::DataTypeInt32>({4, 5, 6});
+ block.replace_by_position(0, std::move(col));
+
+ ASSERT_EQ(block.get_by_position(0).column->get_int(0), 4);
+ ASSERT_EQ(block.get_by_position(0).column->get_int(1), 5);
+ ASSERT_EQ(block.get_by_position(0).column->get_int(2), 6);
+}
+
+TEST(BlockTest, compare_at) {
+ auto block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+ auto ret = block.compare_at(0, 0, block, -1);
+ ASSERT_EQ(ret, 0);
+
+ ret = block.compare_at(0, 1, block, -1);
+ ASSERT_LT(ret, 0);
+
+ ret = block.compare_at(2, 1, block, -1);
+ ASSERT_GT(ret, 0);
+
+
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeInt32>(
+ {2, 2, 1}));
+ std::vector<uint32_t> compare_columns {1, 0};
+
+ ret = block.compare_at(0, 1, &compare_columns, block, -1);
+ ASSERT_LT(ret, 0);
+
+ ret = block.compare_at(1, 2, &compare_columns, block, -1);
+ ASSERT_GT(ret, 0);
+}
+
+TEST(BlockTest, same_bit) {
+ auto block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 1, 3});
+ std::vector<bool> same_bit = {false, true, false};
+ block.set_same_bit(same_bit.begin(), same_bit.end());
+ ASSERT_EQ(block.get_same_bit(0), false);
+ ASSERT_EQ(block.get_same_bit(1), true);
+ ASSERT_EQ(block.get_same_bit(2), false);
+
+ auto block2 = block;
+ block2.set_num_rows(2);
+ ASSERT_EQ(block2.rows(), 2);
+
+ int64_t length = 3;
+ block2.skip_num_rows(length);
+ ASSERT_EQ(1, length);
+ ASSERT_EQ(block2.rows(), 0);
+
+ block2 = block;
+ length = 1;
+ block2.skip_num_rows(length);
+ ASSERT_EQ(block2.rows(), 2);
+ ASSERT_EQ(block2.get_same_bit(0), true);
+
+ block2.skip_num_rows(length);
+ ASSERT_EQ(block2.rows(), 1);
+ ASSERT_EQ(block2.get_same_bit(0), false);
+
+ block.clear_same_bit();
+
+ ASSERT_EQ(block.get_same_bit(1), false);
+}
+
+TEST(BlockTest, dump) {
+ auto block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+ auto types = block.dump_types();
+ ASSERT_TRUE(types.find("Int32") != std::string::npos);
+
+
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({}));
+ types = block.dump_types();
+ ASSERT_TRUE(types.find("String") != std::string::npos);
+
+ block.insert(
+
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeFloat64>({}));
+ types = block.dump_types();
+ ASSERT_TRUE(types.find("Float64") != std::string::npos);
+
+ auto names = block.get_names();
+ for (const auto& name : names) {
+ ASSERT_EQ(name, "column");
+ }
+
+ auto dumped_names = block.dump_names();
+ ASSERT_TRUE(dumped_names.find(", ") != std::string::npos);
+ ASSERT_TRUE(dumped_names.find("column") != std::string::npos);
+
+ block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+ {"a", "b", "c"}));
+
+ auto dumped_line = block.dump_one_line(1, 2);
+ ASSERT_EQ(dumped_line, "2 b");
+}
+
+TEST(BlockTest, merge_impl_ignore_overflow) {
+ auto block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({}));
+ block.insert(
+
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeFloat64>({}));
+ auto block2 =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+
+ auto mutable_block = vectorized::MutableBlock::build_mutable_block(&block);
+
+ auto st = mutable_block.merge_ignore_overflow(std::move(block2));
+ ASSERT_FALSE(st.ok());
+
+ block2 =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+ block2.insert(
+
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({}));
+ block2.insert(
+
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeFloat32>({}));
+
+ EXPECT_ANY_THROW(st =
mutable_block.merge_impl_ignore_overflow(std::move(block2)));
+}
+
+TEST(BlockTest, merge_impl) {
+ auto block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({}));
+ block.insert(
+
vectorized::ColumnHelper::create_nullable_column_with_name<vectorized::DataTypeFloat64>(
+ {1.0}, {0}));
+
+ block.get_by_position(2).column = nullptr;
+
+ auto mutable_block =
vectorized::MutableBlock::build_mutable_block(nullptr);
+
+ auto st = mutable_block.merge(std::move(block));
+ ASSERT_TRUE(st.ok()) << st.to_string();
+
+ auto block2 =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+ block2.insert(
+
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({}));
+
+ st = mutable_block.merge_impl(std::move(block2));
+ ASSERT_FALSE(st);
+
+ block2 =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1});
+ block2.insert(
+
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({"a"}));
+ block2.insert(
+
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeFloat64>({}));
+
+ EXPECT_ANY_THROW(st = mutable_block.merge_impl(std::move(block2)));
+}
+
+TEST(BlockTest, ctor) {
+ TDescriptorTableBuilder builder;
+ TTupleDescriptorBuilder tuple_builder;
+ tuple_builder
+ .add_slot(
+
TSlotDescriptorBuilder().type(PrimitiveType::TYPE_INT).nullable(false).build())
+ .add_slot(TSlotDescriptorBuilder()
+ .type(PrimitiveType::TYPE_STRING)
+ .set_isMaterialized(false)
+ .build());
+ tuple_builder.build(&builder);
+
+ auto t_table = builder.desc_tbl();
+
+ ObjectPool pool;
+ DescriptorTbl* tbl;
+ auto st = DescriptorTbl::create(&pool, t_table, &tbl);
+ ASSERT_TRUE(st.ok()) << st.to_string();
+
+ auto slots = tbl->get_tuple_descriptor(0)->slots();
+
+ std::vector<SlotDescriptor> slot_descs;
+ for (auto* slot : slots) {
+ slot_descs.push_back(*slot);
+ }
+
+ vectorized::Block block(slot_descs, 1);
+ ASSERT_EQ(block.columns(), 2);
+ ASSERT_EQ(block.get_by_position(0).type->get_type_id(),
vectorized::TypeIndex::Int32);
+ ASSERT_EQ(block.get_by_position(1).type->get_type_id(),
vectorized::TypeIndex::Nullable);
+
+ {
+ auto mutable_block =
+
vectorized::MutableBlock::create_unique(tbl->get_tuple_descs(), 10, false);
+ ASSERT_EQ(mutable_block->columns(), 2);
+ auto mutable_block2 = vectorized::MutableBlock::create_unique();
+ mutable_block->swap(*mutable_block2);
+ ASSERT_EQ(mutable_block->columns(), 0);
+ ASSERT_EQ(mutable_block2->columns(), 2);
+ }
+
+ {
+ auto mutable_block =
+
vectorized::MutableBlock::create_unique(tbl->get_tuple_descs(), 10, true);
+ ASSERT_EQ(mutable_block->columns(), 1);
+ auto mutable_block2 = vectorized::MutableBlock::create_unique();
+ mutable_block->swap(*mutable_block2);
+ ASSERT_EQ(mutable_block->columns(), 0);
+ ASSERT_EQ(mutable_block2->columns(), 1);
+ }
+}
+
+TEST(BlockTest, insert_erase) {
+ auto block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+
+ auto column_with_name =
+
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({});
+ EXPECT_ANY_THROW(block.insert(2, column_with_name));
+ block.insert(0, column_with_name);
+
+ ASSERT_EQ(block.columns(), 2);
+ ASSERT_EQ(block.get_by_position(0).type->get_type_id(),
vectorized::TypeIndex::String);
+
+ EXPECT_ANY_THROW(block.insert(3, std::move(column_with_name)));
+
+ column_with_name =
+
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeFloat64>({});
+ block.insert(0, std::move(column_with_name));
+ ASSERT_EQ(block.columns(), 3);
+ ASSERT_EQ(block.get_by_position(0).type->get_type_id(),
vectorized::TypeIndex::Float64);
+
+ std::set<size_t> positions = {0, 2};
+ block.erase(positions);
+
+ ASSERT_EQ(block.columns(), 1);
+ ASSERT_EQ(block.get_by_position(0).type->get_type_id(),
vectorized::TypeIndex::String);
+
+ block.erase_tail(0);
+ ASSERT_EQ(block.columns(), 0);
+
+ EXPECT_ANY_THROW(block.erase("column"));
+ column_with_name =
+
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({});
+ block.insert(0, column_with_name);
+ EXPECT_NO_THROW(block.erase("column"));
+ ASSERT_EQ(block.columns(), 0);
+
+ EXPECT_ANY_THROW(block.safe_get_by_position(0));
+
+ ASSERT_EQ(block.try_get_by_name("column"), nullptr);
+ EXPECT_ANY_THROW(block.get_by_name("column"));
+ EXPECT_ANY_THROW(block.get_position_by_name("column"));
+ block.insert(0, column_with_name);
+
+ EXPECT_NO_THROW(auto item = block.get_by_name("column"));
+ ASSERT_NE(block.try_get_by_name("column"), nullptr);
+ EXPECT_EQ(block.get_position_by_name("column"), 0);
+
+ block.insert({nullptr, nullptr, BeConsts::BLOCK_TEMP_COLUMN_PREFIX});
+ EXPECT_NO_THROW(auto item =
block.get_by_name(BeConsts::BLOCK_TEMP_COLUMN_PREFIX));
+
+ block.erase_tmp_columns();
+ ASSERT_EQ(block.try_get_by_name(BeConsts::BLOCK_TEMP_COLUMN_PREFIX),
nullptr);
+
+ {
+ // test const block
+ const auto const_block = block;
+ EXPECT_EQ(const_block.try_get_by_name("column2"), nullptr);
+ EXPECT_ANY_THROW(const_block.get_by_name("column2"));
+ EXPECT_ANY_THROW(const_block.get_position_by_name("column2"));
+
+ EXPECT_NO_THROW(auto item = const_block.get_by_name("column"));
+ ASSERT_NE(const_block.try_get_by_name("column"), nullptr);
+ EXPECT_EQ(const_block.get_position_by_name("column"), 0);
+ }
+
+ block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+ block.insert({nullptr, std::make_shared<vectorized::DataTypeString>(),
"col1"});
+
+ block.insert({nullptr, std::make_shared<vectorized::DataTypeString>(),
"col2"});
+
+ vectorized::MutableBlock mutable_block(&block);
+ mutable_block.erase("col1");
+ ASSERT_EQ(mutable_block.columns(), 2);
+
+ EXPECT_ANY_THROW(mutable_block.erase("col1"));
+ ASSERT_EQ(mutable_block.columns(), 2);
+ mutable_block.erase("col2");
+ ASSERT_EQ(mutable_block.columns(), 1);
+}
+
+TEST(BlockTest, check_number_of_rows) {
+ auto block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
+
+ block.insert({nullptr, nullptr, "null_col"});
+ EXPECT_NO_THROW(block.check_number_of_rows(true));
+ EXPECT_ANY_THROW(block.check_number_of_rows(false));
+
+ block.insert(
+
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({"abc"}));
+ EXPECT_ANY_THROW(block.check_number_of_rows(true));
+
+ EXPECT_ANY_THROW(block.columns_bytes());
+
+ ASSERT_GT(block.allocated_bytes(), 0);
+}
+
+TEST(BlockTest, clear_column_mem_not_keep) {
+ auto block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+ {"abc", "efg", "hij"}));
+
+ std::vector<bool> column_keep_flags {false, true};
+
+ auto block2 = block;
+ block2.clear_column_mem_not_keep(column_keep_flags, false);
+ ASSERT_EQ(block2.get_by_position(0).column->size(), 0);
+ ASSERT_EQ(block2.get_by_position(1).column->size(), 3);
+
+ block.clear_column_mem_not_keep(column_keep_flags, true);
+ ASSERT_EQ(block.get_by_position(0).column->size(), 3);
+ ASSERT_EQ(block.get_by_position(1).column->size(), 3);
+}
+
+TEST(BlockTest, filter) {
+ {
+ // normal filter
+ auto block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+ {"abc", "efg", "hij"}));
+
+
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeUInt8>(
+ {1, 1, 1}));
+ auto st = vectorized::Block::filter_block(&block, {0, 1}, 2, 2);
+ ASSERT_TRUE(st.ok()) << st.to_string();
+ ASSERT_EQ(block.rows(), 3);
+
+
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeUInt8>(
+ {0, 1, 0}));
+ st = vectorized::Block::filter_block(&block, {0, 1}, 2, 2);
+ ASSERT_TRUE(st.ok()) << st.to_string();
+ ASSERT_EQ(block.rows(), 1);
+
+ block.insert(
+
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeUInt8>({0}));
+ auto block2 = block;
+ st = vectorized::Block::filter_block(&block2, {0, 1}, 2, 2);
+ ASSERT_TRUE(st.ok()) << st.to_string();
+ ASSERT_EQ(block2.rows(), 0);
+ }
+
+ {
+ // nullable filter column
+ auto block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+ {"abc", "efg", "hij"}));
+
+
block.insert(vectorized::ColumnHelper::create_nullable_column_with_name<
+ vectorized::DataTypeUInt8>({1, 1, 1}, {0, 1, 0}));
+ auto st = vectorized::Block::filter_block(&block, {0, 1}, 2, 2);
+ ASSERT_TRUE(st.ok()) << st.to_string();
+ ASSERT_EQ(block.rows(), 2);
+ }
+
+ {
+ // const filter column
+ auto block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+ {"abc", "efg", "hij"}));
+
+
block.insert({vectorized::ColumnConst::create(vectorized::ColumnUInt8::create(1,
1), 3),
+ std::make_shared<vectorized::DataTypeUInt8>(),
"filter"});
+ auto st = vectorized::Block::filter_block(&block, {0, 1}, 2, 2);
+ ASSERT_TRUE(st.ok()) << st.to_string();
+ ASSERT_EQ(block.rows(), 3);
+
+
block.insert({vectorized::ColumnConst::create(vectorized::ColumnUInt8::create(1,
0), 3),
+ std::make_shared<vectorized::DataTypeUInt8>(),
"filter2"});
+ st = vectorized::Block::filter_block(&block, {0, 1}, 2, 2);
+ ASSERT_TRUE(st.ok()) << st.to_string();
+ ASSERT_EQ(block.rows(), 0);
+ }
+
+ {
+ // append_to_block_by_selector
+ auto block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+ {"abc", "efg", "hij"}));
+
+ vectorized::IColumn::Selector selector(2);
+ selector[0] = 0;
+ selector[1] = 2;
+
+ vectorized::MutableBlock mutable_block(block.clone_empty());
+ auto st = block.append_to_block_by_selector(&mutable_block, selector);
+ ASSERT_TRUE(st.ok()) << st.to_string();
+ ASSERT_EQ(mutable_block.rows(), 2);
+ ASSERT_EQ(mutable_block.mutable_columns()[0]->get_int(0), 1);
+ ASSERT_EQ(mutable_block.mutable_columns()[0]->get_int(1), 3);
+ }
+}
+
+TEST(BlockTest, add_rows) {
+ auto block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+ {"abc", "efg", "hij"}));
+
+ auto block2 =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({4});
+ block2.insert(
+
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({"lmn"}));
+
+ vectorized::MutableBlock mutable_block(&block);
+ mutable_block.add_row(&block2, 0);
+ ASSERT_EQ(mutable_block.rows(), 4);
+
+ vectorized::MutableBlock mutable_block2(&block2);
+ auto st = mutable_block2.add_rows(&block, {0, 2});
+ ASSERT_TRUE(st.ok()) << st.to_string();
+
+ ASSERT_EQ(mutable_block2.rows(), 3);
+}
+
+TEST(BlockTest, others) {
+ auto block =
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
+
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
+ {"abc", "efg", "hij"}));
+
+ block.shrink_char_type_column_suffix_zero({1, 2});
+
+ SipHash hash;
+ block.update_hash(hash);
+
+ auto hash_value = hash.get64();
+ ASSERT_EQ(hash_value, 15311230698310402164ULL);
+
+ auto column =
vectorized::ColumnHelper::create_column<vectorized::DataTypeString>(
+ {"lmn", "opq", "rst"});
+
+ block.replace_by_position(1, std::move(column));
+ SipHash hash2;
+ block.update_hash(hash2);
+ hash_value = hash2.get64();
+ ASSERT_EQ(hash_value, 16980601693179295243ULL);
+
+ std::vector<int> result_column_ids = {0, 1};
+ block.shuffle_columns(result_column_ids);
+ ASSERT_EQ(block.get_by_position(0).type->get_type_id(),
vectorized::TypeIndex::Int32);
+ ASSERT_EQ(block.get_by_position(1).type->get_type_id(),
vectorized::TypeIndex::String);
+
+ result_column_ids = {1, 0};
+ block.shuffle_columns(result_column_ids);
+ ASSERT_EQ(block.get_by_position(1).type->get_type_id(),
vectorized::TypeIndex::Int32);
+ ASSERT_EQ(block.get_by_position(0).type->get_type_id(),
vectorized::TypeIndex::String);
+
+ result_column_ids = {1};
+ block.shuffle_columns(result_column_ids);
+ ASSERT_EQ(block.get_by_position(0).type->get_type_id(),
vectorized::TypeIndex::Int32);
+ ASSERT_EQ(block.columns(), 1);
+
+ vectorized::MutableBlock mutable_block(&block);
+ auto dumped = mutable_block.dump_data();
+ ASSERT_GT(dumped.size(), 0) << "Dumped data size: " << dumped.size();
+
+ mutable_block.clear_column_data();
+ ASSERT_EQ(mutable_block.get_column_by_position(0)->size(), 0);
+ ASSERT_TRUE(mutable_block.has("column"));
+ ASSERT_EQ(mutable_block.get_position_by_name("column"), 0);
+
+ auto dumped_names = mutable_block.dump_names();
+ ASSERT_TRUE(dumped_names.find("column") != std::string::npos);
+
+ block.clear_names();
+ dumped_names = block.dump_names();
+ ASSERT_TRUE(dumped_names.empty()) << "Dumped names: " << dumped_names;
+}
+
} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]