This is an automated email from the ASF dual-hosted git repository.
zhangstar333 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new af55267b0ee [BE](ut) add be ut for column complex test case (#48166)
af55267b0ee is described below
commit af55267b0ee68eb6a3f8bf0086a871f2c6273aa1
Author: zhangstar333 <[email protected]>
AuthorDate: Tue Feb 25 13:19:52 2025 +0800
[BE](ut) add be ut for column complex test case (#48166)
### What problem does this PR solve?
add be ut for column complex test case
---
be/src/vec/columns/column_complex.h | 2 +-
be/src/vec/columns/column_fixed_length_object.h | 11 +-
.../columns/column_fixed_length_object_test.cpp | 168 ++++++++
be/test/vec/core/column_complex_test.cpp | 477 +++++++++++++++++++++
4 files changed, 654 insertions(+), 4 deletions(-)
diff --git a/be/src/vec/columns/column_complex.h
b/be/src/vec/columns/column_complex.h
index ded4f96dc72..d439ed9e91c 100644
--- a/be/src/vec/columns/column_complex.h
+++ b/be/src/vec/columns/column_complex.h
@@ -87,7 +87,7 @@ public:
if (UNLIKELY(num == 0)) {
return;
}
-
+ // the offsets size should be num + 1
for (size_t i = 0; i != num; ++i) {
insert_binary_data(data + offsets[i], offsets[i + 1] - offsets[i]);
}
diff --git a/be/src/vec/columns/column_fixed_length_object.h
b/be/src/vec/columns/column_fixed_length_object.h
index 0fd5b35e8d9..ad7cb07d635 100644
--- a/be/src/vec/columns/column_fixed_length_object.h
+++ b/be/src/vec/columns/column_fixed_length_object.h
@@ -187,14 +187,19 @@ public:
ColumnPtr filter(const IColumn::Filter& filter, ssize_t result_size_hint)
const override {
column_match_filter_size(size(), filter.size());
auto res = create(_item_size);
- res->resize(result_size_hint);
-
- for (size_t i = 0, pos = 0; i < filter.size(); i++) {
+ size_t column_size = size();
+ if (result_size_hint > 0) {
+ res->reserve(result_size_hint);
+ }
+ res->resize(column_size);
+ size_t pos = 0;
+ for (size_t i = 0; i < filter.size(); i++) {
if (filter[i]) {
memcpy(&res->_data[pos * _item_size], &_data[i * _item_size],
_item_size);
pos++;
}
}
+ res->resize(pos);
return res;
}
diff --git a/be/test/vec/columns/column_fixed_length_object_test.cpp
b/be/test/vec/columns/column_fixed_length_object_test.cpp
index fd0a3687d65..69f1edb2897 100644
--- a/be/test/vec/columns/column_fixed_length_object_test.cpp
+++ b/be/test/vec/columns/column_fixed_length_object_test.cpp
@@ -20,6 +20,7 @@
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
#include <stddef.h>
#include <memory>
@@ -27,6 +28,7 @@
#include "gtest/gtest_pred_impl.h"
#include "vec/columns/column_vector.h"
#include "vec/common/sip_hash.h"
+#include "vec/common/string_ref.h"
namespace doris::vectorized {
@@ -85,4 +87,170 @@ TEST(ColumnFixedLenghtObjectTest, UpdateHashWithValue) {
EXPECT_EQ(hash1.get64(), hash2.get64());
}
+
+TEST(ColumnFixedLenghtObjectTest, GetDataAtTest) {
+ auto column_fixed = ColumnFixedLengthObject::create(sizeof(int64_t));
+ EXPECT_EQ(sizeof(int64_t), column_fixed->item_size());
+ EXPECT_EQ(8, column_fixed->item_size());
+ column_fixed->set_item_size(8);
+ ASSERT_EQ(column_fixed->get_name(), "ColumnFixedLengthObject");
+ ASSERT_EQ(column_fixed->size(), 0);
+ ASSERT_EQ(column_fixed->get_data().size(), 0);
+ ASSERT_EQ(column_fixed->byte_size(), 0);
+ ASSERT_EQ(column_fixed->allocated_bytes(), 0);
+ std::cout << "1. test name item_size size success" << std::endl;
+
+ column_fixed->insert_default();
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed->get_data_at(0).data), 0);
+ ASSERT_EQ(column_fixed->size(), 1);
+ ASSERT_EQ(column_fixed->byte_size(), 8);
+ ASSERT_EQ(column_fixed->allocated_bytes(), 64);
+ column_fixed->pop_back(1);
+ ASSERT_EQ(column_fixed->size(), 0);
+ ASSERT_EQ(column_fixed->byte_size(), 0);
+ ASSERT_EQ(column_fixed->allocated_bytes(), 64);
+ std::cout << "2. test byte_size allocated_bytes success" << std::endl;
+
+ auto column_fixed2 = ColumnFixedLengthObject::create(sizeof(int64_t));
+ column_fixed->resize(2);
+ ASSERT_TRUE(column_fixed->has_enough_capacity(*column_fixed2));
+ *((int64_t*)column_fixed->get_data().data()) = 11;
+ *((int64_t*)&(column_fixed->get_data()[column_fixed->item_size()])) = 22;
+ ASSERT_EQ(column_fixed->size(), 2);
+ ASSERT_EQ(column_fixed->byte_size(), 16);
+ ASSERT_EQ(column_fixed->allocated_bytes(), 64);
+ std::cout << "3. test has_enough_capacity and value data success" <<
std::endl;
+
+ Field res;
+ column_fixed->get(0, res);
+ column_fixed2->insert(res);
+ ASSERT_EQ(column_fixed2->size(), 1);
+ ASSERT_EQ(column_fixed->operator[](0), column_fixed2->operator[](0));
+ column_fixed2->insert_from(*column_fixed, 1);
+ ASSERT_EQ(column_fixed2->size(), 2);
+ ASSERT_EQ(column_fixed->operator[](1), column_fixed2->operator[](1));
+ //capacity and size is 32 16 16
+ ASSERT_TRUE(column_fixed->has_enough_capacity(*column_fixed2));
+ std::cout << "4. test get/insert/insert_from/has_enough_capacity data
success" << std::endl;
+
+ column_fixed2->clear();
+ ASSERT_EQ(column_fixed2->size(), 0);
+ column_fixed2->insert_range_from(*column_fixed, 0, 2);
+ ASSERT_EQ(column_fixed2->size(), 2);
+ ASSERT_EQ(column_fixed->operator[](0), column_fixed2->operator[](0));
+ ASSERT_EQ(column_fixed->operator[](1), column_fixed2->operator[](1));
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed2->get_data_at(0).data), 11);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed2->get_data_at(1).data), 22);
+ std::cout << "5. test clear/insert_range_from data success" << std::endl;
+
+ int64_t val = 33;
+ column_fixed2->insert_data(reinterpret_cast<const char*>(&val),
sizeof(val));
+ ASSERT_EQ(column_fixed2->size(), 3);
+ auto value = column_fixed2->get_data_at(2);
+ ASSERT_EQ(*reinterpret_cast<const int64_t*>(value.data), 33);
+ std::cout << "6. test insert_data data success" << std::endl;
+
+ auto column_fixed3 = ColumnFixedLengthObject::create(sizeof(int64_t));
+ std::vector<uint32_t> indexs = {0, 1, 2};
+ column_fixed3->insert_indices_from(*column_fixed2, indexs.data(),
+ indexs.data() + indexs.size());
+ ASSERT_EQ(column_fixed3->size(), 3);
+ ASSERT_EQ(column_fixed2->operator[](0), column_fixed3->operator[](0));
+ ASSERT_EQ(column_fixed2->operator[](1), column_fixed3->operator[](1));
+ ASSERT_EQ(column_fixed2->operator[](2), column_fixed3->operator[](2));
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed3->get_data_at(2).data), 33);
+ std::cout << "7. test insert_indices_from data success" << std::endl;
+
+ auto column_fixed4 = ColumnFixedLengthObject::create(sizeof(int64_t));
+ std::vector<StringRef> strings;
+ for (int i = 0; i < 3; i++) {
+ strings.push_back(column_fixed2->get_data_at(i));
+ }
+ column_fixed4->insert_many_strings(strings.data(), 3);
+ ASSERT_EQ(column_fixed4->size(), 3);
+ ASSERT_EQ(column_fixed2->operator[](0), column_fixed4->operator[](0));
+ ASSERT_EQ(column_fixed2->operator[](1), column_fixed4->operator[](1));
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed4->get_data_at(0).data), 11);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed4->get_data_at(1).data), 22);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed2->get_data_at(2).data), 33);
+ // ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed4->get_data_at(2).data), 33);
+ // ASSERT_EQ(column_fixed2->get_data_at(2).to_string(),
column_fixed4->get_data_at(2).to_string());
+ std::cout << "8. test insert_many_strings data success" << std::endl;
+
+ auto column_fixed5 = ColumnFixedLengthObject::create(sizeof(int64_t));
+ std::string buffer;
+ std::vector<uint32_t> buffer_offsets(4, 0);
+ for (int i = 0; i < 3; ++i) {
+ buffer.append(strings[i]);
+ buffer_offsets[i + 1] = buffer_offsets[i] + strings[i].size;
+ }
+ column_fixed5->insert_many_continuous_binary_data(buffer.data(),
buffer_offsets.data(), 3);
+ ASSERT_EQ(column_fixed5->size(), 3);
+ ASSERT_EQ(column_fixed2->operator[](0), column_fixed5->operator[](0));
+ ASSERT_EQ(column_fixed2->operator[](1), column_fixed5->operator[](1));
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed5->get_data_at(0).data), 11);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed5->get_data_at(1).data), 22);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed5->get_data_at(2).data), 33);
+ std::cout << "9. test insert_many_continuous_binary_data data success" <<
std::endl;
+
+ auto column_fixed6 = ColumnFixedLengthObject::create(sizeof(int64_t));
+ column_fixed6->insert_range_from(*column_fixed5, 0, 3);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed6->get_data_at(0).data), 11);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed6->get_data_at(1).data), 22);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed6->get_data_at(2).data), 33);
+ column_fixed6->replace_column_data(*column_fixed5, 2, 0);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed6->get_data_at(0).data), 33);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed6->get_data_at(1).data), 22);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed6->get_data_at(2).data), 33);
+ std::cout << "10. test replace_column_data data success" << std::endl;
+
+ vectorized::IColumn::Filter filter {0, 1, 0};
+ ASSERT_EQ(column_fixed5->clone()->filter(filter), 1);
+ auto column_filter_res = column_fixed6->filter(filter, 0);
+ ASSERT_EQ(column_filter_res->size(), 1);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_filter_res->get_data_at(0).data), 22);
+ std::cout << "11. test filter data success" << std::endl;
+
+ IColumn::Permutation perm {2, 1, 0};
+ auto column_permute = column_fixed6->permute(perm, 3);
+ ASSERT_EQ(column_permute->size(), 3);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_permute->get_data_at(0).data), 33);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_permute->get_data_at(1).data), 22);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_permute->get_data_at(2).data), 33);
+ std::cout << "12. test permute data success" << std::endl;
+
+ IColumn::Offsets offsets2 {2, 4, 6};
+ std::vector<int> res_idx {0, 0, 1, 1, 2, 2};
+ auto column_replicate_res = column_fixed6->replicate(offsets2);
+ ASSERT_EQ(column_replicate_res->size(), 6);
+ ASSERT_EQ(column_fixed6->size(), 3);
+ for (int i = 0; i < 6; ++i) {
+ ASSERT_EQ(column_fixed6->operator[](res_idx[i]),
column_replicate_res->operator[](i));
+ }
+ std::cout << "13. test more val data value and replicate success" <<
std::endl;
+
+ Arena arena;
+ const char* pos = nullptr;
+ StringRef key(pos, 0);
+ for (int i = 0; i < 3; ++i) {
+ auto cur_ref = column_fixed6->serialize_value_into_arena(i, arena,
pos);
+ key.data = cur_ref.data - key.size;
+ key.size += cur_ref.size;
+ }
+ ASSERT_EQ(key.size, 24);
+ ASSERT_EQ(*reinterpret_cast<const int64_t*>(key.data), 33);
+ std::cout << "14. test serialize_value_into_arena data success" <<
std::endl;
+
+ auto column_fixed7 = ColumnFixedLengthObject::create(sizeof(int64_t));
+
+ const char* begin = key.data;
+ for (size_t i = 0; i < 3; ++i) {
+ begin = column_fixed7->deserialize_and_insert_from_arena(begin);
+ }
+ ASSERT_EQ(column_fixed7->size(), 3);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed7->get_data_at(0).data), 33);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed7->get_data_at(1).data), 22);
+ ASSERT_EQ(*reinterpret_cast<const
int64_t*>(column_fixed7->get_data_at(2).data), 33);
+ std::cout << "15. test deserialize_and_insert_from_arena data success" <<
std::endl;
+}
} // namespace doris::vectorized
diff --git a/be/test/vec/core/column_complex_test.cpp
b/be/test/vec/core/column_complex_test.cpp
index a0fbcccdd15..5427fd482c4 100644
--- a/be/test/vec/core/column_complex_test.cpp
+++ b/be/test/vec/core/column_complex_test.cpp
@@ -23,13 +23,20 @@
#include <gtest/gtest.h>
#include <stddef.h>
+#include <cstdlib>
#include <memory>
#include <string>
+#include <tuple>
+#include <vector>
#include "agent/be_exec_version_manager.h"
#include "gtest/gtest_pred_impl.h"
#include "util/bitmap_value.h"
+#include "vec/columns/column.h"
+#include "vec/common/string_ref.h"
+#include "vec/core/block.h"
#include "vec/core/field.h"
+#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_bitmap.h"
#include "vec/data_types/data_type_quantilestate.h"
@@ -56,6 +63,476 @@ TEST(ColumnComplexTest, DataTypeBitmapTest) {
std::make_shared<DataTypeBitMap>();
}
+TEST(ColumnComplexTest, GetDataAtTest) {
+ auto column_bitmap = ColumnBitmap::create();
+ auto column_hll = ColumnHLL::create();
+ auto column_quantile_state = ColumnQuantileState::create();
+ ASSERT_EQ(column_bitmap->byte_size(), 0);
+
+ auto column_bitmap_verify = ColumnBitmap::create();
+ auto column_hll_verify = ColumnHLL::create();
+ auto column_quantile_state_verify = ColumnQuantileState::create();
+
+ column_bitmap->reserve(10);
+ // empty data value
+ column_bitmap->insert_value(BitmapValue::empty_bitmap());
+ column_hll->insert_value(HyperLogLog());
+ column_quantile_state->insert_value(QuantileState());
+
+ column_bitmap_verify->insert_default();
+ column_hll_verify->insert_default();
+ column_quantile_state_verify->insert_many_defaults(1);
+ ASSERT_EQ(column_bitmap_verify->byte_size(), 80);
+ ASSERT_EQ(column_hll_verify->byte_size(), 64);
+ ASSERT_EQ(column_quantile_state_verify->byte_size(), 64);
+ ASSERT_EQ(column_bitmap_verify->allocated_bytes(), 80);
+ ASSERT_EQ(column_hll_verify->allocated_bytes(), 64);
+ ASSERT_EQ(column_quantile_state_verify->allocated_bytes(), 64);
+ std::cout << "1. test byte_size/allocated_bytes empty success" <<
std::endl;
+
+ //column_bitmap have reserve 10, but only insert 1, so the capacity is 10
+ ASSERT_TRUE(column_bitmap->has_enough_capacity(*column_bitmap_verify));
+ ASSERT_FALSE(column_hll->has_enough_capacity(*column_hll_verify));
+
ASSERT_FALSE(column_quantile_state->has_enough_capacity(*column_quantile_state_verify));
+ std::cout << "2. test has_enough_capacity success" << std::endl;
+
+ ASSERT_EQ(column_bitmap->size(), 1);
+ ASSERT_EQ(column_bitmap_verify->size(), 1);
+ ASSERT_EQ(column_bitmap->get_data_at(0),
column_bitmap_verify->get_data_at(0));
+ ASSERT_EQ(column_hll->get_data_at(0), column_hll_verify->get_data_at(0));
+ ASSERT_EQ(column_quantile_state->operator[](0),
column_quantile_state_verify->operator[](0));
+ std::cout << "3. test insert_default/insert_value empty success" <<
std::endl;
+
+ std::srand(std::time(nullptr));
+ auto lambda_function = []() -> std::tuple<UInt64, double> {
+ auto rand_val = random();
+ auto uint64_val = UInt64(rand_val %
std::numeric_limits<UInt64>::max());
+ auto double_val = static_cast<double>(rand_val) /
std::numeric_limits<int>::max();
+ // std::cout << "val: " << rand_val << " " << rand_val << " " <<
double_val << std::endl;
+ return std::tuple {uint64_val, double_val};
+ };
+ UInt64 uint64_val = 0;
+ double double_val = 0;
+ std::tie(uint64_val, double_val) = lambda_function();
+ // single data value
+ auto bitmap = BitmapValue();
+ bitmap.add(uint64_val);
+ auto hll = HyperLogLog();
+ hll.update(uint64_val);
+ auto quantile_state = QuantileState();
+ quantile_state.add_value(double_val);
+
+ // insert random value to idx 1
+ column_bitmap->insert_value(bitmap);
+ column_hll->insert_value(hll);
+ column_quantile_state->insert_value(quantile_state);
+
+ auto& bitmap_verify_data = column_bitmap_verify->get_data();
+ auto& hll_verify_data = column_hll_verify->get_data();
+ auto& quantile_state_verify_data =
column_quantile_state_verify->get_data();
+ // update verify data at idx 0
+ bitmap_verify_data[0].add(uint64_val);
+ hll_verify_data[0].update(uint64_val);
+ quantile_state_verify_data[0].add_value(double_val);
+ ASSERT_EQ(column_bitmap->size(), 2);
+ ASSERT_EQ(column_bitmap_verify->size(), 1);
+ ASSERT_EQ(column_bitmap->get_element(1).to_string(),
bitmap_verify_data[0].to_string());
+ ASSERT_EQ(column_hll->get_element(1).to_string(),
+ column_hll_verify->get_element(0).to_string());
+ ASSERT_EQ(column_quantile_state->get_data_at(1),
column_quantile_state_verify->get_data_at(0));
+ std::cout << "4. test insert/update/get_element data success" << std::endl;
+
+ // insert data from column idx 1
+ column_bitmap_verify->insert_from(*column_bitmap, 1);
+ column_hll_verify->insert_from(*column_hll, 1);
+ column_quantile_state_verify->insert_from(*column_quantile_state, 1);
+ ASSERT_EQ(column_bitmap->size(), 2);
+ ASSERT_EQ(column_bitmap_verify->size(), 2);
+ ASSERT_EQ(column_bitmap->get_element(1).to_string(),
+ column_bitmap_verify->get_element(1).to_string());
+ ASSERT_EQ(column_hll->get_element(1).to_string(),
+ column_hll_verify->get_element(1).to_string());
+ ASSERT_EQ(column_quantile_state->operator[](1),
column_quantile_state_verify->operator[](1));
+ ASSERT_EQ(column_bitmap->operator[](1),
column_bitmap_verify->operator[](1));
+ ASSERT_EQ(column_hll->operator[](1), column_hll_verify->operator[](1));
+ std::cout << "5. test insert_from data success" << std::endl;
+
+ Field field1, field2, field3;
+ column_bitmap->get(1, field1);
+ column_hll->get(1, field2);
+ column_quantile_state->get(1, field3);
+
+ //pop_back data from column idx 1
+ column_bitmap_verify->pop_back(1);
+ column_hll_verify->pop_back(1);
+ column_quantile_state_verify->pop_back(1);
+ //insert data from field
+ column_bitmap_verify->insert(field1);
+ column_hll_verify->insert(field2);
+ column_quantile_state_verify->insert(field3);
+
+ ASSERT_EQ(column_bitmap->size(), 2);
+ ASSERT_EQ(column_bitmap_verify->size(), 2);
+ ASSERT_EQ(column_bitmap->get_element(1).to_string(),
+ column_bitmap_verify->get_element(1).to_string());
+ ASSERT_EQ(column_hll->get_element(1).to_string(),
+ column_hll_verify->get_element(1).to_string());
+ ASSERT_EQ(column_quantile_state->operator[](1),
column_quantile_state_verify->operator[](1));
+ ASSERT_EQ(column_bitmap->operator[](1),
column_bitmap_verify->operator[](1));
+ ASSERT_EQ(column_hll->operator[](1), column_hll_verify->operator[](1));
+ std::cout << "6. test get/insert data and pop_back success" << std::endl;
+
+ std::tie(uint64_val, double_val) = lambda_function();
+ // two val in data value
+ bitmap.add(uint64_val);
+ hll.update(uint64_val);
+ quantile_state.add_value(double_val);
+ column_bitmap->insert_value(bitmap);
+ column_hll->insert_value(hll);
+ column_quantile_state->insert_value(quantile_state);
+
+ column_bitmap_verify->insert_data(column_bitmap->get_data_at(2).data,
+ column_bitmap->get_data_at(2).size);
+ column_hll_verify->insert_data(column_hll->get_data_at(2).data,
+ column_hll->get_data_at(2).size);
+
column_quantile_state_verify->insert_data(column_quantile_state->get_data_at(2).data,
+
column_quantile_state->get_data_at(2).size);
+ ASSERT_EQ(column_bitmap->size(), 3);
+ ASSERT_EQ(column_bitmap_verify->size(), 3);
+ ASSERT_EQ(column_bitmap->get_element(2).to_string(),
+ column_bitmap_verify->get_element(2).to_string());
+ ASSERT_EQ(column_hll->get_element(2).to_string(),
+ column_hll_verify->get_element(2).to_string());
+ ASSERT_EQ(column_quantile_state->operator[](2),
column_quantile_state_verify->operator[](2));
+ ASSERT_EQ(column_bitmap->operator[](2),
column_bitmap_verify->operator[](2));
+ ASSERT_EQ(column_hll->operator[](2), column_hll_verify->operator[](2));
+ std::cout << "7. test two val data value and insert_data success" <<
std::endl;
+
+ // more val in data value
+ for (int range = 1; range < 100; ++range) {
+ std::tie(uint64_val, double_val) = lambda_function();
+ bitmap.add(uint64_val);
+ hll.update(uint64_val);
+ quantile_state.add_value(double_val);
+ }
+ column_bitmap->insert_value(bitmap);
+ column_hll->insert_value(hll);
+ column_quantile_state->insert_value(quantile_state);
+
+ std::string buffer;
+ buffer.resize(bitmap.getSizeInBytes(), '0');
+ bitmap.write_to(const_cast<char*>(buffer.data()));
+ column_bitmap_verify->insert_binary_data(buffer.data(), buffer.size());
+
+ buffer.resize(hll.max_serialized_size(), '0');
+ size_t actual_size = hll.serialize((uint8_t*)buffer.data());
+ buffer.resize(actual_size);
+ column_hll_verify->insert_binary_data(buffer.data(), buffer.size());
+
+ buffer.resize(quantile_state.get_serialized_size());
+
quantile_state.serialize(const_cast<uint8_t*>(reinterpret_cast<uint8_t*>(buffer.data())));
+ column_quantile_state_verify->insert_binary_data(buffer.data(),
buffer.size());
+
+ ASSERT_EQ(column_bitmap->size(), 4);
+ ASSERT_EQ(column_bitmap_verify->size(), 4);
+ ASSERT_EQ(column_bitmap->get_element(3).to_string(),
+ column_bitmap_verify->get_element(3).to_string());
+ ASSERT_EQ(column_hll->get_element(3).to_string(),
+ column_hll_verify->get_element(3).to_string());
+ ASSERT_EQ(column_quantile_state->operator[](3),
column_quantile_state_verify->operator[](3));
+ ASSERT_EQ(column_bitmap->operator[](3),
column_bitmap_verify->operator[](3));
+ ASSERT_EQ(column_hll->operator[](3), column_hll_verify->operator[](3));
+ std::cout << "8. test more val data value and insert_binary_data success"
<< std::endl;
+
+ column_bitmap_verify->clear();
+ column_hll_verify->clear();
+ column_quantile_state_verify->clear();
+ ASSERT_EQ(column_bitmap_verify->size(), 0);
+
+ std::vector<StringRef> bitmap_strings, hll_strings, quantile_state_strings;
+ auto rows = column_bitmap->size();
+ std::vector<std::string> bitmap_buffers(rows), hll_buffers(rows),
quantile_state_buffers(rows);
+ for (int i = 0; i < column_bitmap->size(); ++i) {
+ auto bitmap = column_bitmap->get_element(i);
+ bitmap_buffers[i].resize(bitmap.getSizeInBytes(), '0');
+ bitmap.write_to(const_cast<char*>(bitmap_buffers[i].data()));
+ bitmap_strings.emplace_back(bitmap_buffers[i].data(),
bitmap_buffers[i].size());
+
+ auto hll = column_hll->get_element(i);
+ hll_buffers[i].resize(hll.max_serialized_size(), '0');
+ size_t actual_size = hll.serialize((uint8_t*)hll_buffers[i].data());
+ hll_buffers[i].resize(actual_size);
+ hll_strings.emplace_back(hll_buffers[i].data(), hll_buffers[i].size());
+
+ auto quantile_state = column_quantile_state->get_element(i);
+ quantile_state_buffers[i].resize(quantile_state.get_serialized_size());
+ quantile_state.serialize(
+
const_cast<uint8_t*>(reinterpret_cast<uint8_t*>(quantile_state_buffers[i].data())));
+ quantile_state_strings.emplace_back(quantile_state_buffers[i].data(),
+ quantile_state_buffers[i].size());
+ }
+ column_bitmap_verify->insert_many_strings(bitmap_strings.data(),
column_bitmap->size());
+ column_hll_verify->insert_many_strings(hll_strings.data(),
column_hll->size());
+
column_quantile_state_verify->insert_many_strings(quantile_state_strings.data(),
+
column_quantile_state->size());
+ ASSERT_EQ(rows, column_bitmap_verify->size());
+ ASSERT_EQ(rows, column_hll_verify->size());
+ ASSERT_EQ(rows, column_quantile_state_verify->size());
+ for (int i = 0; i < rows; ++i) {
+ ASSERT_EQ(column_bitmap->get_element(i).to_string(),
+ column_bitmap_verify->get_element(i).to_string());
+ ASSERT_EQ(column_hll->get_element(i).to_string(),
+ column_hll_verify->get_element(i).to_string());
+ ASSERT_EQ(column_quantile_state->operator[](i),
+ column_quantile_state_verify->operator[](i));
+ ASSERT_EQ(column_bitmap->operator[](i),
column_bitmap_verify->operator[](i));
+ ASSERT_EQ(column_hll->operator[](i), column_hll_verify->operator[](i));
+ }
+ std::cout << "9. test more val data value and insert_many_strings success"
<< std::endl;
+
+ auto column_bitmap_verify2 = column_bitmap_verify->clone_empty();
+ auto column_hll_verify2 = column_hll_verify->clone_resized(0);
+ auto column_quantile_state_verify2 =
column_quantile_state_verify->clone_empty();
+ ASSERT_EQ(column_bitmap_verify2->size(), 0);
+ ASSERT_EQ(column_hll_verify2->size(), 0);
+ ASSERT_EQ(column_quantile_state_verify2->size(), 0);
+ std::string bitmap_buffer, hll_buffer, quantile_state_buffer;
+ //the offset should be more than one value
+ std::vector<uint32_t> bitmap_offsets(rows + 1, 0), hll_offsets(rows + 1,
0),
+ quantile_state_offsets(rows + 1, 0);
+ for (int i = 0; i < rows; ++i) {
+ bitmap_buffer.append(bitmap_strings[i]);
+ hll_buffer.append(hll_buffers[i]);
+ quantile_state_buffer.append(quantile_state_buffers[i]);
+ bitmap_offsets[i + 1] = bitmap_offsets[i] + bitmap_strings[i].size;
+ hll_offsets[i + 1] = hll_offsets[i] + hll_strings[i].size;
+ quantile_state_offsets[i + 1] = quantile_state_offsets[i] +
quantile_state_strings[i].size;
+ }
+
column_bitmap_verify2->insert_many_continuous_binary_data(bitmap_buffer.data(),
+
bitmap_offsets.data(), rows);
+ column_hll_verify2->insert_many_continuous_binary_data(hll_buffer.data(),
hll_offsets.data(),
+ rows);
+ column_quantile_state_verify2->insert_many_continuous_binary_data(
+ quantile_state_buffer.data(), quantile_state_offsets.data(), rows);
+ ASSERT_EQ(rows, column_bitmap_verify2->size());
+ ASSERT_EQ(rows, column_hll_verify2->size());
+ ASSERT_EQ(rows, column_quantile_state_verify2->size());
+ for (int i = 0; i < rows; ++i) {
+ ASSERT_EQ(column_quantile_state->operator[](i),
+ column_quantile_state_verify2->operator[](i));
+ ASSERT_EQ(column_bitmap->operator[](i),
column_bitmap_verify2->operator[](i));
+ ASSERT_EQ(column_hll->operator[](i),
column_hll_verify2->operator[](i));
+ }
+ std::cout << "10. test more val data value and
insert_many_continuous_binary_data success"
+ << std::endl;
+
+ column_bitmap_verify->resize(rows);
+ column_hll_verify->resize(rows);
+ column_quantile_state_verify->resize(rows);
+ column_bitmap_verify->clear();
+ column_hll_verify->clear();
+ column_quantile_state_verify->clear();
+ column_bitmap_verify->insert_range_from(*column_bitmap, 0, rows);
+ column_hll_verify->insert_range_from(*column_hll, 0, rows);
+ column_quantile_state_verify->insert_range_from(*column_quantile_state, 0,
rows);
+ ASSERT_EQ(rows, column_bitmap_verify->size());
+ ASSERT_EQ(rows, column_hll_verify->size());
+ ASSERT_EQ(rows, column_quantile_state_verify->size());
+ for (int i = 0; i < rows; ++i) {
+ ASSERT_EQ(column_quantile_state->operator[](i),
+ column_quantile_state_verify->operator[](i));
+ ASSERT_EQ(column_bitmap->operator[](i),
column_bitmap_verify->operator[](i));
+ ASSERT_EQ(column_hll->operator[](i), column_hll_verify->operator[](i));
+ }
+ std::cout << "11. test more val data value and insert_range_from success"
<< std::endl;
+
+ column_bitmap_verify->clear();
+ column_hll_verify->clear();
+ column_quantile_state_verify->clear();
+ std::vector<uint32_t> indices;
+ for (int i = 0; i < rows; ++i) {
+ indices.push_back(i);
+ }
+ column_bitmap_verify->insert_indices_from(*column_bitmap, indices.data(),
+ indices.data() + rows);
+ column_hll_verify->insert_indices_from(*column_hll, indices.data(),
indices.data() + rows);
+ column_quantile_state_verify->insert_indices_from(*column_quantile_state,
indices.data(),
+ indices.data() + rows);
+ ASSERT_EQ(rows, column_bitmap_verify->size());
+ ASSERT_EQ(rows, column_hll_verify->size());
+ ASSERT_EQ(rows, column_quantile_state_verify->size());
+ for (int i = 0; i < rows; ++i) {
+ ASSERT_EQ(column_quantile_state->operator[](i),
+ column_quantile_state_verify->operator[](i));
+ ASSERT_EQ(column_bitmap->operator[](i),
column_bitmap_verify->operator[](i));
+ ASSERT_EQ(column_hll->operator[](i), column_hll_verify->operator[](i));
+ }
+ std::cout << "12. test more val data value and insert_indices_from
success" << std::endl;
+
+ column_bitmap->insert_default();
+ column_hll->insert_default();
+ column_quantile_state->insert_default();
+ column_bitmap_verify->insert_many_from(*column_bitmap, rows, 1);
+ column_hll_verify->insert_many_from(*column_hll, rows, 1);
+ column_quantile_state_verify->insert_many_from(*column_quantile_state,
rows, 1);
+ rows = rows + 1;
+ ASSERT_EQ(rows, column_bitmap_verify->size());
+ ASSERT_EQ(rows, column_hll_verify->size());
+ ASSERT_EQ(rows, column_quantile_state_verify->size());
+ for (int i = 0; i < rows; ++i) {
+ ASSERT_EQ(column_quantile_state->operator[](i),
+ column_quantile_state_verify->operator[](i));
+ ASSERT_EQ(column_bitmap->operator[](i),
column_bitmap_verify->operator[](i));
+ ASSERT_EQ(column_hll->operator[](i), column_hll_verify->operator[](i));
+ }
+ std::cout << "13. test more val data value and insert_many_from success"
<< std::endl;
+
+ column_bitmap->replace_column_data(*column_bitmap_verify, 0, 4);
+ column_hll->replace_column_data(*column_hll_verify, 0, 4);
+ column_quantile_state->replace_column_data(*column_quantile_state_verify,
0, 4);
+ for (int i = 0; i < rows; ++i) {
+ ASSERT_EQ(column_quantile_state->operator[](i),
+ column_quantile_state_verify->operator[](i));
+ ASSERT_EQ(column_bitmap->operator[](i),
column_bitmap_verify->operator[](i));
+ ASSERT_EQ(column_hll->operator[](i), column_hll_verify->operator[](i));
+ }
+ std::cout << "14. test more val data value and replace_column_data
success" << std::endl;
+
+ vectorized::IColumn::Filter filter;
+ for (int i = 0; i < rows; i++) {
+ filter.push_back(i % 2);
+ }
+ // filter data 0 1 0 1 0
+ ASSERT_EQ(column_bitmap_verify->clone()->filter(filter), 2);
+ ASSERT_EQ(column_hll_verify->clone()->filter(filter), 2);
+ ASSERT_EQ(column_quantile_state_verify->clone()->filter(filter), 2);
+ auto column_filter_res_bitmap = column_bitmap->filter(filter, 0);
+ auto column_filter_res_hll = column_hll->filter(filter, 0);
+ auto column_filter_res_quantile_state =
column_quantile_state->filter(filter, 0);
+
+ ASSERT_EQ(column_filter_res_bitmap->size(), 2);
+ ASSERT_EQ(column_filter_res_hll->size(), 2);
+ ASSERT_EQ(column_filter_res_quantile_state->size(), 2);
+ for (int i = 0, j = 0; i < rows; ++i) {
+ if (i % 2) {
+ ASSERT_EQ(column_quantile_state->operator[](i),
+ column_filter_res_quantile_state->operator[](j));
+ ASSERT_EQ(column_bitmap->operator[](i),
column_filter_res_bitmap->operator[](j));
+ ASSERT_EQ(column_hll->operator[](i),
column_filter_res_hll->operator[](j));
+ j++;
+ }
+ }
+
+ filter.clear();
+ filter.resize_fill(rows, 1);
+ // filter data 1 1 1 1 1
+ ASSERT_EQ(column_bitmap_verify->clone()->filter(filter), rows);
+ ASSERT_EQ(column_hll_verify->clone()->filter(filter), rows);
+ ASSERT_EQ(column_quantile_state_verify->clone()->filter(filter), rows);
+ column_filter_res_bitmap = column_bitmap->filter(filter, 0);
+ column_filter_res_hll = column_hll->filter(filter, 0);
+ column_filter_res_quantile_state = column_quantile_state->filter(filter,
0);
+ ASSERT_EQ(column_filter_res_bitmap->size(), rows);
+ ASSERT_EQ(column_filter_res_hll->size(), rows);
+ ASSERT_EQ(column_filter_res_quantile_state->size(), rows);
+ for (int i = 0, j = 0; i < rows; ++i) {
+ if (i % 2) {
+ ASSERT_EQ(column_quantile_state->operator[](i),
+ column_filter_res_quantile_state->operator[](j));
+ ASSERT_EQ(column_bitmap->operator[](i),
column_filter_res_bitmap->operator[](j));
+ ASSERT_EQ(column_hll->operator[](i),
column_filter_res_hll->operator[](j));
+ j++;
+ }
+ }
+
+ filter.clear();
+ filter.resize_fill(rows, 0);
+ // filter data 0 0 0 0 0
+ ASSERT_EQ(column_bitmap_verify->clone()->filter(filter), 0);
+ ASSERT_EQ(column_hll_verify->clone()->filter(filter), 0);
+ ASSERT_EQ(column_quantile_state_verify->clone()->filter(filter), 0);
+ column_filter_res_bitmap = column_bitmap->filter(filter, 0);
+ column_filter_res_hll = column_hll->filter(filter, 0);
+ column_filter_res_quantile_state = column_quantile_state->filter(filter,
0);
+ ASSERT_EQ(column_filter_res_bitmap->size(), 0);
+ ASSERT_EQ(column_filter_res_hll->size(), 0);
+ ASSERT_EQ(column_filter_res_quantile_state->size(), 0);
+ std::cout << "15. test more val data value and filter success" <<
std::endl;
+
+ IColumn::Permutation perm {4, 3, 2, 1, 0};
+ auto column_bitmap_perm = column_bitmap->permute(perm, rows);
+ auto column_hll_perm = column_hll->permute(perm, rows);
+ auto column_quantile_state_perm = column_quantile_state->permute(perm,
rows);
+ ASSERT_EQ(column_bitmap_perm->size(), 5);
+ ASSERT_EQ(column_hll_perm->size(), 5);
+ ASSERT_EQ(column_quantile_state_perm->size(), 5);
+ for (int i = 0, j = 4; i < rows; ++i, --j) {
+ ASSERT_EQ(column_quantile_state->operator[](i),
column_quantile_state_perm->operator[](j));
+ ASSERT_EQ(column_bitmap->operator[](i),
column_bitmap_perm->operator[](j));
+ ASSERT_EQ(column_hll->operator[](i), column_hll_perm->operator[](j));
+ }
+
+ IColumn::Permutation perm2 {0, 1, 2, 3, 4};
+ auto column_bitmap_perm2 = column_bitmap->permute(perm, rows);
+ auto column_hll_perm2 = column_hll->permute(perm, rows);
+ auto column_quantile_state_perm2 = column_quantile_state->permute(perm,
rows);
+ ASSERT_EQ(column_bitmap_perm2->size(), 5);
+ ASSERT_EQ(column_hll_perm2->size(), 5);
+ ASSERT_EQ(column_quantile_state_perm2->size(), 5);
+ for (int i = 0; i < rows; ++i) {
+ ASSERT_EQ(column_quantile_state->operator[](i),
column_quantile_state_perm->operator[](i));
+ ASSERT_EQ(column_bitmap->operator[](i),
column_bitmap_perm->operator[](i));
+ ASSERT_EQ(column_hll->operator[](i), column_hll_perm->operator[](i));
+ }
+
+ IColumn::Permutation perm3 {4, 2, 0, 1, 3};
+ std::vector<int> permute_idx {2, 3, 1, 4, 0};
+ auto column_bitmap_perm3 = column_bitmap->permute(perm, rows);
+ auto column_hll_perm3 = column_hll->permute(perm, rows);
+ auto column_quantile_state_perm3 = column_quantile_state->permute(perm,
rows);
+ ASSERT_EQ(column_bitmap_perm3->size(), 5);
+ ASSERT_EQ(column_hll_perm3->size(), 5);
+ ASSERT_EQ(column_quantile_state_perm3->size(), 5);
+ for (int i = 0; i < rows; ++i) {
+ ASSERT_EQ(column_quantile_state->operator[](i),
+ column_quantile_state_perm3->operator[](permute_idx[i]));
+ ASSERT_EQ(column_bitmap->operator[](i),
column_bitmap_perm3->operator[](permute_idx[i]));
+ ASSERT_EQ(column_hll->operator[](i),
column_hll_perm3->operator[](permute_idx[i]));
+ }
+ std::cout << "16. test more val data value and permute success" <<
std::endl;
+
+ IColumn::Offsets offsets {1, 2, 3, 4, 5};
+ auto column_bitmap_replicate = column_bitmap->replicate(offsets);
+ auto column_hll_replicate = column_hll->replicate(offsets);
+ auto column_quantile_state_replicate =
column_quantile_state->replicate(offsets);
+ ASSERT_EQ(column_bitmap_replicate->size(), 5);
+ ASSERT_EQ(column_hll_replicate->size(), 5);
+ ASSERT_EQ(column_quantile_state_replicate->size(), 5);
+ for (int i = 0; i < 5; ++i) {
+ ASSERT_EQ(column_quantile_state->operator[](i),
+ column_quantile_state_replicate->operator[](i));
+ ASSERT_EQ(column_bitmap->operator[](i),
column_bitmap_replicate->operator[](i));
+ ASSERT_EQ(column_hll->operator[](i),
column_hll_replicate->operator[](i));
+ }
+
+ IColumn::Offsets offsets2 {2, 4, 6, 8, 10};
+ std::vector<int> res_idx {0, 0, 1, 1, 2, 2, 3, 3, 4, 4};
+ auto column_bitmap_replicate2 = column_bitmap->replicate(offsets2);
+ auto column_hll_replicate2 = column_hll->replicate(offsets2);
+ auto column_quantile_state_replicate2 =
column_quantile_state->replicate(offsets2);
+ ASSERT_EQ(column_bitmap_replicate2->size(), 10);
+ ASSERT_EQ(column_hll_replicate2->size(), 10);
+ ASSERT_EQ(column_quantile_state_replicate2->size(), 10);
+ ASSERT_EQ(column_quantile_state->size(), 5);
+ for (int i = 0; i < 10; ++i) {
+ ASSERT_EQ(column_quantile_state->operator[](res_idx[i]),
+ column_quantile_state_replicate2->operator[](i));
+ ASSERT_EQ(column_bitmap->operator[](res_idx[i]),
column_bitmap_replicate2->operator[](i));
+ ASSERT_EQ(column_hll->operator[](res_idx[i]),
column_hll_replicate2->operator[](i));
+ }
+ std::cout << "17. test more val data value and replicate success" <<
std::endl;
+}
+
class ColumnBitmapTest : public testing::Test {
public:
virtual void SetUp() override {}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]