This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new d846e4c6e98 branch-3.0: [fix](column_complex) wrong type of Field returned by ColumnComplex (#43718) d846e4c6e98 is described below commit d846e4c6e9842420c756a4bea1f151ce84f76bc3 Author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> AuthorDate: Wed Nov 13 15:36:33 2024 +0800 branch-3.0: [fix](column_complex) wrong type of Field returned by ColumnComplex (#43718) Cherry-picked from #43515 Co-authored-by: Jerry Hu <mrh...@gmail.com> --- be/src/exec/es/es_scroll_parser.cpp | 2 +- be/src/vec/columns/column_complex.h | 5 +- be/src/vec/columns/column_fixed_length_object.h | 6 +- be/src/vec/columns/column_string.h | 4 +- be/src/vec/common/schema_util.cpp | 2 +- be/src/vec/core/field.cpp | 2 +- be/src/vec/core/field.h | 56 ++------------ .../vec/data_types/data_type_fixed_length_object.h | 2 +- be/src/vec/data_types/data_type_jsonb.h | 2 +- be/src/vec/data_types/data_type_object.h | 4 +- be/src/vec/data_types/data_type_string.cpp | 2 +- be/src/vec/data_types/data_type_string.h | 2 +- .../data_types/serde/data_type_object_serde.cpp | 5 +- be/src/vec/json/parse2column.cpp | 2 +- .../compaction/index_compaction_test.cpp | 4 +- .../index_compaction_with_deleted_term.cpp | 4 +- .../aggregate_functions/agg_min_max_by_test.cpp | 2 +- be/test/vec/columns/column_hash_func_test.cpp | 2 +- be/test/vec/columns/column_nullable_test.h | 2 +- be/test/vec/core/column_complex_test.cpp | 87 ++++++++++++++++++++-- be/test/vec/core/field_test.cpp | 2 +- .../data_types/serde/data_type_serde_pb_test.cpp | 6 +- .../data_types/serde/data_type_to_string_test.cpp | 10 +-- .../vec/function/function_array_element_test.cpp | 2 +- be/test/vec/function/function_array_index_test.cpp | 4 +- be/test/vec/function/function_array_size_test.cpp | 12 +-- .../vec/function/function_arrays_overlap_test.cpp | 6 +- .../function_compressed_materialization_test.cpp | 4 +- be/test/vec/function/function_string_test.cpp | 10 +-- be/test/vec/function/table_function_test.cpp | 4 +- be/test/vec/jsonb/serialize_test.cpp | 2 +- .../data/datatype_p0/complex_types/test_map.out | 3 + .../datatype_p0/complex_types/test_map.groovy | 35 +++++++++ 33 files changed, 188 insertions(+), 109 deletions(-) diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index f8dfbd0d85e..f745ac34e65 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -488,7 +488,7 @@ Status process_single_column(const rapidjson::Value& col, PrimitiveType sub_type bool pure_doc_value, vectorized::Array& array) { T val; RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val)); - array.push_back(val); + array.push_back(vectorized::Field(val)); return Status::OK(); } diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h index feeb8f71b9d..31aa0bc12eb 100644 --- a/be/src/vec/columns/column_complex.h +++ b/be/src/vec/columns/column_complex.h @@ -20,6 +20,8 @@ #pragma once +#include <glog/logging.h> + #include <vector> #include "olap/hll.h" @@ -129,13 +131,14 @@ public: MutableColumnPtr clone_resized(size_t size) const override; void insert(const Field& x) override { + DCHECK_EQ(x.get_type(), Field::TypeToEnum<T>::value); const T& s = doris::vectorized::get<const T&>(x); data.push_back(s); } Field operator[](size_t n) const override { assert(n < size()); - return {reinterpret_cast<const char*>(&data[n]), sizeof(data[n])}; + return Field(data[n]); } void get(size_t n, Field& res) const override { diff --git a/be/src/vec/columns/column_fixed_length_object.h b/be/src/vec/columns/column_fixed_length_object.h index b83f11ff98a..1f92816ba04 100644 --- a/be/src/vec/columns/column_fixed_length_object.h +++ b/be/src/vec/columns/column_fixed_length_object.h @@ -105,11 +105,13 @@ public: } Field operator[](size_t n) const override { - return {_data.data() + n * _item_size, _item_size}; + return Field( + String(reinterpret_cast<const char*>(_data.data() + n * _item_size), _item_size)); } void get(size_t n, Field& res) const override { - res.assign_string(_data.data() + n * _item_size, _item_size); + res = Field( + String(reinterpret_cast<const char*>(_data.data() + n * _item_size), _item_size)); } StringRef get_data_at(size_t n) const override { diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index b441b81613b..8a073ef08cb 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -122,7 +122,7 @@ public: Field operator[](size_t n) const override { assert(n < size()); - return Field(&chars[offset_at(n)], size_at(n)); + return Field(String(reinterpret_cast<const char*>(&chars[offset_at(n)]), size_at(n))); } void get(size_t n, Field& res) const override { @@ -132,7 +132,7 @@ public: res = JsonbField(reinterpret_cast<const char*>(&chars[offset_at(n)]), size_at(n)); return; } - res.assign_string(&chars[offset_at(n)], size_at(n)); + res = Field(String(reinterpret_cast<const char*>(&chars[offset_at(n)]), size_at(n))); } StringRef get_data_at(size_t n) const override { diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index 4545a383910..fd50af3e1fc 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -578,7 +578,7 @@ Status extract(ColumnPtr source, const PathInData& path, MutableColumnPtr& dst) : std::make_shared<DataTypeJsonb>(); ColumnsWithTypeAndName arguments { {source, json_type, ""}, - {type_string->create_column_const(1, Field(jsonpath.data(), jsonpath.size())), + {type_string->create_column_const(1, Field(String(jsonpath.data(), jsonpath.size()))), type_string, ""}}; auto function = SimpleFunctionFactory::instance().get_function("jsonb_extract", arguments, json_type); diff --git a/be/src/vec/core/field.cpp b/be/src/vec/core/field.cpp index 8cb07f27c7c..e652fc2dc9e 100644 --- a/be/src/vec/core/field.cpp +++ b/be/src/vec/core/field.cpp @@ -74,7 +74,7 @@ void read_binary(Array& x, BufferReadable& buf) { case Field::Types::String: { std::string value; doris::vectorized::read_string_binary(value, buf); - x.push_back(value); + x.push_back(Field(value)); break; } case Field::Types::JSONB: { diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h index 87459f19ce6..8113dc602fb 100644 --- a/be/src/vec/core/field.h +++ b/be/src/vec/core/field.h @@ -452,43 +452,20 @@ public: Field(Field&& rhs) { create(std::move(rhs)); } + // Make the constructor with a String parameter explicit to prevent accidentally creating a Field with the wrong string type. + // Other types don't require explicit construction to avoid extensive modifications. template <typename T> requires(!std::is_same_v<std::decay_t<T>, Field>) - Field(T&& rhs); - - /// Create a string inplace. - Field(const char* data, size_t size) { create(data, size); } - - Field(const unsigned char* data, size_t size) { create(data, size); } - - /// NOTE In case when field already has string type, more direct assign is possible. - void assign_string(const char* data, size_t size) { - destroy(); - create(data, size); - } - - void assign_string(const unsigned char* data, size_t size) { - destroy(); - create(data, size); - } - - void assign_jsonb(const char* data, size_t size) { - destroy(); - create_jsonb(data, size); - } - - void assign_jsonb(const unsigned char* data, size_t size) { - destroy(); - create_jsonb(data, size); - } + explicit(std::is_same_v<std::decay_t<T>, String>) Field(T&& rhs); Field& operator=(const Field& rhs) { if (this != &rhs) { if (which != rhs.which) { destroy(); create(rhs); - } else + } else { assign(rhs); /// This assigns string or vector without deallocation of existing buffer. + } } return *this; } @@ -503,8 +480,9 @@ public: if (which != rhs.which) { destroy(); create(std::move(rhs)); - } else + } else { assign(std::move(rhs)); + } } return *this; } @@ -731,7 +709,6 @@ private: *ptr = std::forward<T>(x); } -private: void create(const Field& x) { dispatch([this](auto& value) { create_concrete(value); }, x); } @@ -748,25 +725,6 @@ private: dispatch([this](auto& value) { assign_concrete(std::move(value)); }, x); } - void create(const char* data, size_t size) { - new (&storage) String(data, size); - which = Types::String; - } - - void create(const unsigned char* data, size_t size) { - create(reinterpret_cast<const char*>(data), size); - } - - void create_jsonb(const char* data, size_t size) { - new (&storage) JsonbField(data, size); - which = Types::JSONB; - } - - void create_jsonb(const unsigned char* data, size_t size) { - new (&storage) JsonbField(reinterpret_cast<const char*>(data), size); - which = Types::JSONB; - } - ALWAYS_INLINE void destroy() { if (which < Types::MIN_NON_POD) { return; diff --git a/be/src/vec/data_types/data_type_fixed_length_object.h b/be/src/vec/data_types/data_type_fixed_length_object.h index cc3a74429d7..af923ddce18 100644 --- a/be/src/vec/data_types/data_type_fixed_length_object.h +++ b/be/src/vec/data_types/data_type_fixed_length_object.h @@ -60,7 +60,7 @@ public: return doris::FieldType::OLAP_FIELD_TYPE_NONE; } - Field get_default() const override { return String(); } + Field get_default() const override { return Field(String()); } [[noreturn]] Field get_field(const TExprNode& node) const override { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, diff --git a/be/src/vec/data_types/data_type_jsonb.h b/be/src/vec/data_types/data_type_jsonb.h index 0577e5ed449..3d681e3ce79 100644 --- a/be/src/vec/data_types/data_type_jsonb.h +++ b/be/src/vec/data_types/data_type_jsonb.h @@ -78,7 +78,7 @@ public: DCHECK_EQ(node.node_type, TExprNodeType::JSON_LITERAL); DCHECK(node.__isset.json_literal); JsonBinaryValue value(node.json_literal.value); - return String(value.value(), value.size()); + return Field(String(value.value(), value.size())); } bool equals(const IDataType& rhs) const override; diff --git a/be/src/vec/data_types/data_type_object.h b/be/src/vec/data_types/data_type_object.h index 2959b3dc074..ec60cde9f92 100644 --- a/be/src/vec/data_types/data_type_object.h +++ b/be/src/vec/data_types/data_type_object.h @@ -81,10 +81,10 @@ public: Field get_field(const TExprNode& node) const override { if (node.__isset.string_literal) { - return node.string_literal.value; + return Field(node.string_literal.value); } if (node.node_type == TExprNodeType::NULL_LITERAL) { - return Field(); + return {}; } std::stringstream error_string; node.printTo(error_string); diff --git a/be/src/vec/data_types/data_type_string.cpp b/be/src/vec/data_types/data_type_string.cpp index d2c2ae2c0b0..4ed51a00128 100644 --- a/be/src/vec/data_types/data_type_string.cpp +++ b/be/src/vec/data_types/data_type_string.cpp @@ -64,7 +64,7 @@ Status DataTypeString::from_string(ReadBuffer& rb, IColumn* column) const { } Field DataTypeString::get_default() const { - return String(); + return Field(String()); } MutableColumnPtr DataTypeString::create_column() const { diff --git a/be/src/vec/data_types/data_type_string.h b/be/src/vec/data_types/data_type_string.h index abac6bc4b04..dd937168611 100644 --- a/be/src/vec/data_types/data_type_string.h +++ b/be/src/vec/data_types/data_type_string.h @@ -75,7 +75,7 @@ public: Field get_field(const TExprNode& node) const override { DCHECK_EQ(node.node_type, TExprNodeType::STRING_LITERAL); DCHECK(node.__isset.string_literal); - return node.string_literal.value; + return Field(node.string_literal.value); } bool equals(const IDataType& rhs) const override; diff --git a/be/src/vec/data_types/serde/data_type_object_serde.cpp b/be/src/vec/data_types/serde/data_type_object_serde.cpp index 530646e9b4d..383add39354 100644 --- a/be/src/vec/data_types/serde/data_type_object_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_object_serde.cpp @@ -26,6 +26,7 @@ #include "vec/common/assert_cast.h" #include "vec/common/schema_util.h" #include "vec/core/field.h" +#include "vec/core/types.h" #ifdef __AVX2__ #include "util/jsonb_parser_simd.h" @@ -117,11 +118,11 @@ void DataTypeObjectSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbV Field field; if (arg->isBinary()) { const auto* blob = static_cast<const JsonbBlobVal*>(arg); - field.assign_jsonb(blob->getBlob(), blob->getBlobLen()); + field = JsonbField(blob->getBlob(), blob->getBlobLen()); } else if (arg->isString()) { // not a valid jsonb type, insert as string const auto* str = static_cast<const JsonbStringVal*>(arg); - field.assign_string(str->getBlob(), str->getBlobLen()); + field = Field(String(str->getBlob(), str->getBlobLen())); } else { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Invalid jsonb type"); } diff --git a/be/src/vec/json/parse2column.cpp b/be/src/vec/json/parse2column.cpp index aa5fc5eb8ed..ba18083a95c 100644 --- a/be/src/vec/json/parse2column.cpp +++ b/be/src/vec/json/parse2column.cpp @@ -149,7 +149,7 @@ void parse_json_to_variant(IColumn& column, const char* src, size_t length, } // Treat as string PathInData root_path; - Field field(src, length); + Field field(String(src, length)); result = ParseResult {{root_path}, {field}}; } auto& [paths, values] = *result; diff --git a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp index 5e3370847e9..aed83201a63 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp @@ -289,8 +289,8 @@ TEST_F(IndexCompactionTest, write_index_test) { auto columns = block.mutate_columns(); for (const auto& row : data[i]) { vectorized::Field key = Int32(row.key); - vectorized::Field v1 = row.word; - vectorized::Field v2 = row.url; + vectorized::Field v1(row.word); + vectorized::Field v2(row.url); vectorized::Field v3 = Int32(row.num); columns[0]->insert(key); columns[1]->insert(v1); diff --git a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp index 321d43fa872..8b5d403fca4 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp @@ -582,8 +582,8 @@ TEST_F(IndexCompactionDeleteTest, delete_index_test) { auto columns = block.mutate_columns(); for (const auto& row : data[i]) { vectorized::Field key = Int32(row.key); - vectorized::Field v1 = row.word; - vectorized::Field v2 = row.url; + vectorized::Field v1(row.word); + vectorized::Field v2(row.url); vectorized::Field v3 = Int32(row.num); columns[0]->insert(key); columns[1]->insert(v1); diff --git a/be/test/vec/aggregate_functions/agg_min_max_by_test.cpp b/be/test/vec/aggregate_functions/agg_min_max_by_test.cpp index 137f4fc70b1..b1a3e9ed483 100644 --- a/be/test/vec/aggregate_functions/agg_min_max_by_test.cpp +++ b/be/test/vec/aggregate_functions/agg_min_max_by_test.cpp @@ -71,7 +71,7 @@ TEST_P(AggMinMaxByTest, min_max_by_test) { min_pair.first = str_val; min_pair.second = i; } - column_vector_key_str->insert(cast_to_nearest_field_type(str_val)); + column_vector_key_str->insert(Field(cast_to_nearest_field_type(str_val))); } // Prepare test function and parameters. diff --git a/be/test/vec/columns/column_hash_func_test.cpp b/be/test/vec/columns/column_hash_func_test.cpp index 7b2d5f2dddd..c49f1e0a578 100644 --- a/be/test/vec/columns/column_hash_func_test.cpp +++ b/be/test/vec/columns/column_hash_func_test.cpp @@ -242,7 +242,7 @@ TEST(HashFuncTest, StructTypeTestWithSepcificValueCrcHash) { Tuple t; t.push_back(Int64(1)); - t.push_back(String("hello")); + t.push_back(Field(String("hello"))); DataTypePtr a = std::make_shared<DataTypeStruct>(dataTypes); std::cout << a->get_name() << std::endl; diff --git a/be/test/vec/columns/column_nullable_test.h b/be/test/vec/columns/column_nullable_test.h index 0f90a25c9b5..f371ff13fb2 100644 --- a/be/test/vec/columns/column_nullable_test.h +++ b/be/test/vec/columns/column_nullable_test.h @@ -83,7 +83,7 @@ inline MutableColumnPtr create_nested_column(size_t input_rows_count) { if constexpr (std::is_integral_v<T>) { column->insert(rand() % std::numeric_limits<T>::max()); } else if constexpr (std::is_same_v<T, String>) { - column->insert(generate_random_string(rand() % 512)); + column->insert(Field(generate_random_string(rand() % 512))); } else if constexpr (std::is_same_v<T, Decimal64>) { column->insert(Int64(rand() % std::numeric_limits<Int64>::max())); } else { diff --git a/be/test/vec/core/column_complex_test.cpp b/be/test/vec/core/column_complex_test.cpp index 589a705e072..a0fbcccdd15 100644 --- a/be/test/vec/core/column_complex_test.cpp +++ b/be/test/vec/core/column_complex_test.cpp @@ -17,8 +17,10 @@ #include "vec/columns/column_complex.h" +#include <glog/logging.h> #include <gtest/gtest-message.h> #include <gtest/gtest-test-part.h> +#include <gtest/gtest.h> #include <stddef.h> #include <memory> @@ -26,6 +28,8 @@ #include "agent/be_exec_version_manager.h" #include "gtest/gtest_pred_impl.h" +#include "util/bitmap_value.h" +#include "vec/core/field.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_quantilestate.h" @@ -72,12 +76,12 @@ public: } void check_serialize_and_deserialize(MutableColumnPtr& col) { - auto column = assert_cast<ColumnBitmap*>(col.get()); + auto* column = assert_cast<ColumnBitmap*>(col.get()); auto size = _bitmap_type.get_uncompressed_serialized_bytes( *column, BeExecVersionManager::get_newest_version()); std::unique_ptr<char[]> buf = std::make_unique<char[]>(size); - auto result = _bitmap_type.serialize(*column, buf.get(), - BeExecVersionManager::get_newest_version()); + auto* result = _bitmap_type.serialize(*column, buf.get(), + BeExecVersionManager::get_newest_version()); ASSERT_EQ(result, buf.get() + size); auto column2 = _bitmap_type.create_column(); @@ -85,6 +89,19 @@ public: check_bitmap_column(*column, *column2.get()); } + void check_field_type(MutableColumnPtr& col) { + auto& column = assert_cast<ColumnBitmap&>(*col.get()); + auto dst_column = ColumnBitmap::create(); + const auto rows = column.size(); + for (size_t i = 0; i != rows; ++i) { + auto field = column[i]; + ASSERT_EQ(field.get_type(), Field::Types::Bitmap); + dst_column->insert(field); + } + + check_bitmap_column(column, *dst_column); + } + private: DataTypeBitMap _bitmap_type; }; @@ -94,7 +111,7 @@ public: virtual void SetUp() override {} virtual void TearDown() override {} - void check_bitmap_column(const IColumn& l, const IColumn& r) { + void check_quantile_state_column(const IColumn& l, const IColumn& r) { ASSERT_EQ(l.size(), r.size()); const auto& l_col = assert_cast<const ColumnQuantileState&>(l); const auto& r_col = assert_cast<const ColumnQuantileState&>(r); @@ -117,7 +134,20 @@ public: auto column2 = _quantile_state_type.create_column(); _quantile_state_type.deserialize(buf.get(), &column2, BeExecVersionManager::get_newest_version()); - check_bitmap_column(*column, *column2.get()); + check_quantile_state_column(*column, *column2.get()); + } + + void check_field_type(MutableColumnPtr& col) { + auto& column = assert_cast<ColumnQuantileState&>(*col.get()); + auto dst_column = ColumnQuantileState::create(); + const auto rows = column.size(); + for (size_t i = 0; i != rows; ++i) { + auto field = column[i]; + ASSERT_EQ(field.get_type(), Field::Types::QuantileState); + dst_column->insert(field); + } + + check_quantile_state_column(column, *dst_column); } private: @@ -153,6 +183,38 @@ TEST_F(ColumnBitmapTest, ColumnBitmapReadWrite) { EXPECT_TRUE(bitmap.contains(1000000)); } +TEST_F(ColumnBitmapTest, OperatorValidate) { + auto column = _bitmap_type.create_column(); + + // empty column + check_serialize_and_deserialize(column); + + // bitmap with lots of rows + const size_t row_size = 128; + auto& data = assert_cast<ColumnBitmap&>(*column.get()).get_data(); + data.reserve(row_size); + + for (size_t i = 0; i != row_size; ++i) { + BitmapValue bitmap_value; + for (size_t j = 0; j <= i; ++j) { + bitmap_value.add(j); + } + data.emplace_back(std::move(bitmap_value)); + } + + auto& bitmap_column = assert_cast<ColumnBitmap&>(*column.get()); + for (size_t i = 0; i != row_size; ++i) { + auto field = bitmap_column[i]; + ASSERT_EQ(field.get_type(), Field::Types::Bitmap); + const auto& bitmap = vectorized::get<BitmapValue&>(field); + + ASSERT_EQ(bitmap.cardinality(), i + 1); + for (size_t j = 0; j <= i; ++j) { + ASSERT_TRUE(bitmap.contains(j)); + } + } +} + TEST_F(ColumnQuantileStateTest, ColumnQuantileStateReadWrite) { auto column = _quantile_state_type.create_column(); // empty column @@ -180,4 +242,19 @@ TEST_F(ColumnQuantileStateTest, ColumnQuantileStateReadWrite) { check_serialize_and_deserialize(column); } +TEST_F(ColumnQuantileStateTest, OperatorValidate) { + auto column = _quantile_state_type.create_column(); + + // empty column + check_serialize_and_deserialize(column); + + // bitmap with lots of rows + const size_t row_size = 20000; + auto& data = assert_cast<ColumnQuantileState&>(*column.get()).get_data(); + data.resize(row_size); + check_serialize_and_deserialize(column); + + check_field_type(column); +} + } // namespace doris::vectorized diff --git a/be/test/vec/core/field_test.cpp b/be/test/vec/core/field_test.cpp index a3542735c50..71d26ea4979 100644 --- a/be/test/vec/core/field_test.cpp +++ b/be/test/vec/core/field_test.cpp @@ -39,7 +39,7 @@ TEST(VFieldTest, field_string) { ASSERT_EQ(f.get<String>(), "Hello, world (4)"); f = Array {Field {String {"Hello, world (5)"}}}; ASSERT_EQ(f.get<Array>()[0].get<String>(), "Hello, world (5)"); - f = Array {String {"Hello, world (6)"}}; + f = Array {Field(String {"Hello, world (6)"})}; ASSERT_EQ(f.get<Array>()[0].get<String>(), "Hello, world (6)"); } diff --git a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp index b64ddee1d2c..852614e84c5 100644 --- a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp @@ -583,10 +583,10 @@ TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestStruct) { DataTypePtr m = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>()); DataTypePtr st = std::make_shared<DataTypeStruct>(std::vector<DataTypePtr> {s, d, m}); Tuple t1, t2; - t1.push_back(String("amory cute")); + t1.push_back(Field(String("amory cute"))); t1.push_back(__int128_t(37)); t1.push_back(true); - t2.push_back("null"); + t2.push_back(Field("null")); t2.push_back(__int128_t(26)); t2.push_back(false); MutableColumnPtr struct_column = st->create_column(); @@ -614,7 +614,7 @@ TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestStruct2) { DataTypePtr m = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>()); DataTypePtr st = std::make_shared<DataTypeStruct>(std::vector<DataTypePtr> {s, d, m}); Tuple t1, t2; - t1.push_back(String("amory cute")); + t1.push_back(Field(String("amory cute"))); t1.push_back(37); t1.push_back(true); t2.push_back("null"); diff --git a/be/test/vec/data_types/serde/data_type_to_string_test.cpp b/be/test/vec/data_types/serde/data_type_to_string_test.cpp index fe2e05d10a1..d605e73ced3 100644 --- a/be/test/vec/data_types/serde/data_type_to_string_test.cpp +++ b/be/test/vec/data_types/serde/data_type_to_string_test.cpp @@ -45,9 +45,9 @@ TEST(ToStringMethodTest, DataTypeToStringTest) { a1.push_back(Null()); a1.push_back(UInt64(12345678)); a1.push_back(UInt64(0)); - a2.push_back(String("hello amory")); - a2.push_back("NULL"); - a2.push_back(String("cute amory")); + a2.push_back(Field(String("hello amory"))); + a2.push_back(Field("NULL")); + a2.push_back(Field(String("cute amory"))); a2.push_back(Null()); Map m; m.push_back(a1); @@ -55,11 +55,11 @@ TEST(ToStringMethodTest, DataTypeToStringTest) { Tuple t; t.push_back(Int128(12345454342)); - t.push_back(String("amory cute")); + t.push_back(Field(String("amory cute"))); t.push_back(UInt64(0)); cases.field_values = {UInt64(12), - String(" hello amory , cute amory "), + Field(String(" hello amory , cute amory ")), DecimalField<Decimal32>(-12345678, 0), a1, a2, diff --git a/be/test/vec/function/function_array_element_test.cpp b/be/test/vec/function/function_array_element_test.cpp index 16ce28f5259..bf25ea4386c 100644 --- a/be/test/vec/function/function_array_element_test.cpp +++ b/be/test/vec/function/function_array_element_test.cpp @@ -148,7 +148,7 @@ TEST(function_array_element_test, element_at) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::Int32}; - Array vec = {Field("abc", 3), Field("", 0), Field("def", 3)}; + Array vec = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; DataSet data_set = {{{vec, 1}, std::string("abc")}, {{vec, 2}, std::string("")}, {{vec, 10}, Null()}, diff --git a/be/test/vec/function/function_array_index_test.cpp b/be/test/vec/function/function_array_index_test.cpp index 24bd5797869..1a037818b10 100644 --- a/be/test/vec/function/function_array_index_test.cpp +++ b/be/test/vec/function/function_array_index_test.cpp @@ -152,7 +152,7 @@ TEST(function_array_index_test, array_contains) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::String}; - Array vec = {Field("abc", 3), Field("", 0), Field("def", 3)}; + Array vec = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; DataSet data_set = {{{vec, std::string("abc")}, UInt8(1)}, {{vec, std::string("aaa")}, UInt8(0)}, {{vec, std::string("")}, UInt8(1)}, @@ -252,7 +252,7 @@ TEST(function_array_index_test, array_position) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::String}; - Array vec = {Field("abc", 3), Field("", 0), Field("def", 3)}; + Array vec = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; DataSet data_set = {{{vec, std::string("abc")}, Int64(1)}, {{vec, std::string("aaa")}, Int64(0)}, {{vec, std::string("")}, Int64(2)}, diff --git a/be/test/vec/function/function_array_size_test.cpp b/be/test/vec/function/function_array_size_test.cpp index 3fa710f6844..c853a56930d 100644 --- a/be/test/vec/function/function_array_size_test.cpp +++ b/be/test/vec/function/function_array_size_test.cpp @@ -47,8 +47,8 @@ TEST(function_array_size_test, size) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String}; - Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)}; - Array vec2 = {Field("abc", 3), Field("123", 0), Field("def", 3)}; + Array vec1 = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; + Array vec2 = {Field(String("abc", 3)), Field(String("123", 0)), Field(String("def", 3))}; DataSet data_set = {{{vec1}, Int64(3)}, {{vec2}, Int64(3)}, {{Null()}, Null()}, @@ -76,8 +76,8 @@ TEST(function_array_size_test, cardinality) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String}; - Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)}; - Array vec2 = {Field("abc", 3), Field("123", 0), Field("def", 3)}; + Array vec1 = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; + Array vec2 = {Field(String("abc", 3)), Field(String("123", 0)), Field(String("def", 3))}; DataSet data_set = {{{vec1}, Int64(3)}, {{vec2}, Int64(3)}, {{Null()}, Null()}, @@ -105,8 +105,8 @@ TEST(function_array_size_test, array_size) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String}; - Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)}; - Array vec2 = {Field("abc", 3), Field("123", 0), Field("def", 3)}; + Array vec1 = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; + Array vec2 = {Field(String("abc", 3)), Field(String("123", 0)), Field(String("def", 3))}; DataSet data_set = {{{vec1}, Int64(3)}, {{vec2}, Int64(3)}, {{Null()}, Null()}, diff --git a/be/test/vec/function/function_arrays_overlap_test.cpp b/be/test/vec/function/function_arrays_overlap_test.cpp index 4a13d41b0a3..3297f5fc281 100644 --- a/be/test/vec/function/function_arrays_overlap_test.cpp +++ b/be/test/vec/function/function_arrays_overlap_test.cpp @@ -124,9 +124,9 @@ TEST(function_arrays_overlap_test, arrays_overlap) { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::Array, TypeIndex::String}; - Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)}; - Array vec2 = {Field("abc", 3)}; - Array vec3 = {Field("", 0)}; + Array vec1 = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; + Array vec2 = {Field(String("abc", 3))}; + Array vec3 = {Field(String("", 0))}; DataSet data_set = {{{vec1, vec2}, UInt8(1)}, {{vec1, vec3}, UInt8(1)}, {{Null(), vec1}, Null()}, diff --git a/be/test/vec/function/function_compressed_materialization_test.cpp b/be/test/vec/function/function_compressed_materialization_test.cpp index 2553fc82fc7..432fbf78529 100644 --- a/be/test/vec/function/function_compressed_materialization_test.cpp +++ b/be/test/vec/function/function_compressed_materialization_test.cpp @@ -111,7 +111,7 @@ void encode_and_decode(size_t len_of_varchar, std::string function_name) { continue; } else { std::string random_bytes = generate_random_len_and_random_bytes(m); - col_source_str_mutate->insert(Field(random_bytes.c_str(), random_bytes.size())); + col_source_str_mutate->insert(Field(random_bytes)); } } @@ -185,7 +185,7 @@ TEST(CompressedMaterializationTest, abnormal_test) { for (size_t i = 0; i < input_rows_count; ++i) { std::string random_bytes = generate_random_bytes(16); - col_source_str_mutate->insert(Field(random_bytes.c_str(), random_bytes.size())); + col_source_str_mutate->insert(Field(random_bytes)); } auto col_source_str = std::move(col_source_str_mutate); diff --git a/be/test/vec/function/function_string_test.cpp b/be/test/vec/function/function_string_test.cpp index 5d1d6fb9d8b..f4381505276 100644 --- a/be/test/vec/function/function_string_test.cpp +++ b/be/test/vec/function/function_string_test.cpp @@ -1417,11 +1417,11 @@ TEST(function_string_test, function_concat_ws_test) { { BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::Array, TypeIndex::String}; - Array vec1 = {Field("", 0), Field("", 0), Field("", 0)}; - Array vec2 = {Field("123", 3), Field("456", 3), Field("789", 3)}; - Array vec3 = {Field("", 0), Field("?", 1), Field("", 0)}; - Array vec4 = {Field("abc", 3), Field("", 0), Field("def", 3)}; - Array vec5 = {Field("abc", 3), Field("def", 3), Field("ghi", 3)}; + Array vec1 = {Field(String("", 0)), Field(String("", 0)), Field(String("", 0))}; + Array vec2 = {Field(String("123", 3)), Field(String("456", 3)), Field(String("789", 3))}; + Array vec3 = {Field(String("", 0)), Field(String("?", 1)), Field(String("", 0))}; + Array vec4 = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; + Array vec5 = {Field(String("abc", 3)), Field(String("def", 3)), Field(String("ghi", 3))}; DataSet data_set = {{{std::string("-"), vec1}, std::string("--")}, {{std::string(""), vec2}, std::string("123456789")}, {{std::string("-"), vec3}, std::string("-?-")}, diff --git a/be/test/vec/function/table_function_test.cpp b/be/test/vec/function/table_function_test.cpp index a5c49dbdba9..43d37f6bf73 100644 --- a/be/test/vec/function/table_function_test.cpp +++ b/be/test/vec/function/table_function_test.cpp @@ -97,7 +97,7 @@ TEST_F(TableFunctionTest, vexplode_outer) { // explode_outer(Array<String>) { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String}; - Array vec = {std::string("abc"), std::string(""), std::string("def")}; + Array vec = {Field(std::string("abc")), Field(std::string("")), Field(std::string("def"))}; InputDataSet input_set = {{Null()}, {Array()}, {vec}}; InputTypeSet output_types = {TypeIndex::String}; @@ -144,7 +144,7 @@ TEST_F(TableFunctionTest, vexplode) { // explode(Array<String>) { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String}; - Array vec = {std::string("abc"), std::string(""), std::string("def")}; + Array vec = {Field(std::string("abc")), Field(std::string("")), Field(std::string("def"))}; InputDataSet input_set = {{Null()}, {Array()}, {vec}}; InputTypeSet output_types = {TypeIndex::String}; diff --git a/be/test/vec/jsonb/serialize_test.cpp b/be/test/vec/jsonb/serialize_test.cpp index 3845c689e1e..86244c6ca34 100644 --- a/be/test/vec/jsonb/serialize_test.cpp +++ b/be/test/vec/jsonb/serialize_test.cpp @@ -277,7 +277,7 @@ TEST(BlockSerializeTest, Struct) { DataTypePtr m = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>()); DataTypePtr st = std::make_shared<DataTypeStruct>(std::vector<DataTypePtr> {s, d, m}); Tuple t1, t2; - t1.push_back(String("amory cute")); + t1.push_back(Field(String("amory cute"))); t1.push_back(__int128_t(37)); t1.push_back(true); t2.push_back("null"); diff --git a/regression-test/data/datatype_p0/complex_types/test_map.out b/regression-test/data/datatype_p0/complex_types/test_map.out index 4ac971fb3a1..03c9853b8e8 100644 --- a/regression-test/data/datatype_p0/complex_types/test_map.out +++ b/regression-test/data/datatype_p0/complex_types/test_map.out @@ -14,3 +14,6 @@ 6 3 {"key3":"value3", "key33":"value33", "key3333":"value333"} 6 3 7 4 {"key4":"value4", "key44":"value44", "key444":"value444", "key4444":"value4444"} \N \N +-- !sql2 -- +3 true true true + diff --git a/regression-test/suites/datatype_p0/complex_types/test_map.groovy b/regression-test/suites/datatype_p0/complex_types/test_map.groovy index 4dd0272f517..b985ef61008 100644 --- a/regression-test/suites/datatype_p0/complex_types/test_map.groovy +++ b/regression-test/suites/datatype_p0/complex_types/test_map.groovy @@ -51,4 +51,39 @@ suite("test_map") { qt_sql """ select * from test_map_table left join test_map_table_right on test_map_table.k1 = test_map_table_right.value order by 1,2,4,5; """ + + sql "DROP TABLE IF EXISTS `task_map_agg_with_bitmap`" + sql """ + CREATE TABLE `task_map_agg_with_bitmap` ( + `cache_key` varchar(65533) NOT NULL, + `result_cnt` int NULL COMMENT '人群包人数' + ) ENGINE = OLAP duplicate KEY(`cache_key`) COMMENT 'OLAP' DISTRIBUTED BY HASH(`cache_key`) BUCKETS 1 PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql 'insert into `task_map_agg_with_bitmap` values ("aa",null);' + sql 'insert into `task_map_agg_with_bitmap` values ("bb",null);' + sql 'insert into `task_map_agg_with_bitmap` values ("bb",1);' + sql 'insert into `task_map_agg_with_bitmap` values ("bb",2);' + sql 'insert into `task_map_agg_with_bitmap` values ("bb",3);' + + qt_sql2 """ + select bitmap_count(id_map['2024-11-03']) cnt, + bitmap_contains(id_map['2024-11-03'], 1) c1, + bitmap_contains(id_map['2024-11-03'], 2) c2, + bitmap_contains(id_map['2024-11-03'], 3) c3 + from ( + select + map_agg(tag_logymd, result) id_map + from + ( + select + '2024-11-03' tag_logymd, + bitmap_agg(result_cnt) result + from + `task_map_agg_with_bitmap` + ) t1 + ) t2; + """ } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org