This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new d846e4c6e98 branch-3.0: [fix](column_complex) wrong type of Field 
returned by ColumnComplex (#43718)
d846e4c6e98 is described below

commit d846e4c6e9842420c756a4bea1f151ce84f76bc3
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Nov 13 15:36:33 2024 +0800

    branch-3.0: [fix](column_complex) wrong type of Field returned by 
ColumnComplex (#43718)
    
    Cherry-picked from #43515
    
    Co-authored-by: Jerry Hu <mrh...@gmail.com>
---
 be/src/exec/es/es_scroll_parser.cpp                |  2 +-
 be/src/vec/columns/column_complex.h                |  5 +-
 be/src/vec/columns/column_fixed_length_object.h    |  6 +-
 be/src/vec/columns/column_string.h                 |  4 +-
 be/src/vec/common/schema_util.cpp                  |  2 +-
 be/src/vec/core/field.cpp                          |  2 +-
 be/src/vec/core/field.h                            | 56 ++------------
 .../vec/data_types/data_type_fixed_length_object.h |  2 +-
 be/src/vec/data_types/data_type_jsonb.h            |  2 +-
 be/src/vec/data_types/data_type_object.h           |  4 +-
 be/src/vec/data_types/data_type_string.cpp         |  2 +-
 be/src/vec/data_types/data_type_string.h           |  2 +-
 .../data_types/serde/data_type_object_serde.cpp    |  5 +-
 be/src/vec/json/parse2column.cpp                   |  2 +-
 .../compaction/index_compaction_test.cpp           |  4 +-
 .../index_compaction_with_deleted_term.cpp         |  4 +-
 .../aggregate_functions/agg_min_max_by_test.cpp    |  2 +-
 be/test/vec/columns/column_hash_func_test.cpp      |  2 +-
 be/test/vec/columns/column_nullable_test.h         |  2 +-
 be/test/vec/core/column_complex_test.cpp           | 87 ++++++++++++++++++++--
 be/test/vec/core/field_test.cpp                    |  2 +-
 .../data_types/serde/data_type_serde_pb_test.cpp   |  6 +-
 .../data_types/serde/data_type_to_string_test.cpp  | 10 +--
 .../vec/function/function_array_element_test.cpp   |  2 +-
 be/test/vec/function/function_array_index_test.cpp |  4 +-
 be/test/vec/function/function_array_size_test.cpp  | 12 +--
 .../vec/function/function_arrays_overlap_test.cpp  |  6 +-
 .../function_compressed_materialization_test.cpp   |  4 +-
 be/test/vec/function/function_string_test.cpp      | 10 +--
 be/test/vec/function/table_function_test.cpp       |  4 +-
 be/test/vec/jsonb/serialize_test.cpp               |  2 +-
 .../data/datatype_p0/complex_types/test_map.out    |  3 +
 .../datatype_p0/complex_types/test_map.groovy      | 35 +++++++++
 33 files changed, 188 insertions(+), 109 deletions(-)

diff --git a/be/src/exec/es/es_scroll_parser.cpp 
b/be/src/exec/es/es_scroll_parser.cpp
index f8dfbd0d85e..f745ac34e65 100644
--- a/be/src/exec/es/es_scroll_parser.cpp
+++ b/be/src/exec/es/es_scroll_parser.cpp
@@ -488,7 +488,7 @@ Status process_single_column(const rapidjson::Value& col, 
PrimitiveType sub_type
                              bool pure_doc_value, vectorized::Array& array) {
     T val;
     RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
-    array.push_back(val);
+    array.push_back(vectorized::Field(val));
     return Status::OK();
 }
 
diff --git a/be/src/vec/columns/column_complex.h 
b/be/src/vec/columns/column_complex.h
index feeb8f71b9d..31aa0bc12eb 100644
--- a/be/src/vec/columns/column_complex.h
+++ b/be/src/vec/columns/column_complex.h
@@ -20,6 +20,8 @@
 
 #pragma once
 
+#include <glog/logging.h>
+
 #include <vector>
 
 #include "olap/hll.h"
@@ -129,13 +131,14 @@ public:
     MutableColumnPtr clone_resized(size_t size) const override;
 
     void insert(const Field& x) override {
+        DCHECK_EQ(x.get_type(), Field::TypeToEnum<T>::value);
         const T& s = doris::vectorized::get<const T&>(x);
         data.push_back(s);
     }
 
     Field operator[](size_t n) const override {
         assert(n < size());
-        return {reinterpret_cast<const char*>(&data[n]), sizeof(data[n])};
+        return Field(data[n]);
     }
 
     void get(size_t n, Field& res) const override {
diff --git a/be/src/vec/columns/column_fixed_length_object.h 
b/be/src/vec/columns/column_fixed_length_object.h
index b83f11ff98a..1f92816ba04 100644
--- a/be/src/vec/columns/column_fixed_length_object.h
+++ b/be/src/vec/columns/column_fixed_length_object.h
@@ -105,11 +105,13 @@ public:
     }
 
     Field operator[](size_t n) const override {
-        return {_data.data() + n * _item_size, _item_size};
+        return Field(
+                String(reinterpret_cast<const char*>(_data.data() + n * 
_item_size), _item_size));
     }
 
     void get(size_t n, Field& res) const override {
-        res.assign_string(_data.data() + n * _item_size, _item_size);
+        res = Field(
+                String(reinterpret_cast<const char*>(_data.data() + n * 
_item_size), _item_size));
     }
 
     StringRef get_data_at(size_t n) const override {
diff --git a/be/src/vec/columns/column_string.h 
b/be/src/vec/columns/column_string.h
index b441b81613b..8a073ef08cb 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -122,7 +122,7 @@ public:
 
     Field operator[](size_t n) const override {
         assert(n < size());
-        return Field(&chars[offset_at(n)], size_at(n));
+        return Field(String(reinterpret_cast<const 
char*>(&chars[offset_at(n)]), size_at(n)));
     }
 
     void get(size_t n, Field& res) const override {
@@ -132,7 +132,7 @@ public:
             res = JsonbField(reinterpret_cast<const 
char*>(&chars[offset_at(n)]), size_at(n));
             return;
         }
-        res.assign_string(&chars[offset_at(n)], size_at(n));
+        res = Field(String(reinterpret_cast<const 
char*>(&chars[offset_at(n)]), size_at(n)));
     }
 
     StringRef get_data_at(size_t n) const override {
diff --git a/be/src/vec/common/schema_util.cpp 
b/be/src/vec/common/schema_util.cpp
index 4545a383910..fd50af3e1fc 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -578,7 +578,7 @@ Status extract(ColumnPtr source, const PathInData& path, 
MutableColumnPtr& dst)
                                  : std::make_shared<DataTypeJsonb>();
     ColumnsWithTypeAndName arguments {
             {source, json_type, ""},
-            {type_string->create_column_const(1, Field(jsonpath.data(), 
jsonpath.size())),
+            {type_string->create_column_const(1, Field(String(jsonpath.data(), 
jsonpath.size()))),
              type_string, ""}};
     auto function =
             SimpleFunctionFactory::instance().get_function("jsonb_extract", 
arguments, json_type);
diff --git a/be/src/vec/core/field.cpp b/be/src/vec/core/field.cpp
index 8cb07f27c7c..e652fc2dc9e 100644
--- a/be/src/vec/core/field.cpp
+++ b/be/src/vec/core/field.cpp
@@ -74,7 +74,7 @@ void read_binary(Array& x, BufferReadable& buf) {
         case Field::Types::String: {
             std::string value;
             doris::vectorized::read_string_binary(value, buf);
-            x.push_back(value);
+            x.push_back(Field(value));
             break;
         }
         case Field::Types::JSONB: {
diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h
index 87459f19ce6..8113dc602fb 100644
--- a/be/src/vec/core/field.h
+++ b/be/src/vec/core/field.h
@@ -452,43 +452,20 @@ public:
 
     Field(Field&& rhs) { create(std::move(rhs)); }
 
+    // Make the constructor with a String parameter explicit to prevent 
accidentally creating a Field with the wrong string type.
+    // Other types don't require explicit construction to avoid extensive 
modifications.
     template <typename T>
         requires(!std::is_same_v<std::decay_t<T>, Field>)
-    Field(T&& rhs);
-
-    /// Create a string inplace.
-    Field(const char* data, size_t size) { create(data, size); }
-
-    Field(const unsigned char* data, size_t size) { create(data, size); }
-
-    /// NOTE In case when field already has string type, more direct assign is 
possible.
-    void assign_string(const char* data, size_t size) {
-        destroy();
-        create(data, size);
-    }
-
-    void assign_string(const unsigned char* data, size_t size) {
-        destroy();
-        create(data, size);
-    }
-
-    void assign_jsonb(const char* data, size_t size) {
-        destroy();
-        create_jsonb(data, size);
-    }
-
-    void assign_jsonb(const unsigned char* data, size_t size) {
-        destroy();
-        create_jsonb(data, size);
-    }
+    explicit(std::is_same_v<std::decay_t<T>, String>) Field(T&& rhs);
 
     Field& operator=(const Field& rhs) {
         if (this != &rhs) {
             if (which != rhs.which) {
                 destroy();
                 create(rhs);
-            } else
+            } else {
                 assign(rhs); /// This assigns string or vector without 
deallocation of existing buffer.
+            }
         }
         return *this;
     }
@@ -503,8 +480,9 @@ public:
             if (which != rhs.which) {
                 destroy();
                 create(std::move(rhs));
-            } else
+            } else {
                 assign(std::move(rhs));
+            }
         }
         return *this;
     }
@@ -731,7 +709,6 @@ private:
         *ptr = std::forward<T>(x);
     }
 
-private:
     void create(const Field& x) {
         dispatch([this](auto& value) { create_concrete(value); }, x);
     }
@@ -748,25 +725,6 @@ private:
         dispatch([this](auto& value) { assign_concrete(std::move(value)); }, 
x);
     }
 
-    void create(const char* data, size_t size) {
-        new (&storage) String(data, size);
-        which = Types::String;
-    }
-
-    void create(const unsigned char* data, size_t size) {
-        create(reinterpret_cast<const char*>(data), size);
-    }
-
-    void create_jsonb(const char* data, size_t size) {
-        new (&storage) JsonbField(data, size);
-        which = Types::JSONB;
-    }
-
-    void create_jsonb(const unsigned char* data, size_t size) {
-        new (&storage) JsonbField(reinterpret_cast<const char*>(data), size);
-        which = Types::JSONB;
-    }
-
     ALWAYS_INLINE void destroy() {
         if (which < Types::MIN_NON_POD) {
             return;
diff --git a/be/src/vec/data_types/data_type_fixed_length_object.h 
b/be/src/vec/data_types/data_type_fixed_length_object.h
index cc3a74429d7..af923ddce18 100644
--- a/be/src/vec/data_types/data_type_fixed_length_object.h
+++ b/be/src/vec/data_types/data_type_fixed_length_object.h
@@ -60,7 +60,7 @@ public:
         return doris::FieldType::OLAP_FIELD_TYPE_NONE;
     }
 
-    Field get_default() const override { return String(); }
+    Field get_default() const override { return Field(String()); }
 
     [[noreturn]] Field get_field(const TExprNode& node) const override {
         throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
diff --git a/be/src/vec/data_types/data_type_jsonb.h 
b/be/src/vec/data_types/data_type_jsonb.h
index 0577e5ed449..3d681e3ce79 100644
--- a/be/src/vec/data_types/data_type_jsonb.h
+++ b/be/src/vec/data_types/data_type_jsonb.h
@@ -78,7 +78,7 @@ public:
         DCHECK_EQ(node.node_type, TExprNodeType::JSON_LITERAL);
         DCHECK(node.__isset.json_literal);
         JsonBinaryValue value(node.json_literal.value);
-        return String(value.value(), value.size());
+        return Field(String(value.value(), value.size()));
     }
 
     bool equals(const IDataType& rhs) const override;
diff --git a/be/src/vec/data_types/data_type_object.h 
b/be/src/vec/data_types/data_type_object.h
index 2959b3dc074..ec60cde9f92 100644
--- a/be/src/vec/data_types/data_type_object.h
+++ b/be/src/vec/data_types/data_type_object.h
@@ -81,10 +81,10 @@ public:
 
     Field get_field(const TExprNode& node) const override {
         if (node.__isset.string_literal) {
-            return node.string_literal.value;
+            return Field(node.string_literal.value);
         }
         if (node.node_type == TExprNodeType::NULL_LITERAL) {
-            return Field();
+            return {};
         }
         std::stringstream error_string;
         node.printTo(error_string);
diff --git a/be/src/vec/data_types/data_type_string.cpp 
b/be/src/vec/data_types/data_type_string.cpp
index d2c2ae2c0b0..4ed51a00128 100644
--- a/be/src/vec/data_types/data_type_string.cpp
+++ b/be/src/vec/data_types/data_type_string.cpp
@@ -64,7 +64,7 @@ Status DataTypeString::from_string(ReadBuffer& rb, IColumn* 
column) const {
 }
 
 Field DataTypeString::get_default() const {
-    return String();
+    return Field(String());
 }
 
 MutableColumnPtr DataTypeString::create_column() const {
diff --git a/be/src/vec/data_types/data_type_string.h 
b/be/src/vec/data_types/data_type_string.h
index abac6bc4b04..dd937168611 100644
--- a/be/src/vec/data_types/data_type_string.h
+++ b/be/src/vec/data_types/data_type_string.h
@@ -75,7 +75,7 @@ public:
     Field get_field(const TExprNode& node) const override {
         DCHECK_EQ(node.node_type, TExprNodeType::STRING_LITERAL);
         DCHECK(node.__isset.string_literal);
-        return node.string_literal.value;
+        return Field(node.string_literal.value);
     }
 
     bool equals(const IDataType& rhs) const override;
diff --git a/be/src/vec/data_types/serde/data_type_object_serde.cpp 
b/be/src/vec/data_types/serde/data_type_object_serde.cpp
index 530646e9b4d..383add39354 100644
--- a/be/src/vec/data_types/serde/data_type_object_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_object_serde.cpp
@@ -26,6 +26,7 @@
 #include "vec/common/assert_cast.h"
 #include "vec/common/schema_util.h"
 #include "vec/core/field.h"
+#include "vec/core/types.h"
 
 #ifdef __AVX2__
 #include "util/jsonb_parser_simd.h"
@@ -117,11 +118,11 @@ void 
DataTypeObjectSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbV
     Field field;
     if (arg->isBinary()) {
         const auto* blob = static_cast<const JsonbBlobVal*>(arg);
-        field.assign_jsonb(blob->getBlob(), blob->getBlobLen());
+        field = JsonbField(blob->getBlob(), blob->getBlobLen());
     } else if (arg->isString()) {
         // not a valid jsonb type, insert as string
         const auto* str = static_cast<const JsonbStringVal*>(arg);
-        field.assign_string(str->getBlob(), str->getBlobLen());
+        field = Field(String(str->getBlob(), str->getBlobLen()));
     } else {
         throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Invalid jsonb 
type");
     }
diff --git a/be/src/vec/json/parse2column.cpp b/be/src/vec/json/parse2column.cpp
index aa5fc5eb8ed..ba18083a95c 100644
--- a/be/src/vec/json/parse2column.cpp
+++ b/be/src/vec/json/parse2column.cpp
@@ -149,7 +149,7 @@ void parse_json_to_variant(IColumn& column, const char* 
src, size_t length,
         }
         // Treat as string
         PathInData root_path;
-        Field field(src, length);
+        Field field(String(src, length));
         result = ParseResult {{root_path}, {field}};
     }
     auto& [paths, values] = *result;
diff --git 
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp
 
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp
index 5e3370847e9..aed83201a63 100644
--- 
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp
+++ 
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp
@@ -289,8 +289,8 @@ TEST_F(IndexCompactionTest, write_index_test) {
         auto columns = block.mutate_columns();
         for (const auto& row : data[i]) {
             vectorized::Field key = Int32(row.key);
-            vectorized::Field v1 = row.word;
-            vectorized::Field v2 = row.url;
+            vectorized::Field v1(row.word);
+            vectorized::Field v2(row.url);
             vectorized::Field v3 = Int32(row.num);
             columns[0]->insert(key);
             columns[1]->insert(v1);
diff --git 
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp
 
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp
index 321d43fa872..8b5d403fca4 100644
--- 
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp
+++ 
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp
@@ -582,8 +582,8 @@ TEST_F(IndexCompactionDeleteTest, delete_index_test) {
         auto columns = block.mutate_columns();
         for (const auto& row : data[i]) {
             vectorized::Field key = Int32(row.key);
-            vectorized::Field v1 = row.word;
-            vectorized::Field v2 = row.url;
+            vectorized::Field v1(row.word);
+            vectorized::Field v2(row.url);
             vectorized::Field v3 = Int32(row.num);
             columns[0]->insert(key);
             columns[1]->insert(v1);
diff --git a/be/test/vec/aggregate_functions/agg_min_max_by_test.cpp 
b/be/test/vec/aggregate_functions/agg_min_max_by_test.cpp
index 137f4fc70b1..b1a3e9ed483 100644
--- a/be/test/vec/aggregate_functions/agg_min_max_by_test.cpp
+++ b/be/test/vec/aggregate_functions/agg_min_max_by_test.cpp
@@ -71,7 +71,7 @@ TEST_P(AggMinMaxByTest, min_max_by_test) {
             min_pair.first = str_val;
             min_pair.second = i;
         }
-        column_vector_key_str->insert(cast_to_nearest_field_type(str_val));
+        
column_vector_key_str->insert(Field(cast_to_nearest_field_type(str_val)));
     }
 
     // Prepare test function and parameters.
diff --git a/be/test/vec/columns/column_hash_func_test.cpp 
b/be/test/vec/columns/column_hash_func_test.cpp
index 7b2d5f2dddd..c49f1e0a578 100644
--- a/be/test/vec/columns/column_hash_func_test.cpp
+++ b/be/test/vec/columns/column_hash_func_test.cpp
@@ -242,7 +242,7 @@ TEST(HashFuncTest, StructTypeTestWithSepcificValueCrcHash) {
 
     Tuple t;
     t.push_back(Int64(1));
-    t.push_back(String("hello"));
+    t.push_back(Field(String("hello")));
 
     DataTypePtr a = std::make_shared<DataTypeStruct>(dataTypes);
     std::cout << a->get_name() << std::endl;
diff --git a/be/test/vec/columns/column_nullable_test.h 
b/be/test/vec/columns/column_nullable_test.h
index 0f90a25c9b5..f371ff13fb2 100644
--- a/be/test/vec/columns/column_nullable_test.h
+++ b/be/test/vec/columns/column_nullable_test.h
@@ -83,7 +83,7 @@ inline MutableColumnPtr create_nested_column(size_t 
input_rows_count) {
         if constexpr (std::is_integral_v<T>) {
             column->insert(rand() % std::numeric_limits<T>::max());
         } else if constexpr (std::is_same_v<T, String>) {
-            column->insert(generate_random_string(rand() % 512));
+            column->insert(Field(generate_random_string(rand() % 512)));
         } else if constexpr (std::is_same_v<T, Decimal64>) {
             column->insert(Int64(rand() % std::numeric_limits<Int64>::max()));
         } else {
diff --git a/be/test/vec/core/column_complex_test.cpp 
b/be/test/vec/core/column_complex_test.cpp
index 589a705e072..a0fbcccdd15 100644
--- a/be/test/vec/core/column_complex_test.cpp
+++ b/be/test/vec/core/column_complex_test.cpp
@@ -17,8 +17,10 @@
 
 #include "vec/columns/column_complex.h"
 
+#include <glog/logging.h>
 #include <gtest/gtest-message.h>
 #include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
 #include <stddef.h>
 
 #include <memory>
@@ -26,6 +28,8 @@
 
 #include "agent/be_exec_version_manager.h"
 #include "gtest/gtest_pred_impl.h"
+#include "util/bitmap_value.h"
+#include "vec/core/field.h"
 #include "vec/data_types/data_type.h"
 #include "vec/data_types/data_type_bitmap.h"
 #include "vec/data_types/data_type_quantilestate.h"
@@ -72,12 +76,12 @@ public:
     }
 
     void check_serialize_and_deserialize(MutableColumnPtr& col) {
-        auto column = assert_cast<ColumnBitmap*>(col.get());
+        auto* column = assert_cast<ColumnBitmap*>(col.get());
         auto size = _bitmap_type.get_uncompressed_serialized_bytes(
                 *column, BeExecVersionManager::get_newest_version());
         std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
-        auto result = _bitmap_type.serialize(*column, buf.get(),
-                                             
BeExecVersionManager::get_newest_version());
+        auto* result = _bitmap_type.serialize(*column, buf.get(),
+                                              
BeExecVersionManager::get_newest_version());
         ASSERT_EQ(result, buf.get() + size);
 
         auto column2 = _bitmap_type.create_column();
@@ -85,6 +89,19 @@ public:
         check_bitmap_column(*column, *column2.get());
     }
 
+    void check_field_type(MutableColumnPtr& col) {
+        auto& column = assert_cast<ColumnBitmap&>(*col.get());
+        auto dst_column = ColumnBitmap::create();
+        const auto rows = column.size();
+        for (size_t i = 0; i != rows; ++i) {
+            auto field = column[i];
+            ASSERT_EQ(field.get_type(), Field::Types::Bitmap);
+            dst_column->insert(field);
+        }
+
+        check_bitmap_column(column, *dst_column);
+    }
+
 private:
     DataTypeBitMap _bitmap_type;
 };
@@ -94,7 +111,7 @@ public:
     virtual void SetUp() override {}
     virtual void TearDown() override {}
 
-    void check_bitmap_column(const IColumn& l, const IColumn& r) {
+    void check_quantile_state_column(const IColumn& l, const IColumn& r) {
         ASSERT_EQ(l.size(), r.size());
         const auto& l_col = assert_cast<const ColumnQuantileState&>(l);
         const auto& r_col = assert_cast<const ColumnQuantileState&>(r);
@@ -117,7 +134,20 @@ public:
         auto column2 = _quantile_state_type.create_column();
         _quantile_state_type.deserialize(buf.get(), &column2,
                                          
BeExecVersionManager::get_newest_version());
-        check_bitmap_column(*column, *column2.get());
+        check_quantile_state_column(*column, *column2.get());
+    }
+
+    void check_field_type(MutableColumnPtr& col) {
+        auto& column = assert_cast<ColumnQuantileState&>(*col.get());
+        auto dst_column = ColumnQuantileState::create();
+        const auto rows = column.size();
+        for (size_t i = 0; i != rows; ++i) {
+            auto field = column[i];
+            ASSERT_EQ(field.get_type(), Field::Types::QuantileState);
+            dst_column->insert(field);
+        }
+
+        check_quantile_state_column(column, *dst_column);
     }
 
 private:
@@ -153,6 +183,38 @@ TEST_F(ColumnBitmapTest, ColumnBitmapReadWrite) {
     EXPECT_TRUE(bitmap.contains(1000000));
 }
 
+TEST_F(ColumnBitmapTest, OperatorValidate) {
+    auto column = _bitmap_type.create_column();
+
+    // empty column
+    check_serialize_and_deserialize(column);
+
+    // bitmap with lots of rows
+    const size_t row_size = 128;
+    auto& data = assert_cast<ColumnBitmap&>(*column.get()).get_data();
+    data.reserve(row_size);
+
+    for (size_t i = 0; i != row_size; ++i) {
+        BitmapValue bitmap_value;
+        for (size_t j = 0; j <= i; ++j) {
+            bitmap_value.add(j);
+        }
+        data.emplace_back(std::move(bitmap_value));
+    }
+
+    auto& bitmap_column = assert_cast<ColumnBitmap&>(*column.get());
+    for (size_t i = 0; i != row_size; ++i) {
+        auto field = bitmap_column[i];
+        ASSERT_EQ(field.get_type(), Field::Types::Bitmap);
+        const auto& bitmap = vectorized::get<BitmapValue&>(field);
+
+        ASSERT_EQ(bitmap.cardinality(), i + 1);
+        for (size_t j = 0; j <= i; ++j) {
+            ASSERT_TRUE(bitmap.contains(j));
+        }
+    }
+}
+
 TEST_F(ColumnQuantileStateTest, ColumnQuantileStateReadWrite) {
     auto column = _quantile_state_type.create_column();
     // empty column
@@ -180,4 +242,19 @@ TEST_F(ColumnQuantileStateTest, 
ColumnQuantileStateReadWrite) {
     check_serialize_and_deserialize(column);
 }
 
+TEST_F(ColumnQuantileStateTest, OperatorValidate) {
+    auto column = _quantile_state_type.create_column();
+
+    // empty column
+    check_serialize_and_deserialize(column);
+
+    // bitmap with lots of rows
+    const size_t row_size = 20000;
+    auto& data = assert_cast<ColumnQuantileState&>(*column.get()).get_data();
+    data.resize(row_size);
+    check_serialize_and_deserialize(column);
+
+    check_field_type(column);
+}
+
 } // namespace doris::vectorized
diff --git a/be/test/vec/core/field_test.cpp b/be/test/vec/core/field_test.cpp
index a3542735c50..71d26ea4979 100644
--- a/be/test/vec/core/field_test.cpp
+++ b/be/test/vec/core/field_test.cpp
@@ -39,7 +39,7 @@ TEST(VFieldTest, field_string) {
     ASSERT_EQ(f.get<String>(), "Hello, world (4)");
     f = Array {Field {String {"Hello, world (5)"}}};
     ASSERT_EQ(f.get<Array>()[0].get<String>(), "Hello, world (5)");
-    f = Array {String {"Hello, world (6)"}};
+    f = Array {Field(String {"Hello, world (6)"})};
     ASSERT_EQ(f.get<Array>()[0].get<String>(), "Hello, world (6)");
 }
 
diff --git a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp 
b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp
index b64ddee1d2c..852614e84c5 100644
--- a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp
@@ -583,10 +583,10 @@ TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestStruct) {
     DataTypePtr m = 
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>());
     DataTypePtr st = std::make_shared<DataTypeStruct>(std::vector<DataTypePtr> 
{s, d, m});
     Tuple t1, t2;
-    t1.push_back(String("amory cute"));
+    t1.push_back(Field(String("amory cute")));
     t1.push_back(__int128_t(37));
     t1.push_back(true);
-    t2.push_back("null");
+    t2.push_back(Field("null"));
     t2.push_back(__int128_t(26));
     t2.push_back(false);
     MutableColumnPtr struct_column = st->create_column();
@@ -614,7 +614,7 @@ TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestStruct2) {
     DataTypePtr m = 
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>());
     DataTypePtr st = std::make_shared<DataTypeStruct>(std::vector<DataTypePtr> 
{s, d, m});
     Tuple t1, t2;
-    t1.push_back(String("amory cute"));
+    t1.push_back(Field(String("amory cute")));
     t1.push_back(37);
     t1.push_back(true);
     t2.push_back("null");
diff --git a/be/test/vec/data_types/serde/data_type_to_string_test.cpp 
b/be/test/vec/data_types/serde/data_type_to_string_test.cpp
index fe2e05d10a1..d605e73ced3 100644
--- a/be/test/vec/data_types/serde/data_type_to_string_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_to_string_test.cpp
@@ -45,9 +45,9 @@ TEST(ToStringMethodTest, DataTypeToStringTest) {
     a1.push_back(Null());
     a1.push_back(UInt64(12345678));
     a1.push_back(UInt64(0));
-    a2.push_back(String("hello amory"));
-    a2.push_back("NULL");
-    a2.push_back(String("cute amory"));
+    a2.push_back(Field(String("hello amory")));
+    a2.push_back(Field("NULL"));
+    a2.push_back(Field(String("cute amory")));
     a2.push_back(Null());
     Map m;
     m.push_back(a1);
@@ -55,11 +55,11 @@ TEST(ToStringMethodTest, DataTypeToStringTest) {
 
     Tuple t;
     t.push_back(Int128(12345454342));
-    t.push_back(String("amory cute"));
+    t.push_back(Field(String("amory cute")));
     t.push_back(UInt64(0));
 
     cases.field_values = {UInt64(12),
-                          String(" hello amory , cute amory "),
+                          Field(String(" hello amory , cute amory ")),
                           DecimalField<Decimal32>(-12345678, 0),
                           a1,
                           a2,
diff --git a/be/test/vec/function/function_array_element_test.cpp 
b/be/test/vec/function/function_array_element_test.cpp
index 16ce28f5259..bf25ea4386c 100644
--- a/be/test/vec/function/function_array_element_test.cpp
+++ b/be/test/vec/function/function_array_element_test.cpp
@@ -148,7 +148,7 @@ TEST(function_array_element_test, element_at) {
     {
         InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, 
TypeIndex::Int32};
 
-        Array vec = {Field("abc", 3), Field("", 0), Field("def", 3)};
+        Array vec = {Field(String("abc", 3)), Field(String("", 0)), 
Field(String("def", 3))};
         DataSet data_set = {{{vec, 1}, std::string("abc")},
                             {{vec, 2}, std::string("")},
                             {{vec, 10}, Null()},
diff --git a/be/test/vec/function/function_array_index_test.cpp 
b/be/test/vec/function/function_array_index_test.cpp
index 24bd5797869..1a037818b10 100644
--- a/be/test/vec/function/function_array_index_test.cpp
+++ b/be/test/vec/function/function_array_index_test.cpp
@@ -152,7 +152,7 @@ TEST(function_array_index_test, array_contains) {
     {
         InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, 
TypeIndex::String};
 
-        Array vec = {Field("abc", 3), Field("", 0), Field("def", 3)};
+        Array vec = {Field(String("abc", 3)), Field(String("", 0)), 
Field(String("def", 3))};
         DataSet data_set = {{{vec, std::string("abc")}, UInt8(1)},
                             {{vec, std::string("aaa")}, UInt8(0)},
                             {{vec, std::string("")}, UInt8(1)},
@@ -252,7 +252,7 @@ TEST(function_array_index_test, array_position) {
     {
         InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, 
TypeIndex::String};
 
-        Array vec = {Field("abc", 3), Field("", 0), Field("def", 3)};
+        Array vec = {Field(String("abc", 3)), Field(String("", 0)), 
Field(String("def", 3))};
         DataSet data_set = {{{vec, std::string("abc")}, Int64(1)},
                             {{vec, std::string("aaa")}, Int64(0)},
                             {{vec, std::string("")}, Int64(2)},
diff --git a/be/test/vec/function/function_array_size_test.cpp 
b/be/test/vec/function/function_array_size_test.cpp
index 3fa710f6844..c853a56930d 100644
--- a/be/test/vec/function/function_array_size_test.cpp
+++ b/be/test/vec/function/function_array_size_test.cpp
@@ -47,8 +47,8 @@ TEST(function_array_size_test, size) {
     {
         InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String};
 
-        Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)};
-        Array vec2 = {Field("abc", 3), Field("123", 0), Field("def", 3)};
+        Array vec1 = {Field(String("abc", 3)), Field(String("", 0)), 
Field(String("def", 3))};
+        Array vec2 = {Field(String("abc", 3)), Field(String("123", 0)), 
Field(String("def", 3))};
         DataSet data_set = {{{vec1}, Int64(3)},
                             {{vec2}, Int64(3)},
                             {{Null()}, Null()},
@@ -76,8 +76,8 @@ TEST(function_array_size_test, cardinality) {
     {
         InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String};
 
-        Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)};
-        Array vec2 = {Field("abc", 3), Field("123", 0), Field("def", 3)};
+        Array vec1 = {Field(String("abc", 3)), Field(String("", 0)), 
Field(String("def", 3))};
+        Array vec2 = {Field(String("abc", 3)), Field(String("123", 0)), 
Field(String("def", 3))};
         DataSet data_set = {{{vec1}, Int64(3)},
                             {{vec2}, Int64(3)},
                             {{Null()}, Null()},
@@ -105,8 +105,8 @@ TEST(function_array_size_test, array_size) {
     {
         InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String};
 
-        Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)};
-        Array vec2 = {Field("abc", 3), Field("123", 0), Field("def", 3)};
+        Array vec1 = {Field(String("abc", 3)), Field(String("", 0)), 
Field(String("def", 3))};
+        Array vec2 = {Field(String("abc", 3)), Field(String("123", 0)), 
Field(String("def", 3))};
         DataSet data_set = {{{vec1}, Int64(3)},
                             {{vec2}, Int64(3)},
                             {{Null()}, Null()},
diff --git a/be/test/vec/function/function_arrays_overlap_test.cpp 
b/be/test/vec/function/function_arrays_overlap_test.cpp
index 4a13d41b0a3..3297f5fc281 100644
--- a/be/test/vec/function/function_arrays_overlap_test.cpp
+++ b/be/test/vec/function/function_arrays_overlap_test.cpp
@@ -124,9 +124,9 @@ TEST(function_arrays_overlap_test, arrays_overlap) {
         InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, 
TypeIndex::Array,
                                     TypeIndex::String};
 
-        Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)};
-        Array vec2 = {Field("abc", 3)};
-        Array vec3 = {Field("", 0)};
+        Array vec1 = {Field(String("abc", 3)), Field(String("", 0)), 
Field(String("def", 3))};
+        Array vec2 = {Field(String("abc", 3))};
+        Array vec3 = {Field(String("", 0))};
         DataSet data_set = {{{vec1, vec2}, UInt8(1)},
                             {{vec1, vec3}, UInt8(1)},
                             {{Null(), vec1}, Null()},
diff --git a/be/test/vec/function/function_compressed_materialization_test.cpp 
b/be/test/vec/function/function_compressed_materialization_test.cpp
index 2553fc82fc7..432fbf78529 100644
--- a/be/test/vec/function/function_compressed_materialization_test.cpp
+++ b/be/test/vec/function/function_compressed_materialization_test.cpp
@@ -111,7 +111,7 @@ void encode_and_decode(size_t len_of_varchar, std::string 
function_name) {
                 continue;
             } else {
                 std::string random_bytes = 
generate_random_len_and_random_bytes(m);
-                col_source_str_mutate->insert(Field(random_bytes.c_str(), 
random_bytes.size()));
+                col_source_str_mutate->insert(Field(random_bytes));
             }
         }
 
@@ -185,7 +185,7 @@ TEST(CompressedMaterializationTest, abnormal_test) {
 
     for (size_t i = 0; i < input_rows_count; ++i) {
         std::string random_bytes = generate_random_bytes(16);
-        col_source_str_mutate->insert(Field(random_bytes.c_str(), 
random_bytes.size()));
+        col_source_str_mutate->insert(Field(random_bytes));
     }
 
     auto col_source_str = std::move(col_source_str_mutate);
diff --git a/be/test/vec/function/function_string_test.cpp 
b/be/test/vec/function/function_string_test.cpp
index 5d1d6fb9d8b..f4381505276 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -1417,11 +1417,11 @@ TEST(function_string_test, function_concat_ws_test) {
     {
         BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::Array, 
TypeIndex::String};
 
-        Array vec1 = {Field("", 0), Field("", 0), Field("", 0)};
-        Array vec2 = {Field("123", 3), Field("456", 3), Field("789", 3)};
-        Array vec3 = {Field("", 0), Field("?", 1), Field("", 0)};
-        Array vec4 = {Field("abc", 3), Field("", 0), Field("def", 3)};
-        Array vec5 = {Field("abc", 3), Field("def", 3), Field("ghi", 3)};
+        Array vec1 = {Field(String("", 0)), Field(String("", 0)), 
Field(String("", 0))};
+        Array vec2 = {Field(String("123", 3)), Field(String("456", 3)), 
Field(String("789", 3))};
+        Array vec3 = {Field(String("", 0)), Field(String("?", 1)), 
Field(String("", 0))};
+        Array vec4 = {Field(String("abc", 3)), Field(String("", 0)), 
Field(String("def", 3))};
+        Array vec5 = {Field(String("abc", 3)), Field(String("def", 3)), 
Field(String("ghi", 3))};
         DataSet data_set = {{{std::string("-"), vec1}, std::string("--")},
                             {{std::string(""), vec2}, 
std::string("123456789")},
                             {{std::string("-"), vec3}, std::string("-?-")},
diff --git a/be/test/vec/function/table_function_test.cpp 
b/be/test/vec/function/table_function_test.cpp
index a5c49dbdba9..43d37f6bf73 100644
--- a/be/test/vec/function/table_function_test.cpp
+++ b/be/test/vec/function/table_function_test.cpp
@@ -97,7 +97,7 @@ TEST_F(TableFunctionTest, vexplode_outer) {
     // explode_outer(Array<String>)
     {
         InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String};
-        Array vec = {std::string("abc"), std::string(""), std::string("def")};
+        Array vec = {Field(std::string("abc")), Field(std::string("")), 
Field(std::string("def"))};
         InputDataSet input_set = {{Null()}, {Array()}, {vec}};
 
         InputTypeSet output_types = {TypeIndex::String};
@@ -144,7 +144,7 @@ TEST_F(TableFunctionTest, vexplode) {
     // explode(Array<String>)
     {
         InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String};
-        Array vec = {std::string("abc"), std::string(""), std::string("def")};
+        Array vec = {Field(std::string("abc")), Field(std::string("")), 
Field(std::string("def"))};
         InputDataSet input_set = {{Null()}, {Array()}, {vec}};
 
         InputTypeSet output_types = {TypeIndex::String};
diff --git a/be/test/vec/jsonb/serialize_test.cpp 
b/be/test/vec/jsonb/serialize_test.cpp
index 3845c689e1e..86244c6ca34 100644
--- a/be/test/vec/jsonb/serialize_test.cpp
+++ b/be/test/vec/jsonb/serialize_test.cpp
@@ -277,7 +277,7 @@ TEST(BlockSerializeTest, Struct) {
         DataTypePtr m = 
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>());
         DataTypePtr st = 
std::make_shared<DataTypeStruct>(std::vector<DataTypePtr> {s, d, m});
         Tuple t1, t2;
-        t1.push_back(String("amory cute"));
+        t1.push_back(Field(String("amory cute")));
         t1.push_back(__int128_t(37));
         t1.push_back(true);
         t2.push_back("null");
diff --git a/regression-test/data/datatype_p0/complex_types/test_map.out 
b/regression-test/data/datatype_p0/complex_types/test_map.out
index 4ac971fb3a1..03c9853b8e8 100644
--- a/regression-test/data/datatype_p0/complex_types/test_map.out
+++ b/regression-test/data/datatype_p0/complex_types/test_map.out
@@ -14,3 +14,6 @@
 6      3       {"key3":"value3", "key33":"value33", "key3333":"value333"}      
6       3
 7      4       {"key4":"value4", "key44":"value44", "key444":"value444", 
"key4444":"value4444"}        \N      \N
 
+-- !sql2 --
+3      true    true    true
+
diff --git a/regression-test/suites/datatype_p0/complex_types/test_map.groovy 
b/regression-test/suites/datatype_p0/complex_types/test_map.groovy
index 4dd0272f517..b985ef61008 100644
--- a/regression-test/suites/datatype_p0/complex_types/test_map.groovy
+++ b/regression-test/suites/datatype_p0/complex_types/test_map.groovy
@@ -51,4 +51,39 @@ suite("test_map") {
     qt_sql """
         select * from test_map_table left join test_map_table_right on 
test_map_table.k1 = test_map_table_right.value order by 1,2,4,5;
     """
+
+    sql "DROP TABLE IF EXISTS `task_map_agg_with_bitmap`"
+    sql """
+        CREATE TABLE `task_map_agg_with_bitmap` (
+            `cache_key` varchar(65533) NOT NULL,
+            `result_cnt` int NULL COMMENT '人群包人数'
+        ) ENGINE = OLAP duplicate KEY(`cache_key`) COMMENT 'OLAP' DISTRIBUTED 
BY HASH(`cache_key`) BUCKETS 1 PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+
+    sql 'insert into `task_map_agg_with_bitmap` values ("aa",null);'
+    sql 'insert into `task_map_agg_with_bitmap` values ("bb",null);'
+    sql 'insert into `task_map_agg_with_bitmap` values ("bb",1);'
+    sql 'insert into `task_map_agg_with_bitmap` values ("bb",2);'
+    sql 'insert into `task_map_agg_with_bitmap` values ("bb",3);'
+
+    qt_sql2 """
+        select bitmap_count(id_map['2024-11-03']) cnt,
+            bitmap_contains(id_map['2024-11-03'], 1) c1,
+            bitmap_contains(id_map['2024-11-03'], 2) c2,
+            bitmap_contains(id_map['2024-11-03'], 3) c3
+        from (
+            select
+                map_agg(tag_logymd, result) id_map
+            from
+                (
+                    select
+                        '2024-11-03' tag_logymd,
+                        bitmap_agg(result_cnt) result
+                    from
+                        `task_map_agg_with_bitmap`
+                ) t1
+        ) t2;
+    """
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to