This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 6d0b67a76e45ebcc5c31e738a76416ec49ae5701
Author: zclllhhjj <zhaochan...@selectdb.com>
AuthorDate: Sat Sep 14 10:22:04 2024 +0800

    [Fix](Column) refactor ColumnNullable to provide flags safety (#40769)
    
    ## Proposed changes
    
    Issue Number: close #xxx
    
    In this pr we move null_map and relative flags to an individual base
    class to constrain its visibility.
    please reed the comment carefully
    
    test in master:
    ```
    xxx/column_nullable_test.cpp:187: Failure
    Value of: null_dst->has_null()
      Actual: false
    Expected: true
    
    Error occurred in case0
    [  FAILED  ] ColumnNullableTest.PredicateTest (0 ms)
    [ RUN      ] ColumnNullableTest.HashTest
    [       OK ] ColumnNullableTest.HashTest (0 ms)
    [----------] 3 tests from ColumnNullableTest (0 ms total)
    
    [----------] Global test environment tear-down
    [==========] 3 tests from 1 test suite ran. (1 ms total)
    [  PASSED  ] 2 tests.
    [  FAILED  ] 1 test, listed below:
    [  FAILED  ] ColumnNullableTest.PredicateTest
    ```
    
    and success after this pr
    
    ---------
    
    Co-authored-by: Jerry Hu <mrh...@gmail.com>
---
 be/src/vec/columns/column_nullable.cpp             |  85 +++++-------
 be/src/vec/columns/column_nullable.h               | 153 ++++++++++++++-------
 .../column_nullable_seriazlization_test.cpp        |  75 +---------
 be/test/vec/columns/column_nullable_test.cpp       | 112 +++++++++++++++
 be/test/vec/columns/column_nullable_test.h         | 105 ++++++++++++++
 be/test/vec/columns/column_resize_test.cpp         |   1 -
 be/test/vec/function/function_test_util.h          |   8 ++
 .../correctness/test_column_nullable_cache.out     |   6 +
 .../correctness/test_column_nullable_cache.groovy  |  57 ++++++++
 9 files changed, 426 insertions(+), 176 deletions(-)

diff --git a/be/src/vec/columns/column_nullable.cpp 
b/be/src/vec/columns/column_nullable.cpp
index 483ed5ca6cd..dbee5a2025a 100644
--- a/be/src/vec/columns/column_nullable.cpp
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -31,18 +31,19 @@
 namespace doris::vectorized {
 
 ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, 
MutableColumnPtr&& null_map_)
-        : nested_column(std::move(nested_column_)), 
null_map(std::move(null_map_)) {
+        : NullMapProvider(std::move(null_map_)), 
nested_column(std::move(nested_column_)) {
     /// ColumnNullable cannot have constant nested column. But constant 
argument could be passed. Materialize it.
     nested_column = get_nested_column().convert_to_full_column_if_const();
 
     // after convert const column to full column, it may be a nullable column
     if (nested_column->is_nullable()) {
-        assert_cast<ColumnNullable&>(*nested_column).apply_null_map((const 
ColumnUInt8&)*null_map);
-        null_map = 
assert_cast<ColumnNullable&>(*nested_column).get_null_map_column_ptr();
+        assert_cast<ColumnNullable&>(*nested_column)
+                .apply_null_map(static_cast<const 
ColumnUInt8&>(get_null_map_column()));
+        
reset_null_map(assert_cast<ColumnNullable&>(*nested_column).get_null_map_column_ptr());
         nested_column = 
assert_cast<ColumnNullable&>(*nested_column).get_nested_column_ptr();
     }
 
-    if (is_column_const(*null_map)) {
+    if (is_column_const(get_null_map_column())) [[unlikely]] {
         throw doris::Exception(ErrorCode::INTERNAL_ERROR,
                                "ColumnNullable cannot have constant null map");
         __builtin_unreachable();
@@ -69,7 +70,7 @@ void ColumnNullable::update_xxHash_with_value(size_t start, 
size_t end, uint64_t
         nested_column->update_xxHash_with_value(start, end, hash, nullptr);
     } else {
         const auto* __restrict real_null_data =
-                assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
+                assert_cast<const 
ColumnUInt8&>(get_null_map_column()).get_data().data();
         for (int i = start; i < end; ++i) {
             if (real_null_data[i] != 0) {
                 hash = HashUtil::xxHash64NullWithSeed(hash);
@@ -85,7 +86,7 @@ void ColumnNullable::update_crc_with_value(size_t start, 
size_t end, uint32_t& h
         nested_column->update_crc_with_value(start, end, hash, nullptr);
     } else {
         const auto* __restrict real_null_data =
-                assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
+                assert_cast<const 
ColumnUInt8&>(get_null_map_column()).get_data().data();
         for (int i = start; i < end; ++i) {
             if (real_null_data[i] != 0) {
                 hash = HashUtil::zlib_crc_hash_null(hash);
@@ -110,7 +111,7 @@ void ColumnNullable::update_crcs_with_value(uint32_t* 
__restrict hashes, doris::
     auto s = rows;
     DCHECK(s == size());
     const auto* __restrict real_null_data =
-            assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
+            assert_cast<const 
ColumnUInt8&>(get_null_map_column()).get_data().data();
     if (!has_null()) {
         nested_column->update_crcs_with_value(hashes, type, rows, offset, 
nullptr);
     } else {
@@ -128,7 +129,7 @@ void ColumnNullable::update_hashes_with_value(uint64_t* 
__restrict hashes,
     DCHECK(null_data == nullptr);
     auto s = size();
     const auto* __restrict real_null_data =
-            assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
+            assert_cast<const 
ColumnUInt8&>(get_null_map_column()).get_data().data();
     if (!has_null()) {
         nested_column->update_hashes_with_value(hashes, nullptr);
     } else {
@@ -183,24 +184,24 @@ StringRef ColumnNullable::get_data_at(size_t n) const {
 void ColumnNullable::insert_data(const char* pos, size_t length) {
     if (pos == nullptr) {
         get_nested_column().insert_default();
-        _get_null_map_data().push_back(1);
+        get_null_map_data().push_back(1);
         _has_null = true;
+        _need_update_has_null = false;
     } else {
         get_nested_column().insert_data(pos, length);
-        _get_null_map_data().push_back(0);
+        _push_false_to_nullmap(1);
     }
 }
 
 void ColumnNullable::insert_many_strings(const StringRef* strings, size_t num) 
{
-    auto& null_map_data = _get_null_map_data();
     for (size_t i = 0; i != num; ++i) {
         if (strings[i].data == nullptr) {
             nested_column->insert_default();
-            null_map_data.push_back(1);
+            get_null_map_data().push_back(1);
             _has_null = true;
         } else {
             nested_column->insert_data(strings[i].data, strings[i].size);
-            null_map_data.push_back(0);
+            _push_false_to_nullmap(1);
         }
     }
 }
@@ -227,13 +228,14 @@ const char* 
ColumnNullable::deserialize_and_insert_from_arena(const char* pos) {
     UInt8 val = *reinterpret_cast<const UInt8*>(pos);
     pos += sizeof(val);
 
-    _get_null_map_data().push_back(val);
+    get_null_map_data().push_back(val);
 
     if (val == 0) {
         pos = get_nested_column().deserialize_and_insert_from_arena(pos);
     } else {
         get_nested_column().insert_default();
         _has_null = true;
+        _need_update_has_null = false;
     }
 
     return pos;
@@ -251,7 +253,7 @@ void ColumnNullable::serialize_vec(std::vector<StringRef>& 
keys, size_t num_rows
 }
 
 void ColumnNullable::deserialize_vec(std::vector<StringRef>& keys, const 
size_t num_rows) {
-    auto& arr = _get_null_map_data();
+    auto& arr = get_null_map_data();
     const size_t old_size = arr.size();
     arr.resize(old_size + num_rows);
 
@@ -274,21 +276,15 @@ void 
ColumnNullable::deserialize_vec(std::vector<StringRef>& keys, const size_t
 void ColumnNullable::insert_range_from_ignore_overflow(const 
doris::vectorized::IColumn& src,
                                                        size_t start, size_t 
length) {
     const auto& nullable_col = assert_cast<const ColumnNullable&>(src);
-    _get_null_map_column().insert_range_from(*nullable_col.null_map, start, 
length);
+    
get_null_map_column().insert_range_from(nullable_col.get_null_map_column(), 
start, length);
     
get_nested_column().insert_range_from_ignore_overflow(*nullable_col.nested_column,
 start,
                                                           length);
-    const auto& src_null_map_data = nullable_col.get_null_map_data();
-    _has_null = has_null();
-    _has_null |= simd::contain_byte(src_null_map_data.data() + start, length, 
1);
 }
 
 void ColumnNullable::insert_range_from(const IColumn& src, size_t start, 
size_t length) {
     const auto& nullable_col = assert_cast<const ColumnNullable&>(src);
-    _get_null_map_column().insert_range_from(*nullable_col.null_map, start, 
length);
+    
get_null_map_column().insert_range_from(nullable_col.get_null_map_column(), 
start, length);
     get_nested_column().insert_range_from(*nullable_col.nested_column, start, 
length);
-    const auto& src_null_map_data = nullable_col.get_null_map_data();
-    _has_null = has_null();
-    _has_null |= simd::contain_byte(src_null_map_data.data() + start, length, 
1);
 }
 
 void ColumnNullable::insert_indices_from(const IColumn& src, const uint32_t* 
indices_begin,
@@ -296,9 +292,8 @@ void ColumnNullable::insert_indices_from(const IColumn& 
src, const uint32_t* ind
     const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
     get_nested_column().insert_indices_from(src_concrete.get_nested_column(), 
indices_begin,
                                             indices_end);
-    
_get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(), 
indices_begin,
-                                               indices_end);
-    _need_update_has_null = true;
+    
get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(), 
indices_begin,
+                                              indices_end);
 }
 
 void ColumnNullable::insert_indices_from_not_has_null(const IColumn& src,
@@ -307,17 +302,18 @@ void 
ColumnNullable::insert_indices_from_not_has_null(const IColumn& src,
     const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
     get_nested_column().insert_indices_from(src_concrete.get_nested_column(), 
indices_begin,
                                             indices_end);
-    _get_null_map_column().insert_many_defaults(indices_end - indices_begin);
+    _push_false_to_nullmap(indices_end - indices_begin);
 }
 
 void ColumnNullable::insert(const Field& x) {
     if (x.is_null()) {
         get_nested_column().insert_default();
-        _get_null_map_data().push_back(1);
+        get_null_map_data().push_back(1);
         _has_null = true;
+        _need_update_has_null = false;
     } else {
         get_nested_column().insert(x);
-        _get_null_map_data().push_back(0);
+        _push_false_to_nullmap(1);
     }
 }
 
@@ -325,19 +321,18 @@ void ColumnNullable::insert_from(const IColumn& src, 
size_t n) {
     const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
     get_nested_column().insert_from(src_concrete.get_nested_column(), n);
     auto is_null = src_concrete.get_null_map_data()[n];
-    _has_null |= is_null;
-    _get_null_map_data().push_back(is_null);
+    get_null_map_data().push_back(is_null);
 }
 
 void ColumnNullable::insert_from_not_nullable(const IColumn& src, size_t n) {
     get_nested_column().insert_from(src, n);
-    _get_null_map_data().push_back(0);
+    _push_false_to_nullmap(1);
 }
 
 void ColumnNullable::insert_range_from_not_nullable(const IColumn& src, size_t 
start,
                                                     size_t length) {
     get_nested_column().insert_range_from(src, start, length);
-    _get_null_map_data().resize_fill(_get_null_map_data().size() + length, 0);
+    _push_false_to_nullmap(length);
 }
 
 void ColumnNullable::insert_many_from_not_nullable(const IColumn& src, size_t 
position,
@@ -366,15 +361,14 @@ size_t ColumnNullable::filter(const Filter& filter) {
 }
 
 Status ColumnNullable::filter_by_selector(const uint16_t* sel, size_t 
sel_size, IColumn* col_ptr) {
-    const auto* nullable_col_ptr = reinterpret_cast<const 
ColumnNullable*>(col_ptr);
+    auto* nullable_col_ptr = assert_cast<ColumnNullable*>(col_ptr);
     ColumnPtr nest_col_ptr = nullable_col_ptr->nested_column;
-    ColumnPtr null_map_ptr = nullable_col_ptr->null_map;
+
+    /// `get_null_map_data` will set `_need_update_has_null` to true
+    auto& res_nullmap = nullable_col_ptr->get_null_map_data();
+
     RETURN_IF_ERROR(get_nested_column().filter_by_selector(
             sel, sel_size, 
const_cast<doris::vectorized::IColumn*>(nest_col_ptr.get())));
-    //insert cur nullmap into result nullmap which is empty
-    auto& res_nullmap = reinterpret_cast<vectorized::ColumnVector<UInt8>*>(
-                                
const_cast<doris::vectorized::IColumn*>(null_map_ptr.get()))
-                                ->get_data();
     DCHECK(res_nullmap.empty());
     res_nullmap.resize(sel_size);
     auto& cur_nullmap = get_null_map_column().get_data();
@@ -522,15 +516,10 @@ void ColumnNullable::get_permutation(bool reverse, size_t 
limit, int null_direct
         }
     }
 }
-//
-//void ColumnNullable::gather(ColumnGathererStream & gatherer)
-//{
-//    gatherer.gather(*this);
-//}
 
 void ColumnNullable::reserve(size_t n) {
     get_nested_column().reserve(n);
-    _get_null_map_data().reserve(n);
+    get_null_map_data(false).reserve(n);
 }
 
 void ColumnNullable::resize(size_t n) {
@@ -582,7 +571,7 @@ void ColumnNullable::apply_null_map(const ColumnNullable& 
other) {
 }
 
 void ColumnNullable::check_consistency() const {
-    if (null_map->size() != get_nested_column().size()) {
+    if (get_null_map_column().size() != get_nested_column().size()) {
         throw Exception(ErrorCode::INTERNAL_ERROR,
                         "Sizes of nested column and null map of Nullable 
column are not equal");
     }
@@ -596,8 +585,8 @@ void ColumnNullable::sort_column(const ColumnSorter* 
sorter, EqualFlags& flags,
 }
 
 void ColumnNullable::_update_has_null() {
-    const UInt8* null_pos = _get_null_map_data().data();
-    _has_null = simd::contain_byte(null_pos, _get_null_map_data().size(), 1);
+    const UInt8* null_pos = get_null_map_data().data();
+    _has_null = simd::contain_byte(null_pos, get_null_map_data().size(), 1);
     _need_update_has_null = false;
 }
 
diff --git a/be/src/vec/columns/column_nullable.h 
b/be/src/vec/columns/column_nullable.h
index 5425242aad7..7772b6e80ad 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -21,7 +21,6 @@
 #pragma once
 
 #include <functional>
-#include <ostream>
 #include <string>
 #include <type_traits>
 #include <utility>
@@ -50,6 +49,69 @@ class ColumnSorter;
 using NullMap = ColumnUInt8::Container;
 using ConstNullMapPtr = const NullMap*;
 
+/// use this to avoid directly access null_map forgetting modify 
_need_update_has_null. see more in inner comments
+class NullMapProvider {
+public:
+    NullMapProvider() = default;
+    NullMapProvider(MutableColumnPtr&& null_map) : 
_null_map(std::move(null_map)) {}
+    void reset_null_map(MutableColumnPtr&& null_map) { _null_map = 
std::move(null_map); }
+
+    // return the column that represents the byte map. if want use null_map, 
just call this.
+    const ColumnPtr& get_null_map_column_ptr() const { return _null_map; }
+    // for functions getting nullmap, we assume it will modify it. so set 
`_need_update_has_null` to true. if you know it wouldn't,
+    // call with arg false. but for the ops which will set _has_null 
themselves, call `update_has_null()`
+    MutableColumnPtr get_null_map_column_ptr(bool may_change = true) {
+        if (may_change) {
+            _need_update_has_null = true;
+        }
+        return _null_map->assume_mutable();
+    }
+    ColumnUInt8::WrappedPtr& get_null_map(bool may_change = true) {
+        if (may_change) {
+            _need_update_has_null = true;
+        }
+        return _null_map;
+    }
+
+    ColumnUInt8& get_null_map_column(bool may_change = true) {
+        if (may_change) {
+            _need_update_has_null = true;
+        }
+        return assert_cast<ColumnUInt8&, 
TypeCheckOnRelease::DISABLE>(*_null_map);
+    }
+    const ColumnUInt8& get_null_map_column() const {
+        return assert_cast<const ColumnUInt8&, 
TypeCheckOnRelease::DISABLE>(*_null_map);
+    }
+
+    NullMap& get_null_map_data(bool may_change = true) {
+        return get_null_map_column(may_change).get_data();
+    }
+    const NullMap& get_null_map_data() const { return 
get_null_map_column().get_data(); }
+
+    void clear_null_map() { 
assert_cast<ColumnUInt8*>(_null_map.get())->clear(); }
+
+    void update_has_null(bool new_value) {
+        _has_null = new_value;
+        _need_update_has_null = false;
+    }
+
+protected:
+    /**
+    * Here we have three variables which serve for `has_null()` judgement. If 
we have known the nullity of object, no need
+    *  to check through the `null_map` to get the answer until the next time 
we modify it. Here `_has_null` is just the answer
+    *  we cached. `_need_update_has_null` indicates there's modification or 
not since we got `_has_null()` last time. So in 
+    *  `_has_null()` we can check the two vars to know if there's need to 
update `has_null` or not.
+    * If you just want QUERY BUT NOT MODIFY, make sure the caller is const. 
There will be no perf overhead for const overload.
+    *  Otherwise, this class, as the base class, will make it no possible to 
directly visit `null_map` forgetting to change the
+    *  protected flags. Just call the interface is ok.
+    */
+    bool _need_update_has_null = true;
+    bool _has_null = true;
+
+private:
+    IColumn::WrappedPtr _null_map;
+};
+
 /// Class that specifies nullable columns. A nullable column represents
 /// a column, which may have any type, provided with the possibility of
 /// storing NULL values. For this purpose, a ColumnNullable object stores
@@ -59,7 +121,7 @@ using ConstNullMapPtr = const NullMap*;
 /// over a bitmap because columns are usually stored on disk as compressed
 /// files. In this regard, using a bitmap instead of a byte map would
 /// greatly complicate the implementation with little to no benefits.
-class ColumnNullable final : public COWHelper<IColumn, ColumnNullable> {
+class ColumnNullable final : public COWHelper<IColumn, ColumnNullable>, public 
NullMapProvider {
 private:
     friend class COWHelper<IColumn, ColumnNullable>;
 
@@ -89,10 +151,11 @@ public:
     std::string get_name() const override { return "Nullable(" + 
nested_column->get_name() + ")"; }
     MutableColumnPtr clone_resized(size_t size) const override;
     size_t size() const override {
-        return assert_cast<const ColumnUInt8&, 
TypeCheckOnRelease::DISABLE>(*null_map).size();
+        return assert_cast<const ColumnUInt8&, 
TypeCheckOnRelease::DISABLE>(get_null_map_column())
+                .size();
     }
     PURE bool is_null_at(size_t n) const override {
-        return assert_cast<const ColumnUInt8&, 
TypeCheckOnRelease::DISABLE>(*null_map)
+        return assert_cast<const ColumnUInt8&, 
TypeCheckOnRelease::DISABLE>(get_null_map_column())
                        .get_data()[n] != 0;
     }
     Field operator[](size_t n) const override;
@@ -141,8 +204,13 @@ public:
         assert_cast<ColumnType*>(nested_column.get())
                 ->insert_from(src_concrete.get_nested_column(), n);
         auto is_null = src_concrete.get_null_map_data()[n];
-        _has_null |= is_null;
-        _get_null_map_data().push_back(is_null);
+        if (is_null) {
+            get_null_map_data().push_back(1);
+            _has_null = true;
+            _need_update_has_null = false;
+        } else {
+            _push_false_to_nullmap(1);
+        }
     }
 
     void insert_from_not_nullable(const IColumn& src, size_t n);
@@ -150,19 +218,19 @@ public:
     void insert_many_from_not_nullable(const IColumn& src, size_t position, 
size_t length);
 
     void insert_many_fix_len_data(const char* pos, size_t num) override {
-        _get_null_map_column().insert_many_vals(0, num);
+        _push_false_to_nullmap(num);
         get_nested_column().insert_many_fix_len_data(pos, num);
     }
 
     void insert_many_raw_data(const char* pos, size_t num) override {
         DCHECK(pos);
-        _get_null_map_column().insert_many_vals(0, num);
+        _push_false_to_nullmap(num);
         get_nested_column().insert_many_raw_data(pos, num);
     }
 
     void insert_many_dict_data(const int32_t* data_array, size_t start_index, 
const StringRef* dict,
                                size_t data_num, uint32_t dict_num) override {
-        _get_null_map_column().insert_many_vals(0, data_num);
+        _push_false_to_nullmap(data_num);
         get_nested_column().insert_many_dict_data(data_array, start_index, 
dict, data_num,
                                                   dict_num);
     }
@@ -172,38 +240,40 @@ public:
         if (UNLIKELY(num == 0)) {
             return;
         }
-        _get_null_map_column().insert_many_vals(0, num);
+        _push_false_to_nullmap(num);
         get_nested_column().insert_many_continuous_binary_data(data, offsets, 
num);
     }
 
     void insert_many_binary_data(char* data_array, uint32_t* len_array,
                                  uint32_t* start_offset_array, size_t num) 
override {
-        _get_null_map_column().insert_many_vals(0, num);
+        _push_false_to_nullmap(num);
         get_nested_column().insert_many_binary_data(data_array, len_array, 
start_offset_array, num);
     }
 
     void insert_default() override {
         get_nested_column().insert_default();
-        _get_null_map_data().push_back(1);
+        get_null_map_data().push_back(1);
         _has_null = true;
+        _need_update_has_null = false;
     }
 
     void insert_many_defaults(size_t length) override {
         get_nested_column().insert_many_defaults(length);
-        _get_null_map_data().resize_fill(_get_null_map_data().size() + length, 
1);
+        get_null_map_data().resize_fill(get_null_map_data().size() + length, 
1);
         _has_null = true;
+        _need_update_has_null = false;
     }
 
     void insert_not_null_elements(size_t num) {
         get_nested_column().insert_many_defaults(num);
-        _get_null_map_column().insert_many_vals(0, num);
-        _has_null = false;
+        _push_false_to_nullmap(num);
     }
 
     void insert_null_elements(int num) {
         get_nested_column().insert_many_defaults(num);
-        _get_null_map_column().insert_many_vals(1, num);
+        get_null_map_column().insert_many_vals(1, num);
         _has_null = true;
+        _need_update_has_null = false;
     }
 
     void pop_back(size_t n) override;
@@ -255,7 +325,7 @@ public:
 
     void for_each_subcolumn(ColumnCallback callback) override {
         callback(nested_column);
-        callback(null_map);
+        callback(get_null_map());
     }
 
     bool structure_equals(const IColumn& rhs) const override {
@@ -281,11 +351,13 @@ public:
     bool is_fixed_and_contiguous() const override { return false; }
 
     bool is_exclusive() const override {
-        return IColumn::is_exclusive() && nested_column->is_exclusive() && 
null_map->is_exclusive();
+        return IColumn::is_exclusive() && nested_column->is_exclusive() &&
+               get_null_map_column().is_exclusive();
     }
 
     size_t size_of_value_if_fixed() const override {
-        return null_map->size_of_value_if_fixed() + 
nested_column->size_of_value_if_fixed();
+        return get_null_map_column().size_of_value_if_fixed() +
+               nested_column->size_of_value_if_fixed();
     }
 
     bool only_null() const override { return size() == 1 && is_null_at(0); }
@@ -301,32 +373,12 @@ public:
 
     MutableColumnPtr get_nested_column_ptr() { return 
nested_column->assume_mutable(); }
 
-    /// Return the column that represents the byte map.
-    const ColumnPtr& get_null_map_column_ptr() const { return null_map; }
-
-    MutableColumnPtr get_null_map_column_ptr() {
-        _need_update_has_null = true;
-        return null_map->assume_mutable();
-    }
-
-    ColumnUInt8& get_null_map_column() {
-        _need_update_has_null = true;
-        return assert_cast<ColumnUInt8&, 
TypeCheckOnRelease::DISABLE>(*null_map);
-    }
-    const ColumnUInt8& get_null_map_column() const {
-        return assert_cast<const ColumnUInt8&, 
TypeCheckOnRelease::DISABLE>(*null_map);
-    }
-
     void clear() override {
-        null_map->clear();
+        clear_null_map();
         nested_column->clear();
         _has_null = false;
     }
 
-    NullMap& get_null_map_data() { return get_null_map_column().get_data(); }
-
-    const NullMap& get_null_map_data() const { return 
get_null_map_column().get_data(); }
-
     /// Apply the null byte map of a specified nullable column onto the
     /// null byte map of the current column by performing an element-wise OR
     /// between both byte maps. This method is used to determine the null byte
@@ -352,7 +404,8 @@ public:
         DCHECK(size() > self_row);
         const auto& nullable_rhs =
                 assert_cast<const ColumnNullable&, 
TypeCheckOnRelease::DISABLE>(rhs);
-        null_map->replace_column_data(*nullable_rhs.null_map, row, self_row);
+        
get_null_map_column().replace_column_data(nullable_rhs.get_null_map_column(), 
row,
+                                                  self_row);
 
         if (!nullable_rhs.is_null_at(row)) {
             nested_column->replace_column_data(*nullable_rhs.nested_column, 
row, self_row);
@@ -413,21 +466,15 @@ public:
     }
 
 private:
-    // the two functions will not update `_need_update_has_null`
-    ColumnUInt8& _get_null_map_column() {
-        return assert_cast<ColumnUInt8&, 
TypeCheckOnRelease::DISABLE>(*null_map);
-    }
-    NullMap& _get_null_map_data() { return _get_null_map_column().get_data(); }
-
-    WrappedPtr nested_column;
-    WrappedPtr null_map;
-
-    bool _need_update_has_null = true;
-    bool _has_null = true;
-
     void _update_has_null();
+
     template <bool negative>
     void apply_null_map_impl(const ColumnUInt8& map);
+
+    // push not null value wouldn't change the nullity. no need to update 
_has_null
+    void _push_false_to_nullmap(size_t num) { 
get_null_map_column(false).insert_many_vals(0, num); }
+
+    WrappedPtr nested_column;
 };
 
 ColumnPtr make_nullable(const ColumnPtr& column, bool is_nullable = false);
diff --git a/be/test/vec/columns/column_nullable_seriazlization_test.cpp 
b/be/test/vec/columns/column_nullable_seriazlization_test.cpp
index 635916328ba..70702369cd3 100644
--- a/be/test/vec/columns/column_nullable_seriazlization_test.cpp
+++ b/be/test/vec/columns/column_nullable_seriazlization_test.cpp
@@ -20,92 +20,19 @@
 
 #include <cstddef>
 #include <cstdlib>
-#include <type_traits>
 
+#include "column_nullable_test.h"
 #include "vec/columns/column.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/column_string.h"
 #include "vec/columns/columns_number.h"
 #include "vec/common/arena.h"
 #include "vec/common/string_ref.h"
-#include "vec/core/field.h"
 #include "vec/core/types.h"
-#include "vec/data_types/data_type.h"
-#include "vec/data_types/data_type_date_time.h"
-#include "vec/data_types/data_type_string.h"
 
 using namespace doris;
 using namespace doris::vectorized;
 
-static std::string generate_random_string(size_t max_length) {
-    std::srand(std::time(nullptr)); // use current time as seed for random 
generator
-
-    if (max_length == 0) {
-        return "";
-    }
-
-    auto randbyte = []() -> char {
-        // generate a random byte, in range [0x00, 0xFF]
-        return static_cast<char>(rand() % 256);
-    };
-
-    std::string str(max_length, 0);
-    std::generate_n(str.begin(), max_length, randbyte);
-
-    return str;
-}
-
-static MutableColumnPtr create_null_map(size_t input_rows_count, bool all_null 
= false,
-                                        bool all_not_null = false) {
-    std::srand(std::time(nullptr)); // use current time as seed for random 
generator
-    auto null_map = ColumnUInt8::create();
-    for (size_t i = 0; i < input_rows_count; ++i) {
-        if (all_null) {
-            null_map->insert(1);
-        } else if (all_not_null) {
-            null_map->insert(0);
-        } else {
-            null_map->insert(rand() % 2);
-        }
-    }
-    return null_map;
-}
-
-template <typename T>
-static MutableColumnPtr create_nested_column(size_t input_rows_count) {
-    MutableColumnPtr column;
-    if constexpr (std::is_integral_v<T>) {
-        column = ColumnVector<T>::create();
-    } else if constexpr (std::is_same_v<T, String>) {
-        column = ColumnString::create();
-    } else if constexpr (std::is_same_v<T, Decimal64>) {
-        column = ColumnDecimal64::create(0, 6);
-    }
-
-    for (size_t i = 0; i < input_rows_count; ++i) {
-        if constexpr (std::is_integral_v<T>) {
-            column->insert(rand() % std::numeric_limits<T>::max());
-        } else if constexpr (std::is_same_v<T, String>) {
-            column->insert(generate_random_string(rand() % 512));
-        } else if constexpr (std::is_same_v<T, Decimal64>) {
-            column->insert(Int64(rand() % std::numeric_limits<Int64>::max()));
-        } else {
-            throw std::runtime_error("Unsupported type");
-        }
-    }
-
-    return column;
-}
-
-template <typename T>
-static ColumnNullable::MutablePtr create_column_nullable(size_t 
input_rows_count,
-                                                         bool all_null = false,
-                                                         bool all_not_null = 
false) {
-    auto null_map = create_null_map(input_rows_count, all_null, all_not_null);
-    auto nested_column = create_nested_column<T>(input_rows_count);
-    return ColumnNullable::create(std::move(nested_column), 
std::move(null_map));
-}
-
 TEST(ColumnNullableSerializationTest, column_nullable_column_vector) {
     const size_t input_rows_count = 4096 * 1000;
     ColumnNullable::Ptr column_nullable = 
create_column_nullable<Int64>(input_rows_count);
diff --git a/be/test/vec/columns/column_nullable_test.cpp 
b/be/test/vec/columns/column_nullable_test.cpp
new file mode 100644
index 00000000000..9e66d5bb302
--- /dev/null
+++ b/be/test/vec/columns/column_nullable_test.cpp
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/columns/column_nullable.h"
+
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
+
+#include "column_nullable_test.h"
+#include "common/status.h"
+#include "runtime/define_primitive_type.h"
+#include "vec/columns/columns_number.h"
+#include "vec/columns/predicate_column.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+
+namespace doris::vectorized {
+
+TEST(ColumnNullableTest, NullTest) {
+    ColumnNullable::MutablePtr null_col = create_column_nullable<Int64>(500, 
true);
+    EXPECT_TRUE(null_col->has_null());
+
+    ColumnNullable::MutablePtr dst_col =
+            ColumnNullable::create(create_nested_column<Int64>(10), 
ColumnUInt8::create(10));
+    EXPECT_FALSE(dst_col->has_null());
+
+    ColumnInt64::MutablePtr source_col = ColumnInt64::create();
+    source_col->insert_range_of_integer(0, 100);
+
+    dst_col->insert(Field());
+    EXPECT_TRUE(dst_col->has_null());
+    dst_col->clear();
+    EXPECT_FALSE(dst_col->has_null());
+    dst_col->insert_many_from_not_nullable(*source_col, 0, 10);
+    EXPECT_FALSE(dst_col->has_null());
+    dst_col->insert_from_not_nullable(*source_col, 5);
+    EXPECT_FALSE(dst_col->has_null());
+    dst_col->insert_many_from_not_nullable(*source_col, 5, 5);
+    EXPECT_FALSE(dst_col->has_null());
+    dst_col->insert_range_from_not_nullable(*source_col, 5, 5);
+    EXPECT_FALSE(dst_col->has_null());
+    dst_col->insert_range_from(
+            *ColumnNullable::create(std::move(source_col), 
ColumnUInt8::create(10)), 5, 5);
+    EXPECT_FALSE(dst_col->has_null());
+
+    dst_col->clear();
+    EXPECT_FALSE(dst_col->has_null());
+    dst_col->insert_null_elements(10);
+    EXPECT_TRUE(dst_col->has_null());
+
+    dst_col->clear();
+    EXPECT_FALSE(dst_col->has_null());
+    dst_col->insert_from(*null_col, 100);
+    EXPECT_TRUE(dst_col->has_null());
+
+    auto tmp_col =
+            ColumnNullable::create(create_nested_column<Int64>(10), 
ColumnUInt8::create(10, 1));
+
+    dst_col->clear();
+    EXPECT_FALSE(dst_col->has_null());
+    dst_col->insert_from(*tmp_col, 9);
+    EXPECT_TRUE(dst_col->has_null());
+
+    dst_col->clear();
+    EXPECT_FALSE(dst_col->has_null());
+    dst_col->insert_range_from(*tmp_col, 0, 3);
+    EXPECT_TRUE(dst_col->has_null());
+
+    dst_col->clear();
+    EXPECT_FALSE(dst_col->has_null());
+    dst_col->insert_from(*tmp_col, 9);
+    EXPECT_TRUE(dst_col->has_null());
+}
+
+TEST(ColumnNullableTest, PredicateTest) {
+    auto nullable_pred =
+            ColumnNullable::create(PredicateColumnType<TYPE_DATE>::create(), 
ColumnUInt8::create());
+    nullable_pred->insert_many_defaults(3);
+    EXPECT_TRUE(nullable_pred->has_null());
+    nullable_pred->insert_null_elements(10);
+    EXPECT_TRUE(nullable_pred->has_null());
+
+    nullable_pred->clear();
+    EXPECT_FALSE(nullable_pred->has_null());
+    nullable_pred->insert_null_elements(10);
+    EXPECT_TRUE(nullable_pred->has_null()); // now it have 10 nulls
+
+    auto null_dst = ColumnNullable::create(ColumnDate::create(), 
ColumnUInt8::create());
+    EXPECT_FALSE(null_dst->has_null());
+
+    uint16_t selector[] = {5, 8}; // both null
+    EXPECT_EQ(nullable_pred->filter_by_selector(selector, 2, null_dst.get()), 
Status::OK());
+    // filter_by_selector must announce to update has_null to make below right.
+    EXPECT_TRUE(null_dst->has_null());
+}
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/test/vec/columns/column_nullable_test.h 
b/be/test/vec/columns/column_nullable_test.h
new file mode 100644
index 00000000000..0f90a25c9b5
--- /dev/null
+++ b/be/test/vec/columns/column_nullable_test.h
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gmock/gmock-more-matchers.h>
+#include <gtest/gtest.h>
+
+#include <cstddef>
+#include <cstdlib>
+#include <type_traits>
+
+#include "vec/columns/column.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/columns_number.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_date_time.h"
+#include "vec/data_types/data_type_string.h"
+
+namespace doris::vectorized {
+
+inline std::string generate_random_string(size_t max_length) {
+    std::srand(std::time(nullptr)); // use current time as seed for random 
generator
+
+    if (max_length == 0) {
+        return "";
+    }
+
+    auto randbyte = []() -> char {
+        // generate a random byte, in range [0x00, 0xFF]
+        return static_cast<char>(rand() % 256);
+    };
+
+    std::string str(max_length, 0);
+    std::generate_n(str.begin(), max_length, randbyte);
+
+    return str;
+}
+
+inline MutableColumnPtr create_null_map(size_t input_rows_count, bool all_null 
= false,
+                                        bool all_not_null = false) {
+    std::srand(std::time(nullptr)); // use current time as seed for random 
generator
+    auto null_map = ColumnUInt8::create();
+    for (size_t i = 0; i < input_rows_count; ++i) {
+        if (all_null) {
+            null_map->insert(1);
+        } else if (all_not_null) {
+            null_map->insert(0);
+        } else {
+            null_map->insert(rand() % 2);
+        }
+    }
+    return null_map;
+}
+
+template <typename T>
+inline MutableColumnPtr create_nested_column(size_t input_rows_count) {
+    MutableColumnPtr column;
+    if constexpr (std::is_integral_v<T>) {
+        column = ColumnVector<T>::create();
+    } else if constexpr (std::is_same_v<T, String>) {
+        column = ColumnString::create();
+    } else if constexpr (std::is_same_v<T, Decimal64>) {
+        column = ColumnDecimal64::create(0, 6);
+    }
+
+    for (size_t i = 0; i < input_rows_count; ++i) {
+        if constexpr (std::is_integral_v<T>) {
+            column->insert(rand() % std::numeric_limits<T>::max());
+        } else if constexpr (std::is_same_v<T, String>) {
+            column->insert(generate_random_string(rand() % 512));
+        } else if constexpr (std::is_same_v<T, Decimal64>) {
+            column->insert(Int64(rand() % std::numeric_limits<Int64>::max()));
+        } else {
+            throw std::runtime_error("Unsupported type");
+        }
+    }
+
+    return column;
+}
+
+template <typename T>
+inline ColumnNullable::MutablePtr create_column_nullable(size_t 
input_rows_count,
+                                                         bool all_null = false,
+                                                         bool all_not_null = 
false) {
+    auto null_map = create_null_map(input_rows_count, all_null, all_not_null);
+    auto nested_column = create_nested_column<T>(input_rows_count);
+    return ColumnNullable::create(std::move(nested_column), 
std::move(null_map));
+}
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/test/vec/columns/column_resize_test.cpp 
b/be/test/vec/columns/column_resize_test.cpp
index dd6f78f3e8d..0b5f6ba183e 100644
--- a/be/test/vec/columns/column_resize_test.cpp
+++ b/be/test/vec/columns/column_resize_test.cpp
@@ -1,4 +1,3 @@
-
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
diff --git a/be/test/vec/function/function_test_util.h 
b/be/test/vec/function/function_test_util.h
index 62666d7c67c..e6ce662f64d 100644
--- a/be/test/vec/function/function_test_util.h
+++ b/be/test/vec/function/function_test_util.h
@@ -272,6 +272,14 @@ Status check_function(const std::string& func_name, const 
InputTypeSet& input_ty
         fn_ctx_return.type = doris::PrimitiveType::TYPE_DATEV2;
     } else if (std::is_same_v<ReturnType, DateTimeV2>) {
         fn_ctx_return.type = doris::PrimitiveType::TYPE_DATETIMEV2;
+    } else if (std::is_same_v<ReturnType, Decimal128V2>) {
+        fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMALV2;
+    } else if (std::is_same_v<ReturnType, Decimal32>) {
+        fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL32;
+    } else if (std::is_same_v<ReturnType, Decimal64>) {
+        fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL64;
+    } else if (std::is_same_v<ReturnType, Decimal128V3>) {
+        fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL128I;
     } else {
         fn_ctx_return.type = doris::PrimitiveType::INVALID_TYPE;
     }
diff --git a/regression-test/data/correctness/test_column_nullable_cache.out 
b/regression-test/data/correctness/test_column_nullable_cache.out
new file mode 100644
index 00000000000..eac491df212
--- /dev/null
+++ b/regression-test/data/correctness/test_column_nullable_cache.out
@@ -0,0 +1,6 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !test1 --
+
+-- !test2 --
+0
+
diff --git 
a/regression-test/suites/correctness/test_column_nullable_cache.groovy 
b/regression-test/suites/correctness/test_column_nullable_cache.groovy
new file mode 100644
index 00000000000..cf871c617aa
--- /dev/null
+++ b/regression-test/suites/correctness/test_column_nullable_cache.groovy
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_column_nullable_cache") {
+    sql """
+        drop table if exists test_column_nullable_cache;
+    """
+     sql """
+        CREATE TABLE `test_column_nullable_cache` (
+            `col_int_undef_signed2` int NULL,
+            `col_int_undef_signed` int NULL,
+            `col_int_undef_signed3` int NULL,
+            `col_int_undef_signed4` int NULL,
+            `pk` int NULL
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`col_int_undef_signed2`)
+            DISTRIBUTED by RANDOM BUCKETS 10
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+            );
+     """
+
+    sql """
+    insert into test_column_nullable_cache
+        
(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
 
+        values 
(0,3,7164641,5,8),(1,null,3916062,5,6),(2,1,5533498,0,9),(3,7,2,null,7057679),(4,1,0,7,7),
+        
(5,null,4,2448564,1),(6,7531976,7324373,9,7),(7,3,1,1,3),(8,6,8131576,9,-1793807),(9,9,2,4214547,9),
+        
(10,-7299852,5,1,3),(11,7,3,-1036551,5),(12,-6108579,84823,4,1229534),(13,-1065629,5,4,null),(14,null,8072633,3328285,2),
+        
(15,2,7,6,6),(16,8,5,-4582103,1),(17,5,-4677722,-2379367,4),(18,-7807532,-6686732,0,5329341),
+        
(19,8,7,-4013246,-7013374),(20,0,2,9,2),(21,7,2383333,5,4),(22,5844611,2,2,0),(23,0,4756185,0,-5612039),
+        
(24,6,4878754,608172,0),(25,null,7858692,7,-6704206),(26,7,-1697597,6,9),(27,9,-7021349,3,-3094786),
+        
(28,2,2830915,null,8),(29,4133633,489212,5,9),(30,6,-3346211,3668768,2),(31,1,4862070,-5066405,0),(32,9,6,7,8),
+        
(33,2,null,4,2),(34,1,2893430,-3282825,5),(35,2,3,4,2),(36,4,-3418732,6,1263819),(37,5,4,-6342170,6),(99,9,2,8,null);
+    """
+
+    qt_test1 """
+        select * from test_column_nullable_cache where col_int_undef_signed3 
IS NULL and col_int_undef_signed3 = col_int_undef_signed3;
+    """
+
+    qt_test2 """
+        select count(*) from test_column_nullable_cache where 
col_int_undef_signed3 IS NULL and col_int_undef_signed3 = col_int_undef_signed3;
+    """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to