This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch refactor_rf
in repository https://gitbox.apache.org/repos/asf/doris.git

commit d9a559dc2f1c9af0e17520d9ec846afeacf49fba
Author: Gabriel <liwenqi...@selectdb.com>
AuthorDate: Mon Mar 3 12:30:13 2025 +0800

    [refactor](runtime filter) Refine null property in runtime filter (#48534)
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/exprs/bitmapfilter_predicate.h              |   2 +-
 be/src/exprs/bloom_filter_func.h                   |  18 +--
 be/src/exprs/bloom_filter_func_adaptor.h           |   7 +-
 be/src/exprs/create_predicate_function.h           | 126 +++++++++++----------
 be/src/exprs/filter_base.h                         |  17 ++-
 be/src/exprs/hybrid_set.h                          |   9 +-
 be/src/exprs/minmax_predicate.h                    |   3 +-
 be/src/olap/in_list_predicate.h                    |   4 +-
 be/src/runtime_filter/runtime_filter_definitions.h |  22 +++-
 be/src/runtime_filter/runtime_filter_wrapper.cpp   |  21 ++--
 be/src/runtime_filter/runtime_filter_wrapper.h     |   2 +
 .../aggregate_function_group_array_intersect.h     |   7 +-
 be/src/vec/exec/format/orc/vorc_reader.cpp         |   2 +-
 .../exec/format/parquet/vparquet_group_reader.cpp  |   2 +-
 be/src/vec/exec/format/table/equality_delete.cpp   |   2 +-
 be/src/vec/functions/in.h                          |   9 +-
 be/test/olap/date_bloom_filter_test.cpp            |   8 +-
 .../runtime_filter/runtime_filter_wrapper_test.cpp |  64 +++++++++++
 18 files changed, 199 insertions(+), 126 deletions(-)

diff --git a/be/src/exprs/bitmapfilter_predicate.h 
b/be/src/exprs/bitmapfilter_predicate.h
index 98e839ac3a4..ba7c028d136 100644
--- a/be/src/exprs/bitmapfilter_predicate.h
+++ b/be/src/exprs/bitmapfilter_predicate.h
@@ -50,7 +50,7 @@ class BitmapFilterFunc : public BitmapFilterFuncBase {
 public:
     using CppType = typename PrimitiveTypeTraits<type>::CppType;
 
-    BitmapFilterFunc() : _bitmap_value(std::make_shared<BitmapValue>()) {}
+    BitmapFilterFunc(bool null_aware = false) : 
_bitmap_value(std::make_shared<BitmapValue>()) {}
 
     ~BitmapFilterFunc() override = default;
 
diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h
index 9e92667a7a4..64123250e48 100644
--- a/be/src/exprs/bloom_filter_func.h
+++ b/be/src/exprs/bloom_filter_func.h
@@ -29,6 +29,7 @@ namespace doris {
 // Only Used In RuntimeFilter
 class BloomFilterFuncBase : public FilterBase {
 public:
+    BloomFilterFuncBase(bool null_aware) : FilterBase(null_aware) {}
     virtual ~BloomFilterFuncBase() = default;
 
     void init_params(const RuntimeFilterParams* params) {
@@ -37,7 +38,6 @@ public:
         _build_bf_by_runtime_size = params->build_bf_by_runtime_size;
         _runtime_bloom_filter_min_size = params->runtime_bloom_filter_min_size;
         _runtime_bloom_filter_max_size = params->runtime_bloom_filter_max_size;
-        _null_aware = params->null_aware;
         _bloom_filter_size_calculated_by_ndv = 
params->bloom_filter_size_calculated_by_ndv;
         _enable_fixed_len_to_uint32_v2 = params->enable_fixed_len_to_uint32_v2;
         _limit_length();
@@ -97,22 +97,16 @@ public:
                     "allocated bytes {}",
                     _bloom_filter_alloced, other->_bloom_filter_alloced);
         }
-        if (other->contain_null()) {
-            _bloom_filter->set_null_aware(true);
-            _bloom_filter->set_contain_null();
-        }
+        _bloom_filter->set_contain_null(other->contain_null());
         return _bloom_filter->merge(other->_bloom_filter.get());
     }
 
     Status assign(butil::IOBufAsZeroCopyInputStream* data, const size_t 
data_size,
                   bool contain_null) {
         if (_bloom_filter == nullptr) {
-            _null_aware = contain_null;
             _bloom_filter.reset(BloomFilterAdaptor::create(_null_aware));
         }
-        if (contain_null) {
-            _bloom_filter->set_contain_null();
-        }
+        _bloom_filter->set_contain_null(contain_null);
 
         _bloom_filter_alloced = data_size;
         return _bloom_filter->init(data, data_size);
@@ -173,16 +167,14 @@ protected:
 template <PrimitiveType type>
 class BloomFilterFunc final : public BloomFilterFuncBase {
 public:
+    BloomFilterFunc(bool null_aware) : BloomFilterFuncBase(null_aware) {}
     void insert_set(std::shared_ptr<HybridSetBase> set) override {
         if (_enable_fixed_len_to_uint32_v2) {
             OpV2::insert_set(*_bloom_filter, set);
         } else {
             Op::insert_set(*_bloom_filter, set);
         }
-        if (set->contain_null()) {
-            _bloom_filter->set_null_aware(true);
-            _bloom_filter->set_contain_null();
-        }
+        _bloom_filter->set_contain_null(set->contain_null());
     }
 
     void insert_fixed_len(const vectorized::ColumnPtr& column, size_t start) 
override {
diff --git a/be/src/exprs/bloom_filter_func_adaptor.h 
b/be/src/exprs/bloom_filter_func_adaptor.h
index f1f2fe79fab..a4ff8c839f3 100644
--- a/be/src/exprs/bloom_filter_func_adaptor.h
+++ b/be/src/exprs/bloom_filter_func_adaptor.h
@@ -30,8 +30,7 @@ namespace doris {
 
 class BloomFilterAdaptor : public FilterBase {
 public:
-    BloomFilterAdaptor(bool null_aware) {
-        _null_aware = null_aware;
+    BloomFilterAdaptor(bool null_aware) : FilterBase(null_aware) {
         _bloom_filter = std::make_shared<doris::BlockBloomFilter>();
     }
 
@@ -103,7 +102,7 @@ struct CommonFindOp {
                 if (!nullmap[i]) {
                     
bloom_filter.add_element<fixed_len_to_uint32_method>(*(data + i));
                 } else {
-                    bloom_filter.set_contain_null();
+                    bloom_filter.set_contain_null(true);
                 }
             }
         } else {
@@ -166,7 +165,7 @@ struct StringFindOp : 
CommonFindOp<fixed_len_to_uint32_method, StringRef> {
                 if (nullmap == nullptr || !nullmap[i]) {
                     
bloom_filter.add_element<fixed_len_to_uint32_method>(col.get_data_at(i));
                 } else {
-                    bloom_filter.set_contain_null();
+                    bloom_filter.set_contain_null(true);
                 }
             }
         };
diff --git a/be/src/exprs/create_predicate_function.h 
b/be/src/exprs/create_predicate_function.h
index 7c702ad4ce6..bcb4ec49e75 100644
--- a/be/src/exprs/create_predicate_function.h
+++ b/be/src/exprs/create_predicate_function.h
@@ -35,10 +35,11 @@ class MinmaxFunctionTraits {
 public:
     using BasePtr = MinMaxFuncBase*;
     template <PrimitiveType type, size_t N>
-    static BasePtr get_function() {
+    static BasePtr get_function(bool null_aware) {
         using CppType = typename PrimitiveTypeTraits<type>::CppType;
         return new MinMaxNumFunc<
-                std::conditional_t<std::is_same_v<CppType, StringRef>, 
std::string, CppType>>();
+                std::conditional_t<std::is_same_v<CppType, StringRef>, 
std::string, CppType>>(
+                null_aware);
     }
 };
 
@@ -46,19 +47,19 @@ class HybridSetTraits {
 public:
     using BasePtr = HybridSetBase*;
     template <PrimitiveType type, size_t N>
-    static BasePtr get_function() {
+    static BasePtr get_function(bool null_aware) {
         using CppType = typename PrimitiveTypeTraits<type>::CppType;
         if constexpr (N >= 1 && N <= FIXED_CONTAINER_MAX_SIZE) {
             using Set = std::conditional_t<
                     std::is_same_v<CppType, StringRef>, StringSet<>,
                     HybridSet<type,
                               FixedContainer<typename 
PrimitiveTypeTraits<type>::CppType, N>>>;
-            return new Set();
+            return new Set(null_aware);
         } else {
             using Set = std::conditional_t<
                     std::is_same_v<CppType, StringRef>, StringSet<>,
                     HybridSet<type, DynamicContainer<typename 
PrimitiveTypeTraits<type>::CppType>>>;
-            return new Set();
+            return new Set(null_aware);
         }
     }
 };
@@ -67,8 +68,8 @@ class BloomFilterTraits {
 public:
     using BasePtr = BloomFilterFuncBase*;
     template <PrimitiveType type, size_t N>
-    static BasePtr get_function() {
-        return new BloomFilterFunc<type>();
+    static BasePtr get_function(bool null_aware) {
+        return new BloomFilterFunc<type>(null_aware);
     }
 };
 
@@ -76,8 +77,8 @@ class BitmapFilterTraits {
 public:
     using BasePtr = BitmapFilterFuncBase*;
     template <PrimitiveType type, size_t N>
-    static BasePtr get_function() {
-        return new BitmapFilterFunc<type>();
+    static BasePtr get_function(bool null_aware) {
+        return new BitmapFilterFunc<type>(null_aware);
     }
 };
 
@@ -85,8 +86,8 @@ template <class Traits>
 class PredicateFunctionCreator {
 public:
     template <PrimitiveType type, size_t N = 0>
-    static typename Traits::BasePtr create() {
-        return Traits::template get_function<type, N>();
+    static typename Traits::BasePtr create(bool null_aware) {
+        return Traits::template get_function<type, N>(null_aware);
     }
 };
 
@@ -113,19 +114,19 @@ public:
     M(TYPE_IPV6)
 
 template <class Traits, size_t N = 0>
-typename Traits::BasePtr create_predicate_function(PrimitiveType type) {
+typename Traits::BasePtr create_predicate_function(PrimitiveType type, bool 
null_aware) {
     using Creator = PredicateFunctionCreator<Traits>;
 
     switch (type) {
     case TYPE_BOOLEAN: {
-        return Creator::template create<TYPE_BOOLEAN, N>();
+        return Creator::template create<TYPE_BOOLEAN, N>(null_aware);
     }
     case TYPE_DECIMALV2: {
-        return Creator::template create<TYPE_DECIMALV2, N>();
+        return Creator::template create<TYPE_DECIMALV2, N>(null_aware);
     }
-#define M(NAME)                                     \
-    case NAME: {                                    \
-        return Creator::template create<NAME, N>(); \
+#define M(NAME)                                               \
+    case NAME: {                                              \
+        return Creator::template create<NAME, N>(null_aware); \
     }
         APPLY_FOR_PRIMTYPE(M)
 #undef M
@@ -142,13 +143,13 @@ typename Traits::BasePtr 
create_bitmap_predicate_function(PrimitiveType type) {
 
     switch (type) {
     case TYPE_TINYINT:
-        return Creator::template create<TYPE_TINYINT>();
+        return Creator::template create<TYPE_TINYINT>(false);
     case TYPE_SMALLINT:
-        return Creator::template create<TYPE_SMALLINT>();
+        return Creator::template create<TYPE_SMALLINT>(false);
     case TYPE_INT:
-        return Creator::template create<TYPE_INT>();
+        return Creator::template create<TYPE_INT>(false);
     case TYPE_BIGINT:
-        return Creator::template create<TYPE_BIGINT>();
+        return Creator::template create<TYPE_BIGINT>(false);
     default:
         throw Exception(ErrorCode::INTERNAL_ERROR,
                         "bitmap predicate with type " + type_to_string(type));
@@ -157,72 +158,72 @@ typename Traits::BasePtr 
create_bitmap_predicate_function(PrimitiveType type) {
     return nullptr;
 }
 
-inline auto create_minmax_filter(PrimitiveType type) {
-    return create_predicate_function<MinmaxFunctionTraits>(type);
+inline auto create_minmax_filter(PrimitiveType type, bool null_aware) {
+    return create_predicate_function<MinmaxFunctionTraits>(type, null_aware);
 }
 
 template <size_t N = 0>
-inline auto create_set(PrimitiveType type) {
-    return create_predicate_function<HybridSetTraits, N>(type);
+inline auto create_set(PrimitiveType type, bool null_aware) {
+    return create_predicate_function<HybridSetTraits, N>(type, null_aware);
 }
 
-inline auto create_set(PrimitiveType type, size_t size) {
+inline auto create_set(PrimitiveType type, size_t size, bool null_aware) {
     if (size == 0) {
-        return create_set<0>(type);
+        return create_set<0>(type, null_aware);
     } else if (size == 1) {
-        return create_set<1>(type);
+        return create_set<1>(type, null_aware);
     } else if (size == 2) {
-        return create_set<2>(type);
+        return create_set<2>(type, null_aware);
     } else if (size == 3) {
-        return create_set<3>(type);
+        return create_set<3>(type, null_aware);
     } else if (size == 4) {
-        return create_set<4>(type);
+        return create_set<4>(type, null_aware);
     } else if (size == 5) {
-        return create_set<5>(type);
+        return create_set<5>(type, null_aware);
     } else if (size == 6) {
-        return create_set<6>(type);
+        return create_set<6>(type, null_aware);
     } else if (size == 7) {
-        return create_set<7>(type);
+        return create_set<7>(type, null_aware);
     } else if (size == FIXED_CONTAINER_MAX_SIZE) {
-        return create_set<FIXED_CONTAINER_MAX_SIZE>(type);
+        return create_set<FIXED_CONTAINER_MAX_SIZE>(type, null_aware);
     } else {
-        return create_set(type);
+        return create_set(type, null_aware);
     }
 }
 
 template <size_t N = 0>
-inline HybridSetBase* create_string_value_set() {
+inline HybridSetBase* create_string_value_set(bool null_aware) {
     if constexpr (N >= 1 && N <= FIXED_CONTAINER_MAX_SIZE) {
-        return new StringValueSet<FixedContainer<StringRef, N>>();
+        return new StringValueSet<FixedContainer<StringRef, N>>(null_aware);
     } else {
-        return new StringValueSet();
+        return new StringValueSet(null_aware);
     }
 }
 
-inline HybridSetBase* create_string_value_set(size_t size) {
+inline HybridSetBase* create_string_value_set(size_t size, bool null_aware) {
     if (size == 1) {
-        return create_string_value_set<1>();
+        return create_string_value_set<1>(null_aware);
     } else if (size == 2) {
-        return create_string_value_set<2>();
+        return create_string_value_set<2>(null_aware);
     } else if (size == 3) {
-        return create_string_value_set<3>();
+        return create_string_value_set<3>(null_aware);
     } else if (size == 4) {
-        return create_string_value_set<4>();
+        return create_string_value_set<4>(null_aware);
     } else if (size == 5) {
-        return create_string_value_set<5>();
+        return create_string_value_set<5>(null_aware);
     } else if (size == 6) {
-        return create_string_value_set<6>();
+        return create_string_value_set<6>(null_aware);
     } else if (size == 7) {
-        return create_string_value_set<7>();
+        return create_string_value_set<7>(null_aware);
     } else if (size == FIXED_CONTAINER_MAX_SIZE) {
-        return create_string_value_set<FIXED_CONTAINER_MAX_SIZE>();
+        return create_string_value_set<FIXED_CONTAINER_MAX_SIZE>(null_aware);
     } else {
-        return create_string_value_set();
+        return create_string_value_set(null_aware);
     }
 }
 
-inline auto create_bloom_filter(PrimitiveType type) {
-    return create_predicate_function<BloomFilterTraits>(type);
+inline auto create_bloom_filter(PrimitiveType type, bool null_aware) {
+    return create_predicate_function<BloomFilterTraits>(type, null_aware);
 }
 
 inline auto create_bitmap_filter(PrimitiveType type) {
@@ -232,9 +233,9 @@ inline auto create_bitmap_filter(PrimitiveType type) {
 template <PrimitiveType PT>
 ColumnPredicate* create_olap_column_predicate(uint32_t column_id,
                                               const 
std::shared_ptr<BloomFilterFuncBase>& filter,
-                                              const TabletColumn*) {
+                                              const TabletColumn*, bool 
null_aware) {
     std::shared_ptr<BloomFilterFuncBase> filter_olap;
-    filter_olap.reset(create_bloom_filter(PT));
+    filter_olap.reset(create_bloom_filter(PT, null_aware));
     filter_olap->light_copy(filter.get());
     // create a new filter to match the input filter and PT. For example, 
filter may be varchar, but PT is char
     return new BloomFilterColumnPredicate<PT>(column_id, filter_olap);
@@ -243,7 +244,7 @@ ColumnPredicate* create_olap_column_predicate(uint32_t 
column_id,
 template <PrimitiveType PT>
 ColumnPredicate* create_olap_column_predicate(uint32_t column_id,
                                               const 
std::shared_ptr<BitmapFilterFuncBase>& filter,
-                                              const TabletColumn*) {
+                                              const TabletColumn*, bool) {
     if constexpr (PT == TYPE_TINYINT || PT == TYPE_SMALLINT || PT == TYPE_INT 
||
                   PT == TYPE_BIGINT) {
         return new BitmapFilterColumnPredicate<PT>(column_id, filter);
@@ -255,7 +256,7 @@ ColumnPredicate* create_olap_column_predicate(uint32_t 
column_id,
 template <PrimitiveType PT>
 ColumnPredicate* create_olap_column_predicate(uint32_t column_id,
                                               const 
std::shared_ptr<HybridSetBase>& filter,
-                                              const TabletColumn* column = 
nullptr) {
+                                              const TabletColumn* column, 
bool) {
     return create_in_list_predicate<PT, PredicateType::IN_LIST>(column_id, 
filter,
                                                                 
column->length());
 }
@@ -263,7 +264,7 @@ ColumnPredicate* create_olap_column_predicate(uint32_t 
column_id,
 template <PrimitiveType PT>
 ColumnPredicate* create_olap_column_predicate(uint32_t column_id,
                                               const 
std::shared_ptr<FunctionFilter>& filter,
-                                              const TabletColumn* column = 
nullptr) {
+                                              const TabletColumn* column, 
bool) {
     // currently only support like predicate
     if constexpr (PT == TYPE_CHAR) {
         return new LikeColumnPredicate<TYPE_CHAR>(filter->_opposite, 
column_id, filter->_fn_ctx,
@@ -277,19 +278,20 @@ ColumnPredicate* create_olap_column_predicate(uint32_t 
column_id,
 
 template <typename T>
 ColumnPredicate* create_column_predicate(uint32_t column_id, const 
std::shared_ptr<T>& filter,
-                                         FieldType type, const TabletColumn* 
column = nullptr) {
+                                         FieldType type, const TabletColumn* 
column,
+                                         bool null_aware = false) {
     switch (type) {
-#define M(NAME)                                                               \
-    case FieldType::OLAP_FIELD_##NAME: {                                      \
-        return create_olap_column_predicate<NAME>(column_id, filter, column); \
+#define M(NAME)                                                                
           \
+    case FieldType::OLAP_FIELD_##NAME: {                                       
           \
+        return create_olap_column_predicate<NAME>(column_id, filter, column, 
null_aware); \
     }
         APPLY_FOR_PRIMTYPE(M)
 #undef M
     case FieldType::OLAP_FIELD_TYPE_DECIMAL: {
-        return create_olap_column_predicate<TYPE_DECIMALV2>(column_id, filter, 
column);
+        return create_olap_column_predicate<TYPE_DECIMALV2>(column_id, filter, 
column, null_aware);
     }
     case FieldType::OLAP_FIELD_TYPE_BOOL: {
-        return create_olap_column_predicate<TYPE_BOOLEAN>(column_id, filter, 
column);
+        return create_olap_column_predicate<TYPE_BOOLEAN>(column_id, filter, 
column, null_aware);
     }
     default:
         return nullptr;
diff --git a/be/src/exprs/filter_base.h b/be/src/exprs/filter_base.h
index 2a563b4d64c..5534dc75589 100644
--- a/be/src/exprs/filter_base.h
+++ b/be/src/exprs/filter_base.h
@@ -17,19 +17,28 @@
 
 #pragma once
 
+#include "common/exception.h"
+
 namespace doris {
 
 class FilterBase {
 public:
+    FilterBase(bool null_aware) : _null_aware(null_aware) {}
     bool contain_null() const { return _null_aware && _contain_null; }
 
-    void set_contain_null() { _contain_null = true; }
-
-    void set_null_aware(bool null_aware) { _null_aware = null_aware; }
+    void set_contain_null(bool contain_null) {
+        if (_contain_null && !contain_null) {
+            throw Exception(ErrorCode::INTERNAL_ERROR,
+                            "contain_null cannot be changed from true to 
false");
+        }
+        _contain_null = contain_null;
+    }
 
 protected:
+    // Indicates whether a null datum exists to build this filter.
     bool _contain_null = false;
-    bool _null_aware = false;
+    // Indicates whether this filter is null-aware.
+    const bool _null_aware = false;
 };
 
 } // namespace doris
diff --git a/be/src/exprs/hybrid_set.h b/be/src/exprs/hybrid_set.h
index c202226551d..1358c479dca 100644
--- a/be/src/exprs/hybrid_set.h
+++ b/be/src/exprs/hybrid_set.h
@@ -192,7 +192,7 @@ private:
 // TODO Maybe change void* parameter to template parameter better.
 class HybridSetBase : public FilterBase {
 public:
-    HybridSetBase() = default;
+    HybridSetBase(bool null_aware) : FilterBase(null_aware) {}
     virtual ~HybridSetBase() = default;
     virtual void insert(const void* data) = 0;
     // use in vectorize execute engine
@@ -252,8 +252,7 @@ public:
     using ElementType = typename ContainerType::ElementType;
     using ColumnType = _ColumnType;
 
-    HybridSet() = default;
-
+    HybridSet(bool null_aware) : HybridSetBase(null_aware) {}
     ~HybridSet() override = default;
 
     void insert(const void* data) override {
@@ -387,7 +386,7 @@ class StringSet : public HybridSetBase {
 public:
     using ContainerType = _ContainerType;
 
-    StringSet() = default;
+    StringSet(bool null_aware) : HybridSetBase(null_aware) {}
 
     ~StringSet() override = default;
 
@@ -556,7 +555,7 @@ class StringValueSet : public HybridSetBase {
 public:
     using ContainerType = _ContainerType;
 
-    StringValueSet() = default;
+    StringValueSet(bool null_aware) : HybridSetBase(null_aware) {}
 
     ~StringValueSet() override = default;
 
diff --git a/be/src/exprs/minmax_predicate.h b/be/src/exprs/minmax_predicate.h
index 73cc6d8eb7f..832411fd77c 100644
--- a/be/src/exprs/minmax_predicate.h
+++ b/be/src/exprs/minmax_predicate.h
@@ -29,6 +29,7 @@ namespace doris {
 // only used in Runtime Filter
 class MinMaxFuncBase : public FilterBase {
 public:
+    MinMaxFuncBase(bool null_aware) : FilterBase(null_aware) {}
     virtual void insert_fixed_len(const vectorized::ColumnPtr& column, size_t 
start) = 0;
     virtual void* get_max() = 0;
     virtual void* get_min() = 0;
@@ -44,7 +45,7 @@ public:
 template <class T, bool NeedMax = true, bool NeedMin = true>
 class MinMaxNumFunc : public MinMaxFuncBase {
 public:
-    MinMaxNumFunc() = default;
+    MinMaxNumFunc(bool null_aware) : MinMaxFuncBase(null_aware) {}
     ~MinMaxNumFunc() override = default;
 
     void insert_fixed_len(const vectorized::ColumnPtr& column, size_t start) 
override {
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index 08224df74e7..f5ed4e81da9 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -71,7 +71,7 @@ public:
             : ColumnPredicate(column_id, is_opposite),
               _min_value(type_limit<T>::max()),
               _max_value(type_limit<T>::min()) {
-        _values = std::make_shared<HybridSetType>();
+        _values = std::make_shared<HybridSetType>(false);
         for (const auto& condition : conditions) {
             T tmp;
             if constexpr (Type == TYPE_STRING || Type == TYPE_CHAR) {
@@ -95,7 +95,7 @@ public:
         CHECK(hybrid_set != nullptr);
 
         if constexpr (is_string_type(Type) || Type == TYPE_DECIMALV2 || 
is_date_type(Type)) {
-            _values = std::make_shared<HybridSetType>();
+            _values = std::make_shared<HybridSetType>(false);
             if constexpr (is_string_type(Type)) {
                 HybridSetBase::IteratorBase* iter = hybrid_set->begin();
                 while (iter->has_next()) {
diff --git a/be/src/runtime_filter/runtime_filter_definitions.h 
b/be/src/runtime_filter/runtime_filter_definitions.h
index 47dcbf567c0..ed732b99532 100644
--- a/be/src/runtime_filter/runtime_filter_definitions.h
+++ b/be/src/runtime_filter/runtime_filter_definitions.h
@@ -36,19 +36,29 @@ enum class RuntimeFilterType {
 };
 
 struct RuntimeFilterParams {
+    // Filter ID
+    int32_t filter_id {};
+    // Filter type
     RuntimeFilterType filter_type {};
+    // Data type of build column
     PrimitiveType column_return_type {};
-    int64_t bloom_filter_size {};
+    bool null_aware {};
+    bool enable_fixed_len_to_uint32_v2 {};
+
+    // The max limitation of in-set
     int32_t max_in_num {};
+    // The min size limitation of bloom filter
     int64_t runtime_bloom_filter_min_size {};
+    // The max size limitation of bloom filter
     int64_t runtime_bloom_filter_max_size {};
-    int32_t filter_id {};
-    bool bitmap_filter_not_in {};
+    // Size of bloom filter which is estimated by FE using NDV
+    int64_t bloom_filter_size {};
+    // Whether a runtime size is used to build bloom filter
     bool build_bf_by_runtime_size {};
-
+    // Whether an estimated size by NDV is used to build bloom filter
     bool bloom_filter_size_calculated_by_ndv {};
-    bool null_aware {};
-    bool enable_fixed_len_to_uint32_v2 {};
+
+    bool bitmap_filter_not_in {};
 };
 
 class MinMaxFuncBase;
diff --git a/be/src/runtime_filter/runtime_filter_wrapper.cpp 
b/be/src/runtime_filter/runtime_filter_wrapper.cpp
index f07934de6a7..99889ce2b72 100644
--- a/be/src/runtime_filter/runtime_filter_wrapper.cpp
+++ b/be/src/runtime_filter/runtime_filter_wrapper.cpp
@@ -27,30 +27,27 @@ RuntimeFilterWrapper::RuntimeFilterWrapper(const 
RuntimeFilterParams* params)
                                State::UNINITED, params->max_in_num) {
     switch (_filter_type) {
     case RuntimeFilterType::IN_FILTER: {
-        _hybrid_set.reset(create_set(_column_return_type));
-        _hybrid_set->set_null_aware(params->null_aware);
+        _hybrid_set.reset(create_set(_column_return_type, params->null_aware));
         return;
     }
     // Only use in nested loop join not need set null aware
     case RuntimeFilterType::MIN_FILTER:
     case RuntimeFilterType::MAX_FILTER: {
-        _minmax_func.reset(create_minmax_filter(_column_return_type));
+        _minmax_func.reset(create_minmax_filter(_column_return_type, 
params->null_aware));
         return;
     }
     case RuntimeFilterType::MINMAX_FILTER: {
-        _minmax_func.reset(create_minmax_filter(_column_return_type));
-        _minmax_func->set_null_aware(params->null_aware);
+        _minmax_func.reset(create_minmax_filter(_column_return_type, 
params->null_aware));
         return;
     }
     case RuntimeFilterType::BLOOM_FILTER: {
-        _bloom_filter_func.reset(create_bloom_filter(_column_return_type));
+        _bloom_filter_func.reset(create_bloom_filter(_column_return_type, 
params->null_aware));
         _bloom_filter_func->init_params(params);
         return;
     }
     case RuntimeFilterType::IN_OR_BLOOM_FILTER: {
-        _hybrid_set.reset(create_set(_column_return_type));
-        _hybrid_set->set_null_aware(params->null_aware);
-        _bloom_filter_func.reset(create_bloom_filter(_column_return_type));
+        _hybrid_set.reset(create_set(_column_return_type, params->null_aware));
+        _bloom_filter_func.reset(create_bloom_filter(_column_return_type, 
params->null_aware));
         _bloom_filter_func->init_params(params);
         return;
     }
@@ -231,7 +228,6 @@ Status RuntimeFilterWrapper::merge(const 
RuntimeFilterWrapper* other) {
 
 Status RuntimeFilterWrapper::_assign(const PInFilter& in_filter, bool 
contain_null) {
     if (contain_null) {
-        _hybrid_set->set_null_aware(true);
         _hybrid_set->insert((const void*)nullptr);
     }
 
@@ -417,10 +413,7 @@ Status RuntimeFilterWrapper::_assign(const PBloomFilter& 
bloom_filter,
 }
 
 Status RuntimeFilterWrapper::_assign(const PMinMaxFilter& minmax_filter, bool 
contain_null) {
-    if (contain_null) {
-        _minmax_func->set_null_aware(true);
-        _minmax_func->set_contain_null();
-    }
+    _minmax_func->set_contain_null(contain_null);
 
     switch (_column_return_type) {
     case TYPE_BOOLEAN: {
diff --git a/be/src/runtime_filter/runtime_filter_wrapper.h 
b/be/src/runtime_filter/runtime_filter_wrapper.h
index 98de97ea0c4..baf02e92e3c 100644
--- a/be/src/runtime_filter/runtime_filter_wrapper.h
+++ b/be/src/runtime_filter/runtime_filter_wrapper.h
@@ -40,7 +40,9 @@ public:
         DISABLED // This state indicates that the rf is deprecated, used in 
cases such as reach max_in_num / join spill / meet rpc error
     };
 
+    // Called by consumer / producer
     RuntimeFilterWrapper(const RuntimeFilterParams* params);
+    // Called by merger
     RuntimeFilterWrapper(PrimitiveType column_type, RuntimeFilterType type, 
uint32_t filter_id,
                          State state, int max_in_num = 0)
             : _column_return_type(column_type),
diff --git 
a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h 
b/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h
index e925453d16c..aef6e7c809f 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h
@@ -77,7 +77,10 @@ class NullableNumericOrDateSet : public 
HybridSet<type_to_primitive_type<T>(),
                                                   DynamicContainer<typename 
PrimitiveTypeTraits<
                                                           
type_to_primitive_type<T>()>::CppType>> {
 public:
-    NullableNumericOrDateSet() { this->_null_aware = true; }
+    NullableNumericOrDateSet()
+            : HybridSet<type_to_primitive_type<T>(),
+                        DynamicContainer<typename PrimitiveTypeTraits<
+                                type_to_primitive_type<T>()>::CppType>>(true) 
{}
 
     void change_contain_null_value(bool target_value) { this->_contain_null = 
target_value; }
 };
@@ -322,7 +325,7 @@ public:
 /// Generic implementation, it uses serialized representation as object 
descriptor.
 class NullableStringSet : public StringValueSet<DynamicContainer<StringRef>> {
 public:
-    NullableStringSet() { this->_null_aware = true; }
+    NullableStringSet() : StringValueSet<DynamicContainer<StringRef>>(true) {}
 
     void change_contain_null_value(bool target_value) { this->_contain_null = 
target_value; }
 };
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 5a1b2ceaaf9..909a89116e8 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -2535,7 +2535,7 @@ Status 
OrcReader::_rewrite_dict_conjuncts(std::vector<int32_t>& dict_codes, int
             node.__set_is_nullable(false);
 
             std::shared_ptr<HybridSetBase> hybrid_set(
-                    create_set(PrimitiveType::TYPE_INT, dict_codes.size()));
+                    create_set(PrimitiveType::TYPE_INT, dict_codes.size(), 
false));
             for (int& dict_code : dict_codes) {
                 hybrid_set->insert(&dict_code);
             }
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 830d74acf8b..17697eaddab 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -976,7 +976,7 @@ Status 
RowGroupReader::_rewrite_dict_conjuncts(std::vector<int32_t>& dict_codes,
             node.__set_is_nullable(false);
 
             std::shared_ptr<HybridSetBase> hybrid_set(
-                    create_set(PrimitiveType::TYPE_INT, dict_codes.size()));
+                    create_set(PrimitiveType::TYPE_INT, dict_codes.size(), 
false));
             for (int j = 0; j < dict_codes.size(); ++j) {
                 hybrid_set->insert(&dict_codes[j]);
             }
diff --git a/be/src/vec/exec/format/table/equality_delete.cpp 
b/be/src/vec/exec/format/table/equality_delete.cpp
index bf31f81548c..6c9c6ddb412 100644
--- a/be/src/vec/exec/format/table/equality_delete.cpp
+++ b/be/src/vec/exec/format/table/equality_delete.cpp
@@ -38,7 +38,7 @@ Status SimpleEqualityDelete::_build_set() {
     auto& column_and_type = _delete_block->get_by_position(0);
     _delete_column_name = column_and_type.name;
     _delete_column_type = 
remove_nullable(column_and_type.type)->get_type_as_type_descriptor().type;
-    _hybrid_set.reset(create_set(_delete_column_type, _delete_block->rows()));
+    _hybrid_set.reset(create_set(_delete_column_type, _delete_block->rows(), 
false));
     _hybrid_set->insert_fixed_len(column_and_type.column, 0);
     return Status::OK();
 }
diff --git a/be/src/vec/functions/in.h b/be/src/vec/functions/in.h
index 6f697ba7441..4a588353d58 100644
--- a/be/src/vec/functions/in.h
+++ b/be/src/vec/functions/in.h
@@ -109,17 +109,16 @@ public:
         context->set_function_state(scope, state);
         DCHECK(context->get_num_args() >= 1);
         if (context->get_arg_type(0)->type == PrimitiveType::TYPE_NULL) {
-            state->hybrid_set.reset(create_set(TYPE_BOOLEAN, 0));
+            state->hybrid_set.reset(create_set(TYPE_BOOLEAN, 0, true));
         } else if (context->get_arg_type(0)->type == PrimitiveType::TYPE_CHAR 
||
                    context->get_arg_type(0)->type == 
PrimitiveType::TYPE_VARCHAR ||
                    context->get_arg_type(0)->type == 
PrimitiveType::TYPE_STRING) {
             // the StringValue's memory is held by FunctionContext, so we can 
use StringValueSet here directly
             
state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context)));
         } else {
-            state->hybrid_set.reset(
-                    create_set(context->get_arg_type(0)->type, 
get_size_with_out_null(context)));
+            state->hybrid_set.reset(create_set(context->get_arg_type(0)->type,
+                                               
get_size_with_out_null(context), true));
         }
-        state->hybrid_set->set_null_aware(true);
 
         for (int i = 1; i < context->get_num_args(); ++i) {
             const auto& const_column_ptr = context->get_constant_col(i);
@@ -337,7 +336,7 @@ private:
                 }
             }
             std::unique_ptr<HybridSetBase> hybrid_set(
-                    create_set(context->get_arg_type(0)->type, 
set_datas.size()));
+                    create_set(context->get_arg_type(0)->type, 
set_datas.size(), true));
             for (auto& set_data : set_datas) {
                 hybrid_set->insert((void*)(set_data.data), set_data.size);
             }
diff --git a/be/test/olap/date_bloom_filter_test.cpp 
b/be/test/olap/date_bloom_filter_test.cpp
index cf869bf82e3..c24b1abb51c 100644
--- a/be/test/olap/date_bloom_filter_test.cpp
+++ b/be/test/olap/date_bloom_filter_test.cpp
@@ -241,7 +241,7 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) {
 
         // Test positive cases
         auto test_positive = [&](const std::vector<std::string>& values, bool 
result) {
-            auto hybrid_set = 
std::make_shared<HybridSet<PrimitiveType::TYPE_DATE>>();
+            auto hybrid_set = 
std::make_shared<HybridSet<PrimitiveType::TYPE_DATE>>(false);
             for (const auto& value : values) {
                 auto v = timestamp_from_date(value);
                 hybrid_set->insert(&v);
@@ -259,7 +259,7 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) {
         test_positive({"2024-11-09"}, true);
 
         auto test_negative = [&](const std::vector<std::string>& values, bool 
result) {
-            auto hybrid_set = 
std::make_shared<HybridSet<PrimitiveType::TYPE_DATE>>();
+            auto hybrid_set = 
std::make_shared<HybridSet<PrimitiveType::TYPE_DATE>>(false);
 
             for (const auto& value : values) {
                 auto v = timestamp_from_date(value);
@@ -291,7 +291,7 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) {
 
         // Test positive cases
         auto test_positive = [&](const std::vector<std::string>& values, bool 
result) {
-            auto hybrid_set = 
std::make_shared<HybridSet<PrimitiveType::TYPE_DATETIME>>();
+            auto hybrid_set = 
std::make_shared<HybridSet<PrimitiveType::TYPE_DATETIME>>(false);
             for (const auto& value : values) {
                 auto v = timestamp_from_datetime(value);
                 hybrid_set->insert(&v);
@@ -310,7 +310,7 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) {
 
         // Test negative cases
         auto test_negative = [&](const std::vector<std::string>& values, bool 
result) {
-            auto hybrid_set = 
std::make_shared<HybridSet<PrimitiveType::TYPE_DATETIME>>();
+            auto hybrid_set = 
std::make_shared<HybridSet<PrimitiveType::TYPE_DATETIME>>(false);
             for (const auto& value : values) {
                 auto v = timestamp_from_datetime(value);
                 hybrid_set->insert(&v);
diff --git a/be/test/runtime_filter/runtime_filter_wrapper_test.cpp 
b/be/test/runtime_filter/runtime_filter_wrapper_test.cpp
new file mode 100644
index 00000000000..963bbfc223b
--- /dev/null
+++ b/be/test/runtime_filter/runtime_filter_wrapper_test.cpp
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "runtime_filter/runtime_filter_wrapper.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+namespace doris {
+
+class RuntimeFilterWrapperTest : public testing::Test {
+public:
+    RuntimeFilterWrapperTest() = default;
+    ~RuntimeFilterWrapperTest() override = default;
+    void SetUp() override {}
+    void TearDown() override {}
+};
+
+TEST_F(RuntimeFilterWrapperTest, basic) {
+    int32_t filter_id = 0;
+    RuntimeFilterType filter_type = RuntimeFilterType::IN_FILTER;
+    PrimitiveType column_return_type = PrimitiveType::TYPE_INT;
+    int32_t max_in_num = 0;
+    int64_t runtime_bloom_filter_min_size = 0;
+    int64_t runtime_bloom_filter_max_size = 0;
+    bool build_bf_by_runtime_size = true;
+    int64_t bloom_filter_size = 0;
+    bool bloom_filter_size_calculated_by_ndv = true;
+    bool null_aware = true;
+    bool enable_fixed_len_to_uint32_v2 = true;
+    bool bitmap_filter_not_in = false;
+
+    RuntimeFilterParams params;
+    params.filter_id = filter_id;
+    params.filter_type = filter_type;
+    params.column_return_type = column_return_type;
+    params.max_in_num = max_in_num;
+    params.runtime_bloom_filter_min_size = runtime_bloom_filter_min_size;
+    params.runtime_bloom_filter_max_size = runtime_bloom_filter_max_size;
+    params.build_bf_by_runtime_size = build_bf_by_runtime_size;
+    params.bloom_filter_size_calculated_by_ndv = 
bloom_filter_size_calculated_by_ndv;
+    params.bloom_filter_size = bloom_filter_size;
+    params.null_aware = null_aware;
+    params.enable_fixed_len_to_uint32_v2 = enable_fixed_len_to_uint32_v2;
+    params.bitmap_filter_not_in = bitmap_filter_not_in;
+
+    auto wrapper = std::make_shared<RuntimeFilterWrapper>(&params);
+}
+
+} // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to