This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch refactor_rf in repository https://gitbox.apache.org/repos/asf/doris.git
commit d9a559dc2f1c9af0e17520d9ec846afeacf49fba Author: Gabriel <liwenqi...@selectdb.com> AuthorDate: Mon Mar 3 12:30:13 2025 +0800 [refactor](runtime filter) Refine null property in runtime filter (#48534) ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: ### Release note None ### Check List (For Author) - Test <!-- At least one of them must be included. --> - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason <!-- Add your reason? --> - Behavior changed: - [ ] No. - [ ] Yes. <!-- Explain the behavior change --> - Does this need documentation? - [ ] No. - [ ] Yes. <!-- Add document PR link here. eg: https://github.com/apache/doris-website/pull/1214 --> ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label <!-- Add branch pick label that this PR should merge into --> --- be/src/exprs/bitmapfilter_predicate.h | 2 +- be/src/exprs/bloom_filter_func.h | 18 +-- be/src/exprs/bloom_filter_func_adaptor.h | 7 +- be/src/exprs/create_predicate_function.h | 126 +++++++++++---------- be/src/exprs/filter_base.h | 17 ++- be/src/exprs/hybrid_set.h | 9 +- be/src/exprs/minmax_predicate.h | 3 +- be/src/olap/in_list_predicate.h | 4 +- be/src/runtime_filter/runtime_filter_definitions.h | 22 +++- be/src/runtime_filter/runtime_filter_wrapper.cpp | 21 ++-- be/src/runtime_filter/runtime_filter_wrapper.h | 2 + .../aggregate_function_group_array_intersect.h | 7 +- be/src/vec/exec/format/orc/vorc_reader.cpp | 2 +- .../exec/format/parquet/vparquet_group_reader.cpp | 2 +- be/src/vec/exec/format/table/equality_delete.cpp | 2 +- be/src/vec/functions/in.h | 9 +- be/test/olap/date_bloom_filter_test.cpp | 8 +- .../runtime_filter/runtime_filter_wrapper_test.cpp | 64 +++++++++++ 18 files changed, 199 insertions(+), 126 deletions(-) diff --git a/be/src/exprs/bitmapfilter_predicate.h b/be/src/exprs/bitmapfilter_predicate.h index 98e839ac3a4..ba7c028d136 100644 --- a/be/src/exprs/bitmapfilter_predicate.h +++ b/be/src/exprs/bitmapfilter_predicate.h @@ -50,7 +50,7 @@ class BitmapFilterFunc : public BitmapFilterFuncBase { public: using CppType = typename PrimitiveTypeTraits<type>::CppType; - BitmapFilterFunc() : _bitmap_value(std::make_shared<BitmapValue>()) {} + BitmapFilterFunc(bool null_aware = false) : _bitmap_value(std::make_shared<BitmapValue>()) {} ~BitmapFilterFunc() override = default; diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h index 9e92667a7a4..64123250e48 100644 --- a/be/src/exprs/bloom_filter_func.h +++ b/be/src/exprs/bloom_filter_func.h @@ -29,6 +29,7 @@ namespace doris { // Only Used In RuntimeFilter class BloomFilterFuncBase : public FilterBase { public: + BloomFilterFuncBase(bool null_aware) : FilterBase(null_aware) {} virtual ~BloomFilterFuncBase() = default; void init_params(const RuntimeFilterParams* params) { @@ -37,7 +38,6 @@ public: _build_bf_by_runtime_size = params->build_bf_by_runtime_size; _runtime_bloom_filter_min_size = params->runtime_bloom_filter_min_size; _runtime_bloom_filter_max_size = params->runtime_bloom_filter_max_size; - _null_aware = params->null_aware; _bloom_filter_size_calculated_by_ndv = params->bloom_filter_size_calculated_by_ndv; _enable_fixed_len_to_uint32_v2 = params->enable_fixed_len_to_uint32_v2; _limit_length(); @@ -97,22 +97,16 @@ public: "allocated bytes {}", _bloom_filter_alloced, other->_bloom_filter_alloced); } - if (other->contain_null()) { - _bloom_filter->set_null_aware(true); - _bloom_filter->set_contain_null(); - } + _bloom_filter->set_contain_null(other->contain_null()); return _bloom_filter->merge(other->_bloom_filter.get()); } Status assign(butil::IOBufAsZeroCopyInputStream* data, const size_t data_size, bool contain_null) { if (_bloom_filter == nullptr) { - _null_aware = contain_null; _bloom_filter.reset(BloomFilterAdaptor::create(_null_aware)); } - if (contain_null) { - _bloom_filter->set_contain_null(); - } + _bloom_filter->set_contain_null(contain_null); _bloom_filter_alloced = data_size; return _bloom_filter->init(data, data_size); @@ -173,16 +167,14 @@ protected: template <PrimitiveType type> class BloomFilterFunc final : public BloomFilterFuncBase { public: + BloomFilterFunc(bool null_aware) : BloomFilterFuncBase(null_aware) {} void insert_set(std::shared_ptr<HybridSetBase> set) override { if (_enable_fixed_len_to_uint32_v2) { OpV2::insert_set(*_bloom_filter, set); } else { Op::insert_set(*_bloom_filter, set); } - if (set->contain_null()) { - _bloom_filter->set_null_aware(true); - _bloom_filter->set_contain_null(); - } + _bloom_filter->set_contain_null(set->contain_null()); } void insert_fixed_len(const vectorized::ColumnPtr& column, size_t start) override { diff --git a/be/src/exprs/bloom_filter_func_adaptor.h b/be/src/exprs/bloom_filter_func_adaptor.h index f1f2fe79fab..a4ff8c839f3 100644 --- a/be/src/exprs/bloom_filter_func_adaptor.h +++ b/be/src/exprs/bloom_filter_func_adaptor.h @@ -30,8 +30,7 @@ namespace doris { class BloomFilterAdaptor : public FilterBase { public: - BloomFilterAdaptor(bool null_aware) { - _null_aware = null_aware; + BloomFilterAdaptor(bool null_aware) : FilterBase(null_aware) { _bloom_filter = std::make_shared<doris::BlockBloomFilter>(); } @@ -103,7 +102,7 @@ struct CommonFindOp { if (!nullmap[i]) { bloom_filter.add_element<fixed_len_to_uint32_method>(*(data + i)); } else { - bloom_filter.set_contain_null(); + bloom_filter.set_contain_null(true); } } } else { @@ -166,7 +165,7 @@ struct StringFindOp : CommonFindOp<fixed_len_to_uint32_method, StringRef> { if (nullmap == nullptr || !nullmap[i]) { bloom_filter.add_element<fixed_len_to_uint32_method>(col.get_data_at(i)); } else { - bloom_filter.set_contain_null(); + bloom_filter.set_contain_null(true); } } }; diff --git a/be/src/exprs/create_predicate_function.h b/be/src/exprs/create_predicate_function.h index 7c702ad4ce6..bcb4ec49e75 100644 --- a/be/src/exprs/create_predicate_function.h +++ b/be/src/exprs/create_predicate_function.h @@ -35,10 +35,11 @@ class MinmaxFunctionTraits { public: using BasePtr = MinMaxFuncBase*; template <PrimitiveType type, size_t N> - static BasePtr get_function() { + static BasePtr get_function(bool null_aware) { using CppType = typename PrimitiveTypeTraits<type>::CppType; return new MinMaxNumFunc< - std::conditional_t<std::is_same_v<CppType, StringRef>, std::string, CppType>>(); + std::conditional_t<std::is_same_v<CppType, StringRef>, std::string, CppType>>( + null_aware); } }; @@ -46,19 +47,19 @@ class HybridSetTraits { public: using BasePtr = HybridSetBase*; template <PrimitiveType type, size_t N> - static BasePtr get_function() { + static BasePtr get_function(bool null_aware) { using CppType = typename PrimitiveTypeTraits<type>::CppType; if constexpr (N >= 1 && N <= FIXED_CONTAINER_MAX_SIZE) { using Set = std::conditional_t< std::is_same_v<CppType, StringRef>, StringSet<>, HybridSet<type, FixedContainer<typename PrimitiveTypeTraits<type>::CppType, N>>>; - return new Set(); + return new Set(null_aware); } else { using Set = std::conditional_t< std::is_same_v<CppType, StringRef>, StringSet<>, HybridSet<type, DynamicContainer<typename PrimitiveTypeTraits<type>::CppType>>>; - return new Set(); + return new Set(null_aware); } } }; @@ -67,8 +68,8 @@ class BloomFilterTraits { public: using BasePtr = BloomFilterFuncBase*; template <PrimitiveType type, size_t N> - static BasePtr get_function() { - return new BloomFilterFunc<type>(); + static BasePtr get_function(bool null_aware) { + return new BloomFilterFunc<type>(null_aware); } }; @@ -76,8 +77,8 @@ class BitmapFilterTraits { public: using BasePtr = BitmapFilterFuncBase*; template <PrimitiveType type, size_t N> - static BasePtr get_function() { - return new BitmapFilterFunc<type>(); + static BasePtr get_function(bool null_aware) { + return new BitmapFilterFunc<type>(null_aware); } }; @@ -85,8 +86,8 @@ template <class Traits> class PredicateFunctionCreator { public: template <PrimitiveType type, size_t N = 0> - static typename Traits::BasePtr create() { - return Traits::template get_function<type, N>(); + static typename Traits::BasePtr create(bool null_aware) { + return Traits::template get_function<type, N>(null_aware); } }; @@ -113,19 +114,19 @@ public: M(TYPE_IPV6) template <class Traits, size_t N = 0> -typename Traits::BasePtr create_predicate_function(PrimitiveType type) { +typename Traits::BasePtr create_predicate_function(PrimitiveType type, bool null_aware) { using Creator = PredicateFunctionCreator<Traits>; switch (type) { case TYPE_BOOLEAN: { - return Creator::template create<TYPE_BOOLEAN, N>(); + return Creator::template create<TYPE_BOOLEAN, N>(null_aware); } case TYPE_DECIMALV2: { - return Creator::template create<TYPE_DECIMALV2, N>(); + return Creator::template create<TYPE_DECIMALV2, N>(null_aware); } -#define M(NAME) \ - case NAME: { \ - return Creator::template create<NAME, N>(); \ +#define M(NAME) \ + case NAME: { \ + return Creator::template create<NAME, N>(null_aware); \ } APPLY_FOR_PRIMTYPE(M) #undef M @@ -142,13 +143,13 @@ typename Traits::BasePtr create_bitmap_predicate_function(PrimitiveType type) { switch (type) { case TYPE_TINYINT: - return Creator::template create<TYPE_TINYINT>(); + return Creator::template create<TYPE_TINYINT>(false); case TYPE_SMALLINT: - return Creator::template create<TYPE_SMALLINT>(); + return Creator::template create<TYPE_SMALLINT>(false); case TYPE_INT: - return Creator::template create<TYPE_INT>(); + return Creator::template create<TYPE_INT>(false); case TYPE_BIGINT: - return Creator::template create<TYPE_BIGINT>(); + return Creator::template create<TYPE_BIGINT>(false); default: throw Exception(ErrorCode::INTERNAL_ERROR, "bitmap predicate with type " + type_to_string(type)); @@ -157,72 +158,72 @@ typename Traits::BasePtr create_bitmap_predicate_function(PrimitiveType type) { return nullptr; } -inline auto create_minmax_filter(PrimitiveType type) { - return create_predicate_function<MinmaxFunctionTraits>(type); +inline auto create_minmax_filter(PrimitiveType type, bool null_aware) { + return create_predicate_function<MinmaxFunctionTraits>(type, null_aware); } template <size_t N = 0> -inline auto create_set(PrimitiveType type) { - return create_predicate_function<HybridSetTraits, N>(type); +inline auto create_set(PrimitiveType type, bool null_aware) { + return create_predicate_function<HybridSetTraits, N>(type, null_aware); } -inline auto create_set(PrimitiveType type, size_t size) { +inline auto create_set(PrimitiveType type, size_t size, bool null_aware) { if (size == 0) { - return create_set<0>(type); + return create_set<0>(type, null_aware); } else if (size == 1) { - return create_set<1>(type); + return create_set<1>(type, null_aware); } else if (size == 2) { - return create_set<2>(type); + return create_set<2>(type, null_aware); } else if (size == 3) { - return create_set<3>(type); + return create_set<3>(type, null_aware); } else if (size == 4) { - return create_set<4>(type); + return create_set<4>(type, null_aware); } else if (size == 5) { - return create_set<5>(type); + return create_set<5>(type, null_aware); } else if (size == 6) { - return create_set<6>(type); + return create_set<6>(type, null_aware); } else if (size == 7) { - return create_set<7>(type); + return create_set<7>(type, null_aware); } else if (size == FIXED_CONTAINER_MAX_SIZE) { - return create_set<FIXED_CONTAINER_MAX_SIZE>(type); + return create_set<FIXED_CONTAINER_MAX_SIZE>(type, null_aware); } else { - return create_set(type); + return create_set(type, null_aware); } } template <size_t N = 0> -inline HybridSetBase* create_string_value_set() { +inline HybridSetBase* create_string_value_set(bool null_aware) { if constexpr (N >= 1 && N <= FIXED_CONTAINER_MAX_SIZE) { - return new StringValueSet<FixedContainer<StringRef, N>>(); + return new StringValueSet<FixedContainer<StringRef, N>>(null_aware); } else { - return new StringValueSet(); + return new StringValueSet(null_aware); } } -inline HybridSetBase* create_string_value_set(size_t size) { +inline HybridSetBase* create_string_value_set(size_t size, bool null_aware) { if (size == 1) { - return create_string_value_set<1>(); + return create_string_value_set<1>(null_aware); } else if (size == 2) { - return create_string_value_set<2>(); + return create_string_value_set<2>(null_aware); } else if (size == 3) { - return create_string_value_set<3>(); + return create_string_value_set<3>(null_aware); } else if (size == 4) { - return create_string_value_set<4>(); + return create_string_value_set<4>(null_aware); } else if (size == 5) { - return create_string_value_set<5>(); + return create_string_value_set<5>(null_aware); } else if (size == 6) { - return create_string_value_set<6>(); + return create_string_value_set<6>(null_aware); } else if (size == 7) { - return create_string_value_set<7>(); + return create_string_value_set<7>(null_aware); } else if (size == FIXED_CONTAINER_MAX_SIZE) { - return create_string_value_set<FIXED_CONTAINER_MAX_SIZE>(); + return create_string_value_set<FIXED_CONTAINER_MAX_SIZE>(null_aware); } else { - return create_string_value_set(); + return create_string_value_set(null_aware); } } -inline auto create_bloom_filter(PrimitiveType type) { - return create_predicate_function<BloomFilterTraits>(type); +inline auto create_bloom_filter(PrimitiveType type, bool null_aware) { + return create_predicate_function<BloomFilterTraits>(type, null_aware); } inline auto create_bitmap_filter(PrimitiveType type) { @@ -232,9 +233,9 @@ inline auto create_bitmap_filter(PrimitiveType type) { template <PrimitiveType PT> ColumnPredicate* create_olap_column_predicate(uint32_t column_id, const std::shared_ptr<BloomFilterFuncBase>& filter, - const TabletColumn*) { + const TabletColumn*, bool null_aware) { std::shared_ptr<BloomFilterFuncBase> filter_olap; - filter_olap.reset(create_bloom_filter(PT)); + filter_olap.reset(create_bloom_filter(PT, null_aware)); filter_olap->light_copy(filter.get()); // create a new filter to match the input filter and PT. For example, filter may be varchar, but PT is char return new BloomFilterColumnPredicate<PT>(column_id, filter_olap); @@ -243,7 +244,7 @@ ColumnPredicate* create_olap_column_predicate(uint32_t column_id, template <PrimitiveType PT> ColumnPredicate* create_olap_column_predicate(uint32_t column_id, const std::shared_ptr<BitmapFilterFuncBase>& filter, - const TabletColumn*) { + const TabletColumn*, bool) { if constexpr (PT == TYPE_TINYINT || PT == TYPE_SMALLINT || PT == TYPE_INT || PT == TYPE_BIGINT) { return new BitmapFilterColumnPredicate<PT>(column_id, filter); @@ -255,7 +256,7 @@ ColumnPredicate* create_olap_column_predicate(uint32_t column_id, template <PrimitiveType PT> ColumnPredicate* create_olap_column_predicate(uint32_t column_id, const std::shared_ptr<HybridSetBase>& filter, - const TabletColumn* column = nullptr) { + const TabletColumn* column, bool) { return create_in_list_predicate<PT, PredicateType::IN_LIST>(column_id, filter, column->length()); } @@ -263,7 +264,7 @@ ColumnPredicate* create_olap_column_predicate(uint32_t column_id, template <PrimitiveType PT> ColumnPredicate* create_olap_column_predicate(uint32_t column_id, const std::shared_ptr<FunctionFilter>& filter, - const TabletColumn* column = nullptr) { + const TabletColumn* column, bool) { // currently only support like predicate if constexpr (PT == TYPE_CHAR) { return new LikeColumnPredicate<TYPE_CHAR>(filter->_opposite, column_id, filter->_fn_ctx, @@ -277,19 +278,20 @@ ColumnPredicate* create_olap_column_predicate(uint32_t column_id, template <typename T> ColumnPredicate* create_column_predicate(uint32_t column_id, const std::shared_ptr<T>& filter, - FieldType type, const TabletColumn* column = nullptr) { + FieldType type, const TabletColumn* column, + bool null_aware = false) { switch (type) { -#define M(NAME) \ - case FieldType::OLAP_FIELD_##NAME: { \ - return create_olap_column_predicate<NAME>(column_id, filter, column); \ +#define M(NAME) \ + case FieldType::OLAP_FIELD_##NAME: { \ + return create_olap_column_predicate<NAME>(column_id, filter, column, null_aware); \ } APPLY_FOR_PRIMTYPE(M) #undef M case FieldType::OLAP_FIELD_TYPE_DECIMAL: { - return create_olap_column_predicate<TYPE_DECIMALV2>(column_id, filter, column); + return create_olap_column_predicate<TYPE_DECIMALV2>(column_id, filter, column, null_aware); } case FieldType::OLAP_FIELD_TYPE_BOOL: { - return create_olap_column_predicate<TYPE_BOOLEAN>(column_id, filter, column); + return create_olap_column_predicate<TYPE_BOOLEAN>(column_id, filter, column, null_aware); } default: return nullptr; diff --git a/be/src/exprs/filter_base.h b/be/src/exprs/filter_base.h index 2a563b4d64c..5534dc75589 100644 --- a/be/src/exprs/filter_base.h +++ b/be/src/exprs/filter_base.h @@ -17,19 +17,28 @@ #pragma once +#include "common/exception.h" + namespace doris { class FilterBase { public: + FilterBase(bool null_aware) : _null_aware(null_aware) {} bool contain_null() const { return _null_aware && _contain_null; } - void set_contain_null() { _contain_null = true; } - - void set_null_aware(bool null_aware) { _null_aware = null_aware; } + void set_contain_null(bool contain_null) { + if (_contain_null && !contain_null) { + throw Exception(ErrorCode::INTERNAL_ERROR, + "contain_null cannot be changed from true to false"); + } + _contain_null = contain_null; + } protected: + // Indicates whether a null datum exists to build this filter. bool _contain_null = false; - bool _null_aware = false; + // Indicates whether this filter is null-aware. + const bool _null_aware = false; }; } // namespace doris diff --git a/be/src/exprs/hybrid_set.h b/be/src/exprs/hybrid_set.h index c202226551d..1358c479dca 100644 --- a/be/src/exprs/hybrid_set.h +++ b/be/src/exprs/hybrid_set.h @@ -192,7 +192,7 @@ private: // TODO Maybe change void* parameter to template parameter better. class HybridSetBase : public FilterBase { public: - HybridSetBase() = default; + HybridSetBase(bool null_aware) : FilterBase(null_aware) {} virtual ~HybridSetBase() = default; virtual void insert(const void* data) = 0; // use in vectorize execute engine @@ -252,8 +252,7 @@ public: using ElementType = typename ContainerType::ElementType; using ColumnType = _ColumnType; - HybridSet() = default; - + HybridSet(bool null_aware) : HybridSetBase(null_aware) {} ~HybridSet() override = default; void insert(const void* data) override { @@ -387,7 +386,7 @@ class StringSet : public HybridSetBase { public: using ContainerType = _ContainerType; - StringSet() = default; + StringSet(bool null_aware) : HybridSetBase(null_aware) {} ~StringSet() override = default; @@ -556,7 +555,7 @@ class StringValueSet : public HybridSetBase { public: using ContainerType = _ContainerType; - StringValueSet() = default; + StringValueSet(bool null_aware) : HybridSetBase(null_aware) {} ~StringValueSet() override = default; diff --git a/be/src/exprs/minmax_predicate.h b/be/src/exprs/minmax_predicate.h index 73cc6d8eb7f..832411fd77c 100644 --- a/be/src/exprs/minmax_predicate.h +++ b/be/src/exprs/minmax_predicate.h @@ -29,6 +29,7 @@ namespace doris { // only used in Runtime Filter class MinMaxFuncBase : public FilterBase { public: + MinMaxFuncBase(bool null_aware) : FilterBase(null_aware) {} virtual void insert_fixed_len(const vectorized::ColumnPtr& column, size_t start) = 0; virtual void* get_max() = 0; virtual void* get_min() = 0; @@ -44,7 +45,7 @@ public: template <class T, bool NeedMax = true, bool NeedMin = true> class MinMaxNumFunc : public MinMaxFuncBase { public: - MinMaxNumFunc() = default; + MinMaxNumFunc(bool null_aware) : MinMaxFuncBase(null_aware) {} ~MinMaxNumFunc() override = default; void insert_fixed_len(const vectorized::ColumnPtr& column, size_t start) override { diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index 08224df74e7..f5ed4e81da9 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -71,7 +71,7 @@ public: : ColumnPredicate(column_id, is_opposite), _min_value(type_limit<T>::max()), _max_value(type_limit<T>::min()) { - _values = std::make_shared<HybridSetType>(); + _values = std::make_shared<HybridSetType>(false); for (const auto& condition : conditions) { T tmp; if constexpr (Type == TYPE_STRING || Type == TYPE_CHAR) { @@ -95,7 +95,7 @@ public: CHECK(hybrid_set != nullptr); if constexpr (is_string_type(Type) || Type == TYPE_DECIMALV2 || is_date_type(Type)) { - _values = std::make_shared<HybridSetType>(); + _values = std::make_shared<HybridSetType>(false); if constexpr (is_string_type(Type)) { HybridSetBase::IteratorBase* iter = hybrid_set->begin(); while (iter->has_next()) { diff --git a/be/src/runtime_filter/runtime_filter_definitions.h b/be/src/runtime_filter/runtime_filter_definitions.h index 47dcbf567c0..ed732b99532 100644 --- a/be/src/runtime_filter/runtime_filter_definitions.h +++ b/be/src/runtime_filter/runtime_filter_definitions.h @@ -36,19 +36,29 @@ enum class RuntimeFilterType { }; struct RuntimeFilterParams { + // Filter ID + int32_t filter_id {}; + // Filter type RuntimeFilterType filter_type {}; + // Data type of build column PrimitiveType column_return_type {}; - int64_t bloom_filter_size {}; + bool null_aware {}; + bool enable_fixed_len_to_uint32_v2 {}; + + // The max limitation of in-set int32_t max_in_num {}; + // The min size limitation of bloom filter int64_t runtime_bloom_filter_min_size {}; + // The max size limitation of bloom filter int64_t runtime_bloom_filter_max_size {}; - int32_t filter_id {}; - bool bitmap_filter_not_in {}; + // Size of bloom filter which is estimated by FE using NDV + int64_t bloom_filter_size {}; + // Whether a runtime size is used to build bloom filter bool build_bf_by_runtime_size {}; - + // Whether an estimated size by NDV is used to build bloom filter bool bloom_filter_size_calculated_by_ndv {}; - bool null_aware {}; - bool enable_fixed_len_to_uint32_v2 {}; + + bool bitmap_filter_not_in {}; }; class MinMaxFuncBase; diff --git a/be/src/runtime_filter/runtime_filter_wrapper.cpp b/be/src/runtime_filter/runtime_filter_wrapper.cpp index f07934de6a7..99889ce2b72 100644 --- a/be/src/runtime_filter/runtime_filter_wrapper.cpp +++ b/be/src/runtime_filter/runtime_filter_wrapper.cpp @@ -27,30 +27,27 @@ RuntimeFilterWrapper::RuntimeFilterWrapper(const RuntimeFilterParams* params) State::UNINITED, params->max_in_num) { switch (_filter_type) { case RuntimeFilterType::IN_FILTER: { - _hybrid_set.reset(create_set(_column_return_type)); - _hybrid_set->set_null_aware(params->null_aware); + _hybrid_set.reset(create_set(_column_return_type, params->null_aware)); return; } // Only use in nested loop join not need set null aware case RuntimeFilterType::MIN_FILTER: case RuntimeFilterType::MAX_FILTER: { - _minmax_func.reset(create_minmax_filter(_column_return_type)); + _minmax_func.reset(create_minmax_filter(_column_return_type, params->null_aware)); return; } case RuntimeFilterType::MINMAX_FILTER: { - _minmax_func.reset(create_minmax_filter(_column_return_type)); - _minmax_func->set_null_aware(params->null_aware); + _minmax_func.reset(create_minmax_filter(_column_return_type, params->null_aware)); return; } case RuntimeFilterType::BLOOM_FILTER: { - _bloom_filter_func.reset(create_bloom_filter(_column_return_type)); + _bloom_filter_func.reset(create_bloom_filter(_column_return_type, params->null_aware)); _bloom_filter_func->init_params(params); return; } case RuntimeFilterType::IN_OR_BLOOM_FILTER: { - _hybrid_set.reset(create_set(_column_return_type)); - _hybrid_set->set_null_aware(params->null_aware); - _bloom_filter_func.reset(create_bloom_filter(_column_return_type)); + _hybrid_set.reset(create_set(_column_return_type, params->null_aware)); + _bloom_filter_func.reset(create_bloom_filter(_column_return_type, params->null_aware)); _bloom_filter_func->init_params(params); return; } @@ -231,7 +228,6 @@ Status RuntimeFilterWrapper::merge(const RuntimeFilterWrapper* other) { Status RuntimeFilterWrapper::_assign(const PInFilter& in_filter, bool contain_null) { if (contain_null) { - _hybrid_set->set_null_aware(true); _hybrid_set->insert((const void*)nullptr); } @@ -417,10 +413,7 @@ Status RuntimeFilterWrapper::_assign(const PBloomFilter& bloom_filter, } Status RuntimeFilterWrapper::_assign(const PMinMaxFilter& minmax_filter, bool contain_null) { - if (contain_null) { - _minmax_func->set_null_aware(true); - _minmax_func->set_contain_null(); - } + _minmax_func->set_contain_null(contain_null); switch (_column_return_type) { case TYPE_BOOLEAN: { diff --git a/be/src/runtime_filter/runtime_filter_wrapper.h b/be/src/runtime_filter/runtime_filter_wrapper.h index 98de97ea0c4..baf02e92e3c 100644 --- a/be/src/runtime_filter/runtime_filter_wrapper.h +++ b/be/src/runtime_filter/runtime_filter_wrapper.h @@ -40,7 +40,9 @@ public: DISABLED // This state indicates that the rf is deprecated, used in cases such as reach max_in_num / join spill / meet rpc error }; + // Called by consumer / producer RuntimeFilterWrapper(const RuntimeFilterParams* params); + // Called by merger RuntimeFilterWrapper(PrimitiveType column_type, RuntimeFilterType type, uint32_t filter_id, State state, int max_in_num = 0) : _column_return_type(column_type), diff --git a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h b/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h index e925453d16c..aef6e7c809f 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h +++ b/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h @@ -77,7 +77,10 @@ class NullableNumericOrDateSet : public HybridSet<type_to_primitive_type<T>(), DynamicContainer<typename PrimitiveTypeTraits< type_to_primitive_type<T>()>::CppType>> { public: - NullableNumericOrDateSet() { this->_null_aware = true; } + NullableNumericOrDateSet() + : HybridSet<type_to_primitive_type<T>(), + DynamicContainer<typename PrimitiveTypeTraits< + type_to_primitive_type<T>()>::CppType>>(true) {} void change_contain_null_value(bool target_value) { this->_contain_null = target_value; } }; @@ -322,7 +325,7 @@ public: /// Generic implementation, it uses serialized representation as object descriptor. class NullableStringSet : public StringValueSet<DynamicContainer<StringRef>> { public: - NullableStringSet() { this->_null_aware = true; } + NullableStringSet() : StringValueSet<DynamicContainer<StringRef>>(true) {} void change_contain_null_value(bool target_value) { this->_contain_null = target_value; } }; diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 5a1b2ceaaf9..909a89116e8 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -2535,7 +2535,7 @@ Status OrcReader::_rewrite_dict_conjuncts(std::vector<int32_t>& dict_codes, int node.__set_is_nullable(false); std::shared_ptr<HybridSetBase> hybrid_set( - create_set(PrimitiveType::TYPE_INT, dict_codes.size())); + create_set(PrimitiveType::TYPE_INT, dict_codes.size(), false)); for (int& dict_code : dict_codes) { hybrid_set->insert(&dict_code); } diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index 830d74acf8b..17697eaddab 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -976,7 +976,7 @@ Status RowGroupReader::_rewrite_dict_conjuncts(std::vector<int32_t>& dict_codes, node.__set_is_nullable(false); std::shared_ptr<HybridSetBase> hybrid_set( - create_set(PrimitiveType::TYPE_INT, dict_codes.size())); + create_set(PrimitiveType::TYPE_INT, dict_codes.size(), false)); for (int j = 0; j < dict_codes.size(); ++j) { hybrid_set->insert(&dict_codes[j]); } diff --git a/be/src/vec/exec/format/table/equality_delete.cpp b/be/src/vec/exec/format/table/equality_delete.cpp index bf31f81548c..6c9c6ddb412 100644 --- a/be/src/vec/exec/format/table/equality_delete.cpp +++ b/be/src/vec/exec/format/table/equality_delete.cpp @@ -38,7 +38,7 @@ Status SimpleEqualityDelete::_build_set() { auto& column_and_type = _delete_block->get_by_position(0); _delete_column_name = column_and_type.name; _delete_column_type = remove_nullable(column_and_type.type)->get_type_as_type_descriptor().type; - _hybrid_set.reset(create_set(_delete_column_type, _delete_block->rows())); + _hybrid_set.reset(create_set(_delete_column_type, _delete_block->rows(), false)); _hybrid_set->insert_fixed_len(column_and_type.column, 0); return Status::OK(); } diff --git a/be/src/vec/functions/in.h b/be/src/vec/functions/in.h index 6f697ba7441..4a588353d58 100644 --- a/be/src/vec/functions/in.h +++ b/be/src/vec/functions/in.h @@ -109,17 +109,16 @@ public: context->set_function_state(scope, state); DCHECK(context->get_num_args() >= 1); if (context->get_arg_type(0)->type == PrimitiveType::TYPE_NULL) { - state->hybrid_set.reset(create_set(TYPE_BOOLEAN, 0)); + state->hybrid_set.reset(create_set(TYPE_BOOLEAN, 0, true)); } else if (context->get_arg_type(0)->type == PrimitiveType::TYPE_CHAR || context->get_arg_type(0)->type == PrimitiveType::TYPE_VARCHAR || context->get_arg_type(0)->type == PrimitiveType::TYPE_STRING) { // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context))); } else { - state->hybrid_set.reset( - create_set(context->get_arg_type(0)->type, get_size_with_out_null(context))); + state->hybrid_set.reset(create_set(context->get_arg_type(0)->type, + get_size_with_out_null(context), true)); } - state->hybrid_set->set_null_aware(true); for (int i = 1; i < context->get_num_args(); ++i) { const auto& const_column_ptr = context->get_constant_col(i); @@ -337,7 +336,7 @@ private: } } std::unique_ptr<HybridSetBase> hybrid_set( - create_set(context->get_arg_type(0)->type, set_datas.size())); + create_set(context->get_arg_type(0)->type, set_datas.size(), true)); for (auto& set_data : set_datas) { hybrid_set->insert((void*)(set_data.data), set_data.size); } diff --git a/be/test/olap/date_bloom_filter_test.cpp b/be/test/olap/date_bloom_filter_test.cpp index cf869bf82e3..c24b1abb51c 100644 --- a/be/test/olap/date_bloom_filter_test.cpp +++ b/be/test/olap/date_bloom_filter_test.cpp @@ -241,7 +241,7 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) { // Test positive cases auto test_positive = [&](const std::vector<std::string>& values, bool result) { - auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_DATE>>(); + auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_DATE>>(false); for (const auto& value : values) { auto v = timestamp_from_date(value); hybrid_set->insert(&v); @@ -259,7 +259,7 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) { test_positive({"2024-11-09"}, true); auto test_negative = [&](const std::vector<std::string>& values, bool result) { - auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_DATE>>(); + auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_DATE>>(false); for (const auto& value : values) { auto v = timestamp_from_date(value); @@ -291,7 +291,7 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) { // Test positive cases auto test_positive = [&](const std::vector<std::string>& values, bool result) { - auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_DATETIME>>(); + auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_DATETIME>>(false); for (const auto& value : values) { auto v = timestamp_from_datetime(value); hybrid_set->insert(&v); @@ -310,7 +310,7 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) { // Test negative cases auto test_negative = [&](const std::vector<std::string>& values, bool result) { - auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_DATETIME>>(); + auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_DATETIME>>(false); for (const auto& value : values) { auto v = timestamp_from_datetime(value); hybrid_set->insert(&v); diff --git a/be/test/runtime_filter/runtime_filter_wrapper_test.cpp b/be/test/runtime_filter/runtime_filter_wrapper_test.cpp new file mode 100644 index 00000000000..963bbfc223b --- /dev/null +++ b/be/test/runtime_filter/runtime_filter_wrapper_test.cpp @@ -0,0 +1,64 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "runtime_filter/runtime_filter_wrapper.h" + +#include <glog/logging.h> +#include <gtest/gtest.h> + +namespace doris { + +class RuntimeFilterWrapperTest : public testing::Test { +public: + RuntimeFilterWrapperTest() = default; + ~RuntimeFilterWrapperTest() override = default; + void SetUp() override {} + void TearDown() override {} +}; + +TEST_F(RuntimeFilterWrapperTest, basic) { + int32_t filter_id = 0; + RuntimeFilterType filter_type = RuntimeFilterType::IN_FILTER; + PrimitiveType column_return_type = PrimitiveType::TYPE_INT; + int32_t max_in_num = 0; + int64_t runtime_bloom_filter_min_size = 0; + int64_t runtime_bloom_filter_max_size = 0; + bool build_bf_by_runtime_size = true; + int64_t bloom_filter_size = 0; + bool bloom_filter_size_calculated_by_ndv = true; + bool null_aware = true; + bool enable_fixed_len_to_uint32_v2 = true; + bool bitmap_filter_not_in = false; + + RuntimeFilterParams params; + params.filter_id = filter_id; + params.filter_type = filter_type; + params.column_return_type = column_return_type; + params.max_in_num = max_in_num; + params.runtime_bloom_filter_min_size = runtime_bloom_filter_min_size; + params.runtime_bloom_filter_max_size = runtime_bloom_filter_max_size; + params.build_bf_by_runtime_size = build_bf_by_runtime_size; + params.bloom_filter_size_calculated_by_ndv = bloom_filter_size_calculated_by_ndv; + params.bloom_filter_size = bloom_filter_size; + params.null_aware = null_aware; + params.enable_fixed_len_to_uint32_v2 = enable_fixed_len_to_uint32_v2; + params.bitmap_filter_not_in = bitmap_filter_not_in; + + auto wrapper = std::make_shared<RuntimeFilterWrapper>(¶ms); +} + +} // namespace doris --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org