This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 44e979e43b [Vectorized][Function] add orthogonal bitmap agg functions (#10126) 44e979e43b is described below commit 44e979e43b0ebdb961ce64782dfdc17613f43ed5 Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com> AuthorDate: Fri Jun 17 08:48:41 2022 +0800 [Vectorized][Function] add orthogonal bitmap agg functions (#10126) * [Vectorized][Function] add orthogonal bitmap agg functions save some file about orthogonal bitmap function add some file to rebase update functions file * refactor union_count function refactor orthogonal union count functions * remove bool is_variadic --- be/src/exprs/bitmap_function.cpp | 256 ++------------------- be/src/runtime/string_value.h | 1 + be/src/util/bitmap_intersect.h | 245 ++++++++++++++++++++ be/src/vec/CMakeLists.txt | 1 + .../aggregate_function_orthogonal_bitmap.cpp | 99 ++++++++ .../aggregate_function_orthogonal_bitmap.h | 247 ++++++++++++++++++++ .../aggregate_function_simple_factory.cpp | 2 + be/test/exprs/bitmap_function_test.cpp | 7 +- docs/.vuepress/sidebar/en/docs.js | 1 + docs/.vuepress/sidebar/zh-CN/docs.js | 1 + .../bitmap-functions/intersect_count.md | 57 +++++ .../bitmap-functions/intersect_count.md | 56 +++++ .../apache/doris/analysis/FunctionCallExpr.java | 11 +- .../apache/doris/catalog/AggregateFunction.java | 7 +- .../java/org/apache/doris/catalog/FunctionSet.java | 21 ++ 15 files changed, 772 insertions(+), 240 deletions(-) diff --git a/be/src/exprs/bitmap_function.cpp b/be/src/exprs/bitmap_function.cpp index 5e38ab8f79..e45f7244b9 100644 --- a/be/src/exprs/bitmap_function.cpp +++ b/be/src/exprs/bitmap_function.cpp @@ -20,141 +20,12 @@ #include "exprs/anyval_util.h" #include "gutil/strings/numbers.h" #include "gutil/strings/split.h" +#include "util/bitmap_intersect.h" #include "util/bitmap_value.h" #include "util/string_parser.hpp" namespace doris { -namespace detail { - -const int DATETIME_PACKED_TIME_BYTE_SIZE = 8; -const int DATETIME_TYPE_BYTE_SIZE = 4; - -const int DECIMAL_BYTE_SIZE = 16; - -// get_val start -template <typename ValType, typename T> -T get_val(const ValType& x) { - DCHECK(!x.is_null); - return x.val; -} - -template <> -StringValue get_val(const StringVal& x) { - DCHECK(!x.is_null); - return StringValue::from_string_val(x); -} - -template <> -DateTimeValue get_val(const DateTimeVal& x) { - return DateTimeValue::from_datetime_val(x); -} - -template <> -DecimalV2Value get_val(const DecimalV2Val& x) { - return DecimalV2Value::from_decimal_val(x); -} -// get_val end - -// serialize_size start -template <typename T> -int32_t serialize_size(const T& v) { - return sizeof(T); -} - -template <> -int32_t serialize_size(const DateTimeValue& v) { - return DATETIME_PACKED_TIME_BYTE_SIZE + DATETIME_TYPE_BYTE_SIZE; -} - -template <> -int32_t serialize_size(const DecimalV2Value& v) { - return DECIMAL_BYTE_SIZE; -} - -template <> -int32_t serialize_size(const StringValue& v) { - return v.len + 4; -} -// serialize_size end - -// write_to start -template <typename T> -char* write_to(const T& v, char* dest) { - size_t type_size = sizeof(T); - memcpy(dest, &v, type_size); - dest += type_size; - return dest; -} - -template <> -char* write_to(const DateTimeValue& v, char* dest) { - DateTimeVal value; - v.to_datetime_val(&value); - *(int64_t*)dest = value.packed_time; - dest += DATETIME_PACKED_TIME_BYTE_SIZE; - *(int*)dest = value.type; - dest += DATETIME_TYPE_BYTE_SIZE; - return dest; -} - -template <> -char* write_to(const DecimalV2Value& v, char* dest) { - __int128 value = v.value(); - memcpy(dest, &value, DECIMAL_BYTE_SIZE); - dest += DECIMAL_BYTE_SIZE; - return dest; -} - -template <> -char* write_to(const StringValue& v, char* dest) { - *(int32_t*)dest = v.len; - dest += 4; - memcpy(dest, v.ptr, v.len); - dest += v.len; - return dest; -} -// write_to end - -// read_from start -template <typename T> -void read_from(const char** src, T* result) { - size_t type_size = sizeof(T); - memcpy(result, *src, type_size); - *src += type_size; -} - -template <> -void read_from(const char** src, DateTimeValue* result) { - DateTimeVal value; - value.is_null = false; - value.packed_time = *(int64_t*)(*src); - *src += DATETIME_PACKED_TIME_BYTE_SIZE; - value.type = *(int*)(*src); - *src += DATETIME_TYPE_BYTE_SIZE; - *result = DateTimeValue::from_datetime_val(value); - ; -} - -template <> -void read_from(const char** src, DecimalV2Value* result) { - __int128 v = 0; - memcpy(&v, *src, DECIMAL_BYTE_SIZE); - *src += DECIMAL_BYTE_SIZE; - *result = DecimalV2Value(v); -} - -template <> -void read_from(const char** src, StringValue* result) { - int32_t length = *(int32_t*)(*src); - *src += 4; - *result = StringValue((char*)*src, length); - *src += length; -} -// read_from end - -} // namespace detail - static StringVal serialize(FunctionContext* ctx, BitmapValue* value) { if (!value) { BitmapValue empty_bitmap; @@ -168,98 +39,6 @@ static StringVal serialize(FunctionContext* ctx, BitmapValue* value) { } } -// Calculate the intersection of two or more bitmaps -// Usage: intersect_count(bitmap_column_to_count, filter_column, filter_values ...) -// Example: intersect_count(user_id, event, 'A', 'B', 'C'), meaning find the intersect count of user_id in all A/B/C 3 bitmaps -// Todo(kks) Use Array type instead of variable arguments -template <typename T> -struct BitmapIntersect { -public: - BitmapIntersect() {} - - explicit BitmapIntersect(const char* src) { deserialize(src); } - - void add_key(const T key) { - BitmapValue empty_bitmap; - _bitmaps[key] = empty_bitmap; - } - - void update(const T& key, const BitmapValue& bitmap) { - if (_bitmaps.find(key) != _bitmaps.end()) { - _bitmaps[key] |= bitmap; - } - } - - void merge(const BitmapIntersect& other) { - for (auto& kv : other._bitmaps) { - if (_bitmaps.find(kv.first) != _bitmaps.end()) { - _bitmaps[kv.first] |= kv.second; - } else { - _bitmaps[kv.first] = kv.second; - } - } - } - - // intersection - BitmapValue intersect() const { - BitmapValue result; - auto it = _bitmaps.begin(); - result |= it->second; - it++; - for (; it != _bitmaps.end(); it++) { - result &= it->second; - } - return result; - } - - // calculate the intersection for _bitmaps's bitmap values - int64_t intersect_count() const { - if (_bitmaps.empty()) { - return 0; - } - return intersect().cardinality(); - } - - // the serialize size - size_t size() { - size_t size = 4; - for (auto& kv : _bitmaps) { - size += detail::serialize_size(kv.first); - ; - size += kv.second.getSizeInBytes(); - } - return size; - } - - //must call size() first - void serialize(char* dest) { - char* writer = dest; - *(int32_t*)writer = _bitmaps.size(); - writer += 4; - for (auto& kv : _bitmaps) { - writer = detail::write_to(kv.first, writer); - kv.second.write(writer); - writer += kv.second.getSizeInBytes(); - } - } - - void deserialize(const char* src) { - const char* reader = src; - int32_t bitmaps_size = *(int32_t*)reader; - reader += 4; - for (int32_t i = 0; i < bitmaps_size; i++) { - T key; - detail::read_from(&reader, &key); - BitmapValue bitmap(reader); - reader += bitmap.getSizeInBytes(); - _bitmaps[key] = bitmap; - } - } - -private: - std::map<T, BitmapValue> _bitmaps; -}; - void BitmapFunctions::init() {} void BitmapFunctions::bitmap_init(FunctionContext* ctx, StringVal* dst) { @@ -403,7 +182,7 @@ StringVal BitmapFunctions::bitmap_serialize(FunctionContext* ctx, const StringVa return result; } -// This is a init function for intersect_count not for bitmap_intersect. +// This is a init function for intersect_count not for bitmap_intersect, not for _orthogonal_bitmap_intersect(bitmap,t,t) template <typename T, typename ValType> void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, StringVal* dst) { dst->is_null = false; @@ -414,12 +193,14 @@ void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, StringVal* dst for (int i = 2; i < ctx->get_num_constant_args(); ++i) { DCHECK(ctx->is_arg_constant(i)); ValType* arg = reinterpret_cast<ValType*>(ctx->get_constant_arg(i)); - intersect->add_key(detail::get_val<ValType, T>(*arg)); + intersect->add_key(detail::Helper::get_val<ValType, T>(*arg)); } dst->ptr = (uint8_t*)intersect; } +// This is a update function for intersect_count/ORTHOGONAL_BITMAP_INTERSECT_COUNT/ORTHOGONAL_BITMAP_INTERSECT(bitmap,t,t) +// not for bitmap_intersect(Bitmap) template <typename T, typename ValType> void BitmapFunctions::bitmap_intersect_update(FunctionContext* ctx, const StringVal& src, const ValType& key, int num_key, const ValType* keys, @@ -427,13 +208,14 @@ void BitmapFunctions::bitmap_intersect_update(FunctionContext* ctx, const String auto* dst_bitmap = reinterpret_cast<BitmapIntersect<T>*>(dst->ptr); // zero size means the src input is a agg object if (src.len == 0) { - dst_bitmap->update(detail::get_val<ValType, T>(key), + dst_bitmap->update(detail::Helper::get_val<ValType, T>(key), *reinterpret_cast<BitmapValue*>(src.ptr)); } else { - dst_bitmap->update(detail::get_val<ValType, T>(key), BitmapValue((char*)src.ptr)); + dst_bitmap->update(detail::Helper::get_val<ValType, T>(key), BitmapValue((char*)src.ptr)); } } +//only for intersect_count(bitmap,t,t) template <typename T> void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, const StringVal& src, const StringVal* dst) { @@ -441,6 +223,7 @@ void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, const StringV dst_bitmap->merge(BitmapIntersect<T>((char*)src.ptr)); } +//only for intersect_count(bitmap,t,t) template <typename T> StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, const StringVal& src) { auto* src_bitmap = reinterpret_cast<BitmapIntersect<T>*>(src.ptr); @@ -450,6 +233,7 @@ StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, cons return result; } +//only for intersect_count(bitmap,t,t) template <typename T> BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, const StringVal& src) { auto* src_bitmap = reinterpret_cast<BitmapIntersect<T>*>(src.ptr); @@ -928,13 +712,15 @@ StringVal BitmapFunctions::bitmap_subset_limit(FunctionContext* ctx, const Strin return serialize(ctx, &ret_bitmap); } - +// init ORTHOGONAL_BITMAP_UNION_COUNT(bitmap) +// update bitmap_union() void BitmapFunctions::orthogonal_bitmap_union_count_init(FunctionContext* ctx, StringVal* dst) { dst->is_null = false; dst->len = sizeof(BitmapValue); dst->ptr = (uint8_t*)new BitmapValue(); } +// serialize for ORTHOGONAL_BITMAP_UNION_COUNT(bitmap) StringVal BitmapFunctions::orthogonal_bitmap_count_serialize(FunctionContext* ctx, const StringVal& src) { if (src.is_null) { @@ -950,7 +736,7 @@ StringVal BitmapFunctions::orthogonal_bitmap_count_serialize(FunctionContext* ct return result; } -// This is a init function for bitmap_intersect. +// This is a init function for orthogonal_bitmap_intersect(bitmap,t,t). template <typename T, typename ValType> void BitmapFunctions::orthogonal_bitmap_intersect_init(FunctionContext* ctx, StringVal* dst) { // constant args start from index 2 @@ -961,7 +747,7 @@ void BitmapFunctions::orthogonal_bitmap_intersect_init(FunctionContext* ctx, Str for (int i = 2; i < ctx->get_num_constant_args(); ++i) { ValType* arg = reinterpret_cast<ValType*>(ctx->get_constant_arg(i)); - intersect->add_key(detail::get_val<ValType, T>(*arg)); + intersect->add_key(detail::Helper::get_val<ValType, T>(*arg)); } dst->ptr = (uint8_t*)intersect; @@ -972,7 +758,7 @@ void BitmapFunctions::orthogonal_bitmap_intersect_init(FunctionContext* ctx, Str } } -// This is a init function for intersect_count. +// This is a init function for orthogonal_bitmap_intersect_count(bitmap,t,t). template <typename T, typename ValType> void BitmapFunctions::orthogonal_bitmap_intersect_count_init(FunctionContext* ctx, StringVal* dst) { if (ctx->get_num_constant_args() > 1) { @@ -983,7 +769,7 @@ void BitmapFunctions::orthogonal_bitmap_intersect_count_init(FunctionContext* ct // constant args start from index 2 for (int i = 2; i < ctx->get_num_constant_args(); ++i) { ValType* arg = reinterpret_cast<ValType*>(ctx->get_constant_arg(i)); - intersect->add_key(detail::get_val<ValType, T>(*arg)); + intersect->add_key(detail::Helper::get_val<ValType, T>(*arg)); } dst->ptr = (uint8_t*)intersect; @@ -995,6 +781,9 @@ void BitmapFunctions::orthogonal_bitmap_intersect_count_init(FunctionContext* ct } } +// This is a serialize function for orthogonal_bitmap_intersect(bitmap,t,t). +// merge is ths simple bitmap_union() function LINE(80); +// finalize is the bitmap_serialize() function LINE(173) template <typename T> StringVal BitmapFunctions::orthogonal_bitmap_intersect_serialize(FunctionContext* ctx, const StringVal& src) { @@ -1014,6 +803,8 @@ BigIntVal BitmapFunctions::orthogonal_bitmap_intersect_finalize(FunctionContext* return result; } +// This is a merge function for orthogonal_bitmap_intersect_count(bitmap,t,t). +// and merge for ORTHOGONAL_BITMAP_UNION_COUNT(bitmap) void BitmapFunctions::orthogonal_bitmap_count_merge(FunctionContext* context, const StringVal& src, StringVal* dst) { if (dst->len != sizeof(int64_t)) { @@ -1027,6 +818,8 @@ void BitmapFunctions::orthogonal_bitmap_count_merge(FunctionContext* context, co *(int64_t*)dst->ptr += *(int64_t*)src.ptr; } +// This is a finalize function for orthogonal_bitmap_intersect_count(bitmap,t,t). +// finalize for ORTHOGONAL_BITMAP_UNION_COUNT(bitmap) BigIntVal BitmapFunctions::orthogonal_bitmap_count_finalize(FunctionContext* context, const StringVal& src) { auto* pval = reinterpret_cast<int64_t*>(src.ptr); @@ -1035,6 +828,7 @@ BigIntVal BitmapFunctions::orthogonal_bitmap_count_finalize(FunctionContext* con return result; } +// This is a serialize function for orthogonal_bitmap_intersect_count(bitmap,t,t). template <typename T> StringVal BitmapFunctions::orthogonal_bitmap_intersect_count_serialize(FunctionContext* ctx, const StringVal& src) { diff --git a/be/src/runtime/string_value.h b/be/src/runtime/string_value.h index f15b26571e..13b3852a5d 100644 --- a/be/src/runtime/string_value.h +++ b/be/src/runtime/string_value.h @@ -90,6 +90,7 @@ struct StringValue { StringValue(char* ptr, int len) : ptr(ptr), len(len) {} StringValue(const char* ptr, int len) : ptr(const_cast<char*>(ptr)), len(len) {} StringValue() : ptr(nullptr), len(0) {} + StringValue(const StringRef& str) : ptr(const_cast<char*>(str.data)), len(str.size) {} /// Construct a StringValue from 's'. 's' must be valid for as long as /// this object is valid. diff --git a/be/src/util/bitmap_intersect.h b/be/src/util/bitmap_intersect.h new file mode 100644 index 0000000000..dcda6ae5a5 --- /dev/null +++ b/be/src/util/bitmap_intersect.h @@ -0,0 +1,245 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once +#include "runtime/string_value.h" +#include "udf/udf.h" +#include "util/bitmap_value.h" + +namespace doris { + +namespace detail { +class Helper { +public: + static const int DATETIME_PACKED_TIME_BYTE_SIZE = 8; + static const int DATETIME_TYPE_BYTE_SIZE = 4; + static const int DECIMAL_BYTE_SIZE = 16; + + // get_val start + template <typename ValType, typename T> + static T get_val(const ValType& x) { + DCHECK(!x.is_null); + return x.val; + } + + // serialize_size start + template <typename T> + static int32_t serialize_size(const T& v) { + return sizeof(T); + } + + // write_to start + template <typename T> + static char* write_to(const T& v, char* dest) { + size_t type_size = sizeof(T); + memcpy(dest, &v, type_size); + dest += type_size; + return dest; + } + + // read_from start + template <typename T> + static void read_from(const char** src, T* result) { + size_t type_size = sizeof(T); + memcpy(result, *src, type_size); + *src += type_size; + } +}; + +template <> +inline StringValue Helper::get_val<StringVal>(const StringVal& x) { + DCHECK(!x.is_null); + return StringValue::from_string_val(x); +} + +template <> +inline DateTimeValue Helper::get_val<DateTimeVal>(const DateTimeVal& x) { + return DateTimeValue::from_datetime_val(x); +} + +template <> +inline DecimalV2Value Helper::get_val<DecimalV2Val>(const DecimalV2Val& x) { + return DecimalV2Value::from_decimal_val(x); +} +// get_val end + +template <> +inline char* Helper::write_to<DateTimeValue>(const DateTimeValue& v, char* dest) { + DateTimeVal value; + v.to_datetime_val(&value); + *(int64_t*)dest = value.packed_time; + dest += DATETIME_PACKED_TIME_BYTE_SIZE; + *(int*)dest = value.type; + dest += DATETIME_TYPE_BYTE_SIZE; + return dest; +} + +template <> +inline char* Helper::write_to<DecimalV2Value>(const DecimalV2Value& v, char* dest) { + __int128 value = v.value(); + memcpy(dest, &value, DECIMAL_BYTE_SIZE); + dest += DECIMAL_BYTE_SIZE; + return dest; +} + +template <> +inline char* Helper::write_to<StringValue>(const StringValue& v, char* dest) { + *(int32_t*)dest = v.len; + dest += 4; + memcpy(dest, v.ptr, v.len); + dest += v.len; + return dest; +} +// write_to end + +template <> +inline int32_t Helper::serialize_size<DateTimeValue>(const DateTimeValue& v) { + return Helper::DATETIME_PACKED_TIME_BYTE_SIZE + Helper::DATETIME_TYPE_BYTE_SIZE; +} + +template <> +inline int32_t Helper::serialize_size<DecimalV2Value>(const DecimalV2Value& v) { + return Helper::DECIMAL_BYTE_SIZE; +} + +template <> +inline int32_t Helper::serialize_size<StringValue>(const StringValue& v) { + return v.len + 4; +} +// serialize_size end + +template <> +inline void Helper::read_from<DateTimeValue>(const char** src, DateTimeValue* result) { + DateTimeVal value; + value.is_null = false; + value.packed_time = *(int64_t*)(*src); + *src += DATETIME_PACKED_TIME_BYTE_SIZE; + value.type = *(int*)(*src); + *src += DATETIME_TYPE_BYTE_SIZE; + *result = DateTimeValue::from_datetime_val(value); +} + +template <> +inline void Helper::read_from<DecimalV2Value>(const char** src, DecimalV2Value* result) { + __int128 v = 0; + memcpy(&v, *src, DECIMAL_BYTE_SIZE); + *src += DECIMAL_BYTE_SIZE; + *result = DecimalV2Value(v); +} + +template <> +inline void Helper::read_from<StringValue>(const char** src, StringValue* result) { + int32_t length = *(int32_t*)(*src); + *src += 4; + *result = StringValue((char*)*src, length); + *src += length; +} +// read_from end + +} // namespace detail + +// Calculate the intersection of two or more bitmaps +// Usage: intersect_count(bitmap_column_to_count, filter_column, filter_values ...) +// Example: intersect_count(user_id, event, 'A', 'B', 'C'), meaning find the intersect count of user_id in all A/B/C 3 bitmaps +// Todo(kks) Use Array type instead of variable arguments +template <typename T> +struct BitmapIntersect { +public: + BitmapIntersect() = default; + + explicit BitmapIntersect(const char* src) { deserialize(src); } + + void add_key(const T key) { + BitmapValue empty_bitmap; + _bitmaps[key] = empty_bitmap; + } + + void update(const T& key, const BitmapValue& bitmap) { + if (_bitmaps.find(key) != _bitmaps.end()) { + _bitmaps[key] |= bitmap; + } + } + + void merge(const BitmapIntersect& other) { + for (auto& kv : other._bitmaps) { + if (_bitmaps.find(kv.first) != _bitmaps.end()) { + _bitmaps[kv.first] |= kv.second; + } else { + _bitmaps[kv.first] = kv.second; + } + } + } + + // intersection + BitmapValue intersect() const { + BitmapValue result; + auto it = _bitmaps.begin(); + result |= it->second; + it++; + for (; it != _bitmaps.end(); it++) { + result &= it->second; + } + return result; + } + + // calculate the intersection for _bitmaps's bitmap values + int64_t intersect_count() const { + if (_bitmaps.empty()) { + return 0; + } + return intersect().cardinality(); + } + + // the serialize size + size_t size() { + size_t size = 4; + for (auto& kv : _bitmaps) { + size += detail::Helper::serialize_size(kv.first); + size += kv.second.getSizeInBytes(); + } + return size; + } + + //must call size() first + void serialize(char* dest) { + char* writer = dest; + *(int32_t*)writer = _bitmaps.size(); + writer += 4; + for (auto& kv : _bitmaps) { + writer = detail::Helper::write_to(kv.first, writer); + kv.second.write(writer); + writer += kv.second.getSizeInBytes(); + } + } + + void deserialize(const char* src) { + const char* reader = src; + int32_t bitmaps_size = *(int32_t*)reader; + reader += 4; + for (int32_t i = 0; i < bitmaps_size; i++) { + T key; + detail::Helper::read_from(&reader, &key); + BitmapValue bitmap(reader); + reader += bitmap.getSizeInBytes(); + _bitmaps[key] = bitmap; + } + } + +private: + std::map<T, BitmapValue> _bitmaps; +}; + +} // namespace doris diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 5c4d5c7b36..fc50adca0a 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -40,6 +40,7 @@ set(VEC_FILES aggregate_functions/aggregate_function_percentile_approx.cpp aggregate_functions/aggregate_function_simple_factory.cpp aggregate_functions/aggregate_function_java_udaf.h + aggregate_functions/aggregate_function_orthogonal_bitmap.cpp columns/collator.cpp columns/column.cpp columns/column_array.cpp diff --git a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp new file mode 100644 index 0000000000..470a6c8388 --- /dev/null +++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp @@ -0,0 +1,99 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h" + +#include <memory> + +#include "vec/aggregate_functions/aggregate_function_simple_factory.h" +#include "vec/aggregate_functions/helpers.h" +#include "vec/data_types/data_type_string.h" + +namespace doris::vectorized { + +template <template <typename> class Impl> +AggregateFunctionPtr create_aggregate_function_orthogonal(const std::string& name, + const DataTypes& argument_types, + const Array& params, + const bool result_is_nullable) { + if (argument_types.empty()) { + LOG(WARNING) << "Incorrect number of arguments for aggregate function " << name; + return nullptr; + } else if (argument_types.size() == 1) { + // only used at AGGREGATE (merge finalize) for variadic function + // and for orthogonal_bitmap_union_count function + return std::make_shared<AggFunctionOrthBitmapFunc<Impl<StringValue>>>(argument_types); + } else { + const IDataType& argument_type = *argument_types[1]; + AggregateFunctionPtr res(create_with_numeric_type<AggFunctionOrthBitmapFunc, Impl>( + argument_type, argument_types)); + + WhichDataType which(argument_type); + + if (res) { + return res; + } else if (which.is_string_or_fixed_string()) { + return std::make_shared<AggFunctionOrthBitmapFunc<Impl<StringValue>>>(argument_types); + } + LOG(WARNING) << "Incorrect Type " << argument_type.get_name() + << " of arguments for aggregate function " << name; + return nullptr; + } +} + +AggregateFunctionPtr create_aggregate_function_orthogonal_bitmap_intersect( + const std::string& name, const DataTypes& argument_types, const Array& parameters, + bool result_is_nullable) { + return create_aggregate_function_orthogonal<AggOrthBitMapIntersect>( + name, argument_types, parameters, result_is_nullable); +} + +AggregateFunctionPtr create_aggregate_function_orthogonal_bitmap_intersect_count( + const std::string& name, const DataTypes& argument_types, const Array& parameters, + bool result_is_nullable) { + return create_aggregate_function_orthogonal<AggOrthBitMapIntersectCount>( + name, argument_types, parameters, result_is_nullable); +} + +AggregateFunctionPtr create_aggregate_function_intersect_count(const std::string& name, + const DataTypes& argument_types, + const Array& parameters, + bool result_is_nullable) { + return create_aggregate_function_orthogonal<AggIntersectCount>(name, argument_types, parameters, + result_is_nullable); +} + +AggregateFunctionPtr create_aggregate_function_orthogonal_bitmap_union_count( + const std::string& name, const DataTypes& argument_types, const Array& parameters, + const bool result_is_nullable) { + return create_aggregate_function_orthogonal<OrthBitmapUnionCountData>( + name, argument_types, parameters, result_is_nullable); +} + +void register_aggregate_function_orthogonal_bitmap(AggregateFunctionSimpleFactory& factory) { + factory.register_function("orthogonal_bitmap_intersect", + create_aggregate_function_orthogonal_bitmap_intersect); + + factory.register_function("orthogonal_bitmap_intersect_count", + create_aggregate_function_orthogonal_bitmap_intersect_count); + + factory.register_function("orthogonal_bitmap_union_count", + create_aggregate_function_orthogonal_bitmap_union_count); + + factory.register_function("intersect_count", create_aggregate_function_intersect_count); +} +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h new file mode 100644 index 0000000000..4f1fb69ec4 --- /dev/null +++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h @@ -0,0 +1,247 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "exprs/bitmap_function.h" +#include "util/bitmap_intersect.h" +#include "util/bitmap_value.h" +#include "vec/aggregate_functions/aggregate_function.h" +#include "vec/columns/column_complex.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_vector.h" +#include "vec/common/assert_cast.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type_bitmap.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_number.h" +#include "vec/io/io_helper.h" + +namespace doris::vectorized { + +template <typename T> +struct AggOrthBitmapBaseData { +public: + using ColVecData = std::conditional_t<IsNumber<T>, ColumnVector<T>, ColumnString>; + + void add(const IColumn** columns, size_t row_num) { + const auto& bitmap_col = static_cast<const ColumnBitmap&>(*columns[0]); + const auto& data_col = static_cast<const ColVecData&>(*columns[1]); + const auto& bitmap_value = bitmap_col.get_element(row_num); + + if constexpr (IsNumber<T>) { + bitmap.update(data_col.get_element(row_num), bitmap_value); + } else { + bitmap.update(StringValue(data_col.get_data_at(row_num)), bitmap_value); + } + } + + void init_add_key(const IColumn** columns, size_t row_num, int argument_size) { + if (first_init) { + DCHECK(argument_size > 1); + for (int idx = 2; idx < argument_size; ++idx) { + const auto& col = static_cast<const ColVecData&>(*columns[idx]); + if constexpr (IsNumber<T>) { + bitmap.add_key(col.get_element(row_num)); + } else { + bitmap.add_key(StringValue(col.get_data_at(row_num))); + } + } + first_init = false; + } + } + +protected: + doris::BitmapIntersect<T> bitmap; + bool first_init = true; +}; + +template <typename T> +struct AggOrthBitMapIntersect : public AggOrthBitmapBaseData<T> { +public: + static constexpr auto name = "orthogonal_bitmap_intersect"; + + static DataTypePtr get_return_type() { return std::make_shared<DataTypeBitMap>(); } + + void merge(const AggOrthBitMapIntersect& rhs) { + if (rhs.first_init) { + return; + } + result |= rhs.result; + } + + void write(BufferWritable& buf) { + write_binary(AggOrthBitmapBaseData<T>::first_init, buf); + result = AggOrthBitmapBaseData<T>::bitmap.intersect(); + DataTypeBitMap::serialize_as_stream(result, buf); + } + + void read(BufferReadable& buf) { + read_binary(AggOrthBitmapBaseData<T>::first_init, buf); + DataTypeBitMap::deserialize_as_stream(result, buf); + } + + void get(IColumn& to) const { + auto& column = static_cast<ColumnBitmap&>(to); + column.get_data().emplace_back(result); + } + +private: + BitmapValue result; +}; + +template <typename T> +struct AggIntersectCount : public AggOrthBitmapBaseData<T> { +public: + static constexpr auto name = "intersect_count"; + + static DataTypePtr get_return_type() { return std::make_shared<DataTypeInt64>(); } + + void merge(const AggIntersectCount& rhs) { + if (rhs.first_init) { + return; + } + AggOrthBitmapBaseData<T>::bitmap.merge(rhs.bitmap); + } + + void write(BufferWritable& buf) { + write_binary(AggOrthBitmapBaseData<T>::first_init, buf); + std::string data; + data.resize(AggOrthBitmapBaseData<T>::bitmap.size()); + AggOrthBitmapBaseData<T>::bitmap.serialize(data.data()); + write_binary(data, buf); + } + + void read(BufferReadable& buf) { + read_binary(AggOrthBitmapBaseData<T>::first_init, buf); + std::string data; + read_binary(data, buf); + AggOrthBitmapBaseData<T>::bitmap.deserialize(data.data()); + } + + void get(IColumn& to) const { + auto& column = static_cast<ColumnVector<Int64>&>(to); + column.get_data().emplace_back(AggOrthBitmapBaseData<T>::bitmap.intersect_count()); + } +}; + +template <typename T> +struct AggOrthBitMapIntersectCount : public AggOrthBitmapBaseData<T> { +public: + static constexpr auto name = "orthogonal_bitmap_intersect_count"; + + static DataTypePtr get_return_type() { return std::make_shared<DataTypeInt64>(); } + + void merge(const AggOrthBitMapIntersectCount& rhs) { + if (rhs.first_init) { + return; + } + result += rhs.result; + } + + void write(BufferWritable& buf) { + write_binary(AggOrthBitmapBaseData<T>::first_init, buf); + result = AggOrthBitmapBaseData<T>::bitmap.intersect_count(); + write_binary(result, buf); + } + + void read(BufferReadable& buf) { + read_binary(AggOrthBitmapBaseData<T>::first_init, buf); + read_binary(result, buf); + } + + void get(IColumn& to) const { + auto& column = static_cast<ColumnVector<Int64>&>(to); + column.get_data().emplace_back(result); + } + +private: + Int64 result = 0; +}; + +template <typename T> +struct OrthBitmapUnionCountData { + static constexpr auto name = "orthogonal_bitmap_union_count"; + + static DataTypePtr get_return_type() { return std::make_shared<DataTypeInt64>(); } + // Here no need doing anything, so only given an function declaration + void init_add_key(const IColumn** columns, size_t row_num, int argument_size) {} + + void add(const IColumn** columns, size_t row_num) { + const auto& column = static_cast<const ColumnBitmap&>(*columns[0]); + value |= column.get_data()[row_num]; + } + void merge(const OrthBitmapUnionCountData& rhs) { result += rhs.result; } + + void write(BufferWritable& buf) { + result = value.cardinality(); + write_binary(result, buf); + } + + void read(BufferReadable& buf) { read_binary(result, buf); } + + void get(IColumn& to) const { + auto& column = static_cast<ColumnVector<Int64>&>(to); + column.get_data().emplace_back(result ? result : value.cardinality()); + } + +private: + BitmapValue value; + int64_t result = 0; +}; + +template <typename Impl> +class AggFunctionOrthBitmapFunc final + : public IAggregateFunctionDataHelper<Impl, AggFunctionOrthBitmapFunc<Impl>> { +public: + String get_name() const override { return Impl::name; } + + AggFunctionOrthBitmapFunc(const DataTypes& argument_types_) + : IAggregateFunctionDataHelper<Impl, AggFunctionOrthBitmapFunc<Impl>>(argument_types_, + {}), + _argument_size(argument_types_.size()) {} + + DataTypePtr get_return_type() const override { return Impl::get_return_type(); } + + void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num, + Arena*) const override { + this->data(place).init_add_key(columns, row_num, _argument_size); + this->data(place).add(columns, row_num); + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, + Arena*) const override { + this->data(place).merge(this->data(rhs)); + } + + void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { + this->data(const_cast<AggregateDataPtr>(place)).write(buf); + } + + void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, + Arena*) const override { + this->data(place).read(buf); + } + + void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { + this->data(place).get(to); + } + +private: + int _argument_size; +}; +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp index 0afa9b97d3..badf756f8b 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp @@ -46,6 +46,7 @@ void register_aggregate_function_group_concat(AggregateFunctionSimpleFactory& fa void register_aggregate_function_percentile(AggregateFunctionSimpleFactory& factory); void register_aggregate_function_window_funnel(AggregateFunctionSimpleFactory& factory); void register_aggregate_function_percentile_approx(AggregateFunctionSimpleFactory& factory); +void register_aggregate_function_orthogonal_bitmap(AggregateFunctionSimpleFactory& factory); AggregateFunctionSimpleFactory& AggregateFunctionSimpleFactory::instance() { static std::once_flag oc; static AggregateFunctionSimpleFactory instance; @@ -68,6 +69,7 @@ AggregateFunctionSimpleFactory& AggregateFunctionSimpleFactory::instance() { register_aggregate_function_percentile(instance); register_aggregate_function_percentile_approx(instance); register_aggregate_function_window_funnel(instance); + register_aggregate_function_orthogonal_bitmap(instance); // if you only register function with no nullable, and wants to add nullable automatically, you should place function above this line register_aggregate_function_combinator_null(instance); diff --git a/be/test/exprs/bitmap_function_test.cpp b/be/test/exprs/bitmap_function_test.cpp index ddf8a52ea3..78116bcbed 100644 --- a/be/test/exprs/bitmap_function_test.cpp +++ b/be/test/exprs/bitmap_function_test.cpp @@ -27,6 +27,7 @@ #include "exprs/aggregate_functions.h" #include "exprs/anyval_util.h" #include "testutil/function_utils.h" +#include "util/bitmap_intersect.h" #include "util/bitmap_value.h" #include "util/logging.h" @@ -266,10 +267,10 @@ void test_bitmap_intersect(FunctionContext* ctx, ValType key1, ValType key2) { BitmapIntersect<ValueType> intersect2; for (size_t i = 2; i < const_vals.size(); i++) { ValType* arg = reinterpret_cast<ValType*>(const_vals[i]); - intersect2.add_key(detail::get_val<ValType, ValueType>(*arg)); + intersect2.add_key(detail::Helper::get_val<ValType, ValueType>(*arg)); } - intersect2.update(detail::get_val<ValType, ValueType>(key1), bitmap1); - intersect2.update(detail::get_val<ValType, ValueType>(key2), bitmap2); + intersect2.update(detail::Helper::get_val<ValType, ValueType>(key1), bitmap1); + intersect2.update(detail::Helper::get_val<ValType, ValueType>(key2), bitmap2); StringVal expected = convert_bitmap_intersect_to_string(ctx, intersect2); EXPECT_EQ(expected, intersect1); diff --git a/docs/.vuepress/sidebar/en/docs.js b/docs/.vuepress/sidebar/en/docs.js index db754625b0..f7ad6c58df 100644 --- a/docs/.vuepress/sidebar/en/docs.js +++ b/docs/.vuepress/sidebar/en/docs.js @@ -442,6 +442,7 @@ module.exports = [ "bitmap_xor", "to_bitmap", "bitmap_max", + "intersect_count", "orthogonal_bitmap_intersect", "orthogonal_bitmap_intersect_count", "orthogonal_bitmap_union_count", diff --git a/docs/.vuepress/sidebar/zh-CN/docs.js b/docs/.vuepress/sidebar/zh-CN/docs.js index 5d18239c19..6fe177719f 100644 --- a/docs/.vuepress/sidebar/zh-CN/docs.js +++ b/docs/.vuepress/sidebar/zh-CN/docs.js @@ -442,6 +442,7 @@ module.exports = [ "bitmap_xor", "to_bitmap", "bitmap_max", + "intersect_count", "orthogonal_bitmap_intersect", "orthogonal_bitmap_intersect_count", "orthogonal_bitmap_union_count", diff --git a/docs/en/docs/sql-manual/sql-functions/bitmap-functions/intersect_count.md b/docs/en/docs/sql-manual/sql-functions/bitmap-functions/intersect_count.md new file mode 100644 index 0000000000..938865d3ac --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/bitmap-functions/intersect_count.md @@ -0,0 +1,57 @@ +--- +{ +"title": "intersect_count", +"language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## intersect_count +### description +#### Syntax + +`BITMAP INTERSECT_COUNT(bitmap_column, column_to_filter, filter_values)` +Calculate the intersection of two or more bitmaps +Usage: intersect_count(bitmap_column_to_count, filter_column, filter_values ...) +Example: intersect_count(user_id, event, 'A', 'B', 'C'), meaning find the intersect count of user_id in all A/B/C 3 bitmaps + +### example + +``` +MySQL [test_query_qa]> select dt,bitmap_to_string(user_id) from pv_bitmap where dt in (3,4); ++------+-----------------------------+ +| dt | bitmap_to_string(`user_id`) | ++------+-----------------------------+ +| 4 | 1,2,3 | +| 3 | 1,2,3,4,5 | ++------+-----------------------------+ +2 rows in set (0.012 sec) + +MySQL [test_query_qa]> select intersect_count(user_id,dt,3,4) from pv_bitmap; ++----------------------------------------+ +| intersect_count(`user_id`, `dt`, 3, 4) | ++----------------------------------------+ +| 3 | ++----------------------------------------+ +1 row in set (0.014 sec) +``` + +### keywords + + INTERSECT_COUNT,BITMAP diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/intersect_count.md b/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/intersect_count.md new file mode 100644 index 0000000000..41f58f6da0 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/intersect_count.md @@ -0,0 +1,56 @@ +--- +{ +"title": "intersect_count", +"language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## intersect_count +### description +#### Syntax + +`BITMAP INTERSECT_COUNT(bitmap_column, column_to_filter, filter_values)` +聚合函数,求bitmap交集大小的函数, 不要求数据分布正交 +第一个参数是Bitmap列,第二个参数是用来过滤的维度列,第三个参数是变长参数,含义是过滤维度列的不同取值 + +### example + +``` +MySQL [test_query_qa]> select dt,bitmap_to_string(user_id) from pv_bitmap where dt in (3,4); ++------+-----------------------------+ +| dt | bitmap_to_string(`user_id`) | ++------+-----------------------------+ +| 4 | 1,2,3 | +| 3 | 1,2,3,4,5 | ++------+-----------------------------+ +2 rows in set (0.012 sec) + +MySQL [test_query_qa]> select intersect_count(user_id,dt,3,4) from pv_bitmap; ++----------------------------------------+ +| intersect_count(`user_id`, `dt`, 3, 4) | ++----------------------------------------+ +| 3 | ++----------------------------------------+ +1 row in set (0.014 sec) +``` + +### keywords + + INTERSECT_COUNT,BITMAP diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index 594bd50c09..a9ca2504a1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -559,20 +559,23 @@ public class FunctionCallExpr extends Expr { throw new AnalysisException("BITMAP_UNION_INT params only support TINYINT or SMALLINT or INT"); } - if (fnName.getFunction().equalsIgnoreCase(FunctionSet.INTERSECT_COUNT)) { + if (fnName.getFunction().equalsIgnoreCase(FunctionSet.INTERSECT_COUNT) || fnName.getFunction() + .equalsIgnoreCase(FunctionSet.ORTHOGONAL_BITMAP_INTERSECT) || fnName.getFunction() + .equalsIgnoreCase(FunctionSet.ORTHOGONAL_BITMAP_INTERSECT_COUNT)) { if (children.size() <= 2) { - throw new AnalysisException("intersect_count(bitmap_column, column_to_filter, filter_values) " + throw new AnalysisException(fnName + "(bitmap_column, column_to_filter, filter_values) " + "function requires at least three parameters"); } Type inputType = getChild(0).getType(); if (!inputType.isBitmapType()) { - throw new AnalysisException("intersect_count function first argument should be of BITMAP type, but was " + inputType); + throw new AnalysisException( + fnName + "function first argument should be of BITMAP type, but was " + inputType); } for (int i = 2; i < children.size(); i++) { if (!getChild(i).isConstant()) { - throw new AnalysisException("intersect_count function filter_values arg must be constant"); + throw new AnalysisException(fnName + " function filter_values arg must be constant"); } } return; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java index 98d0488614..efa1c0c2f1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java @@ -46,8 +46,11 @@ public class AggregateFunction extends Function { private static final Logger LOG = LogManager.getLogger(AggregateFunction.class); - public static ImmutableSet<String> NOT_NULLABLE_AGGREGATE_FUNCTION_NAME_SET = - ImmutableSet.of("row_number", "rank", "dense_rank", "multi_distinct_count", "multi_distinct_sum", "hll_union_agg", "hll_union", "bitmap_union", "bitmap_intersect", FunctionSet.COUNT, "approx_count_distinct", "ndv", FunctionSet.BITMAP_UNION_INT, FunctionSet.BITMAP_UNION_COUNT, "ndv_no_finalize", FunctionSet.WINDOW_FUNNEL); + public static ImmutableSet<String> NOT_NULLABLE_AGGREGATE_FUNCTION_NAME_SET = ImmutableSet.of("row_number", "rank", + "dense_rank", "multi_distinct_count", "multi_distinct_sum", "hll_union_agg", "hll_union", "bitmap_union", + "bitmap_intersect", "orthogonal_bitmap_intersect", "orthogonal_bitmap_intersect_count", "intersect_count", + "orthogonal_bitmap_union_count", FunctionSet.COUNT, "approx_count_distinct", "ndv", + FunctionSet.BITMAP_UNION_INT, FunctionSet.BITMAP_UNION_COUNT, "ndv_no_finalize", FunctionSet.WINDOW_FUNNEL); public static ImmutableSet<String> ALWAYS_NULLABLE_AGGREGATE_FUNCTION_NAME_SET = ImmutableSet.of("stddev_samp", "variance_samp", "var_samp", "percentile_approx"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index 88808abe89..5b263589c3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -1684,6 +1684,14 @@ public class FunctionSet<T> { BITMAP_INTERSECT_FINALIZE_SYMBOL.get(t), true, false, true)); + // VEC_INTERSECT_COUNT + addBuiltin( + AggregateFunction.createBuiltin(INTERSECT_COUNT, Lists.newArrayList(Type.BITMAP, t, t), Type.BIGINT, + Type.VARCHAR, true, BITMAP_INTERSECT_INIT_SYMBOL.get(t), + BITMAP_INTERSECT_UPDATE_SYMBOL.get(t), BITMAP_INTERSECT_MERGE_SYMBOL.get(t), + BITMAP_INTERSECT_SERIALIZE_SYMBOL.get(t), null, null, + BITMAP_INTERSECT_FINALIZE_SYMBOL.get(t), true, false, true, true)); + // HLL_UNION_AGG addBuiltin(AggregateFunction.createBuiltin("hll_union_agg", Lists.newArrayList(t), Type.BIGINT, Type.VARCHAR, @@ -2042,6 +2050,15 @@ public class FunctionSet<T> { "", "_ZN5doris15BitmapFunctions32orthogonal_bitmap_count_finalizeEPN9doris_udf15FunctionContextERKNS1_9StringValE", true, false, true)); + + //vec ORTHOGONAL_BITMAP_INTERSECT and ORTHOGONAL_BITMAP_INTERSECT_COUNT + addBuiltin( + AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_INTERSECT, Lists.newArrayList(Type.BITMAP, t, t), + Type.BITMAP, Type.BITMAP, true, "", "", "", "", "", "", "", true, false, true, true)); + + addBuiltin(AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_INTERSECT_COUNT, + Lists.newArrayList(Type.BITMAP, t, t), Type.BIGINT, Type.BITMAP, true, "", "", "", "", "", "", "", + true, false, true, true)); } // bitmap addBuiltin(AggregateFunction.createBuiltin(BITMAP_UNION, Lists.newArrayList(Type.BITMAP), @@ -2100,6 +2117,10 @@ public class FunctionSet<T> { null, "_ZN5doris15BitmapFunctions32orthogonal_bitmap_count_finalizeEPN9doris_udf15FunctionContextERKNS1_9StringValE", true, true, true)); + // ORTHOGONAL_BITMAP_UNION_COUNT vectorized + addBuiltin(AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_UNION_COUNT, Lists.newArrayList(Type.BITMAP), + Type.BIGINT, Type.BITMAP, "", "", "", "", null, null, "", true, true, true, true)); + // TODO(ml): supply function symbol addBuiltin(AggregateFunction.createBuiltin(BITMAP_INTERSECT, Lists.newArrayList(Type.BITMAP), Type.BITMAP, Type.VARCHAR, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org