This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch dev-1.0.0 in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 061e344cff0d0f6f4bc815713163057ac35fc7f0 Author: Pxl <952130...@qq.com> AuthorDate: Thu Feb 24 11:06:27 2022 +0800 [Bug][Vectorized] fix bitmap_min(empty) not return null (#8190) --- be/src/util/bitmap_value.h | 101 ++++++++++----------- .../vec/functions/function_always_not_nullable.h | 3 - be/src/vec/functions/function_bitmap.cpp | 51 +---------- be/src/vec/functions/function_bitmap_min_or_max.h | 100 ++++++++++++++++++++ be/src/vec/functions/function_convert_tz.h | 3 - .../function_date_or_datetime_computation.cpp | 6 +- be/src/vec/utils/template_helpers.hpp | 3 - gensrc/script/doris_builtins_functions.py | 4 +- 8 files changed, 157 insertions(+), 114 deletions(-) diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h index 1ac8a9d..0dd6031 100644 --- a/be/src/util/bitmap_value.h +++ b/be/src/util/bitmap_value.h @@ -1567,7 +1567,7 @@ public: return true; } - doris_udf::BigIntVal minimum() { + doris_udf::BigIntVal minimum() const { switch (_type) { case SINGLE: return doris_udf::BigIntVal(_sv); @@ -1612,7 +1612,7 @@ public: return ss.str(); } - doris_udf::BigIntVal maximum() { + doris_udf::BigIntVal maximum() const { switch (_type) { case SINGLE: return doris_udf::BigIntVal(_sv); @@ -1691,7 +1691,7 @@ public: } return count; } - + void clear() { _type = EMPTY; _bitmap.clear(); @@ -1736,31 +1736,28 @@ private: // BitmapValueIterator end = bitmap_value.end(); // for (; iter != end(); ++iter) { // uint64_t v = *iter; -// ... do something with "v" ... +// ... do something with "v" ... // } class BitmapValueIterator { public: - BitmapValueIterator(const BitmapValue& bitmap, bool end = false) - : _bitmap(bitmap), _end(end) { - + BitmapValueIterator(const BitmapValue& bitmap, bool end = false) : _bitmap(bitmap), _end(end) { switch (_bitmap._type) { - case BitmapValue::BitmapDataType::EMPTY: - _end = true; - break; - case BitmapValue::BitmapDataType::SINGLE: - _sv = _bitmap._sv; - break; - case BitmapValue::BitmapDataType::BITMAP: - _iter = new detail::Roaring64MapSetBitForwardIterator(_bitmap._bitmap, _end); - break; - default: - CHECK(false) << _bitmap._type; + case BitmapValue::BitmapDataType::EMPTY: + _end = true; + break; + case BitmapValue::BitmapDataType::SINGLE: + _sv = _bitmap._sv; + break; + case BitmapValue::BitmapDataType::BITMAP: + _iter = new detail::Roaring64MapSetBitForwardIterator(_bitmap._bitmap, _end); + break; + default: + CHECK(false) << _bitmap._type; } } BitmapValueIterator(const BitmapValueIterator& other) - : _bitmap(other._bitmap), _iter(other._iter), _sv(other._sv), _end(other._end) { - } + : _bitmap(other._bitmap), _iter(other._iter), _sv(other._sv), _end(other._end) {} ~BitmapValueIterator() { if (_iter != nullptr) { @@ -1772,12 +1769,12 @@ public: uint64_t operator*() const { CHECK(!_end) << "should not get value of end iterator"; switch (_bitmap._type) { - case BitmapValue::BitmapDataType::SINGLE: - return _sv; - case BitmapValue::BitmapDataType::BITMAP: - return *(*_iter); - default: - CHECK(false) << _bitmap._type; + case BitmapValue::BitmapDataType::SINGLE: + return _sv; + case BitmapValue::BitmapDataType::BITMAP: + return *(*_iter); + default: + CHECK(false) << _bitmap._type; } return 0; } @@ -1785,14 +1782,14 @@ public: BitmapValueIterator& operator++() { // ++i, must returned inc. value CHECK(!_end) << "should not forward when iterator ends"; switch (_bitmap._type) { - case BitmapValue::BitmapDataType::SINGLE: - _end = true; - break; - case BitmapValue::BitmapDataType::BITMAP: - ++(*_iter); - break; - default: - CHECK(false) << _bitmap._type; + case BitmapValue::BitmapDataType::SINGLE: + _end = true; + break; + case BitmapValue::BitmapDataType::BITMAP: + ++(*_iter); + break; + default: + CHECK(false) << _bitmap._type; } return *this; } @@ -1801,14 +1798,14 @@ public: CHECK(!_end) << "should not forward when iterator ends"; BitmapValueIterator orig(*this); switch (_bitmap._type) { - case BitmapValue::BitmapDataType::SINGLE: - _end = true; - break; - case BitmapValue::BitmapDataType::BITMAP: - ++(*_iter); - break; - default: - CHECK(false) << _bitmap._type; + case BitmapValue::BitmapDataType::SINGLE: + _end = true; + break; + case BitmapValue::BitmapDataType::BITMAP: + ++(*_iter); + break; + default: + CHECK(false) << _bitmap._type; } return orig; } @@ -1817,21 +1814,19 @@ public: if (_end && other._end) return true; switch (_bitmap._type) { - case BitmapValue::BitmapDataType::EMPTY: - return other._bitmap._type == BitmapValue::BitmapDataType::EMPTY; - case BitmapValue::BitmapDataType::SINGLE: - return _sv == other._sv; - case BitmapValue::BitmapDataType::BITMAP: - return *_iter == *(other._iter); - default: - CHECK(false) << _bitmap._type; + case BitmapValue::BitmapDataType::EMPTY: + return other._bitmap._type == BitmapValue::BitmapDataType::EMPTY; + case BitmapValue::BitmapDataType::SINGLE: + return _sv == other._sv; + case BitmapValue::BitmapDataType::BITMAP: + return *_iter == *(other._iter); + default: + CHECK(false) << _bitmap._type; } return false; } - bool operator!=(const BitmapValueIterator& other) const { - return !(*this == other); - } + bool operator!=(const BitmapValueIterator& other) const { return !(*this == other); } private: const BitmapValue& _bitmap; diff --git a/be/src/vec/functions/function_always_not_nullable.h b/be/src/vec/functions/function_always_not_nullable.h index baafda6..76b2b2a 100644 --- a/be/src/vec/functions/function_always_not_nullable.h +++ b/be/src/vec/functions/function_always_not_nullable.h @@ -14,9 +14,6 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -// This file is copied from -// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionStringOrArrayToT.h -// and modified by Doris #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp index 5bdbfd8..1fe1146 100644 --- a/be/src/vec/functions/function_bitmap.cpp +++ b/be/src/vec/functions/function_bitmap.cpp @@ -21,6 +21,7 @@ #include "gutil/strings/numbers.h" #include "gutil/strings/split.h" #include "util/string_parser.hpp" +#include "vec/functions/function_bitmap_min_or_max.h" #include "vec/functions/function_const.h" #include "vec/functions/function_string.h" #include "vec/functions/function_totype.h" @@ -285,50 +286,6 @@ struct BitmapHasAll { } }; -struct NameBitmapMin { - static constexpr auto name = "bitmap_min"; -}; - -struct BitmapMin { - using ReturnType = DataTypeInt64; - static constexpr auto TYPE_INDEX = TypeIndex::BitMap; - using Type = DataTypeBitMap::FieldType; - using ReturnColumnType = ColumnVector<Int64>; - using ReturnColumnContainer = ColumnVector<Int64>::Container; - - static Status vector(const std::vector<BitmapValue>& data, ReturnColumnContainer& res) { - size_t size = data.size(); - res.reserve(size); - for (size_t i = 0; i < size; ++i) { - auto min = const_cast<std::vector<BitmapValue>&>(data)[i].minimum(); - res.push_back(min.val); - } - return Status::OK(); - } -}; - -struct NameBitmapMax { - static constexpr auto name = "bitmap_max"; -}; - -struct BitmapMax { - using ReturnType = DataTypeInt64; - static constexpr auto TYPE_INDEX = TypeIndex::BitMap; - using Type = DataTypeBitMap::FieldType; - using ReturnColumnType = ColumnVector<Int64>; - using ReturnColumnContainer = ColumnVector<Int64>::Container; - - static Status vector(const std::vector<BitmapValue>& data, ReturnColumnContainer& res) { - size_t size = data.size(); - res.reserve(size); - for (size_t i = 0; i < size; ++i) { - auto max = const_cast<std::vector<BitmapValue>&>(data)[i].maximum(); - res.push_back(max.val); - } - return Status::OK(); - } -}; - struct NameBitmapToString { static constexpr auto name = "bitmap_to_string"; }; @@ -475,8 +432,10 @@ using FunctionToBitmap = FunctionUnaryToType<ToBitmapImpl, NameToBitmap>; using FunctionBitmapFromString = FunctionUnaryToType<BitmapFromString, NameBitmapFromString>; using FunctionBitmapHash = FunctionUnaryToType<BitmapHash, NameBitmapHash>; using FunctionBitmapCount = FunctionUnaryToType<BitmapCount, NameBitmapCount>; -using FunctionBitmapMin = FunctionUnaryToType<BitmapMin, NameBitmapMin>; -using FunctionBitmapMax = FunctionUnaryToType<BitmapMax, NameBitmapMax>; + +using FunctionBitmapMin = FunctionBitmapSingle<FunctionBitmapMinImpl>; +using FunctionBitmapMax = FunctionBitmapSingle<FunctionBitmapMaxImpl>; + using FunctionBitmapToString = FunctionUnaryToType<BitmapToString, NameBitmapToString>; using FunctionBitmapNot = FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapNot, NameBitmapNot>; diff --git a/be/src/vec/functions/function_bitmap_min_or_max.h b/be/src/vec/functions/function_bitmap_min_or_max.h new file mode 100644 index 0000000..10ab59b --- /dev/null +++ b/be/src/vec/functions/function_bitmap_min_or_max.h @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "util/bitmap_value.h" +#include "vec/functions/simple_function_factory.h" +#include "vec/utils/util.hpp" + +namespace doris::vectorized { + +struct FunctionBitmapMinImpl { + static constexpr auto name = "bitmap_min"; + static Int64 calculate(const BitmapValue& value) { return value.minimum().val; } +}; + +struct FunctionBitmapMaxImpl { + static constexpr auto name = "bitmap_max"; + static Int64 calculate(const BitmapValue& value) { return value.maximum().val; } +}; + +template <typename Impl> +class FunctionBitmapSingle : public IFunction { +public: + static constexpr auto name = Impl::name; + + static FunctionPtr create() { return std::make_shared<FunctionBitmapSingle>(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 1; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared<DataTypeInt64>()); + } + + bool use_default_implementation_for_constants() const override { return true; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + auto result_column = ColumnInt64::create(); + auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0); + + ColumnPtr argument_column = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_column)) { + // Danger: Here must dispose the null map data first! Because + // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem + // of column nullable mem of null map + VectorizedUtils::update_null_map(result_null_map_column->get_data(), + nullable->get_null_map_data()); + argument_column = nullable->get_nested_column_ptr(); + } + + execute_straight(assert_cast<const ColumnBitmap*>(argument_column.get()), + assert_cast<ColumnInt64*>(result_column.get()), + assert_cast<ColumnUInt8*>(result_null_map_column.get())->get_data(), + input_rows_count); + + block.get_by_position(result).column = + ColumnNullable::create(std::move(result_column), std::move(result_null_map_column)); + return Status::OK(); + } + +private: + void execute_straight(const ColumnBitmap* date_column, ColumnInt64* result_column, + NullMap& result_null_map, size_t input_rows_count) { + for (size_t i = 0; i < input_rows_count; i++) { + if (result_null_map[i]) { + result_column->insert_default(); + continue; + } + + BitmapValue value = date_column->get_element(i); + if (!value.cardinality()) { + result_null_map[i] = true; + result_column->insert_default(); + continue; + } + + result_column->insert(Impl::calculate(value)); + } + } +}; + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_convert_tz.h b/be/src/vec/functions/function_convert_tz.h index 6666729..7af61b3 100644 --- a/be/src/vec/functions/function_convert_tz.h +++ b/be/src/vec/functions/function_convert_tz.h @@ -14,9 +14,6 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -// This file is copied from -// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionStringOrArrayToT.h -// and modified by Doris #include "vec/columns/columns_number.h" #include "vec/common/string_ref.h" diff --git a/be/src/vec/functions/function_date_or_datetime_computation.cpp b/be/src/vec/functions/function_date_or_datetime_computation.cpp index 8074573..7469dba 100644 --- a/be/src/vec/functions/function_date_or_datetime_computation.cpp +++ b/be/src/vec/functions/function_date_or_datetime_computation.cpp @@ -102,9 +102,8 @@ void register_function_date_time_computation(SimpleFunctionFactory& factory) { factory.register_function<FunctionAddMinutes>(); factory.register_function<FunctionAddHours>(); factory.register_function<FunctionAddDays>(); - factory.register_alias("days_add", "add_dates"); factory.register_alias("days_add", "date_add"); - factory.register_alias("days_add", "add_date"); + factory.register_alias("days_add", "adddate"); factory.register_function<FunctionAddWeeks>(); factory.register_function<FunctionAddMonths>(); factory.register_function<FunctionAddYears>(); @@ -114,9 +113,8 @@ void register_function_date_time_computation(SimpleFunctionFactory& factory) { factory.register_function<FunctionSubMinutes>(); factory.register_function<FunctionSubHours>(); factory.register_function<FunctionSubDays>(); - factory.register_alias("days_sub", "subdate"); factory.register_alias("days_sub", "date_sub"); - factory.register_alias("days_sub", "sub_date"); + factory.register_alias("days_sub", "subdate"); factory.register_function<FunctionSubMonths>(); factory.register_function<FunctionSubYears>(); factory.register_function<FunctionSubQuarters>(); diff --git a/be/src/vec/utils/template_helpers.hpp b/be/src/vec/utils/template_helpers.hpp index 11fc7eb..10aac23 100644 --- a/be/src/vec/utils/template_helpers.hpp +++ b/be/src/vec/utils/template_helpers.hpp @@ -14,9 +14,6 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -// This file is copied from -// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/Helpers.h -// and modified by Doris #pragma once diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index a75a8cd..8e3cec7 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1209,10 +1209,10 @@ visible_functions = [ '', '', 'vec', ''], [['bitmap_min'], 'BIGINT', ['BITMAP'], '_ZN5doris15BitmapFunctions10bitmap_minEPN9doris_udf15FunctionContextERKNS1_9StringValE', - '', '', 'vec', ''], + '', '', 'vec', 'ALWAYS_NULLABLE'], [['bitmap_max'], 'BIGINT', ['BITMAP'], '_ZN5doris15BitmapFunctions10bitmap_maxEPN9doris_udf15FunctionContextERKNS1_9StringValE', - '', '', 'vec', ''], + '', '', 'vec', 'ALWAYS_NULLABLE'], [['bitmap_subset_in_range'], 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], '_ZN5doris15BitmapFunctions22bitmap_subset_in_rangeEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValES9_', '', '', 'vec', 'ALWAYS_NULLABLE'], --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org