This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch vectorized in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit ebdc0bac985b488fda6f1a29d1c09ceedfe5315a Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com> AuthorDate: Thu Jan 6 19:33:25 2022 +0800 [Function][Vec] add function coalesce (#7632) --- be/src/vec/CMakeLists.txt | 1 + be/src/vec/functions/function_coalesce.cpp | 143 +++++++++++++++++++++ be/src/vec/functions/simple_function_factory.h | 2 + docs/.vuepress/sidebar/en.js | 1 + docs/.vuepress/sidebar/zh-CN.js | 1 + .../sql-functions/string-functions/coalesce.md | 62 +++++++++ .../sql-functions/string-functions/coalesce.md | 63 +++++++++ gensrc/script/doris_builtins_functions.py | 28 ++-- 8 files changed, 287 insertions(+), 14 deletions(-) diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 71efde5..01c69eb 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -133,6 +133,7 @@ set(VEC_FILES functions/function_ifnull.cpp functions/nullif.cpp functions/random.cpp + functions/function_coalesce.cpp functions/function_date_or_datetime_computation.cpp functions/function_date_or_datetime_to_string.cpp functions/function_datetime_string_to_string.cpp diff --git a/be/src/vec/functions/function_coalesce.cpp b/be/src/vec/functions/function_coalesce.cpp new file mode 100644 index 0000000..65d544c --- /dev/null +++ b/be/src/vec/functions/function_coalesce.cpp @@ -0,0 +1,143 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "udf/udf.h" +#include "vec/data_types/data_type_nothing.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/get_least_supertype.h" +#include "vec/functions/function_helpers.h" +#include "vec/functions/simple_function_factory.h" +#include "vec/utils/util.hpp" + +namespace doris::vectorized { +class FunctionCoalesce : public IFunction { +public: + static constexpr auto name = "coalesce"; + + static FunctionPtr create() { return std::make_shared<FunctionCoalesce>(); } + + String get_name() const override { return name; } + + bool use_default_implementation_for_constants() const override { return false; } + + bool use_default_implementation_for_nulls() const override { return false; } + + bool is_variadic() const override { return true; } + + size_t get_number_of_arguments() const override { return 0; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + for (const auto& arg : arguments) { + if (!arg->is_nullable()) { + return arg; + } + } + return arguments[0]; + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + DCHECK_GE(arguments.size(), 1); + ColumnNumbers filtered_args; + filtered_args.reserve(arguments.size()); + for (const auto& arg : arguments) { + const auto& type = block.get_by_position(arg).type; + if (type->only_null()) { + continue; + } + filtered_args.push_back(arg); + if (!type->is_nullable()) { + break; + } + } + + size_t remaining_rows = input_rows_count; + size_t argument_size = filtered_args.size(); + std::vector<int> record_idx(input_rows_count, -1); //used to save column idx + MutableColumnPtr result_column; + + DataTypePtr type = block.get_by_position(result).type; + if (!type->is_nullable()) { + result_column = type->create_column(); + } else { + result_column = remove_nullable(type)->create_column(); + } + + result_column->reserve(input_rows_count); + auto return_type = std::make_shared<DataTypeUInt8>(); + auto null_map = ColumnUInt8::create(input_rows_count, 1); + auto& null_map_data = null_map->get_data(); + ColumnPtr argument_columns[argument_size]; + + for (size_t i = 0; i < argument_size; ++i) { + block.get_by_position(filtered_args[i]).column = + block.get_by_position(filtered_args[i]) + .column->convert_to_full_column_if_const(); + argument_columns[i] = block.get_by_position(filtered_args[i]).column; + if (auto* nullable = check_and_get_column<const ColumnNullable>(*argument_columns[i])) { + argument_columns[i] = nullable->get_nested_column_ptr(); + } + } + + for (size_t i = 0; i < argument_size && remaining_rows; ++i) { + const ColumnsWithTypeAndName is_not_null_col {block.get_by_position(filtered_args[i])}; + Block temporary_block(is_not_null_col); + temporary_block.insert(ColumnWithTypeAndName {nullptr, return_type, ""}); + auto func_is_not_null = SimpleFunctionFactory::instance().get_function("is_not_null_pred", is_not_null_col, return_type); + func_is_not_null->execute(context, temporary_block, {0}, 1, input_rows_count); + + auto res_column = std::move(*temporary_block.get_by_position(1).column->convert_to_full_column_if_const()).mutate(); + auto& res_map = assert_cast<ColumnVector<UInt8>*>(res_column.get())->get_data(); + auto* res = res_map.data(); + + //TODO: if need to imporve performance in the feature, here it's could SIMD + for (size_t j = 0; j < input_rows_count && remaining_rows; ++j) { + if (res[j] && null_map_data[j]) { + null_map_data[j] = 0; + remaining_rows--; + record_idx[j] = i; + } + } + } + //TODO: According to the record results, fill in result one by one, + //that's fill in result use different methods for different types, + //because now the string types does not support random position writing, + //But other type could, so could check one column, and fill the not null value in result column, + //and then next column, this method may result in higher CPU cache + for (int row = 0; row < input_rows_count; ++row) { + if (record_idx[row] == -1) { + result_column->insert_default(); + } else { + result_column->insert_from(*argument_columns[record_idx[row]].get(), row); + } + } + + if (type->is_nullable()) { + block.replace_by_position(result, ColumnNullable::create(std::move(result_column), std::move(null_map))); + } else { + block.replace_by_position(result, std::move(result_column)); + } + + return Status::OK(); + } +}; + +void register_function_coalesce(SimpleFunctionFactory& factory) { + factory.register_function<FunctionCoalesce>(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index c016d35..1d064bb 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -64,6 +64,7 @@ void register_function_function_ifnull(SimpleFunctionFactory& factory); void register_function_like(SimpleFunctionFactory& factory); void register_function_regexp(SimpleFunctionFactory& factory); void register_function_random(SimpleFunctionFactory& factory); +void register_function_coalesce(SimpleFunctionFactory& factory); class SimpleFunctionFactory { using Creator = std::function<FunctionBuilderPtr()>; using FunctionCreators = std::unordered_map<std::string, Creator>; @@ -176,6 +177,7 @@ public: register_function_like(instance); register_function_regexp(instance); register_function_random(instance); + register_function_coalesce(instance); }); return instance; } diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js index 4a12a28..4adaa79 100644 --- a/docs/.vuepress/sidebar/en.js +++ b/docs/.vuepress/sidebar/en.js @@ -330,6 +330,7 @@ module.exports = [ "ascii", "bit_length", "char_length", + "coalesce", "concat", "concat_ws", "ends_with", diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js index b47618a..fdd2e01 100644 --- a/docs/.vuepress/sidebar/zh-CN.js +++ b/docs/.vuepress/sidebar/zh-CN.js @@ -334,6 +334,7 @@ module.exports = [ "ascii", "bit_length", "char_length", + "coalesce", "concat", "concat_ws", "ends_with", diff --git a/docs/en/sql-reference/sql-functions/string-functions/coalesce.md b/docs/en/sql-reference/sql-functions/string-functions/coalesce.md new file mode 100644 index 0000000..23fef73 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/string-functions/coalesce.md @@ -0,0 +1,62 @@ +--- +{ + "title": "coalesce", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# coalesce +## Description +### Syntax + +`VARCHAR coalesce(VARCHAR, ...)` +`...` +`INT coalesce(INT, ...)` + +` coalesce ` function will return the first not null value. If it's all value is null, return null + +## example + +``` +MySQL> select coalesce(1,null,2); ++----------------------+ +| coalesce(1, NULL, 2) | ++----------------------+ +| 1 | ++----------------------+ + +MySQL> select coalesce(null,"asd",1); ++--------------------------+ +| coalesce(NULL, 'asd', 1) | ++--------------------------+ +| asd | ++--------------------------+ + +MySQL> select coalesce(null,null,null); ++----------------------------+ +| coalesce(NULL, NULL, NULL) | ++----------------------------+ +| NULL | ++----------------------------+ +``` +## keyword +coalesce diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/coalesce.md b/docs/zh-CN/sql-reference/sql-functions/string-functions/coalesce.md new file mode 100644 index 0000000..0ea6c2a --- /dev/null +++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/coalesce.md @@ -0,0 +1,63 @@ +--- +{ + "title": "coalesce", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# coalesce +## description +### Syntax + +`VARCHAR coalesce(VARCHAR, ...)` +`...` +`INT coalesce(INT, ...)` + +`coalesce`函数会返回第一个非NULL的值,若全部为NULL,则返回NULL + +## example + +``` +MySQL> select coalesce(1,null,2); ++----------------------+ +| coalesce(1, NULL, 2) | ++----------------------+ +| 1 | ++----------------------+ + +MySQL> select coalesce(null,"asd",1); ++--------------------------+ +| coalesce(NULL, 'asd', 1) | ++--------------------------+ +| asd | ++--------------------------+ + +MySQL> select coalesce(null,null,null); ++----------------------------+ +| coalesce(NULL, NULL, NULL) | ++----------------------------+ +| NULL | ++----------------------------+ + +``` +## keyword +coalesce diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 5027627..51ecc89 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -805,21 +805,21 @@ visible_functions = [ [['ifnull'], 'VARCHAR', ['VARCHAR', 'VARCHAR'], '', '', '', 'vec', 'CUSTOM'], [['ifnull'], 'STRING', ['STRING', 'STRING'], '', '', '', 'vec', 'CUSTOM'], - [['coalesce'], 'BOOLEAN', ['BOOLEAN', '...'], '', '', '', '', 'CUSTOM'], - [['coalesce'], 'TINYINT', ['TINYINT', '...'], '', '', '', '', 'CUSTOM'], - [['coalesce'], 'SMALLINT', ['SMALLINT', '...'], '', '', '', '', 'CUSTOM'], - [['coalesce'], 'INT', ['INT', '...'], '', '', '', '', 'CUSTOM'], - [['coalesce'], 'BIGINT', ['BIGINT', '...'], '', '', '', '', 'CUSTOM'], - [['coalesce'], 'LARGEINT', ['LARGEINT', '...'], '', '', '', '', 'CUSTOM'], - [['coalesce'], 'FLOAT', ['FLOAT', '...'], '', '', '', '', 'CUSTOM'], - [['coalesce'], 'DOUBLE', ['DOUBLE', '...'], '', '', '', '', 'CUSTOM'], - [['coalesce'], 'DATETIME', ['DATETIME', '...'], '', '', '', '', 'CUSTOM'], - [['coalesce'], 'DATE', ['DATE', '...'], '', '', '', '', 'CUSTOM'], - [['coalesce'], 'DECIMALV2', ['DECIMALV2', '...'], '', '', '', '', 'CUSTOM'], - [['coalesce'], 'BITMAP', ['BITMAP', '...'], '', '', '', '', 'CUSTOM'], + [['coalesce'], 'BOOLEAN', ['BOOLEAN', '...'], '', '', '', 'vec', 'CUSTOM'], + [['coalesce'], 'TINYINT', ['TINYINT', '...'], '', '', '', 'vec', 'CUSTOM'], + [['coalesce'], 'SMALLINT', ['SMALLINT', '...'], '', '', '', 'vec', 'CUSTOM'], + [['coalesce'], 'INT', ['INT', '...'], '', '', '', 'vec', 'CUSTOM'], + [['coalesce'], 'BIGINT', ['BIGINT', '...'], '', '', '', 'vec', 'CUSTOM'], + [['coalesce'], 'LARGEINT', ['LARGEINT', '...'], '', '', '', 'vec', 'CUSTOM'], + [['coalesce'], 'FLOAT', ['FLOAT', '...'], '', '', '', 'vec', 'CUSTOM'], + [['coalesce'], 'DOUBLE', ['DOUBLE', '...'], '', '', '', 'vec', 'CUSTOM'], + [['coalesce'], 'DATETIME', ['DATETIME', '...'], '', '', '', 'vec', 'CUSTOM'], + [['coalesce'], 'DATE', ['DATE', '...'], '', '', '', 'vec', 'CUSTOM'], + [['coalesce'], 'DECIMALV2', ['DECIMALV2', '...'], '', '', '', 'vec', 'CUSTOM'], + [['coalesce'], 'BITMAP', ['BITMAP', '...'], '', '', '', 'vec', 'CUSTOM'], # The priority of varchar should be lower than decimal in IS_SUPERTYPE_OF mode. - [['coalesce'], 'VARCHAR', ['VARCHAR', '...'], '', '', '', '', 'CUSTOM'], - [['coalesce'], 'STRING', ['STRING', '...'], '', '', '', '', 'CUSTOM'], + [['coalesce'], 'VARCHAR', ['VARCHAR', '...'], '', '', '', 'vec', 'CUSTOM'], + [['coalesce'], 'STRING', ['STRING', '...'], '', '', '', 'vec', 'CUSTOM'], [['esquery'], 'BOOLEAN', ['VARCHAR', 'VARCHAR'], '_ZN5doris11ESFunctions5matchEPN' --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org