This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch vectorized
in repository

commit f3ce1cba69aa27f8ae0af63451bab41d5048c367
Author: HappenLee <>
AuthorDate: Wed Jan 12 13:20:48 2022 +0800

    [Vectorized][Enhancement] use simd to speed up coalesce and if_not_null 
function (#7722)
    Co-authored-by: lihaopeng <>
 be/src/vec/functions/function_coalesce.cpp     | 210 ++++++++++++++++++++-----
 be/src/vec/functions/is_not_null.cpp           |   4 +-
 be/src/vec/functions/simple_function_factory.h |   5 +-
 be/test/vec/function/function_string_test.cpp  |  42 +++++
 4 files changed, 214 insertions(+), 47 deletions(-)

diff --git a/be/src/vec/functions/function_coalesce.cpp 
index 65d544c..99b6110 100644
--- a/be/src/vec/functions/function_coalesce.cpp
+++ b/be/src/vec/functions/function_coalesce.cpp
@@ -28,6 +28,8 @@ class FunctionCoalesce : public IFunction {
     static constexpr auto name = "coalesce";
+    mutable FunctionBasePtr func_is_not_null;
     static FunctionPtr create() { return std::make_shared<FunctionCoalesce>(); 
     String get_name() const override { return name; }
@@ -41,47 +43,70 @@ public:
     size_t get_number_of_arguments() const override { return 0; }
     DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        DataTypePtr res;
         for (const auto& arg : arguments) {
             if (!arg->is_nullable()) {
-                return arg;
+                res = arg;
+                break;
-        return arguments[0];
+        res = res ? res : arguments[0];
+        const ColumnsWithTypeAndName is_not_null_col{
+                {nullptr, make_nullable(res), ""}
+        };
+        func_is_not_null = SimpleFunctionFactory::instance().
+                get_function("is_not_null_pred", is_not_null_col, 
+        return res;
     Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
                         size_t result, size_t input_rows_count) override {
         DCHECK_GE(arguments.size(), 1);
+        DataTypePtr result_type = block.get_by_position(result).type;
         ColumnNumbers filtered_args;
-        for (const auto& arg : arguments) {
-            const auto& type = block.get_by_position(arg).type;
-            if (type->only_null()) {
-                continue;
-            }
-            filtered_args.push_back(arg);
-            if (!type->is_nullable()) {
-                break;
+        for (size_t i = 0; i < arguments.size(); ++i) {
+            const auto& arg_type = block.get_by_position(arguments[i]).type;
+            filtered_args.push_back(arguments[i]);
+            if (!arg_type->is_nullable()) {
+                if (i == 0) { //if the first column not null, return it's 
+                    block.get_by_position(result).column = 
+                    return Status::OK();
+                } else {
+                    break;
+                }
         size_t remaining_rows = input_rows_count;
         size_t argument_size = filtered_args.size();
-        std::vector<int> record_idx(input_rows_count, -1); //used to save 
column idx
+        std::vector<uint32_t> record_idx(input_rows_count, 0); //used to save 
column idx, record the result data of each row from which column
+        std::vector<uint8_t> filled_flags(input_rows_count, 0); //used to save 
filled flag, in order to check current row whether have filled data
         MutableColumnPtr result_column;
+        if (!result_type->is_nullable()) {
+            result_column = result_type->create_column();
+        } else {
+            result_column = remove_nullable(result_type)->create_column();
+        }
-        DataTypePtr type = block.get_by_position(result).type;
-        if (!type->is_nullable()) {
-            result_column = type->create_column();
+        // because now the string types does not support random position 
+        // so insert into result data have two methods, one is for string 
types, one is for others type remaining
+        bool is_string_result = result_column->is_column_string();
+        if (is_string_result) {
+            result_column->reserve(input_rows_count);
         } else {
-            result_column = remove_nullable(type)->create_column();
+            result_column->resize(input_rows_count);
-        result_column->reserve(input_rows_count);
         auto return_type = std::make_shared<DataTypeUInt8>();
-        auto null_map = ColumnUInt8::create(input_rows_count, 1);
-        auto& null_map_data = null_map->get_data();
-        ColumnPtr argument_columns[argument_size];
+        auto null_map = ColumnUInt8::create(input_rows_count, 1);  //if 
null_map_data==1, the current row should be null
+        auto* __restrict null_map_data = null_map->get_data().data();
+        ColumnPtr argument_columns[argument_size]; //use to save nested_column 
if is nullable column
         for (size_t i = 0; i < argument_size; ++i) {
             block.get_by_position(filtered_args[i]).column =
@@ -93,40 +118,69 @@ public:
+        Block temporary_block {
+            ColumnsWithTypeAndName {
+                    block.get_by_position(filtered_args[0]),
+                    {nullptr, std::make_shared<DataTypeUInt8>(), ""}
+            }
+        };
         for (size_t i = 0; i < argument_size && remaining_rows; ++i) {
-            const ColumnsWithTypeAndName is_not_null_col 
-            Block temporary_block(is_not_null_col);
-            temporary_block.insert(ColumnWithTypeAndName {nullptr, 
return_type, ""});
-            auto func_is_not_null = 
is_not_null_col, return_type);
+            temporary_block.get_by_position(0).column = 
             func_is_not_null->execute(context, temporary_block, {0}, 1, 
-            auto res_column = 
+            auto res_column = 
             auto& res_map = 
-            auto* res =;
-            //TODO: if need to imporve performance in the feature, here it's 
could SIMD
-            for (size_t j = 0; j < input_rows_count && remaining_rows; ++j) {
-                if (res[j] && null_map_data[j]) {
-                    null_map_data[j] = 0;
-                    remaining_rows--;
-                    record_idx[j] = i;
+            auto* __restrict res =;
+            // Here it's SIMD thought the compiler automatically
+            // true: res[j]==1 && null_map_data[j]==1, false: others
+            // if true: remaining_rows--; record_idx[j]=column_idx; 
null_map_data[j]=0, so the current row could fill result
+            for (size_t j = 0; j < input_rows_count; ++j) {
+                remaining_rows -= (res[j] & null_map_data[j]);
+                record_idx[j] += (res[j] & null_map_data[j]) * i;
+                null_map_data[j] -= (res[j] & null_map_data[j]);
+            }
+            if (remaining_rows == 0) {
+                //check whether all result data from the same column
+                size_t is_same_column_count = 0;
+                const auto data = record_idx[0];
+                for (size_t row = 0; row < input_rows_count; ++row) {
+                    is_same_column_count += (record_idx[row] == data);
+                }
+                if (is_same_column_count == input_rows_count) {
+                    if (result_type->is_nullable()) {
+                        block.get_by_position(result).column = 
make_nullable(argument_columns[i], false);
+                    } else {
+                        block.get_by_position(result).column = 
+                    }
+                    return Status::OK();
+            if (!is_string_result) {
+                //if not string type, could check one column firstly,
+                //and then fill the not null value in result column,
+                //this method may result in higher CPU cache
+                filled_result_column(result_type, result_column, 
argument_columns[i], null_map_data,
+                           , input_rows_count);
+            }
-        //TODO: According to the record results, fill in result one by one, 
-        //that's fill in result use different methods for different types,
-        //because now the string types does not support random position 
-        //But other type could, so could check one column, and fill the not 
null value in result column,
-        //and then next column, this method may result in higher CPU cache
-        for (int row = 0; row < input_rows_count; ++row) {
-            if (record_idx[row] == -1) {
-                result_column->insert_default();
-            } else {
result_column->insert_from(*argument_columns[record_idx[row]].get(), row);
+        if (is_string_result) {
+            //if string type,  should according to the record results, fill in 
result one by one, 
+            for (size_t row = 0; row < input_rows_count; ++row) {
+                if (null_map_data[row]) { //should be null
+                    result_column->insert_default();
+                } else {
result_column->insert_from(*argument_columns[record_idx[row]].get(), row);
+                }
-        if (type->is_nullable()) {
+        if (result_type->is_nullable()) {
ColumnNullable::create(std::move(result_column), std::move(null_map)));
         } else {
             block.replace_by_position(result, std::move(result_column));
@@ -134,6 +188,76 @@ public:
         return Status::OK();
+    template <typename ColumnType>
+    Status insert_result_data(MutableColumnPtr& result_column, ColumnPtr& 
+                              const UInt8* __restrict null_map_data, UInt8* 
__restrict filled_flag,
+                              const size_t input_rows_count) {
+        auto* __restrict result_raw_data =
+        auto* __restrict column_raw_data =
+                reinterpret_cast<const 
+        // Here it's SIMD thought the compiler automatically also
+        // true: null_map_data[row]==0 && filled_idx[row]==0
+        // if true, could filled current row data into result column
+        for (size_t row = 0; row < input_rows_count; ++row) {
+            result_raw_data[row] += (!(null_map_data[row] | filled_flag[row])) 
* column_raw_data[row];
+            filled_flag[row] += (!(null_map_data[row] | filled_flag[row]));
+        }
+        return Status::OK();
+    }
+    //TODO: this function is same as case when, should be replaced by macro
+    Status filled_result_column(const DataTypePtr& data_type, 
MutableColumnPtr& result_column,
+                                ColumnPtr& argument_column, UInt8* __restrict 
+                                UInt8* __restrict filled_flag, const size_t 
input_rows_count) {
+        WhichDataType which(data_type->is_nullable()
+                                    ? reinterpret_cast<const 
+                                              ->get_nested_type()
+                                    : data_type);
+        if (which.is_uint8()) {
+            return insert_result_data<ColumnUInt8>(result_column, 
argument_column, null_map_data,
+                                                   filled_flag, 
+        } else if (which.is_int16()) {
+            return insert_result_data<ColumnInt16>(result_column, 
argument_column, null_map_data,
+                                                   filled_flag, 
+        } else if (which.is_uint32()) {
+            return insert_result_data<ColumnUInt32>(result_column, 
argument_column, null_map_data,
+                                                    filled_flag, 
+        } else if (which.is_uint64()) {
+            return insert_result_data<ColumnUInt64>(result_column, 
argument_column, null_map_data,
+                                                    filled_flag, 
+        } else if (which.is_int8()) {
+            return insert_result_data<ColumnInt8>(result_column, 
argument_column, null_map_data,
+                                                  filled_flag, 
+        } else if (which.is_int16()) {
+            return insert_result_data<ColumnInt16>(result_column, 
argument_column, null_map_data,
+                                                   filled_flag, 
+        } else if (which.is_int32()) {
+            return insert_result_data<ColumnInt32>(result_column, 
argument_column, null_map_data,
+                                                   filled_flag, 
+        } else if (which.is_int64()) {
+            return insert_result_data<ColumnInt64>(result_column, 
argument_column, null_map_data,
+                                                   filled_flag, 
+        } else if (which.is_date_or_datetime()) {
+            return insert_result_data<ColumnVector<DateTime>>(
+                    result_column, argument_column, null_map_data, 
filled_flag, input_rows_count);
+        } else if (which.is_float32()) {
+            return insert_result_data<ColumnFloat32>(result_column, 
argument_column, null_map_data,
+                                                     filled_flag, 
+        } else if (which.is_float64()) {
+            return insert_result_data<ColumnFloat64>(result_column, 
argument_column, null_map_data,
+                                                     filled_flag, 
+        } else if (which.is_decimal()) {
+            return insert_result_data<ColumnDecimal<Decimal128>>(
+                    result_column, argument_column, null_map_data, 
filled_flag, input_rows_count);
+        } else {
+            return Status::NotSupported(fmt::format("Unexpected type {} of 
argument of function {}",
+                                                    data_type->get_name(), 
+        }
+    }
 void register_function_coalesce(SimpleFunctionFactory& factory) {
diff --git a/be/src/vec/functions/is_not_null.cpp 
index 1bfeaa6..346dac1 100644
--- a/be/src/vec/functions/is_not_null.cpp
+++ b/be/src/vec/functions/is_not_null.cpp
@@ -52,8 +52,8 @@ public:
         if (auto* nullable = 
check_and_get_column<ColumnNullable>(*elem.column)) {
             /// Return the negated null map.
             auto res_column = ColumnUInt8::create(input_rows_count);
-            const auto& src_data = nullable->get_null_map_data();
-            auto& res_data = assert_cast<ColumnUInt8&>(*res_column).get_data();
+            const auto* __restrict src_data = 
+            auto* __restrict res_data = 
             for (size_t i = 0; i < input_rows_count; ++i) {
                 res_data[i] = !src_data[i];
diff --git a/be/src/vec/functions/simple_function_factory.h 
index 1d064bb..d757920 100644
--- a/be/src/vec/functions/simple_function_factory.h
+++ b/be/src/vec/functions/simple_function_factory.h
@@ -65,10 +65,11 @@ void register_function_like(SimpleFunctionFactory& factory);
 void register_function_regexp(SimpleFunctionFactory& factory);
 void register_function_random(SimpleFunctionFactory& factory);
 void register_function_coalesce(SimpleFunctionFactory& factory);
 class SimpleFunctionFactory {
     using Creator = std::function<FunctionBuilderPtr()>;
-    using FunctionCreators = std::unordered_map<std::string, Creator>;
-    using FunctionIsVariadic = std::unordered_set<std::string>;
+    using FunctionCreators = phmap::flat_hash_map<std::string, Creator>;
+    using FunctionIsVariadic = phmap::flat_hash_set<std::string>;
     void register_function(const std::string& name, Creator ptr) {
diff --git a/be/test/vec/function/function_string_test.cpp 
index bd2f9a7..9f1f5d7 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -681,6 +681,48 @@ TEST(function_string_test, function_unhex_test) {
     vectorized::check_function<vectorized::DataTypeString, true>(func_name, 
input_types, data_set);
+TEST(function_string_test, function_coalesce_test) {
+    std::string func_name = "coalesce";
+    {
+        std::vector<std::any> input_types = {vectorized::TypeIndex::Int32,
+                                             vectorized::TypeIndex::Int32,
+                                             vectorized::TypeIndex::Int32};
+        DataSet data_set = {{{Null(), Null(), (int32_t)1}, {(int32_t)1}},
+                            {{Null(), Null(), (int32_t)2}, {(int32_t)2}},
+                            {{Null(), Null(), (int32_t)3}, {(int32_t)3}},
+                            {{Null(), Null(), (int32_t)4}, {(int32_t)4}}};
+        vectorized::check_function<vectorized::DataTypeInt32, true>(func_name, 
+                                                                    data_set);
+    }
+    {
+        std::vector<std::any> input_types = {vectorized::TypeIndex::String,
+                                             vectorized::TypeIndex::String,
+                                             vectorized::TypeIndex::Int32};
+        DataSet data_set = {
+                {{std::string("qwer"), Null(), (int32_t)1}, 
+                {{std::string("asdf"), Null(), (int32_t)2}, 
+                {{std::string("zxcv"), Null(), (int32_t)3}, 
+                {{std::string("vbnm"), Null(), (int32_t)4}, 
+        };
+        vectorized::check_function<vectorized::DataTypeString, 
true>(func_name, input_types,
+                                                                     data_set);
+    }
+    {
+        std::vector<std::any> input_types = {vectorized::TypeIndex::String,
+                                             vectorized::TypeIndex::String,
+                                             vectorized::TypeIndex::String};
+        DataSet data_set = {
+                {{Null(), std::string("abc"), std::string("hij")}, 
+                {{Null(), std::string("def"), std::string("klm")}, 
+                {{Null(), std::string(""), std::string("xyz")}, 
+                {{Null(), Null(), std::string("uvw")}, {std::string("uvw")}}};
+        vectorized::check_function<vectorized::DataTypeString, 
true>(func_name, input_types,
+                                                                     data_set);
+    }
 } // namespace doris
 int main(int argc, char** argv) {

To unsubscribe, e-mail:
For additional commands, e-mail:

Reply via email to