[incubator-doris] 03/06: [Function] Refactor the function code of log (#8199)

morningman Thu, 24 Feb 2022 07:45:45 -0800

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.0
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


commit d3b9bb20ff63b2bcb5a12501374ad363284408ce
Author: HappenLee <happen...@hotmail.com>
AuthorDate: Thu Feb 24 11:06:58 2022 +0800

    [Function] Refactor the function code of log (#8199)
    
    1. Support return null when input is invalid
    2. Del the unless code in vec function
    
    Co-authored-by: lihaopeng <lihaop...@baidu.com>
---
 be/src/exprs/math_functions.cpp                    |  12 +-
 be/src/vec/functions/function_binary_arithmetic.h  |   7 +
 .../vec/functions/function_math_binary_float64.h   | 248 ---------------------
 be/src/vec/functions/function_math_unary.h         |   1 +
 ..._unary.h => function_math_unary_to_null_type.h} |  87 ++------
 be/src/vec/functions/math.cpp                      | 131 ++++++-----
 be/src/vec/functions/modulo.cpp                    |   2 +-
 be/src/vec/io/io_helper.h                          |   1 +
 be/test/vec/function/function_math_test.cpp        |  11 +-
 be/test/vec/function/function_test_util.h          |   1 -
 gensrc/script/doris_builtins_functions.py          |   8 +-
 11 files changed, 127 insertions(+), 382 deletions(-)

diff --git a/be/src/exprs/math_functions.cpp b/be/src/exprs/math_functions.cpp
index 708fc42..13fe610 100644
--- a/be/src/exprs/math_functions.cpp
+++ b/be/src/exprs/math_functions.cpp
@@ -161,6 +161,12 @@ SmallIntVal MathFunctions::abs(FunctionContext* ctx, const 
doris_udf::TinyIntVal
     return SmallIntVal(std::abs(int16_t(val.val)));
 }
 
+#define LOG_MATH_FN(NAME, RET_TYPE, INPUT_TYPE, FN)                           \
+    RET_TYPE MathFunctions::NAME(FunctionContext* ctx, const INPUT_TYPE& v) { \
+        if (v.is_null || v.val <= 0) return RET_TYPE::null();                 \
+        return RET_TYPE(FN(v.val));                                           \
+    }
+
 // Generates a UDF that always calls FN() on the input val and returns it.
 #define ONE_ARG_MATH_FN(NAME, RET_TYPE, INPUT_TYPE, FN)                       \
     RET_TYPE MathFunctions::NAME(FunctionContext* ctx, const INPUT_TYPE& v) { \
@@ -179,9 +185,9 @@ ONE_ARG_MATH_FN(atan, DoubleVal, DoubleVal, std::atan);
 ONE_ARG_MATH_FN(sqrt, DoubleVal, DoubleVal, std::sqrt);
 ONE_ARG_MATH_FN(ceil, BigIntVal, DoubleVal, std::ceil);
 ONE_ARG_MATH_FN(floor, BigIntVal, DoubleVal, std::floor);
-ONE_ARG_MATH_FN(ln, DoubleVal, DoubleVal, std::log);
-ONE_ARG_MATH_FN(log10, DoubleVal, DoubleVal, std::log10);
 ONE_ARG_MATH_FN(exp, DoubleVal, DoubleVal, std::exp);
+LOG_MATH_FN(ln, DoubleVal, DoubleVal, std::log);
+LOG_MATH_FN(log10, DoubleVal, DoubleVal, std::log10);
 
 TinyIntVal MathFunctions::sign(FunctionContext* ctx, const DoubleVal& v) {
     if (v.is_null) {
@@ -227,7 +233,7 @@ DoubleVal MathFunctions::truncate(FunctionContext* ctx, 
const DoubleVal& v, cons
 }
 
 DoubleVal MathFunctions::log2(FunctionContext* ctx, const DoubleVal& v) {
-    if (v.is_null) {
+    if (v.is_null || v.val <= 0.0) {
         return DoubleVal::null();
     }
     return DoubleVal(std::log(v.val) / std::log(2.0));
diff --git a/be/src/vec/functions/function_binary_arithmetic.h 
b/be/src/vec/functions/function_binary_arithmetic.h
index f987f90..da1cabb 100644
--- a/be/src/vec/functions/function_binary_arithmetic.h
+++ b/be/src/vec/functions/function_binary_arithmetic.h
@@ -461,6 +461,8 @@ template <template <typename, typename> class Op, typename 
Name,
           bool CanBeExecutedOnDefaultArguments = true>
 class FunctionBinaryArithmetic : public IFunction {
     bool check_decimal_overflow = true;
+    static constexpr bool has_variadic_argument =
+            
!std::is_void_v<decltype(has_variadic_argument_types(std::declval<Op<int,int>>()))>;
 
     template <typename F>
     static bool cast_type(const IDataType* type, F&& f) {
@@ -508,6 +510,11 @@ public:
 
     size_t get_number_of_arguments() const override { return 2; }
 
+    DataTypes get_variadic_argument_types_impl() const override {
+        if constexpr (has_variadic_argument) return Op<int, 
int>::get_variadic_argument_types();
+        return {};
+    }
+
     DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
         DataTypePtr type_res;
         bool valid = cast_both_types(
diff --git a/be/src/vec/functions/function_math_binary_float64.h 
b/be/src/vec/functions/function_math_binary_float64.h
deleted file mode 100644
index 8f41931..0000000
--- a/be/src/vec/functions/function_math_binary_float64.h
+++ /dev/null
@@ -1,248 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-// This file is copied from
-// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionMathBinaryFloat64.h
-// and modified by Doris
-
-#pragma once
-
-#include "vec/columns/column_const.h"
-#include "vec/columns/column_decimal.h"
-#include "vec/columns/columns_number.h"
-#include "vec/core/call_on_type_index.h"
-#include "vec/data_types/data_type_decimal.h"
-#include "vec/data_types/data_type_number.h"
-#include "vec/functions/function.h"
-#include "vec/functions/function_helpers.h"
-
-namespace doris::vectorized {
-
-template <typename Impl>
-class FunctionMathBinaryFloat64 : public IFunction {
-public:
-    static constexpr auto name = Impl::name;
-    static constexpr bool has_variadic_argument =
-            
!std::is_void_v<decltype(has_variadic_argument_types(std::declval<Impl>()))>;
-    static FunctionPtr create() { return 
std::make_shared<FunctionMathBinaryFloat64>(); }
-    static_assert(Impl::rows_per_iteration > 0, "Impl must process at least 
one row per iteration");
-
-    bool use_default_implementation_for_constants() const override { return 
true; }
-
-private:
-    String get_name() const override { return name; }
-
-    size_t get_number_of_arguments() const override { return 2; }
-
-    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
-        const auto check_argument_type = [this](const IDataType* arg) -> bool {
-            if (!is_native_number(arg)) {
-                LOG(ERROR) << "Illegal type " << arg->get_name() << " of 
argument of function "
-                           << get_name();
-                return false;
-            }
-            return true;
-        };
-
-        if (check_argument_type(arguments.front().get()) &&
-            check_argument_type(arguments.back().get())) {
-            return std::make_shared<DataTypeFloat64>();
-        } else {
-            return nullptr;
-        }
-    }
-
-    DataTypes get_variadic_argument_types_impl() const override {
-        if constexpr (has_variadic_argument) return 
Impl::get_variadic_argument_types();
-        return {};
-    }
-
-    template <typename LeftType, typename RightType>
-    bool execute_typed(Block& block, const size_t result, const ColumnConst* 
left_arg,
-                       const IColumn* right_arg) {
-        if (const auto right_arg_typed = 
check_and_get_column<ColumnVector<RightType>>(right_arg)) {
-            auto dst = ColumnVector<Float64>::create();
-
-            LeftType left_src_data[Impl::rows_per_iteration];
-            std::fill(std::begin(left_src_data), std::end(left_src_data),
-                      left_arg->template get_value<LeftType>());
-            const auto& right_src_data = right_arg_typed->get_data();
-            const auto src_size = right_src_data.size();
-            auto& dst_data = dst->get_data();
-            dst_data.resize(src_size);
-
-            const auto rows_remaining = src_size % Impl::rows_per_iteration;
-            const auto rows_size = src_size - rows_remaining;
-
-            for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration)
-                Impl::execute(left_src_data, &right_src_data[i], &dst_data[i]);
-
-            if (rows_remaining != 0) {
-                RightType right_src_remaining[Impl::rows_per_iteration];
-                memcpy(right_src_remaining, &right_src_data[rows_size],
-                       rows_remaining * sizeof(RightType));
-                memset(right_src_remaining + rows_remaining, 0,
-                       (Impl::rows_per_iteration - rows_remaining) * 
sizeof(RightType));
-                Float64 dst_remaining[Impl::rows_per_iteration];
-
-                Impl::execute(left_src_data, right_src_remaining, 
dst_remaining);
-
-                memcpy(&dst_data[rows_size], dst_remaining, rows_remaining * 
sizeof(Float64));
-            }
-
-            block.replace_by_position(result, std::move(dst));
-            return true;
-        }
-
-        return false;
-    }
-
-    template <typename LeftType, typename RightType>
-    bool execute_typed(Block& block, const size_t result, const 
ColumnVector<LeftType>* left_arg,
-                       const IColumn* right_arg) {
-        if (const auto right_arg_typed = 
check_and_get_column<ColumnVector<RightType>>(right_arg)) {
-            auto dst = ColumnVector<Float64>::create();
-
-            const auto& left_src_data = left_arg->get_data();
-            const auto& right_src_data = right_arg_typed->get_data();
-            const auto src_size = left_src_data.size();
-            auto& dst_data = dst->get_data();
-            dst_data.resize(src_size);
-
-            const auto rows_remaining = src_size % Impl::rows_per_iteration;
-            const auto rows_size = src_size - rows_remaining;
-
-            for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration)
-                Impl::execute(&left_src_data[i], &right_src_data[i], 
&dst_data[i]);
-
-            if (rows_remaining != 0) {
-                LeftType left_src_remaining[Impl::rows_per_iteration];
-                memcpy(left_src_remaining, &left_src_data[rows_size],
-                       rows_remaining * sizeof(LeftType));
-                memset(left_src_remaining + rows_remaining, 0,
-                       (Impl::rows_per_iteration - rows_remaining) * 
sizeof(LeftType));
-                RightType right_src_remaining[Impl::rows_per_iteration];
-                memcpy(right_src_remaining, &right_src_data[rows_size],
-                       rows_remaining * sizeof(RightType));
-                memset(right_src_remaining + rows_remaining, 0,
-                       (Impl::rows_per_iteration - rows_remaining) * 
sizeof(RightType));
-                Float64 dst_remaining[Impl::rows_per_iteration];
-
-                Impl::execute(left_src_remaining, right_src_remaining, 
dst_remaining);
-
-                memcpy(&dst_data[rows_size], dst_remaining, rows_remaining * 
sizeof(Float64));
-            }
-
-            block.replace_by_position(result, std::move(dst));
-            return true;
-        }
-        if (const auto right_arg_typed =
-                    
check_and_get_column_const<ColumnVector<RightType>>(right_arg)) {
-            auto dst = ColumnVector<Float64>::create();
-
-            const auto& left_src_data = left_arg->get_data();
-            RightType right_src_data[Impl::rows_per_iteration];
-            std::fill(std::begin(right_src_data), std::end(right_src_data),
-                      right_arg_typed->template get_value<RightType>());
-            const auto src_size = left_src_data.size();
-            auto& dst_data = dst->get_data();
-            dst_data.resize(src_size);
-
-            const auto rows_remaining = src_size % Impl::rows_per_iteration;
-            const auto rows_size = src_size - rows_remaining;
-
-            for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration)
-                Impl::execute(&left_src_data[i], right_src_data, &dst_data[i]);
-
-            if (rows_remaining != 0) {
-                LeftType left_src_remaining[Impl::rows_per_iteration];
-                memcpy(left_src_remaining, &left_src_data[rows_size],
-                       rows_remaining * sizeof(LeftType));
-                memset(left_src_remaining + rows_remaining, 0,
-                       (Impl::rows_per_iteration - rows_remaining) * 
sizeof(LeftType));
-                Float64 dst_remaining[Impl::rows_per_iteration];
-
-                Impl::execute(left_src_remaining, right_src_data, 
dst_remaining);
-
-                memcpy(&dst_data[rows_size], dst_remaining, rows_remaining * 
sizeof(Float64));
-            }
-
-            block.replace_by_position(result, std::move(dst));
-            return true;
-        }
-
-        return false;
-    }
-
-    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
-                        size_t result, size_t /*input_rows_count*/) override {
-        const ColumnWithTypeAndName& col_left = 
block.get_by_position(arguments[0]);
-        const ColumnWithTypeAndName& col_right = 
block.get_by_position(arguments[1]);
-
-        auto call = [&](const auto& types) -> bool {
-            using Types = std::decay_t<decltype(types)>;
-            using LeftType = typename Types::LeftType;
-            using RightType = typename Types::RightType;
-            using ColVecLeft = ColumnVector<LeftType>;
-
-            const IColumn* left_arg = col_left.column.get();
-            const IColumn* right_arg = col_right.column.get();
-
-            if (const auto left_arg_typed = 
check_and_get_column<ColVecLeft>(left_arg)) {
-                if (execute_typed<LeftType, RightType>(block, result, 
left_arg_typed, right_arg)) {
-                    return true;
-                }
-                DCHECK(false) << "Illegal column " << right_arg->get_name()
-                              << " of second argument of function " << 
get_name();
-            }
-            if (const auto left_arg_typed = 
check_and_get_column_const<ColVecLeft>(left_arg)) {
-                if (execute_typed<LeftType, RightType>(block, result, 
left_arg_typed, right_arg)) {
-                    return true;
-                }
-
-                DCHECK(false) << "Illegal column " << right_arg->get_name()
-                              << " of second argument of function " << 
get_name();
-            }
-
-            return false;
-        };
-
-        TypeIndex left_index = col_left.type->get_type_id();
-        TypeIndex right_index = col_right.type->get_type_id();
-
-        if (!call_on_basic_types<true, true, false, false>(left_index, 
right_index, call)) {
-            return Status::InvalidArgument("Illegal column " + 
col_left.column->get_name() +
-                                           " of argument of function " + 
get_name());
-        }
-        return Status::OK();
-    }
-};
-
-template <typename Name, Float64(Function)(Float64, Float64)>
-struct BinaryFunctionPlain {
-    static constexpr auto name = Name::name;
-    static constexpr auto rows_per_iteration = 1;
-
-    template <typename T1, typename T2>
-    static void execute(const T1* src_left, const T2* src_right, Float64* dst) 
{
-        dst[0] = static_cast<Float64>(
-                Function(static_cast<Float64>(src_left[0]), 
static_cast<Float64>(src_right[0])));
-    }
-};
-
-#define BinaryFunctionVectorized BinaryFunctionPlain
-
-} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_math_unary.h 
b/be/src/vec/functions/function_math_unary.h
index f12936c..acd4deb 100644
--- a/be/src/vec/functions/function_math_unary.h
+++ b/be/src/vec/functions/function_math_unary.h
@@ -48,6 +48,7 @@ private:
         }
         return std::make_shared<typename Impl::Type>();
     }
+
     DataTypes get_variadic_argument_types_impl() const override {
         if constexpr (has_variadic_argument) return 
Impl::get_variadic_argument_types();
         return {};
diff --git a/be/src/vec/functions/function_math_unary.h 
b/be/src/vec/functions/function_math_unary_to_null_type.h
similarity index 57%
copy from be/src/vec/functions/function_math_unary.h
copy to be/src/vec/functions/function_math_unary_to_null_type.h
index f12936c..846f6d9 100644
--- a/be/src/vec/functions/function_math_unary.h
+++ b/be/src/vec/functions/function_math_unary_to_null_type.h
@@ -1,3 +1,4 @@
+
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
@@ -14,9 +15,6 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-// This file is copied from
-// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionMathUnary.h
-// and modified by Doris
 
 #pragma once
 
@@ -30,12 +28,10 @@
 namespace doris::vectorized {
 
 template <typename Impl>
-class FunctionMathUnary : public IFunction {
+class FunctionMathUnaryToNullType : public IFunction {
 public:
     static constexpr auto name = Impl::name;
-    static constexpr bool has_variadic_argument =
-            
!std::is_void_v<decltype(has_variadic_argument_types(std::declval<Impl>()))>;
-    static FunctionPtr create() { return 
std::make_shared<FunctionMathUnary>(); }
+    static FunctionPtr create() { return 
std::make_shared<FunctionMathUnaryToNullType>(); }
 
 private:
     String get_name() const override { return name; }
@@ -46,45 +42,13 @@ private:
         if (!is_number(arg)) {
             return nullptr;
         }
-        return std::make_shared<typename Impl::Type>();
-    }
-    DataTypes get_variadic_argument_types_impl() const override {
-        if constexpr (has_variadic_argument) return 
Impl::get_variadic_argument_types();
-        return {};
+        return make_nullable(std::make_shared<typename Impl::Type>());
     }
-    
+
     template <typename T, typename ReturnType>
-    static void execute_in_iterations(const T* src_data, ReturnType* dst_data, 
size_t size) {
-        if constexpr (Impl::rows_per_iteration == 0) {
-            /// Process all data as a whole and use FastOps implementation
-
-            /// If the argument is integer, convert to Float64 beforehand
-            if constexpr (!std::is_floating_point_v<T>) {
-                PODArray<Float64> tmp_vec(size);
-                for (size_t i = 0; i < size; ++i) tmp_vec[i] = src_data[i];
-
-                Impl::execute(tmp_vec.data(), size, dst_data);
-            } else {
-                Impl::execute(src_data, size, dst_data);
-            }
-        } else {
-            const size_t rows_remaining = size % Impl::rows_per_iteration;
-            const size_t rows_size = size - rows_remaining;
-
-            for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration)
-                Impl::execute(&src_data[i], &dst_data[i]);
-
-            if (rows_remaining != 0) {
-                T src_remaining[Impl::rows_per_iteration];
-                memcpy(src_remaining, &src_data[rows_size], rows_remaining * 
sizeof(T));
-                memset(src_remaining + rows_remaining, 0,
-                       (Impl::rows_per_iteration - rows_remaining) * 
sizeof(T));
-                ReturnType dst_remaining[Impl::rows_per_iteration];
-
-                Impl::execute(src_remaining, dst_remaining);
-
-                memcpy(&dst_data[rows_size], dst_remaining, rows_remaining * 
sizeof(ReturnType));
-            }
+    static void execute_in_iterations(const T* src_data, ReturnType* dst_data, 
NullMap& null_map, size_t size) {
+        for (size_t i = 0; i < size; i++) {
+            Impl::execute(&src_data[i], &dst_data[i], null_map[i]);
         }
     }
 
@@ -97,9 +61,13 @@ private:
         auto& dst_data = dst->get_data();
         dst_data.resize(size);
 
-        execute_in_iterations(src_data.data(), dst_data.data(), size);
+        auto null_column = ColumnVector<UInt8>::create();
+        auto& null_map = null_column->get_data();
+        null_map.resize(size);
 
-        block.replace_by_position(result, std::move(dst));
+        execute_in_iterations(src_data.data(), dst_data.data(), null_map, 
size);
+
+        block.replace_by_position(result, 
ColumnNullable::create(std::move(dst), std::move(null_column)));
         return true;
     }
 
@@ -113,13 +81,17 @@ private:
         auto& dst_data = dst->get_data();
         dst_data.resize(size);
 
+        auto null_column = ColumnVector<UInt8>::create();
+        auto& null_map = null_column->get_data();
+        null_map.resize(size);
+
         for (size_t i = 0; i < size; ++i)
             dst_data[i] = convert_from_decimal<DataTypeDecimal<T>, 
DataTypeNumber<ReturnType>>(
                     src_data[i], scale);
 
-        execute_in_iterations(dst_data.data(), dst_data.data(), size);
+        execute_in_iterations(dst_data.data(), dst_data.data(), null_map, 
size);
 
-        block.replace_by_position(result, std::move(dst));
+        block.replace_by_position(result, 
ColumnNullable::create(std::move(dst), std::move(null_column)));
         return true;
     }
 
@@ -132,13 +104,11 @@ private:
         auto call = [&](const auto& types) -> bool {
             using Types = std::decay_t<decltype(types)>;
             using Type = typename Types::RightType;
-            using ReturnType = std::conditional_t<
-                    Impl::always_returns_float64, Float64, Int64>;
             using ColVecType = std::conditional_t<IsDecimalNumber<Type>, 
ColumnDecimal<Type>,
                                                   ColumnVector<Type>>;
 
             const auto col_vec = 
check_and_get_column<ColVecType>(col.column.get());
-            return execute<Type, ReturnType>(block, col_vec, result);
+            return execute<Type, typename Impl::RetType>(block, col_vec, 
result);
         };
 
         if (!call_on_basic_type<void, true, true, true, 
false>(col.type->get_type_id(), call)) {
@@ -149,19 +119,4 @@ private:
     }
 };
 
-template <typename Name, Float64(Function)(Float64), typename ReturnType = 
DataTypeFloat64>
-struct UnaryFunctionPlain {
-    using Type = ReturnType;
-    static constexpr auto name = Name::name;
-    static constexpr auto rows_per_iteration = 1;
-    static constexpr bool always_returns_float64 = std::is_same_v<Type, 
DataTypeFloat64>;
-
-    template <typename T, typename U>
-    static void execute(const T* src, U* dst) {
-        dst[0] = static_cast<Float64>(Function(static_cast<Float64>(src[0])));
-    }
-};
-
-#define UnaryFunctionVectorized UnaryFunctionPlain
-
 } // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/math.cpp b/be/src/vec/functions/math.cpp
index 540c2c1..a265168 100644
--- a/be/src/vec/functions/math.cpp
+++ b/be/src/vec/functions/math.cpp
@@ -15,11 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "vec/common/field_visitors.h"
 #include "vec/data_types/number_traits.h"
 #include "vec/functions/function_const.h"
-#include "vec/functions/function_math_binary_float64.h"
+#include "vec/functions/function_binary_arithmetic.h"
+#include "vec/functions/function_binary_arithmetic_to_null_type.h"
 #include "vec/functions/function_math_unary.h"
+#include "vec/functions/function_math_unary_to_null_type.h"
 #include "vec/functions/function_string.h"
 #include "vec/functions/function_totype.h"
 #include "vec/functions/function_unary_arithmetic.h"
@@ -125,37 +126,41 @@ struct ExpName {
 };
 using FunctionExp = FunctionMathUnary<UnaryFunctionVectorized<ExpName, 
std::exp>>;
 
-struct LnName {
-    static constexpr auto name = "ln";
-};
-using FunctionLn = FunctionMathUnary<UnaryFunctionVectorized<LnName, 
std::log>>;
-
-struct Log2Name {
-    static constexpr auto name = "log2";
-};
-using FunctionLog2 = FunctionMathUnary<UnaryFunctionVectorized<Log2Name, 
std::log2>>;
-
-struct Log10Name {
-    static constexpr auto name = "log10";
-};
-using FunctionLog10 = FunctionMathUnary<UnaryFunctionVectorized<Log10Name, 
std::log10>>;
+#define LOG_FUNCTION_IMPL(CLASS, NAME, FUNC)                                   
 \
+struct CLASS##Impl {                                                           
 \
+    using Type = DataTypeFloat64;                                              
 \
+    using RetType = Float64;                                                   
 \
+    static constexpr auto name = #NAME;                                        
 \
+    template <typename T, typename U>                                          
 \
+    static void execute(const T* src, U* dst, UInt8& null_flag) {              
 \
+        null_flag = src[0] <= 0;                                               
 \
+        dst[0] = static_cast<U>(FUNC((double)src[0]));                         
 \
+    }                                                                          
 \
+};                                                                             
 \
+using Function##CLASS = FunctionMathUnaryToNullType<CLASS##Impl>;
+
+LOG_FUNCTION_IMPL(Log10, log10, std::log10);
+LOG_FUNCTION_IMPL(Log2, log2, std::log2);
+LOG_FUNCTION_IMPL(Ln, ln, std::log);
 
 struct LogName {
     static constexpr auto name = "log";
 };
 
-template <typename Name>
+template <typename A, typename B>
 struct LogImpl {
-    static constexpr auto name = LogName::name;
-    static constexpr auto rows_per_iteration = 1;
-
-    template <typename T1, typename T2>
-    static void execute(const T1* src_left, const T2* src_right, Float64* dst) 
{
-        dst[0] = 
static_cast<Float64>(std::log(static_cast<Float64>(src_right[0])) /
-                                      
std::log(static_cast<Float64>(src_left[0])));
+    using ResultType = Float64;
+    static const constexpr bool allow_decimal = false;
+
+    template <typename Result>
+    static inline Result apply(A a, B b, NullMap& null_map, size_t index) {
+        constexpr double EPSILON = 1e-9;
+        null_map[index] = a <= 0 || b <= 0 || std::fabs(a - 1.0) < EPSILON;
+        return static_cast<Float64>(std::log(static_cast<Float64>(b)) /
+                                      std::log(static_cast<Float64>(a)));
     }
 };
-using FunctionLog = FunctionMathBinaryFloat64<LogImpl<LogName>>;
+using FunctionLog = FunctionBinaryArithmeticToNullType<LogImpl, LogName>;
 
 struct CeilName {
     static constexpr auto name = "ceil";
@@ -301,28 +306,6 @@ struct FloorName {
 };
 using FunctionFloor = FunctionMathUnary<UnaryFunctionVectorized<FloorName, 
std::floor, DataTypeInt64>>;
 
-struct PowName {
-    static constexpr auto name = "pow";
-};
-using FunctionPow = 
FunctionMathBinaryFloat64<BinaryFunctionVectorized<PowName, std::pow>>;
-
-struct TruncateName {
-    static constexpr auto name = "truncate";
-};
-
-template <typename Name>
-struct TruncateImpl {
-    static constexpr auto rows_per_iteration = 1;
-    static constexpr auto name = TruncateName::name;
-
-    template <typename T1, typename T2>
-    static void execute(const T1* src_left, const T2* src_right, Float64* dst) 
{
-        dst[0] = static_cast<Float64>(my_double_round(
-                static_cast<Float64>(src_left[0]), 
static_cast<Int32>(src_right[0]), false, true));
-    }
-};
-using FunctionTruncate = FunctionMathBinaryFloat64<TruncateImpl<TruncateName>>;
-
 template <typename A>
 struct RadiansImpl {
     using ResultType = A;
@@ -412,26 +395,62 @@ struct RoundOneImpl {
 };
 using FunctionRoundOne = FunctionMathUnary<RoundOneImpl<RoundName>>;
 
+template <typename A, typename B>
+struct PowImpl {
+    using ResultType = double;
+    static const constexpr bool allow_decimal = false;
+
+    template <typename type>
+    static inline double apply(A a, B b) {
+        /// Next everywhere, static_cast - so that there is no wrong result in 
expressions of the form Int64 c = UInt32(a) * Int32(-1).
+        return std::pow((double)a, (double) b);
+    }
+};
+struct PowName {
+    static constexpr auto name = "pow";
+};
+using FunctionPow = FunctionBinaryArithmetic<PowImpl, PowName>;
+
+template <typename A, typename B>
+struct TruncateImpl {
+    using ResultType = double;
+    static const constexpr bool allow_decimal = false;
+
+    template <typename type>
+    static inline double apply(A a, B b) {
+        /// Next everywhere, static_cast - so that there is no wrong result in 
expressions of the form Int64 c = UInt32(a) * Int32(-1).
+        return static_cast<Float64>(my_double_round(
+                static_cast<Float64>(a), static_cast<Int32>(b), false, true));
+    }
+};
+struct TruncateName {
+    static constexpr auto name = "truncate";
+};
+using FunctionTruncate = FunctionBinaryArithmetic<TruncateImpl, TruncateName>;
+
 /// round(double,int32)-->double
 /// key_str:roundFloat64Int32
-template <typename Name>
+template <typename A, typename B>
 struct RoundTwoImpl {
-    static constexpr auto name = RoundName::name;
-    static constexpr auto rows_per_iteration = 1;
-    
+    using ResultType = double;
+    static const constexpr bool allow_decimal = false;
+
     static DataTypes get_variadic_argument_types() {
         return {std::make_shared<vectorized::DataTypeFloat64>(),
                 std::make_shared<vectorized::DataTypeInt32>()};
     }
 
-    template <typename T1, typename T2>
-    static void execute(const T1* src_left, const T2* src_right, Float64* dst) 
{
-        dst[0] = my_double_round(static_cast<Float64>(src_left[0]),
-                                 static_cast<Int32>(src_right[0]), false, 
false);
+    template <typename type>
+    static inline double apply(A a, B b) {
+        /// Next everywhere, static_cast - so that there is no wrong result in 
expressions of the form Int64 c = UInt32(a) * Int32(-1).
+        return static_cast<Float64>(my_double_round(
+                static_cast<Float64>(a), static_cast<Int32>(b), false, false));
     }
 };
-using FunctionRoundTwo = FunctionMathBinaryFloat64<RoundTwoImpl<RoundName>>;
+using FunctionRoundTwo = FunctionBinaryArithmetic<RoundTwoImpl, RoundName>;
 
+// TODO: Now math may cause one thread compile time too long, because the 
function in math
+// so mush. Split it to speed up compile time in the future
 void register_function_math(SimpleFunctionFactory& factory) {
     factory.register_function<FunctionAcos>();
     factory.register_function<FunctionAsin>();
diff --git a/be/src/vec/functions/modulo.cpp b/be/src/vec/functions/modulo.cpp
index 3bd15c3..0e8bf49 100644
--- a/be/src/vec/functions/modulo.cpp
+++ b/be/src/vec/functions/modulo.cpp
@@ -38,7 +38,7 @@ struct ModuloImpl {
     template <typename Result = ResultType>
     static inline Result apply(A a, B b, NullMap& null_map, size_t index) {
         if constexpr (std::is_floating_point_v<Result>) {
-            null_map[index] = 0;
+            null_map[index] = b == 0;
             return std::fmod((double)a, (double)b);
         } else {
             null_map[index] = b == 0;
diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h
index bc09fe8..99253c9 100644
--- a/be/src/vec/io/io_helper.h
+++ b/be/src/vec/io/io_helper.h
@@ -258,6 +258,7 @@ bool read_datetime_text_impl(T& x, ReadBuffer& buf) {
     static_assert(std::is_same_v<Int64, T>);
     auto dv = binary_cast<Int64, VecDateTimeValue>(x);
     auto ans = dv.from_date_str(buf.position(), buf.count());
+    dv.to_datetime();
 
     // only to match the is_all_read() check to prevent return null
     buf.position() = buf.end();
diff --git a/be/test/vec/function/function_math_test.cpp 
b/be/test/vec/function/function_math_test.cpp
index a6b92c7..448cf84 100644
--- a/be/test/vec/function/function_math_test.cpp
+++ b/be/test/vec/function/function_math_test.cpp
@@ -144,9 +144,10 @@ TEST(MathFunctionTest, ln_test) {
 
     DataSet data_set = {{{1.0}, 0.0},
                         {{0.5}, -0.69314718055994529},
+                        {{-2.0}, Null()},
                         {{100.0}, 4.6051701859880918},
                         {{1000.0}, 6.9077552789821368}};
-
+    
     check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
 }
 
@@ -158,7 +159,8 @@ TEST(MathFunctionTest, log2_test) {
     DataSet data_set = {{{1.0}, 0.0},
                         {{0.5}, -1.0},
                         {{100.0}, 6.6438561897747244},
-                        {{1000.0}, 9.965784284662087}};
+                        {{1000.0}, 9.965784284662087},
+                        {{-1.0}, Null()}};
 
     check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
 }
@@ -169,7 +171,8 @@ TEST(MathFunctionTest, log10_test) {
     InputTypeSet input_types = {TypeIndex::Float64};
 
     DataSet data_set = {
-            {{1.0}, 0.0}, {{0.5}, -0.3010299956639812}, {{100.0}, 2.0}, 
{{1000.0}, 3.0}};
+            {{1.0}, 0.0}, {{0.5}, -0.3010299956639812},
+            {{100.0}, 2.0}, {{-1.0}, Null()}, {{1000.0}, 3.0}};
 
     check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
 }
@@ -182,6 +185,8 @@ TEST(MathFunctionTest, log_test) {
     DataSet data_set = {{{10.0, 1.0}, 0.0},
                         {{10.0, 100.0}, 2.0},
                         {{0.1, 5.0}, -0.69897000433601886},
+                        {{-2.0, 5.0}, Null()},
+                        {{2.0, -5.0}, Null()},
                         {{2.0, 0.5}, -1.0}};
 
     check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
diff --git a/be/test/vec/function/function_test_util.h 
b/be/test/vec/function/function_test_util.h
index c1dc3d1..247c309 100644
--- a/be/test/vec/function/function_test_util.h
+++ b/be/test/vec/function/function_test_util.h
@@ -86,7 +86,6 @@ void insert_column_to_block(std::list<ColumnPtr>& columns, 
ColumnsWithTypeAndNam
 // Null values are represented by Null()
 // The type of the constant column is represented as follows: Consted 
{TypeIndex::String}
 // A DataSet with a constant column can only have one row of data
-
 template <typename ReturnType, bool nullable = false>
 void check_function(const std::string& func_name, const std::vector<std::any>& 
input_types,
                     const DataSet& data_set) {
diff --git a/gensrc/script/doris_builtins_functions.py 
b/gensrc/script/doris_builtins_functions.py
index 8e3cec7..4d1f5d3 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -579,13 +579,13 @@ visible_functions = [
             '15FunctionContextERKNS1_9DoubleValERKNS1_6IntValE', '', '', 
'vec', ''],
 
     [['ln', 'dlog1'], 'DOUBLE', ['DOUBLE'],
-            
'_ZN5doris13MathFunctions2lnEPN9doris_udf15FunctionContextERKNS1_9DoubleValE', 
'', '', 'vec', ''],
+            
'_ZN5doris13MathFunctions2lnEPN9doris_udf15FunctionContextERKNS1_9DoubleValE', 
'', '', 'vec', 'ALWAYS_NULLABLE'],
     [['log'], 'DOUBLE', ['DOUBLE', 'DOUBLE'],
-            
'_ZN5doris13MathFunctions3logEPN9doris_udf15FunctionContextERKNS1_9DoubleValES6_',
 '', '', 'vec', ''],
+            
'_ZN5doris13MathFunctions3logEPN9doris_udf15FunctionContextERKNS1_9DoubleValES6_',
 '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['log2'], 'DOUBLE', ['DOUBLE'],
-            
'_ZN5doris13MathFunctions4log2EPN9doris_udf15FunctionContextERKNS1_9DoubleValE',
 '', '', 'vec', ''],
+            
'_ZN5doris13MathFunctions4log2EPN9doris_udf15FunctionContextERKNS1_9DoubleValE',
 '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['log10', 'dlog10'], 'DOUBLE', ['DOUBLE'],
-            
'_ZN5doris13MathFunctions5log10EPN9doris_udf15FunctionContextERKNS1_9DoubleValE',
 '', '', 'vec', ''],
+            
'_ZN5doris13MathFunctions5log10EPN9doris_udf15FunctionContextERKNS1_9DoubleValE',
 '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['exp', 'dexp'], 'DOUBLE', ['DOUBLE'],
             
'_ZN5doris13MathFunctions3expEPN9doris_udf15FunctionContextERKNS1_9DoubleValE', 
'', '', 'vec', ''],
 

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

[incubator-doris] 03/06: [Function] Refactor the function code of log (#8199)

Reply via email to