[incubator-doris] branch master updated: [Bug][Optimize] Fix race condition problem and optimize do_money_format function (#6350)

caiconghui Fri, 06 Aug 2021 01:29:52 -0700

This is an automated email from the ASF dual-hosted git repository.

caiconghui pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 2f5b06a  [Bug][Optimize] Fix race condition problem and optimize 
do_money_format function (#6350)
2f5b06a is described below

commit 2f5b06ae701e1274824c1768fd5bb1a739b6906c
Author: caiconghui <[email protected]>
AuthorDate: Fri Aug 6 16:29:34 2021 +0800

    [Bug][Optimize] Fix race condition problem and optimize do_money_format 
function (#6350)
    
    * [Bug][Optimize] Fix race condition problem and optimize do_money_format 
function
    
    Co-authored-by: caiconghui <[email protected]>
---
 be/src/exprs/string_functions.cpp       | 16 +++----
 be/src/exprs/string_functions.h         | 34 ++++++---------
 be/src/gutil/strings/numbers.cc         | 76 ++++++++++++++++++++++++---------
 be/src/gutil/strings/numbers.h          |  3 ++
 be/test/exprs/string_functions_test.cpp | 34 ++++++++++-----
 5 files changed, 101 insertions(+), 62 deletions(-)

diff --git a/be/src/exprs/string_functions.cpp 
b/be/src/exprs/string_functions.cpp
index 0ede46e..8822065 100644
--- a/be/src/exprs/string_functions.cpp
+++ b/be/src/exprs/string_functions.cpp
@@ -880,8 +880,10 @@ StringVal StringFunctions::money_format(FunctionContext* 
context, const DoubleVa
         return StringVal::null();
     }
 
-    double v_cent = MathFunctions::my_double_round(v.val, 2, false, false) * 
100;
-    return do_money_format(context, std::to_string(v_cent));
+    double v_cent = MathFunctions::my_double_round(v.val, 2, false, false);
+    bool negative = v_cent < 0;
+    int32_t frac_value = negative ? ((int64_t) (-v_cent * 100)) % 100 : 
((int64_t)(v_cent * 100)) % 100;
+    return do_money_format<int64_t, 26>(context, (int64_t) v_cent , 
frac_value);
 }
 
 StringVal StringFunctions::money_format(FunctionContext* context, const 
DecimalV2Val& v) {
@@ -891,25 +893,21 @@ StringVal StringFunctions::money_format(FunctionContext* 
context, const DecimalV
 
     DecimalV2Value rounded(0);
     DecimalV2Value::from_decimal_val(v).round(&rounded, 2, HALF_UP);
-    DecimalV2Value tmp(std::string_view("100"));
-    DecimalV2Value result = rounded * tmp;
-    return do_money_format(context, result.to_string());
+    return do_money_format<int64_t, 26>(context, rounded.int_value(), 
abs(rounded.frac_value()));
 }
 
 StringVal StringFunctions::money_format(FunctionContext* context, const 
BigIntVal& v) {
     if (v.is_null) {
         return StringVal::null();
     }
-
-    return do_money_format(context, fmt::format("{}00", v.val, "00"));
+    return do_money_format<int64_t, 26>(context, v.val);
 }
 
 StringVal StringFunctions::money_format(FunctionContext* context, const 
LargeIntVal& v) {
     if (v.is_null) {
         return StringVal::null();
     }
-
-    return do_money_format(context, fmt::format("{}00", v.val, "00"));
+    return do_money_format<__int128_t, 52>(context, v.val);
 }
 
 static int index_of(const uint8_t* source, int source_offset, int source_count,
diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h
index 05a548d..dbd8cee 100644
--- a/be/src/exprs/string_functions.h
+++ b/be/src/exprs/string_functions.h
@@ -26,6 +26,7 @@
 #include <string_view>
 
 #include "anyval_util.h"
+#include "gutil/strings/numbers.h"
 #include "runtime/string_search.hpp"
 #include "runtime/string_value.h"
 
@@ -148,28 +149,17 @@ public:
     static doris_udf::StringVal money_format(doris_udf::FunctionContext* 
context,
                                              const doris_udf::LargeIntVal& v);
 
-    struct CommaMoneypunct : std::moneypunct<char> {
-        pattern do_pos_format() const override { return {{none, sign, none, 
value}}; }
-        pattern do_neg_format() const override { return {{none, sign, none, 
value}}; }
-        int do_frac_digits() const override { return 2; }
-        char_type do_thousands_sep() const override { return ','; }
-        string_type do_grouping() const override { return "\003"; }
-        string_type do_negative_sign() const override { return "-"; }
-    };
-
-    static StringVal do_money_format(FunctionContext* context, const 
std::string& v) {
-        static std::locale comma_locale(std::locale(), new CommaMoneypunct());
-        static std::stringstream ss;
-        static bool ss_init = false;
-        if (UNLIKELY(!ss_init)) {
-            ss.imbue(comma_locale);
-            ss_init = true;
-        }
-        static std::string empty_string;
-        ss.str(empty_string);
-
-        ss << std::put_money(v);
-        return AnyValUtil::from_string_temp(context, ss.str());
+    template <typename T, size_t N> static StringVal 
do_money_format(FunctionContext* context, const T int_value,
+            const int32_t frac_value = 0) {
+        char local[N];
+        char* p = SimpleItoaWithCommas(int_value, local, sizeof(local));
+        int32_t string_val_len = local + sizeof(local) - p + 3;
+        StringVal result = StringVal::create_temp_string_val(context, 
string_val_len);
+        memcpy(result.ptr, p, string_val_len - 3);
+        *(result.ptr + string_val_len - 3) = '.';
+        *(result.ptr + string_val_len - 2) = '0' + (frac_value / 10);
+        *(result.ptr + string_val_len - 1) = '0' + (frac_value % 10);
+        return result;
     };
 
     static StringVal split_part(FunctionContext* context, const StringVal& 
content,
diff --git a/be/src/gutil/strings/numbers.cc b/be/src/gutil/strings/numbers.cc
index 83fd47a..65ae119 100644
--- a/be/src/gutil/strings/numbers.cc
+++ b/be/src/gutil/strings/numbers.cc
@@ -1335,7 +1335,39 @@ string SimpleItoaWithCommas(uint32 i) {
 string SimpleItoaWithCommas(int64 i) {
     // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
     char local[26];
+    char* p = SimpleItoaWithCommas(i, local, sizeof(local));
+    return string(p, local + sizeof(local));
+}
+
+// We need this overload because otherwise SimpleItoaWithCommas(5ULL) wouldn't
+// compile.
+string SimpleItoaWithCommas(uint64 i) {
+    // 20 digits and 6 commas are good for 64-bit or smaller ints.
+    // Longest is 18,446,744,073,709,551,615.
+    char local[26];
     char* p = local + sizeof(local);
+    *--p = '0' + i % 10; // this case deals with the number "0"
+    i /= 10;
+    while (i) {
+        *--p = '0' + i % 10;
+        i /= 10;
+        if (i == 0) break;
+
+        *--p = '0' + i % 10;
+        i /= 10;
+        if (i == 0) break;
+
+        *--p = ',';
+        *--p = '0' + i % 10;
+        i /= 10;
+        // For this unrolling, we check if i == 0 in the main while loop
+    }
+    return string(p, local + sizeof(local));
+}
+
+char* SimpleItoaWithCommas(int64_t i, char* buffer, int32_t buffer_size) {
+    // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
+    char* p = buffer + buffer_size;
     // Need to use uint64 instead of int64 to correctly handle
     // -9,223,372,036,854,775,808.
     uint64 n = i;
@@ -1357,35 +1389,37 @@ string SimpleItoaWithCommas(int64 i) {
         // For this unrolling, we check if n == 0 in the main while loop
     }
     if (i < 0) *--p = '-';
-    return string(p, local + sizeof(local));
+    return p;
 }
 
-// We need this overload because otherwise SimpleItoaWithCommas(5ULL) wouldn't
-// compile.
-string SimpleItoaWithCommas(uint64 i) {
-    // 20 digits and 6 commas are good for 64-bit or smaller ints.
-    // Longest is 18,446,744,073,709,551,615.
-    char local[26];
-    char* p = local + sizeof(local);
-    *--p = '0' + i % 10; // this case deals with the number "0"
-    i /= 10;
-    while (i) {
-        *--p = '0' + i % 10;
-        i /= 10;
-        if (i == 0) break;
+char* SimpleItoaWithCommas(__int128_t i, char* buffer, int32_t buffer_size) {
+    // 39 digits, 12 commas, and sign are good for 128-bit or smaller ints.
+    char* p = buffer + buffer_size;
+    // Need to use uint128 instead of int128 to correctly handle
+    // -170,141,183,460,469,231,731,687,303,715,884,105,728.
+    __uint128_t n = i;
+    if (i < 0) n = 0 - n;
+    *--p = '0' + n % 10; // this case deals with the number "0"
+    n /= 10;
+    while (n) {
+        *--p = '0' + n % 10;
+        n /= 10;
+        if (n == 0) break;
 
-        *--p = '0' + i % 10;
-        i /= 10;
-        if (i == 0) break;
+        *--p = '0' + n % 10;
+        n /= 10;
+        if (n == 0) break;
 
         *--p = ',';
-        *--p = '0' + i % 10;
-        i /= 10;
-        // For this unrolling, we check if i == 0 in the main while loop
+        *--p = '0' + n % 10;
+        n /= 10;
+        // For this unrolling, we check if n == 0 in the main while loop
     }
-    return string(p, local + sizeof(local));
+    if (i < 0) *--p = '-';
+    return p;
 }
 
+
 // ----------------------------------------------------------------------
 // ItoaKMGT()
 //    Description: converts an integer to a string
diff --git a/be/src/gutil/strings/numbers.h b/be/src/gutil/strings/numbers.h
index 5f2acb2..931a1c2 100644
--- a/be/src/gutil/strings/numbers.h
+++ b/be/src/gutil/strings/numbers.h
@@ -461,6 +461,9 @@ string SimpleItoaWithCommas(uint32 i);
 string SimpleItoaWithCommas(int64 i);
 string SimpleItoaWithCommas(uint64 i);
 
+char* SimpleItoaWithCommas(int64_t i, char* buffer, int32_t buffer_size);
+char* SimpleItoaWithCommas(__int128_t i, char* buffer, int32_t buffer_size);
+
 // ----------------------------------------------------------------------
 // ItoaKMGT()
 //    Description: converts an integer to a string
diff --git a/be/test/exprs/string_functions_test.cpp 
b/be/test/exprs/string_functions_test.cpp
index a5240c3..99532b6 100644
--- a/be/test/exprs/string_functions_test.cpp
+++ b/be/test/exprs/string_functions_test.cpp
@@ -18,9 +18,9 @@
 #include "exprs/string_functions.h"
 
 #include <gtest/gtest.h>
-
 #include <iostream>
 #include <string>
+#include <fmt/os.h>
 
 #include "exprs/anyval_util.h"
 #include "test_util/test_util.h"
@@ -44,13 +44,26 @@ private:
     FunctionContext* ctx;
 };
 
-TEST_F(StringFunctionsTest, do_money_format_bench) {
+TEST_F(StringFunctionsTest, do_money_format_for_bigint_bench) {
     doris_udf::FunctionContext* context = new doris_udf::FunctionContext();
     StringVal expected =
             AnyValUtil::from_string_temp(context, 
std::string("9,223,372,036,854,775,807.00"));
+    BigIntVal bigIntVal(9223372036854775807);
     for (int i = 0; i < LOOP_LESS_OR_MORE(10, 10000000); i++) {
-        StringVal result =
-                StringFunctions::do_money_format(context, 
"922337203685477580700"); // cent
+        StringVal result = StringFunctions::money_format(context, bigIntVal);
+        ASSERT_EQ(expected, result);
+    }
+    delete context;
+}
+
+TEST_F(StringFunctionsTest, do_money_format_for_decimalv2_bench) {
+    doris_udf::FunctionContext* context = new doris_udf::FunctionContext();
+    StringVal expected = AnyValUtil::from_string_temp(context, 
std::string("9,223,372,085.85"));
+    DecimalV2Value dv1(std::string("9223372085.8678"));
+    DecimalV2Val decimalV2Val;
+    dv1.to_decimal_val(&decimalV2Val);
+    for (int i = 0; i < LOOP_LESS_OR_MORE(10, 10000000); i++) {
+        StringVal result = StringFunctions::money_format(context, 
decimalV2Val);
         ASSERT_EQ(expected, result);
     }
     delete context;
@@ -75,16 +88,17 @@ TEST_F(StringFunctionsTest, money_format_bigint) {
 
 TEST_F(StringFunctionsTest, money_format_large_int) {
     doris_udf::FunctionContext* context = new doris_udf::FunctionContext();
-
-    std::string str("170141183460469231731687303715884105727");
-    std::stringstream ss;
-    ss << str;
-    __int128 value;
-    ss >> value;
+    __int128 value = MAX_INT128;
     StringVal result = StringFunctions::money_format(context, 
doris_udf::LargeIntVal(value));
     StringVal expected = AnyValUtil::from_string_temp(
             context, 
std::string("170,141,183,460,469,231,731,687,303,715,884,105,727.00"));
     ASSERT_EQ(expected, result);
+
+    value = MIN_INT128;
+    result = StringFunctions::money_format(context, 
doris_udf::LargeIntVal(value));
+    expected = AnyValUtil::from_string_temp(
+                context, 
std::string("-170,141,183,460,469,231,731,687,303,715,884,105,728.00"));
+    ASSERT_EQ(expected, result);
     delete context;
 }
 

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[incubator-doris] branch master updated: [Bug][Optimize] Fix race condition problem and optimize do_money_format function (#6350)

Reply via email to