(doris) branch branch-2.1 updated: [fix](DECIMAL) error DECIMAL cat to BOOLEAN (#44326) (#46275)

yiguolei Thu, 02 Jan 2025 02:42:30 -0800

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 7206ca39eca [fix](DECIMAL) error DECIMAL cat to BOOLEAN (#44326) 
(#46275)
7206ca39eca is described below

commit 7206ca39eca8154ec2fd343ef8651523be769946
Author: Jerry Hu <[email protected]>
AuthorDate: Thu Jan 2 18:42:04 2025 +0800

    [fix](DECIMAL) error DECIMAL cat to BOOLEAN (#44326) (#46275)
    
    In the past, there were issues with converting `double` and `decimal` to
    `boolean`.
    For example, a `double` value like 0.13 would first be cast to `uint8`,
    resulting in 0.
    Then, it would be converted to `bool`, yielding 0 (incorrect, as the
    expected result is 1).
    
    Similarly, `decimal` values were directly cast to `uint8`, leading to
    non-0/1 values for `bool`.
    This issue arises because Doris internally uses `uint8` to represent
    `boolean`.
    
    before
    ```
    mysql> select cast(40.123 as BOOLEAN);
    +-------------------------+
    | cast(40.123 as BOOLEAN) |
    +-------------------------+
    |                      40 |
    +-------------------------+
    ```
    
    now
    ```
    mysql> select cast(40.123 as BOOLEAN);
    +-------------------------+
    | cast(40.123 as BOOLEAN) |
    +-------------------------+
    |                       1 |
    +-------------------------+
    ```
    
    ### What problem does this PR solve?
    
    pick #44326
    Related PR: #[44326](https://github.com/mrhhsg/doris/tree/pick_44326)
    
    Problem Summary:
---
 be/src/vec/data_types/data_type_decimal.h          | 19 +++++---
 be/src/vec/functions/function_cast.h               | 43 +++++++++++------
 .../correctness/test_cast_decimalv3_as_bool.out    | 17 +++++++
 .../correctness/test_cast_decimalv3_as_bool.groovy | 55 ++++++++++++++++++++++
 .../case_function/test_case_function_null.groovy   | 18 ++++---
 5 files changed, 124 insertions(+), 28 deletions(-)

diff --git a/be/src/vec/data_types/data_type_decimal.h 
b/be/src/vec/data_types/data_type_decimal.h
index d95750c9f25..4880b2ceb86 100644
--- a/be/src/vec/data_types/data_type_decimal.h
+++ b/be/src/vec/data_types/data_type_decimal.h
@@ -509,15 +509,20 @@ void convert_from_decimals(RealTo* dst, const RealFrom* 
src, UInt32 precicion_fr
     MaxFieldType multiplier = 
DataTypeDecimal<MaxFieldType>::get_scale_multiplier(scale_from);
     FromDataType from_data_type(precicion_from, scale_from);
     for (size_t i = 0; i < size; i++) {
-        auto tmp = static_cast<MaxFieldType>(src[i]).value / multiplier.value;
-        if constexpr (narrow_integral) {
-            if (tmp < min_result.value || tmp > max_result.value) {
-                
THROW_DECIMAL_CONVERT_OVERFLOW_EXCEPTION(from_data_type.to_string(src[i]),
-                                                         
from_data_type.get_name(),
-                                                         OrigToDataType 
{}.get_name());
+        // uint8_t now use as boolean in doris
+        if constexpr (std::is_same_v<RealTo, UInt8>) {
+            dst[i] = static_cast<MaxFieldType>(src[i]).value != 0;
+        } else {
+            auto tmp = static_cast<MaxFieldType>(src[i]).value / 
multiplier.value;
+            if constexpr (narrow_integral) {
+                if (tmp < min_result.value || tmp > max_result.value) {
+                    
THROW_DECIMAL_CONVERT_OVERFLOW_EXCEPTION(from_data_type.to_string(src[i]),
+                                                             
from_data_type.get_name(),
+                                                             OrigToDataType 
{}.get_name());
+                }
             }
+            dst[i] = tmp;
         }
-        dst[i] = tmp;
     }
 }
 
diff --git a/be/src/vec/functions/function_cast.h 
b/be/src/vec/functions/function_cast.h
index 68b1eb85f26..cf3ea7b0791 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -256,6 +256,21 @@ struct ConvertImpl {
     using FromFieldType = typename FromDataType::FieldType;
     using ToFieldType = typename ToDataType::FieldType;
 
+    // `static_cast_set` is introduced to wrap `static_cast` and handle 
special cases.
+    // Doris uses `uint8` to represent boolean values internally.
+    // Directly `static_cast` to `uint8` can result in non-0/1 values,
+    // To address this, `static_cast_set` performs an additional check:
+    //  For `uint8` types, it explicitly uses `static_cast<bool>` to ensure
+    //  the result is either 0 or 1.
+    static void static_cast_set(ToFieldType& to, const FromFieldType& from) {
+        // uint8_t now use as boolean in doris
+        if constexpr (std::is_same_v<uint8_t, ToFieldType>) {
+            to = static_cast<bool>(from);
+        } else {
+            to = static_cast<ToFieldType>(from);
+        }
+    }
+
     template <typename Additions = void*>
     static Status execute(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
                           size_t result, size_t input_rows_count,
@@ -375,8 +390,9 @@ struct ConvertImpl {
                     } else if constexpr (IsDateTimeV2Type<ToDataType>) {
                         DataTypeDateTimeV2::cast_from_date(vec_from[i], 
vec_to[i]);
                     } else {
-                        vec_to[i] =
-                                reinterpret_cast<const 
VecDateTimeValue&>(vec_from[i]).to_int64();
+                        static_cast_set(
+                                vec_to[i],
+                                reinterpret_cast<const 
VecDateTimeValue&>(vec_from[i]).to_int64());
                     }
                 }
             } else if constexpr (IsTimeV2Type<FromDataType>) {
@@ -407,13 +423,16 @@ struct ConvertImpl {
                         }
                     } else {
                         if constexpr (IsDateTimeV2Type<FromDataType>) {
-                            vec_to[i] = reinterpret_cast<const 
DateV2Value<DateTimeV2ValueType>&>(
-                                                vec_from[i])
-                                                .to_int64();
+                            static_cast_set(
+                                    vec_to[i],
+                                    reinterpret_cast<const 
DateV2Value<DateTimeV2ValueType>&>(
+                                            vec_from[i])
+                                            .to_int64());
                         } else {
-                            vec_to[i] = reinterpret_cast<const 
DateV2Value<DateV2ValueType>&>(
-                                                vec_from[i])
-                                                .to_int64();
+                            static_cast_set(vec_to[i],
+                                            reinterpret_cast<const 
DateV2Value<DateV2ValueType>&>(
+                                                    vec_from[i])
+                                                    .to_int64());
                         }
                     }
                 }
@@ -435,16 +454,10 @@ struct ConvertImpl {
                     return Status::OK();
                 } else {
                     for (size_t i = 0; i < size; ++i) {
-                        vec_to[i] = static_cast<ToFieldType>(vec_from[i]);
+                        static_cast_set(vec_to[i], vec_from[i]);
                     }
                 }
             }
-            // TODO: support boolean cast more reasonable
-            if constexpr (std::is_same_v<uint8_t, ToFieldType>) {
-                for (int i = 0; i < size; ++i) {
-                    vec_to[i] = static_cast<bool>(vec_to[i]);
-                }
-            }
 
             block.replace_by_position(result, std::move(col_to));
         } else {
diff --git a/regression-test/data/correctness/test_cast_decimalv3_as_bool.out 
b/regression-test/data/correctness/test_cast_decimalv3_as_bool.out
new file mode 100644
index 00000000000..4f41130b00b
--- /dev/null
+++ b/regression-test/data/correctness/test_cast_decimalv3_as_bool.out
@@ -0,0 +1,17 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select1 --
+0.000  13131.213132100 0E-16
+0.000  2131231.231000000       2.3323000E-9
+3.141  0E-9    123123.2131231231322130
+
+-- !select2 --
+false  true    false
+false  true    true
+true   false   true
+
+-- !select3 --
+true   1       true    false
+
+-- !select3 --
+true   1       true    false
+
diff --git 
a/regression-test/suites/correctness/test_cast_decimalv3_as_bool.groovy 
b/regression-test/suites/correctness/test_cast_decimalv3_as_bool.groovy
new file mode 100644
index 00000000000..768da493251
--- /dev/null
+++ b/regression-test/suites/correctness/test_cast_decimalv3_as_bool.groovy
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_cast_decimalv3_as_bool") {
+     sql """ DROP TABLE IF EXISTS cast_decimalv3_as_bool """
+     sql """
+        CREATE TABLE IF NOT EXISTS cast_decimalv3_as_bool (
+            `id` int(11) ,
+            `k1` decimalv3(9,3) ,
+            `k2` decimalv3(18,9) ,
+            `k3` decimalv3(38,16) ,
+        )
+        UNIQUE KEY(`id`)
+        DISTRIBUTED BY HASH(`id`) BUCKETS 10
+        PROPERTIES (
+        "enable_unique_key_merge_on_write" = "true",
+        "replication_num" = "1"
+        );
+    """
+    sql """
+        set enable_nereids_planner=true,enable_fold_constant_by_be = false
+    """
+    sql """
+        INSERT INTO cast_decimalv3_as_bool VALUES
+        (1,0.00001,13131.2131321,0.000000000000000000),
+        (2,0.00000,2131231.231,0.0000000023323),
+        (3,3.141414,0.0000000000,123123.213123123132213);
+    """
+    qt_select1 """
+        select k1,k2,k3 from cast_decimalv3_as_bool order by id
+    """
+    qt_select2 """
+        select cast(k1 as boolean), cast(k2 as boolean) , cast(k3 as boolean) 
from cast_decimalv3_as_bool order by id
+    """ 
+    qt_select3"""
+        select cast(3.00001 as boolean),  cast(cast(3.00001 as  boolean) as 
int),cast(0.001 as boolean),cast(0.000 as boolean);
+    """
+        qt_select3"""
+        select cast(cast(3.00001 as double)as boolean),  
cast(cast(cast(3.00001 as double) as  boolean) as int),cast(cast(0.001 as 
double) as boolean),cast(cast(0.000 as double) as boolean);
+    """
+}
\ No newline at end of file
diff --git 
a/regression-test/suites/query_p0/sql_functions/case_function/test_case_function_null.groovy
 
b/regression-test/suites/query_p0/sql_functions/case_function/test_case_function_null.groovy
index 5138db6e73b..a91c86b5f48 100644
--- 
a/regression-test/suites/query_p0/sql_functions/case_function/test_case_function_null.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/case_function/test_case_function_null.groovy
@@ -185,10 +185,11 @@ suite("test_case_function_null", 
"query,p0,arrow_flight_sql") {
             c2,
             c1;
     """
-
+    // There is a behavior change. The 0.4cast boolean used to be 0 in the 
past, but now it has changed to 1.
+    // Therefore, we need to update the case accordingly.
     qt_sql_case1 """
         SELECT SUM(
-            CASE (((NULL BETWEEN NULL AND NULL)) and (CAST(0.4716 AS BOOLEAN)))
+            CASE (((NULL BETWEEN NULL AND NULL)) and (CAST(0.0 AS BOOLEAN)))
             WHEN ((CAST('-1530390546' AS VARCHAR)) LIKE ('-1678299490'))
             THEN (- (+ case_null2.c0))
             WHEN CASE (NULL IN (NULL))
@@ -197,9 +198,10 @@ suite("test_case_function_null", 
"query,p0,arrow_flight_sql") {
             END)
         FROM case_null2;
     """
-
+    // There is a behavior change. The 0.4cast boolean used to be 0 in the 
past, but now it has changed to 1.
+    // Therefore, we need to update the case accordingly.
     qt_sql_case2 """
-        SELECT SUM(CASE (((NULL BETWEEN NULL AND NULL)) and (CAST(0.4716 AS 
BOOLEAN)))
+        SELECT SUM(CASE (((NULL BETWEEN NULL AND NULL)) and (CAST(0.0 AS 
BOOLEAN)))
             WHEN ((CAST('-1530390546' AS VARCHAR)) LIKE ('-1678299490'))
             THEN (- (+ case_null2.c0))
             END)
@@ -209,9 +211,11 @@ suite("test_case_function_null", 
"query,p0,arrow_flight_sql") {
     sql "SET experimental_enable_nereids_planner=true"
     sql "SET enable_fallback_to_original_planner=false"
 
+    // There is a behavior change. The 0.4cast boolean used to be 0 in the 
past, but now it has changed to 1.
+    // Therefore, we need to update the case accordingly.
     qt_sql_case1 """
         SELECT SUM(
-            CASE (((NULL BETWEEN NULL AND NULL)) and (CAST(0.4716 AS BOOLEAN)))
+            CASE (((NULL BETWEEN NULL AND NULL)) and (CAST(0.0 AS BOOLEAN)))
             WHEN ((CAST('-1530390546' AS VARCHAR)) LIKE ('-1678299490'))
             THEN (- (+ case_null2.c0))
             WHEN CASE (NULL IN (NULL))
@@ -221,8 +225,10 @@ suite("test_case_function_null", 
"query,p0,arrow_flight_sql") {
         FROM case_null2;
     """
 
+    // There is a behavior change. The 0.4cast boolean used to be 0 in the 
past, but now it has changed to 1.
+    // Therefore, we need to update the case accordingly.
     qt_sql_case2 """
-        SELECT SUM(CASE (((NULL BETWEEN NULL AND NULL)) and (CAST(0.4716 AS 
BOOLEAN)))
+        SELECT SUM(CASE (((NULL BETWEEN NULL AND NULL)) and (CAST(0.0 AS 
BOOLEAN)))
             WHEN ((CAST('-1530390546' AS VARCHAR)) LIKE ('-1678299490'))
             THEN (- (+ case_null2.c0))
             END)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch branch-2.1 updated: [fix](DECIMAL) error DECIMAL cat to BOOLEAN (#44326) (#46275)

Reply via email to