This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 121c2002868 [fix](array_range) fix array_range func for large param 
which should return error (#38284)
121c2002868 is described below

commit 121c20028682e44aef875acd21acb00b5a1495e8
Author: amory <wangqian...@selectdb.com>
AuthorDate: Sat Jul 27 18:08:16 2024 +0800

    [fix](array_range) fix array_range func for large param which should return 
error (#38284)
    
    if array_range with large size , which will make be oom , so we should
    avoid this with max_array_size_as_field
---
 .../vec/functions/array/function_array_range.cpp   | 31 ++++++++++++++++------
 .../scalar_function/Array.groovy                   | 29 ++++++++++++++++++++
 .../array_functions/test_array_functions.groovy    | 22 +++++++++++++++
 3 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/be/src/vec/functions/array/function_array_range.cpp 
b/be/src/vec/functions/array/function_array_range.cpp
index a84b94d8cfc..1a5dd08aaac 100644
--- a/be/src/vec/functions/array/function_array_range.cpp
+++ b/be/src/vec/functions/array/function_array_range.cpp
@@ -168,9 +168,9 @@ struct RangeImplUtil {
         dest_nested_column->reserve(input_rows_count);
         dest_nested_null_map.reserve(input_rows_count);
 
-        vector(start_column->get_data(), end_column->get_data(), 
step_column->get_data(),
-               args_null_map->get_data(), nested_column->get_data(), 
dest_nested_null_map,
-               dest_offsets);
+        RETURN_IF_ERROR(vector(start_column->get_data(), 
end_column->get_data(),
+                               step_column->get_data(), 
args_null_map->get_data(),
+                               nested_column->get_data(), 
dest_nested_null_map, dest_offsets));
 
         block.get_by_position(result).column =
                 ColumnNullable::create(std::move(dest_array_column_ptr), 
std::move(args_null_map));
@@ -178,11 +178,12 @@ struct RangeImplUtil {
     }
 
 private:
-    static void vector(const PaddedPODArray<SourceDataType>& start,
-                       const PaddedPODArray<SourceDataType>& end, const 
PaddedPODArray<Int32>& step,
-                       NullMap& args_null_map, PaddedPODArray<SourceDataType>& 
nested_column,
-                       PaddedPODArray<UInt8>& dest_nested_null_map,
-                       ColumnArray::Offsets64& dest_offsets) {
+    static Status vector(const PaddedPODArray<SourceDataType>& start,
+                         const PaddedPODArray<SourceDataType>& end,
+                         const PaddedPODArray<Int32>& step, NullMap& 
args_null_map,
+                         PaddedPODArray<SourceDataType>& nested_column,
+                         PaddedPODArray<UInt8>& dest_nested_null_map,
+                         ColumnArray::Offsets64& dest_offsets) {
         int rows = start.size();
         for (auto row = 0; row < rows; ++row) {
             auto idx = start[row];
@@ -195,6 +196,13 @@ private:
                     dest_offsets.push_back(dest_offsets.back());
                     continue;
                 } else {
+                    if (idx < end_row && step_row > 0 &&
+                        ((static_cast<__int128_t>(end_row) - 
static_cast<__int128_t>(step_row) -
+                          1) / static_cast<__int128_t>(step_row) +
+                         1) > max_array_size_as_field) {
+                        return Status::InvalidArgument("Array size exceeds the 
limit {}",
+                                                       
max_array_size_as_field);
+                    }
                     int offset = dest_offsets.back();
                     while (idx < end[row]) {
                         nested_column.push_back(idx);
@@ -219,11 +227,17 @@ private:
                     using UNIT = 
std::conditional_t<std::is_same_v<TimeUnitOrVoid, void>,
                                                     
std::integral_constant<TimeUnit, TimeUnit::DAY>,
                                                     TimeUnitOrVoid>;
+                    int move = 0;
                     while (doris::datetime_diff<UNIT::value, 
DateTimeV2ValueType,
                                                 DateTimeV2ValueType>(idx, 
end_row) > 0) {
+                        if (move > max_array_size_as_field) {
+                            return Status::InvalidArgument("Array size exceeds 
the limit {}",
+                                                           
max_array_size_as_field);
+                        }
                         nested_column.push_back(idx);
                         dest_nested_null_map.push_back(0);
                         offset++;
+                        move++;
                         idx = doris::vectorized::date_time_add<
                                 UNIT::value, DateV2Value<DateTimeV2ValueType>,
                                 DateV2Value<DateTimeV2ValueType>, 
DateTimeV2>(idx, step_row,
@@ -233,6 +247,7 @@ private:
                 }
             }
         }
+        return Status::OK();
     }
 };
 
diff --git 
a/regression-test/suites/nereids_function_p0/scalar_function/Array.groovy 
b/regression-test/suites/nereids_function_p0/scalar_function/Array.groovy
index 5957ced51af..ef3813d6deb 100644
--- a/regression-test/suites/nereids_function_p0/scalar_function/Array.groovy
+++ b/regression-test/suites/nereids_function_p0/scalar_function/Array.groovy
@@ -623,6 +623,11 @@ suite("nereids_scalar_fn_Array") {
     order_qt_sql_array_range_two_param_notnull "select array_range(kint, 1000) 
from fn_test_not_nullable order by id"
     order_qt_sql_array_range_three_param "select array_range(kint, 10000, 
ktint) from fn_test order by id"
     order_qt_sql_array_range_three_param_notnull "select array_range(kint, 
10000, ktint) from fn_test_not_nullable order by id"
+    // make a large size of array element, expect to throw error
+    test  {
+        sql "select array_range(kint, 1000000000) from fn_test"
+        exception ('Array size exceeds the limit 1000000')
+    }
 
     // array_remove
     order_qt_sql_array_remove_Double "select array_remove(kadbl, kdbl) from 
fn_test"
@@ -1276,6 +1281,30 @@ suite("nereids_scalar_fn_Array") {
     qt_sequence_datetime_hour """select sequence(kdtmv2s1, date_add(kdtmv2s1, 
interval kint-3 hour), interval kint hour) from fn_test order by kdtmv2s1;"""
     qt_sequence_datetime_minute """select sequence(kdtmv2s1, 
date_add(kdtmv2s1, interval kint+1 minute), interval kint minute) from fn_test 
order by kdtmv2s1;"""
     qt_sequence_datetime_second """select sequence(kdtmv2s1, 
date_add(kdtmv2s1, interval kint second), interval kint-1 second) from fn_test 
order by kdtmv2s1;"""
+    // make large error size
+    test {
+        sql "select array_size(sequence(kdtmv2s1, date_add(kdtmv2s1, interval 
kint+1000 year), interval kint hour)) from fn_test order by kdtmv2s1;"
+        check{result, exception, startTime, endTime ->
+            assertTrue(exception != null)
+            logger.info(exception.message)
+        }
+    }
+
+    test {
+        sql "select array_size(sequence(kdtmv2s1, date_add(kdtmv2s1, interval 
kint+10000 month), interval kint hour)) from fn_test order by kdtmv2s1;"
+        check{result, exception, startTime, endTime ->
+            assertTrue(exception != null)
+            logger.info(exception.message)
+        }
+    }
+
+    test {
+        sql "select array_size(sequence(kdtmv2s1, date_add(kdtmv2s1, interval 
kint+1000001 day), interval kint day)) from fn_test order by kdtmv2s1;"
+        check{result, exception, startTime, endTime ->
+            assertTrue(exception != null)
+            logger.info(exception.message)
+        }
+    }
 
     // with array empty
     qt_array_empty_fe """select array()"""
diff --git 
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
 
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
index ac12b1ffccb..96bca3eb4ff 100644
--- 
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
@@ -421,4 +421,26 @@ suite("test_array_functions") {
     qt_const_select "select sequence(cast('2022-35-38 12:00:10' as 
datetimev2(0)), cast('2022-05-18 22:00:30' as datetimev2(0))); "
     qt_const_select "select sequence(1, 10, 0); "
     qt_const_select "select sequence(cast('2022-05-15 12:00:00' as 
datetimev2(0)), cast('2022-05-17 12:00:00' as datetimev2(0)), interval 0 day); "
+    // test large size of array
+    test {
+        sql """ select sequence(cast('2022-05-01 12:00:00' as datetimev2(0)), 
cast('2022-05-17 12:00:00' as datetimev2(0)), interval 10000000000 week); """
+        check{result, exception, startTime, endTime ->
+            assertTrue(exception != null)
+            logger.info(exception.message)
+        }        
+    }
+    test {
+        sql """ select sequence(1, 10000000000); """
+        check{result, exception, startTime, endTime ->
+            assertTrue(exception != null)
+            logger.info(exception.message)
+        }
+    }
+    test {
+        sql """ select sequence(1, 10000000000, 2); """
+        check{result, exception, startTime, endTime ->
+            assertTrue(exception != null)
+            logger.info(exception.message)
+        }
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to