This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new 47c9346b84c branch-4.1: [fix](retention) Limit param count to 32 to 
avoid BE heap overflow #64521 (#64661)
47c9346b84c is described below

commit 47c9346b84c808966e6fc51e7fd294421dce8f69
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Jun 22 14:02:25 2026 +0800

    branch-4.1: [fix](retention) Limit param count to 32 to avoid BE heap 
overflow #64521 (#64661)
    
    Cherry-picked from #64521
    
    Co-authored-by: 924060929 <[email protected]>
---
 .../exprs/aggregate/aggregate_function_retention.h | 13 ++++++-
 be/test/exprs/aggregate/vec_retention_test.cpp     | 17 +++++++++
 .../trees/expressions/functions/agg/Retention.java | 11 ++++++
 .../test_aggregate_retention_param_limit.groovy    | 44 ++++++++++++++++++++++
 4 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/be/src/exprs/aggregate/aggregate_function_retention.h 
b/be/src/exprs/aggregate/aggregate_function_retention.h
index f36757406e7..0899f3a5e7a 100644
--- a/be/src/exprs/aggregate/aggregate_function_retention.h
+++ b/be/src/exprs/aggregate/aggregate_function_retention.h
@@ -27,6 +27,8 @@
 #include <boost/iterator/iterator_facade.hpp>
 #include <memory>
 
+#include "common/exception.h"
+#include "common/status.h"
 #include "core/assert_cast.h"
 #include "core/column/column.h"
 #include "core/column/column_array.h"
@@ -113,7 +115,16 @@ class AggregateFunctionRetention final
 public:
     AggregateFunctionRetention(const DataTypes& argument_types_)
             : IAggregateFunctionDataHelper<RetentionState, 
AggregateFunctionRetention>(
-                      argument_types_) {}
+                      argument_types_) {
+        // RetentionState only has room for MAX_EVENTS(32) events (fixed-size 
events[] array,
+        // plus an int64 serialized bitmap). More params would overflow 
events[] in add()/
+        // insert_result_into() and corrupt the heap, so reject it at 
construction time.
+        if (argument_types_.size() > RetentionState::MAX_EVENTS) {
+            throw Exception(ErrorCode::INVALID_ARGUMENT,
+                            "retention function can accept at most {} params, 
but got {}",
+                            RetentionState::MAX_EVENTS, 
argument_types_.size());
+        }
+    }
 
     String get_name() const override { return "retention"; }
 
diff --git a/be/test/exprs/aggregate/vec_retention_test.cpp 
b/be/test/exprs/aggregate/vec_retention_test.cpp
index ea22645fb32..cbecc2a9d24 100644
--- a/be/test/exprs/aggregate/vec_retention_test.cpp
+++ b/be/test/exprs/aggregate/vec_retention_test.cpp
@@ -22,6 +22,7 @@
 #include <memory>
 #include <ostream>
 
+#include "common/exception.h"
 #include "common/logging.h"
 #include "core/assert_cast.h"
 #include "core/column/column_array.h"
@@ -32,6 +33,7 @@
 #include "core/string_buffer.hpp"
 #include "core/types.h"
 #include "exprs/aggregate/aggregate_function.h"
+#include "exprs/aggregate/aggregate_function_retention.h"
 #include "exprs/aggregate/aggregate_function_simple_factory.h"
 #include "gtest/gtest_pred_impl.h"
 
@@ -286,4 +288,19 @@ TEST_F(VRetentionTest, testSerialize) {
     agg_function->destroy(place2);
     agg_function->destroy(place3);
 }
+
+TEST_F(VRetentionTest, testMaxEventsBoundary) {
+    AggregateFunctionSimpleFactory factory = 
AggregateFunctionSimpleFactory::instance();
+
+    // 32 boolean params is the maximum allowed (RetentionState::MAX_EVENTS) 
and must succeed.
+    DataTypes max_types(RetentionState::MAX_EVENTS, 
std::make_shared<DataTypeUInt8>());
+    auto fn = factory.get("retention", max_types, nullptr, false, -1);
+    EXPECT_NE(fn, nullptr);
+
+    // 33 boolean params overflow the fixed-size events[32] array; the 
function must be rejected
+    // at construction time instead of corrupting the heap.
+    DataTypes too_many_types(RetentionState::MAX_EVENTS + 1, 
std::make_shared<DataTypeUInt8>());
+    EXPECT_THROW({ factory.get("retention", too_many_types, nullptr, false, 
-1); },
+                 doris::Exception);
+}
 } // namespace doris
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Retention.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Retention.java
index 24c2801e0aa..87177f34b38 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Retention.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Retention.java
@@ -37,6 +37,13 @@ import java.util.List;
 public class Retention extends NullableAggregateFunction
         implements ExplicitlyCastableSignature {
 
+    // The BE side stores the retention state in a fixed-size array
+    // (RetentionState::MAX_EVENTS, uint8_t events[32]) and also serializes it 
into a single
+    // int64 bitmap, so at most 32 conditions can be represented. Passing more 
than 32 params
+    // overflows that array on BE and causes a heap out-of-bounds write/read 
(BE core).
+    // Keep this in sync with 
be/src/exprs/aggregate/aggregate_function_retention.h.
+    public static final int MAX_EVENTS = 32;
+
     public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
             
FunctionSignature.ret(ArrayType.of(BooleanType.INSTANCE)).varArgs(BooleanType.INSTANCE)
     );
@@ -70,6 +77,10 @@ public class Retention extends NullableAggregateFunction
         if (this.children.isEmpty()) {
             throw new AnalysisException("The " + functionName + " function 
must have at least one param");
         }
+        if (children.size() > MAX_EVENTS) {
+            throw new AnalysisException("The " + functionName + " function can 
accept at most " + MAX_EVENTS
+                    + " params, but got " + children.size());
+        }
 
         for (int i = 0; i < children.size(); i++) {
             if (!getArgumentType(i).isBooleanType()) {
diff --git 
a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_retention_param_limit.groovy
 
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_retention_param_limit.groovy
new file mode 100644
index 00000000000..3fb3fbbc0e9
--- /dev/null
+++ 
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_retention_param_limit.groovy
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// retention() stores its state in a fixed-size events[32] array on BE. 
Passing more than
+// 32 conditions used to overflow that array and core the BE. FE must reject
+// > 32 params with a clear error instead of sending the query to BE.
+suite("test_aggregate_retention_param_limit") {
+    sql "DROP TABLE IF EXISTS retention_param_limit_test"
+    sql """
+        CREATE TABLE retention_param_limit_test (
+            uid INT,
+            dt DATETIME
+        )
+        DUPLICATE KEY(uid)
+        DISTRIBUTED BY HASH(uid) BUCKETS 1
+        PROPERTIES ("replication_num" = "1")
+    """
+    sql """ INSERT INTO retention_param_limit_test VALUES (1, '2026-01-01'), 
(1, '2026-01-02'), (2, '2026-01-01') """
+
+    def conds = { int n -> (1..n).collect { "uid = ${it}" }.join(", ") }
+
+    // 32 conditions is the maximum allowed and must succeed.
+    sql """ SELECT uid, retention(${conds(32)}) FROM 
retention_param_limit_test GROUP BY uid ORDER BY uid """
+
+    // 33 conditions must be rejected by FE with a clear error (not a BE 
crash).
+    test {
+        sql """ SELECT uid, retention(${conds(33)}) FROM 
retention_param_limit_test GROUP BY uid """
+        exception "at most 32"
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to