This is an automated email from the ASF dual-hosted git repository.

924060929 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new fdaa0b1d7b3 [fix](retention) Limit param count to 32 to avoid BE heap 
overflow (#64521)
fdaa0b1d7b3 is described below

commit fdaa0b1d7b3f3eadb9510b693bcf4927e361a638
Author: 924060929 <[email protected]>
AuthorDate: Mon Jun 22 10:40:46 2026 +0800

    [fix](retention) Limit param count to 32 to avoid BE heap overflow (#64521)
    
    Problem Summary:
    
    `retention()` keeps its aggregate state in a fixed-size
    `RetentionState::events[32]` array and serializes it into a single
    `int64` bitmap, so it can represent at most 32 events. However neither
    FE nor BE limited the number of arguments:
    
    - `AggregateFunctionRetention::add()` iterates over all argument columns
    and calls `RetentionState::set(i)` (`events[i] = 1`) without any bound
    check, so with more than 32 params it writes past the end of `events[]`
    (heap out-of-bounds write).
    - `RetentionState::insert_result_into()` reads `events[]` for the actual
    argument count, so it reads past the array as well.
    
    With a query calling `retention()` with 102 boolean arguments, this
    corrupted the heap and later crashed BE in the streaming aggregation
    output path:
    
    ```
    [E-3113]string column length is too large: total_length=4295003729, 
element_number=4064
    ...
    doris::vectorized::ColumnStr<unsigned int>::insert_many_strings -> memcpy   
(SIGSEGV)
    doris::pipeline::StreamingAggLocalState::_get_results_with_serialized_key
    ```
    
    This PR enforces the 32-event limit on both sides:
    
    - **FE** (`Retention.checkLegalityBeforeTypeCoercion`): reject `> 32`
    params with a clear `AnalysisException` at planning time.
    - **BE** (`AggregateFunctionRetention` constructor): throw
    `INVALID_ARGUMENT` when more than `RetentionState::MAX_EVENTS` argument
    types are given, as a backstop for any path that reaches BE (e.g. an
    older FE during a rolling upgrade, or a direct BE call).
    
    ### Release note
    
    Fix BE crash when `retention()` is called with more than 32 conditions;
    it now reports a clear error instead of corrupting memory.
---
 .../exprs/aggregate/aggregate_function_retention.h | 13 ++++++-
 be/test/exprs/aggregate/vec_retention_test.cpp     | 17 +++++++++
 .../trees/expressions/functions/agg/Retention.java | 11 ++++++
 .../test_aggregate_retention_param_limit.groovy    | 44 ++++++++++++++++++++++
 4 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/be/src/exprs/aggregate/aggregate_function_retention.h 
b/be/src/exprs/aggregate/aggregate_function_retention.h
index 189a31b441a..6132179eaf6 100644
--- a/be/src/exprs/aggregate/aggregate_function_retention.h
+++ b/be/src/exprs/aggregate/aggregate_function_retention.h
@@ -27,6 +27,8 @@
 #include <boost/iterator/iterator_facade.hpp>
 #include <memory>
 
+#include "common/exception.h"
+#include "common/status.h"
 #include "core/assert_cast.h"
 #include "core/column/column.h"
 #include "core/column/column_array.h"
@@ -112,7 +114,16 @@ class AggregateFunctionRetention final
 public:
     AggregateFunctionRetention(const DataTypes& argument_types_)
             : IAggregateFunctionDataHelper<RetentionState, 
AggregateFunctionRetention>(
-                      argument_types_) {}
+                      argument_types_) {
+        // RetentionState only has room for MAX_EVENTS(32) events (fixed-size 
events[] array,
+        // plus an int64 serialized bitmap). More params would overflow 
events[] in add()/
+        // insert_result_into() and corrupt the heap, so reject it at 
construction time.
+        if (argument_types_.size() > RetentionState::MAX_EVENTS) {
+            throw Exception(ErrorCode::INVALID_ARGUMENT,
+                            "retention function can accept at most {} params, 
but got {}",
+                            RetentionState::MAX_EVENTS, 
argument_types_.size());
+        }
+    }
 
     String get_name() const override { return "retention"; }
 
diff --git a/be/test/exprs/aggregate/vec_retention_test.cpp 
b/be/test/exprs/aggregate/vec_retention_test.cpp
index 21966fb7f98..ae3cb320c0f 100644
--- a/be/test/exprs/aggregate/vec_retention_test.cpp
+++ b/be/test/exprs/aggregate/vec_retention_test.cpp
@@ -22,6 +22,7 @@
 #include <memory>
 #include <ostream>
 
+#include "common/exception.h"
 #include "common/logging.h"
 #include "core/assert_cast.h"
 #include "core/column/column_array.h"
@@ -33,6 +34,7 @@
 #include "core/string_buffer.hpp"
 #include "core/types.h"
 #include "exprs/aggregate/aggregate_function.h"
+#include "exprs/aggregate/aggregate_function_retention.h"
 #include "exprs/aggregate/aggregate_function_simple_factory.h"
 #include "gtest/gtest_pred_impl.h"
 
@@ -293,4 +295,19 @@ TEST_F(VRetentionTest, testSerialize) {
     agg_function->destroy(place2);
     agg_function->destroy(place3);
 }
+
+TEST_F(VRetentionTest, testMaxEventsBoundary) {
+    AggregateFunctionSimpleFactory factory = 
AggregateFunctionSimpleFactory::instance();
+
+    // 32 boolean params is the maximum allowed (RetentionState::MAX_EVENTS) 
and must succeed.
+    DataTypes max_types(RetentionState::MAX_EVENTS, 
std::make_shared<DataTypeUInt8>());
+    auto fn = factory.get("retention", max_types, nullptr, false, -1);
+    EXPECT_NE(fn, nullptr);
+
+    // 33 boolean params overflow the fixed-size events[32] array; the 
function must be rejected
+    // at construction time instead of corrupting the heap.
+    DataTypes too_many_types(RetentionState::MAX_EVENTS + 1, 
std::make_shared<DataTypeUInt8>());
+    EXPECT_THROW({ factory.get("retention", too_many_types, nullptr, false, 
-1); },
+                 doris::Exception);
+}
 } // namespace doris
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Retention.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Retention.java
index 24c2801e0aa..87177f34b38 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Retention.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Retention.java
@@ -37,6 +37,13 @@ import java.util.List;
 public class Retention extends NullableAggregateFunction
         implements ExplicitlyCastableSignature {
 
+    // The BE side stores the retention state in a fixed-size array
+    // (RetentionState::MAX_EVENTS, uint8_t events[32]) and also serializes it 
into a single
+    // int64 bitmap, so at most 32 conditions can be represented. Passing more 
than 32 params
+    // overflows that array on BE and causes a heap out-of-bounds write/read 
(BE core).
+    // Keep this in sync with 
be/src/exprs/aggregate/aggregate_function_retention.h.
+    public static final int MAX_EVENTS = 32;
+
     public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
             
FunctionSignature.ret(ArrayType.of(BooleanType.INSTANCE)).varArgs(BooleanType.INSTANCE)
     );
@@ -70,6 +77,10 @@ public class Retention extends NullableAggregateFunction
         if (this.children.isEmpty()) {
             throw new AnalysisException("The " + functionName + " function 
must have at least one param");
         }
+        if (children.size() > MAX_EVENTS) {
+            throw new AnalysisException("The " + functionName + " function can 
accept at most " + MAX_EVENTS
+                    + " params, but got " + children.size());
+        }
 
         for (int i = 0; i < children.size(); i++) {
             if (!getArgumentType(i).isBooleanType()) {
diff --git 
a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_retention_param_limit.groovy
 
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_retention_param_limit.groovy
new file mode 100644
index 00000000000..3fb3fbbc0e9
--- /dev/null
+++ 
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_retention_param_limit.groovy
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// retention() stores its state in a fixed-size events[32] array on BE. 
Passing more than
+// 32 conditions used to overflow that array and core the BE. FE must reject
+// > 32 params with a clear error instead of sending the query to BE.
+suite("test_aggregate_retention_param_limit") {
+    sql "DROP TABLE IF EXISTS retention_param_limit_test"
+    sql """
+        CREATE TABLE retention_param_limit_test (
+            uid INT,
+            dt DATETIME
+        )
+        DUPLICATE KEY(uid)
+        DISTRIBUTED BY HASH(uid) BUCKETS 1
+        PROPERTIES ("replication_num" = "1")
+    """
+    sql """ INSERT INTO retention_param_limit_test VALUES (1, '2026-01-01'), 
(1, '2026-01-02'), (2, '2026-01-01') """
+
+    def conds = { int n -> (1..n).collect { "uid = ${it}" }.join(", ") }
+
+    // 32 conditions is the maximum allowed and must succeed.
+    sql """ SELECT uid, retention(${conds(32)}) FROM 
retention_param_limit_test GROUP BY uid ORDER BY uid """
+
+    // 33 conditions must be rejected by FE with a clear error (not a BE 
crash).
+    test {
+        sql """ SELECT uid, retention(${conds(33)}) FROM 
retention_param_limit_test GROUP BY uid """
+        exception "at most 32"
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to