This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch tpc_preview6
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/tpc_preview6 by this push:
new f4d9e840461 [fix](column predicate)fix topn&bloom&bitmap filter
convert to column predicate error. (#60262)
f4d9e840461 is described below
commit f4d9e8404611f47c528b7a71882b92082b15844e
Author: daidai <[email protected]>
AuthorDate: Thu Jan 29 14:22:54 2026 +0800
[fix](column predicate)fix topn&bloom&bitmap filter convert to column
predicate error. (#60262)
### What problem does this PR solve?
Problem Summary:
1. fix #60197 :
Null pointer exceptions may occur when processing non-slot topn values.
```
*** Query id: 45488371394ee0-aac81c6593b2f02a ***
*** is nereids: 1 ***
*** tablet id: 0 ***
*** Aborted at 1769444368 (unix time) try "date -d @1769444368" if you are
using GNU date ***
*** Current BE git commitID: a0eb8c0c71 ***
*** SIGSEGV address not mapped to object (@0x18) received by PID 13103 (TID
13838 OR 0x7ff0423ee640) from PID 24; stack trace: ***
0# doris::signal::(anonymous namespace)::FailureSignalHandler(int,
siginfo_t*, void*) at
/home/zcp/repo_center/doris_branch-4.0/doris/be/src/common/signal_handler.h:420
1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
2# JVM_handle_linux_signal in
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
3# 0x00007FF250EE5520 in /lib/x86_64-linux-gnu/libc.so.6
4# doris::vectorized::RuntimePredicate::update(doris::vectorized::Field
const&) at
/home/zcp/repo_center/doris_branch-4.0/doris/be/src/runtime/runtime_predicate.cpp:219
5# doris::pipeline::SortSinkOperatorX::sink(doris::RuntimeState*,
doris::vectorized::Block*, bool) at
/home/zcp/repo_center/doris_branch-4.0/doris/be/src/pipeline/exec/sort_sink_operator.cpp:152
6# doris::pipeline::PipelineTask::execute(bool*) at
/home/zcp/repo_center/doris_branch-4.0/doris/be/src/pipeline/pipeline_task.cpp:598
````
2. fix : file scan node push down bloom_filter & bitmap_filter for min-max
filter.
---
be/src/pipeline/exec/file_scan_operator.h | 4 +-
be/src/runtime/runtime_predicate.cpp | 20 +++
.../hive/test_hive_topn_rf_null.out | 148 +++++++++++++++++++++
.../hive/test_hive_topn_rf_null.groovy | 56 +++++++-
4 files changed, 222 insertions(+), 6 deletions(-)
diff --git a/be/src/pipeline/exec/file_scan_operator.h
b/be/src/pipeline/exec/file_scan_operator.h
index 38e34e0a4bb..9c99df14874 100644
--- a/be/src/pipeline/exec/file_scan_operator.h
+++ b/be/src/pipeline/exec/file_scan_operator.h
@@ -61,7 +61,7 @@ public:
private:
friend class vectorized::FileScanner;
PushDownType _should_push_down_bloom_filter() const override {
- return PushDownType::PARTIAL_ACCEPTABLE;
+ return PushDownType::UNACCEPTABLE;
}
PushDownType _should_push_down_topn_filter() const override {
return PushDownType::PARTIAL_ACCEPTABLE;
@@ -73,7 +73,7 @@ private:
}
PushDownType _should_push_down_bitmap_filter() const override {
- return PushDownType::PARTIAL_ACCEPTABLE;
+ return PushDownType::UNACCEPTABLE;
}
PushDownType _should_push_down_is_null_predicate(
vectorized::VectorizedFnCall* fn_call) const override {
diff --git a/be/src/runtime/runtime_predicate.cpp
b/be/src/runtime/runtime_predicate.cpp
index 1cb13350d16..cd61775ef7c 100644
--- a/be/src/runtime/runtime_predicate.cpp
+++ b/be/src/runtime/runtime_predicate.cpp
@@ -63,6 +63,9 @@ Status RuntimePredicate::init_target(
}
std::unique_lock<std::shared_mutex> wlock(_rwlock);
check_target_node_id(target_node_id);
+ // order by abs(col1) limit x;
+ // cannot be used min-max filter, no need create predicate.
+ // but can used in VTopNPred.execute_column
if (target_is_slot(target_node_id)) {
_contexts[target_node_id].col_name =
slot_id_to_slot_desc[get_texpr(target_node_id).nodes[0].slot_ref.slot_id]
@@ -213,6 +216,23 @@ Status RuntimePredicate::update(const Field& value) {
}
for (auto p : _contexts) {
auto ctx = p.second;
+ if (ctx.predicate == nullptr) {
+ // 1. `init_target` will not create predicate. example : `order by
abs(col1) limit x;`
+ // So don't need create new `ColumnPredicate`,
+ // but need update `_orderby_extrem` for
`VTopNPred.execute_column`
+ // 2. this `RuntimePredicate` will associate multiple scan nodes.
+ // When the sort node is updated, some scan nodes may not have
called `init_target` yet.
+ // example:
+ //SELECT subq1.pk AS pk1 FROM (
+ // ( SELECT t1.pk FROM tb AS t1 )
+ // UNION ALL
+ // ( SELECT t1.pk FROM tb AS t1 ORDER BY t1.pk ))
+ // subq1
+ //WHERE subq1.pk <> (
+ // SELECT t1.pk FROM tb AS t1 ORDER BY t1.pk LIMIT 1
+ //) ORDER BY 1 LIMIT 1 ;
+ continue;
+ }
auto str_ref = _get_string_ref(_orderby_extrem, _type);
std::shared_ptr<ColumnPredicate> pred =
_pred_constructor(ctx.predicate->column_id(), ctx.col_name,
ctx.col_data_type,
diff --git
a/regression-test/data/external_table_p0/hive/test_hive_topn_rf_null.out
b/regression-test/data/external_table_p0/hive/test_hive_topn_rf_null.out
index c6cea0d8e74..6af3271c0cf 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_topn_rf_null.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_topn_rf_null.out
@@ -243,6 +243,80 @@ Charlie
7 \N Grace
9 \N Ivan
+-- !sql_test_19 --
+1 100 Alice
+2 200 \N
+3 300 Charlie
+4 400 David
+5 \N \N
+
+-- !sql_test_20 --
+10 1000 Judy
+6 600 Frank
+7 \N Grace
+8 800 \N
+9 \N Ivan
+
+-- !sql_test_21 --
+1 100 Alice
+2 200 \N
+5 \N \N
+7 \N Grace
+9 \N Ivan
+
+-- !sql_test_22 --
+10 1000 Judy
+3 300 Charlie
+4 400 David
+6 600 Frank
+8 800 \N
+
+-- !sql_test_23 --
+1
+
+-- !sql_test_24 --
+1
+
+-- !sql_test_24 --
+1
+
+-- !sql_test_19 --
+1 100 Alice
+2 200 \N
+3 300 Charlie
+4 400 David
+5 \N \N
+
+-- !sql_test_20 --
+10 1000 Judy
+6 600 Frank
+7 \N Grace
+8 800 \N
+9 \N Ivan
+
+-- !sql_test_21 --
+1 100 Alice
+2 200 \N
+5 \N \N
+7 \N Grace
+9 \N Ivan
+
+-- !sql_test_22 --
+10 1000 Judy
+3 300 Charlie
+4 400 David
+6 600 Frank
+8 800 \N
+
+-- !sql_test_23 --
+1
+
+-- !sql_test_24 --
+1
+
+-- !sql_test_24 --
+1
+
-- !sql_test_1 --
1 100 Alice
2 200 \N
@@ -487,3 +561,77 @@ Charlie
7 \N Grace
9 \N Ivan
+-- !sql_test_19 --
+1 100 Alice
+2 200 \N
+3 300 Charlie
+4 400 David
+5 \N \N
+
+-- !sql_test_20 --
+10 1000 Judy
+6 600 Frank
+7 \N Grace
+8 800 \N
+9 \N Ivan
+
+-- !sql_test_21 --
+1 100 Alice
+2 200 \N
+5 \N \N
+7 \N Grace
+9 \N Ivan
+
+-- !sql_test_22 --
+10 1000 Judy
+3 300 Charlie
+4 400 David
+6 600 Frank
+8 800 \N
+
+-- !sql_test_23 --
+1
+
+-- !sql_test_24 --
+1
+
+-- !sql_test_24 --
+1
+
+-- !sql_test_19 --
+1 100 Alice
+2 200 \N
+3 300 Charlie
+4 400 David
+5 \N \N
+
+-- !sql_test_20 --
+10 1000 Judy
+6 600 Frank
+7 \N Grace
+8 800 \N
+9 \N Ivan
+
+-- !sql_test_21 --
+1 100 Alice
+2 200 \N
+5 \N \N
+7 \N Grace
+9 \N Ivan
+
+-- !sql_test_22 --
+10 1000 Judy
+3 300 Charlie
+4 400 David
+6 600 Frank
+8 800 \N
+
+-- !sql_test_23 --
+1
+
+-- !sql_test_24 --
+1
+
+-- !sql_test_24 --
+1
+
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_topn_rf_null.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_topn_rf_null.groovy
index c2d5960bb10..28abc87ae08 100644
---
a/regression-test/suites/external_table_p0/hive/test_hive_topn_rf_null.groovy
+++
b/regression-test/suites/external_table_p0/hive/test_hive_topn_rf_null.groovy
@@ -44,10 +44,59 @@ suite("test_hive_topn_rf_null",
"p0,external,hive,external_docker,external_docke
}
}
+ def runTestTopRfPredicate = {
+
+ for (String table_name in ["test_topn_rf_null_orc",
"test_topn_rf_null_parquet"]) {
+ order_qt_sql_test_19 """
+ SELECT * FROM ${table_name} ORDER BY abs(id) ASC LIMIT 5;
+ """
+
+ order_qt_sql_test_20 """
+ SELECT * FROM ${table_name} ORDER BY abs(id) desc LIMIT 5;
+ """
+
+ order_qt_sql_test_21 """
+ SELECT * FROM ${table_name} ORDER BY abs(value) ASC LIMIT 5;
+ """
+
+ order_qt_sql_test_22 """
+ SELECT * FROM ${table_name} ORDER BY abs(value) desc LIMIT 5;
+ """
+ order_qt_sql_test_23 """
+ SELECT subq1.id AS pk1 FROM (
+ ( SELECT t1.id FROM ${table_name} AS t1 )
+ UNION ALL
+ ( SELECT t1.id FROM ${table_name} AS t1 ORDER BY t1.id ))
subq1
+ where
+ subq1.id <=> (SELECT t1.id FROM ${table_name} AS t1 ORDER
BY t1.id LIMIT 1) ORDER BY 1 LIMIT 1 ;
+ """
+
+
+
+ order_qt_sql_test_24 """
+ SELECT subq1.id AS pk1 FROM (
+ ( SELECT t1.id FROM ${table_name} AS t1 where abs(t1.id) < 10)
+ UNION ALL
+ ( SELECT t1.id FROM ${table_name} AS t1 ORDER BY t1.id limit
10)) subq1
+ where
+ subq1.id <=> (SELECT t1.id FROM ${table_name} AS t1 ORDER
BY t1.id LIMIT 1) ORDER BY 1 LIMIT 1 ;
+ """
+
+ order_qt_sql_test_24 """
+ SELECT subq1.id AS pk1 FROM (
+ ( SELECT t1.id FROM ${table_name} AS t1 where t1.id < 1000)
+ UNION ALL
+ ( SELECT t1.id FROM ${table_name} AS t1 ORDER BY t1.id desc
)) subq1
+ where
+ subq1.id <=> (SELECT t1.id FROM ${table_name} AS t1 ORDER
BY t1.id LIMIT 1) ORDER BY 1 LIMIT 1 ;
+ """
+ }
+ }
+
- for (String hivePrefix : ["hive3"]) {
+ for (String hivePrefix : ["hive2"]) {
String hms_port = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
String catalog = "test_hive_topn_rf_null_${hivePrefix}"
@@ -66,13 +115,12 @@ suite("test_hive_topn_rf_null",
"p0,external,hive,external_docker,external_docke
sql """ set topn_filter_ratio=1"""
runTopnRfNullTest();
-
+ runTestTopRfPredicate();
sql """ set topn_filter_ratio=0 """
runTopnRfNullTest();
-
-
+ runTestTopRfPredicate();
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]