This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 47309d14c IMPALA-12204: Fix redundant codegen info added in subplan
profiles
47309d14c is described below
commit 47309d14ca6bd274dd72674e12092f6dd3e034f3
Author: stiga-huang <[email protected]>
AuthorDate: Mon Jun 12 13:18:47 2023 +0800
IMPALA-12204: Fix redundant codegen info added in subplan profiles
The SUBPLAN node will open its right child node many times in its
GetNext(), depending on how many rows generated from its left child. The
right child of a SUBPLAN node is a subtree of operators. They should not
add codegen info into profile in their Open() method since it will be
invoked repeatedly.
Currently, DataSink and UnionNode have such an issue. This patch fixes
them by adding the codegen info to profile in Close() instead of Open(),
just like what we did in IMPALA-11200.
Tests:
- Add e2e tests
Change-Id: I99a0a842df63a03c61024e2b77d5118ca63a2b2d
Reviewed-on: http://gerrit.cloudera.org:8080/20037
Tested-by: Impala Public Jenkins <[email protected]>
Reviewed-by: Csaba Ringhofer <[email protected]>
---
be/src/exec/data-sink.cc | 6 ++--
be/src/exec/union-node.cc | 9 +++--
be/src/runtime/fragment-state.h | 2 +-
.../nested-types-subplan-single-node.test | 40 ++++++++++++++++++++++
.../QueryTest/union-const-scalar-expr-codegen.test | 6 ++--
5 files changed, 51 insertions(+), 12 deletions(-)
diff --git a/be/src/exec/data-sink.cc b/be/src/exec/data-sink.cc
index ed43ef4ce..790b7b214 100644
--- a/be/src/exec/data-sink.cc
+++ b/be/src/exec/data-sink.cc
@@ -165,9 +165,6 @@ Status DataSink::Prepare(RuntimeState* state, MemTracker*
parent_mem_tracker) {
Status DataSink::Open(RuntimeState* state) {
DCHECK_EQ(output_exprs_.size(), output_expr_evals_.size());
- for (const string& codegen_msg : sink_config_.codegen_status_msgs_) {
- profile_->AppendExecOption(codegen_msg);
- }
return ScalarExprEvaluator::Open(output_expr_evals_, state);
}
@@ -178,6 +175,9 @@ void DataSink::Close(RuntimeState* state) {
if (expr_results_pool_.get() != nullptr) expr_results_pool_->FreeAll();
if (expr_mem_tracker_ != nullptr) expr_mem_tracker_->Close();
if (mem_tracker_ != nullptr) mem_tracker_->Close();
+ for (const string& codegen_msg : sink_config_.codegen_status_msgs_) {
+ profile_->AppendExecOption(codegen_msg);
+ }
closed_ = true;
}
diff --git a/be/src/exec/union-node.cc b/be/src/exec/union-node.cc
index b454cdf5b..54d840a32 100644
--- a/be/src/exec/union-node.cc
+++ b/be/src/exec/union-node.cc
@@ -183,11 +183,6 @@ Status UnionNode::Open(RuntimeState* state) {
// Ensures that rows are available for clients to fetch after this Open() has
// succeeded.
if (!children_.empty()) RETURN_IF_ERROR(child(child_idx_)->Open(state));
-
- if (is_codegen_status_added_ && num_const_scalar_expr_to_be_codegened_ == 0
- && !const_exprs_lists_.empty()) {
- runtime_profile_->AppendExecOption("Codegen Disabled for const scalar
expressions");
- }
return Status::OK();
}
@@ -373,4 +368,8 @@ void UnionNode::Close(RuntimeState* state) {
ScalarExprEvaluator::Close(evals, state);
}
ExecNode::Close(state);
+ if (is_codegen_status_added_ && num_const_scalar_expr_to_be_codegened_ == 0
+ && !const_exprs_lists_.empty()) {
+ runtime_profile_->AppendExecOption("Codegen Disabled for const scalar
expressions");
+ }
}
diff --git a/be/src/runtime/fragment-state.h b/be/src/runtime/fragment-state.h
index 94ecf0579..7dac84e44 100644
--- a/be/src/runtime/fragment-state.h
+++ b/be/src/runtime/fragment-state.h
@@ -123,7 +123,7 @@ class FragmentState {
int64_t NumScalarExprNeedsCodegen() const { return
scalar_exprs_to_codegen_.size(); }
/// Check if codegen was disabled and if so, add a message to the runtime
profile.
- /// Call this only after expressions have been have been created.
+ /// Call this only after expressions have been created.
void CheckAndAddCodegenDisabledMessage(std::vector<std::string>&
codegen_status_msgs) {
if (CodegenDisabledByQueryOption()) {
codegen_status_msgs.emplace_back(
diff --git
a/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan-single-node.test
b/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan-single-node.test
index 135825fa0..ece9c7007 100644
---
a/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan-single-node.test
+++
b/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan-single-node.test
@@ -36,3 +36,43 @@ BIGINT,STRING,BIGINT
---- RUNTIME_PROFILE
!row_regex: .*Codegen Enabled, Codegen Enabled.*
====
+---- QUERY
+# Regression test for IMPALA-12204
+select c1.c_custkey, v.o_orderkey from
+ customer c1, customer c2,
+ (select x.* from c1.c_orders x, c2.c_orders y
+ where x.o_orderkey = y.o_orderkey) v
+where c1.c_custkey = c2.c_custkey
+ and c1.c_custkey < 4;
+---- RESULTS
+1,579908
+1,4273923
+1,454791
+1,4808192
+1,3868359
+1,5133509
+2,1763205
+2,1842406
+2,1374019
+2,430243
+2,1071617
+2,3986496
+2,2992930
+---- TYPES
+BIGINT,BIGINT
+---- RUNTIME_PROFILE
+!row_regex: .*Build Side Codegen Enabled, Hash Table Construction Codegen
Enabled, Build Side Codegen Enabled, Hash Table Construction Codegen Enabled.*
+====
+---- QUERY
+# Regression test for IMPALA-12204
+select count(*) from customer c,
+ (select o_orderkey from c.c_orders
+ union all select 100) v
+where c_custkey < 4
+---- RESULTS
+16
+---- TYPES
+BIGINT
+---- RUNTIME_PROFILE
+!row_regex: .*Codegen Disabled for const scalar expressions, Codegen Disabled
for const scalar expressions.*
+====
diff --git
a/testdata/workloads/functional-query/queries/QueryTest/union-const-scalar-expr-codegen.test
b/testdata/workloads/functional-query/queries/QueryTest/union-const-scalar-expr-codegen.test
index 732f0becf..e8398ebac 100644
---
a/testdata/workloads/functional-query/queries/QueryTest/union-const-scalar-expr-codegen.test
+++
b/testdata/workloads/functional-query/queries/QueryTest/union-const-scalar-expr-codegen.test
@@ -16,7 +16,7 @@ tinyint,tinyint,tinyint
#SORT_NODE
ExecOption: Codegen Enabled
#UNION_NODE
-ExecOption: Codegen Disabled for const scalar expressions, Codegen Enabled
+ExecOption: Codegen Enabled, Codegen Disabled for const scalar expressions
====
---- QUERY
# Test insert statement with values (translated into UNION with const
expressions).
@@ -29,7 +29,7 @@ insert into test_values_codegen(c1) values
(CAST(1+ceil(2.5)*3 as tinyint));
00:UNION
constant-operands=1
#UNION_NODE
-ExecOption: Codegen Disabled for const scalar expressions, Codegen Enabled
+ExecOption: Codegen Enabled, Codegen Disabled for const scalar expressions
====
---- QUERY
# Test insert statement with values having const scalar expressions.
@@ -43,7 +43,7 @@ insert into test_values_codegen values
00:UNION
constant-operands=2
#UNION_NODE
-ExecOption: Codegen Disabled for const scalar expressions, Codegen Enabled
+ExecOption: Codegen Enabled, Codegen Disabled for const scalar expressions
====
---- QUERY
# Test the result of above inserts with codegen disabled.