This is an automated email from the ASF dual-hosted git repository. wzhou pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit a2b8aed2c2d4c1bb25ed9626a2b014b79ec741ad Author: Riza Suminto <[email protected]> AuthorDate: Wed Jan 10 10:16:02 2024 -0800 IMPALA-12702: Show reduced cardinality estimation in ExecSummary In the query profile, cardinality reduction from IMPALA-12018 is highlighted in Plan section, but missing out from ExecSummary section. This patch changes the ExecSummary to show the reduced cardinality estimation if it set. Testing: - Add TestObservability::test_reduced_cardinality_by_filter Change-Id: If1f51ce585a1cb66e518b725686ab3076ffa8168 Reviewed-on: http://gerrit.cloudera.org:8080/20879 Reviewed-by: Wenzhe Zhou <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../main/java/org/apache/impala/planner/PlanNode.java | 3 ++- .../queries/QueryTest/runtime_filters.test | 6 +++--- tests/query_test/test_observability.py | 18 ++++++++++++++++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/planner/PlanNode.java b/fe/src/main/java/org/apache/impala/planner/PlanNode.java index bdd2fbcb0..e14195f12 100644 --- a/fe/src/main/java/org/apache/impala/planner/PlanNode.java +++ b/fe/src/main/java/org/apache/impala/planner/PlanNode.java @@ -498,7 +498,8 @@ abstract public class PlanNode extends TreeNode<PlanNode> { msg.limit = limit_; TExecStats estimatedStats = new TExecStats(); - estimatedStats.setCardinality(cardinality_); + estimatedStats.setCardinality( + filteredCardinality_ > -1 ? filteredCardinality_ : cardinality_); estimatedStats.setMemory_used(nodeResourceProfile_.getMemEstimateBytes()); msg.setLabel(getDisplayLabel()); msg.setLabel_detail(getDisplayLabelDetail()); diff --git a/testdata/workloads/functional-query/queries/QueryTest/runtime_filters.test b/testdata/workloads/functional-query/queries/QueryTest/runtime_filters.test index b06b39b3c..00c6c96a6 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/runtime_filters.test +++ b/testdata/workloads/functional-query/queries/QueryTest/runtime_filters.test @@ -27,7 +27,7 @@ on p.month = b.int_col and b.month = 1 and b.string_col = "1" ---- RUNTIME_PROFILE aggregation(SUM, Files rejected): 22 ---- RUNTIME_PROFILE: table_format=kudu -row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+7.30K.* +row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+608.* ==== @@ -59,7 +59,7 @@ on p.month = b.int_col and b.month = 1 and b.string_col = "1" ---- RUNTIME_PROFILE aggregation(SUM, Files rejected): 22 ---- RUNTIME_PROFILE: table_format=kudu -row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+7.30K.* +row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+608.* ==== @@ -321,7 +321,7 @@ with t1 as (select month x, bigint_col y from alltypes limit 7301), ---- RUNTIME_PROFILE aggregation(SUM, Files rejected): 22 ---- RUNTIME_PROFILE: table_format=kudu -row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+7.30K.* +row_regex: 00:SCAN KUDU.*s[ ]+620[ ]+1.82K.* ==== diff --git a/tests/query_test/test_observability.py b/tests/query_test/test_observability.py index d82b3b0d7..597dea515 100644 --- a/tests/query_test/test_observability.py +++ b/tests/query_test/test_observability.py @@ -872,6 +872,24 @@ class TestObservability(ImpalaTestSuite): assert len(re.findall('Single node plan created:', runtime_profile, re.M)) == 2 assert len(re.findall('Distributed plan created:', runtime_profile, re.M)) == 2 + def test_reduced_cardinality_by_filter(self): + """IMPALA-12702: Check that ExecSummary shows the reduced cardinality estimation.""" + query_opts = {'compute_processing_cost': True} + query = """select STRAIGHT_JOIN count(*) from + (select l_orderkey from tpch_parquet.lineitem) a + join (select o_orderkey, o_custkey from tpch_parquet.orders) l1 + on a.l_orderkey = l1.o_orderkey + where l1.o_custkey < 1000""" + result = self.execute_query(query, query_opts) + scan = result.exec_summary[10] + assert scan['operator'] == '00:SCAN HDFS' + assert scan['num_rows'] == 39563 + assert scan['est_num_rows'] == 575771 + assert scan['detail'] == 'tpch_parquet.lineitem' + runtime_profile = result.runtime_profile + assert "cardinality=575.77K(filtered from 6.00M)" in runtime_profile + + class TestQueryStates(ImpalaTestSuite): """Test that the 'Query State' and 'Impala Query State' are set correctly in the runtime profile."""
