This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 9e05ffcaaf9ed67dd3310af674d107a484aef7fa Author: Jason Fehr <[email protected]> AuthorDate: Mon Nov 4 13:03:50 2024 -0800 IMPALA-13505: Fix NPE in Calcite Planner Fixes the NullPointerException occurring when using the Calcite planner with test_tpcds_queries.py::TestTpcdsDecimalV2Query::test_tpcds_q8. The NPE was thrown from the Planner where it generates the list of columns in the query for use in the profile and workload management. Testing was accomplished by manually running the impacted the test and with a new custom cluster test that replicates the failing test. Change-Id: I4d282120e596fd39a569d1ce9b25024f4f174dd0 Reviewed-on: http://gerrit.cloudera.org:8080/22033 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../java/org/apache/impala/analysis/Analyzer.java | 6 ++++ tests/custom_cluster/test_workload_mgmt_init.py | 2 +- .../test_workload_mgmt_sql_details.py | 35 ++++++++++++++++++++-- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java index 17392de7e..6a4f252e0 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -4693,6 +4693,12 @@ public class Analyzer { if (sourceExpr instanceof SlotRef) { SlotRef sourceSlotRef = (SlotRef) sourceExpr; if (sourceSlotRef.getDesc().getSourceExprs().size() == 0) { + // Ensure the resolved path is defined before using it. + if (sourceSlotRef.getResolvedPath() == null) { + return; // Note, since this statement is within a lambda function, this return + // skips to the next item in the stream forEach loop. + } + // First case - source expression represents an actual column. // Only record the field name and drop any complex type sub items. clause.getColumnList(globalState_) diff --git a/tests/custom_cluster/test_workload_mgmt_init.py b/tests/custom_cluster/test_workload_mgmt_init.py index 43f7c9239..6640d1dd0 100644 --- a/tests/custom_cluster/test_workload_mgmt_init.py +++ b/tests/custom_cluster/test_workload_mgmt_init.py @@ -52,7 +52,7 @@ class TestWorkloadManagementInitBase(CustomClusterTestSuite): def restart_cluster(self, schema_version="", wait_for_init_complete=True, cluster_size=3, additional_impalad_opts="", wait_for_backends=True, additional_catalogd_opts="", expect_startup_err=False): - """Wraps the existing customer cluster _start_impala_cluster function to restart the + """Wraps the existing custom cluster _start_impala_cluster function to restart the Impala cluster. Specifies coordinator/catalog startup flags to enable workload management and set the schema version. If wait_for_init_complete is True, this function blocks until the workload management init process completes. If diff --git a/tests/custom_cluster/test_workload_mgmt_sql_details.py b/tests/custom_cluster/test_workload_mgmt_sql_details.py index 4777c5e3c..89ee0bd5c 100644 --- a/tests/custom_cluster/test_workload_mgmt_sql_details.py +++ b/tests/custom_cluster/test_workload_mgmt_sql_details.py @@ -450,8 +450,39 @@ class TestWorkloadManagementSQLDetails(TestQueryLogTableBase): "group by tinyint_col, smallint_col", ["alltypes"], ["alltypes.tinyint_col", "alltypes.smallint_col", "alltypes.float_col"], - ['alltypes.smallint_col', 'alltypes.tinyint_col'], + ["alltypes.smallint_col", "alltypes.tinyint_col"], [], - ['alltypes.smallint_col', 'alltypes.tinyint_col'], + ["alltypes.smallint_col", "alltypes.tinyint_col"], [], "functional") + + @CustomClusterTestSuite.with_args(start_args="--use_calcite_planner=true", + cluster_size=1, impalad_graceful_shutdown=True, + impalad_args="--enable_workload_mgmt --query_log_write_interval_s=1", + catalogd_args="--enable_workload_mgmt") + def test_tpcds_8_decimal(self, vector): + """Runs the tpcds-decimal_v2-q8 query using the calcite planner and asserts the query + completes successfully. See IMPALA-13505 for details on why this query in + particular is tested.""" + + client = self.get_client(vector.get_value("protocol")) + assert client.execute("use tpcds").success + + res = client.execute("SELECT s_store_name, sum(ss_net_profit) FROM store_sales," + "date_dim, store, (SELECT ca_zip FROM (SELECT SUBSTRING(ca_zip, 1, 5) ca_zip " + "FROM customer_address WHERE SUBSTRING(ca_zip, 1, 5) IN (" + "'24128', '76232', '65084', '87816', '83926', '77556', '20548'," + "'26231', '43848', '15126', '91137', '61265', '98294', '25782', '17920'," + "'18426', '98235', '40081', '84093', '28577', '55565', '17183', '54601') " + "INTERSECT SELECT ca_zip FROM (SELECT SUBSTRING(ca_zip, 1, 5) ca_zip, " + "count(*) cnt FROM customer_address, customer " + "WHERE ca_address_sk = c_current_addr_sk AND c_preferred_cust_flag='Y' " + "GROUP BY ca_zip HAVING count(*) > 10)A1)A2) V1 " + "WHERE ss_store_sk = s_store_sk AND ss_sold_date_sk = d_date_sk AND d_qoy = 2 " + "AND d_year = 1998 AND (SUBSTRING(s_zip, 1, 2) = SUBSTRING(V1.ca_zip, 1, 2)) " + "GROUP BY s_store_name ORDER BY s_store_name LIMIT 100") + assert res.success + + # Wait for the query to be written to the completed queries table. + self.cluster.get_first_impalad().service.wait_for_metric_value( + "impala-server.completed-queries.written", 1, 60)
