This is an automated email from the ASF dual-hosted git repository. jasonmfehr pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit c23d3cb58adc558fe5e5064f9db6f6471c18d4ea Author: Riza Suminto <[email protected]> AuthorDate: Fri Aug 15 15:30:31 2025 -0700 IMPALA-14311: Fix ASAN issue in test_query_cancel_load_tables verifiers.test_verify_metrics.TestValidateMetrics.test_metrics_are_zero fails in ASAN tests. This is because a client session opened by test_query_cancel_load_tables is not cleanly closed. This patch fix the issue by creating the Impala client within the run method of web_pages_util.py using with-as statement. This patch also attempt to fix flakiness in test_query_expiration by validating that time_limit_expire_handle is actually closed in Coordinator log (IMPALA-13444). Testing: - Pass test_web_pages.py test in ASAN build. - Loop and pass test_query_expiration 50x in ASAN build. Change-Id: I6fcf13902478535f6fa15f267edc83891057993c Reviewed-on: http://gerrit.cloudera.org:8080/23302 Reviewed-by: Daniel Becker <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- tests/custom_cluster/test_query_expiration.py | 10 ++++++++-- tests/util/web_pages_util.py | 18 +++++++++++------- tests/webserver/test_web_pages.py | 6 +++--- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/tests/custom_cluster/test_query_expiration.py b/tests/custom_cluster/test_query_expiration.py index 3f088713f..ceffee6a3 100644 --- a/tests/custom_cluster/test_query_expiration.py +++ b/tests/custom_cluster/test_query_expiration.py @@ -119,9 +119,15 @@ class TestQueryExpiration(CustomClusterTestSuite): r"minimum memory reservation is greater than memory available.*\nQuery " + r"[0-9a-f]+:[0-9a-f]+ expired due to client inactivity \(timeout is 1s000ms\)") # hs2 client does not automaticaly close time_limit_expire_handle. - # manually close it. + # manually close it. It can take few miliseconds in slow build (ie., ASAN) until + # query actually closed in the coordinator. So make sure from the log that it is + # actually closed. + tle_query_id = client.get_query_id(time_limit_expire_handle) client.close_query(time_limit_expire_handle) - self._check_num_executing(impalad, 2) + self.assert_impalad_log_contains( + 'INFO', r'ReleaseQueryState\(\): deleted query_id=' + tle_query_id, + timeout_s=4) + self._check_num_executing(impalad, 2, 0) # Both queries with query_timeout_s < 4 should generate this message. self.assert_impalad_log_contains('INFO', "Expiring query due to client inactivity: " r"[0-9a-f]+:[0-9a-f]+, last activity was at: \d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d", diff --git a/tests/util/web_pages_util.py b/tests/util/web_pages_util.py index 4b294d893..cb98d5d76 100644 --- a/tests/util/web_pages_util.py +++ b/tests/util/web_pages_util.py @@ -22,6 +22,7 @@ import requests from tests.util.retry import retry + def get_num_completed_backends(service, query_id): """Get the number of completed backends for the given query_id from the 'query_backends' web page.""" @@ -92,18 +93,21 @@ def assert_query_stopped(impalad, query_id): assert len(expected_queries) == 1 -def run(client, query, queue): +def run(test_class, query, options, queue): """Execute query and put results or errors into queue.""" - try: - queue.put(str(client.execute(query))) - except Exception as ex: - queue.put(str(ex)) + with test_class.create_impala_client() as client: + if options: + client.set_configuration(options) + try: + queue.put(str(client.execute(query))) + except Exception as ex: + queue.put(str(ex)) -def start(client, query): +def start(test_class, query, options=None): """Execute a query in a separate process. Returns (process, queue).""" queue = Queue() - proc = Process(target=run, args=(client, query, queue)) + proc = Process(target=run, args=(test_class, query, options, queue)) proc.start() return proc, queue diff --git a/tests/webserver/test_web_pages.py b/tests/webserver/test_web_pages.py index c80a77f20..4a8d5fa10 100644 --- a/tests/webserver/test_web_pages.py +++ b/tests/webserver/test_web_pages.py @@ -1087,7 +1087,7 @@ class TestWebPage(ImpalaTestSuite): # Start the query completely async. The server doesn't return a response until # the query has exited the CREATED state, so we need to get the query ID another way. - proc, queue = start(self.client, "select count(*) from functional_parquet.alltypes") + proc, queue = start(self, "select count(*) from functional_parquet.alltypes") wait_for_state(impalad, 'CREATED') in_flight_queries = impalad.get_debug_webpage_json('queries')['in_flight_queries'] @@ -1113,11 +1113,11 @@ class TestWebPage(ImpalaTestSuite): impalad = ImpalaCluster.get_e2e_test_cluster().impalads[0].service wait_for_state(impalad, None) delay_created_action = "impalad_load_tables_delay:SLEEP@5000" - self.client.set_configuration({'debug_action': delay_created_action}) # Start the query completely async. The server doesn't return a response until # the query has exited the CREATED state, so we need to get the query ID another way. - proc, queue = start(self.client, "select count(*) from functional_parquet.alltypes") + proc, queue = start(self, "select count(*) from functional_parquet.alltypes", + {'debug_action': delay_created_action}) wait_for_state(impalad, 'CREATED') in_flight_queries = impalad.get_debug_webpage_json('queries')['in_flight_queries']
