This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit ac23deab4ddc05b6b076b0767994d7bc58e8ef6a Author: wzhou-code <[email protected]> AuthorDate: Mon Apr 3 18:06:49 2023 -0700 IMPALA-12036: Fix Web UI to show right resource pools Web queries site shows no resource pool unless it is specified with query option. The Planner could set TQueryCtx.request_pool in TQueryExecRequest when auto scaling is enabled. But the backend ignores the TQueryCtx.request_pool in TQueryExecRequest when getting resource pools for Web UI. This patch fixes the issue in ClientRequestState::request_pool() by checking TQueryCtx.request_pool in TQueryExecRequest. It also removes the error path in RequestPoolService::ResolveRequestPool() if requested_pool is empty string. Testing: - Updated TestExecutorGroups::test_query_cpu_count_divisor_default, TestExecutorGroups::test_query_cpu_count_divisor_two, and TestExecutorGroups::test_query_cpu_count_divisor_fraction to verify resource pools on Web queries site and Web admission site. - Updated expected error message in TestAdmissionController::test_set_request_pool. - Passed core test. Change-Id: Iceacb3a8ec3bd15a8029ba05d064bbbb81e3a766 Reviewed-on: http://gerrit.cloudera.org:8080/19688 Reviewed-by: Riza Suminto <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> Reviewed-by: Kurt Deschler <[email protected]> Reviewed-by: Abhishek Rawat <[email protected]> --- be/src/scheduling/request-pool-service.cc | 6 ---- be/src/service/client-request-state.h | 17 ++++++++++- be/src/service/impala-server.cc | 1 + tests/custom_cluster/test_admission_controller.py | 9 +++--- tests/custom_cluster/test_executor_groups.py | 36 +++++++++++++++++++++++ tests/custom_cluster/test_web_pages.py | 2 ++ 6 files changed, 59 insertions(+), 12 deletions(-) diff --git a/be/src/scheduling/request-pool-service.cc b/be/src/scheduling/request-pool-service.cc index 90e48d35c..dd3d2ff83 100644 --- a/be/src/scheduling/request-pool-service.cc +++ b/be/src/scheduling/request-pool-service.cc @@ -83,8 +83,6 @@ static const string DEFAULT_POOL_NAME = "default-pool"; static const string RESOLVE_POOL_METRIC_NAME = "request-pool-service.resolve-pool-duration-ms"; -static const string ERROR_USER_TO_POOL_MAPPING_NOT_FOUND = - "No mapping found for request from user '$0' with requested pool '$1'"; static const string ERROR_USER_NOT_ALLOWED_IN_POOL = "Request from user '$0' with " "requested pool '$1' denied access to assigned pool '$2'"; static const string ERROR_USER_NOT_SPECIFIED = "User must be specified because " @@ -177,10 +175,6 @@ Status RequestPoolService::ResolveRequestPool(const TQueryCtx& ctx, if (result.status.status_code != TErrorCode::OK) { return Status(boost::algorithm::join(result.status.error_msgs, "; ")); } - if (result.resolved_pool.empty()) { - return Status(Substitute(ERROR_USER_TO_POOL_MAPPING_NOT_FOUND, - user, requested_pool)); - } if (!result.has_access) { return Status(Substitute(ERROR_USER_NOT_ALLOWED_IN_POOL, user, requested_pool, result.resolved_pool)); diff --git a/be/src/service/client-request-state.h b/be/src/service/client-request-state.h index 2c9da9d2a..93a38c375 100644 --- a/be/src/service/client-request-state.h +++ b/be/src/service/client-request-state.h @@ -251,7 +251,14 @@ class ClientRequestState { /// control. /// Admission control resource pool associated with this query. std::string request_pool() const { - return query_ctx_.__isset.request_pool ? query_ctx_.request_pool : ""; + if (is_planning_done_.load() && exec_request_ != nullptr + && exec_request_->query_exec_request.query_ctx.__isset.request_pool) { + // If the request pool has been set by Planner, return the request pool selected + // by Planner. + return exec_request_->query_exec_request.query_ctx.request_pool; + } else { + return query_ctx_.__isset.request_pool ? query_ctx_.request_pool : ""; + } } int num_rows_fetched() const { return num_rows_fetched_; } @@ -426,6 +433,11 @@ class ClientRequestState { void SetBlacklistedExecutorAddresses( std::unordered_set<NetworkAddressPB>& executor_addresses); + /// Mark planning as done for this request. + /// This function should be called after QueryDriver::RunFrontendPlanner() is + /// returned without error. + void SetPlanningDone() { is_planning_done_.store(true); } + protected: /// Updates the end_time_us_ of this query if it isn't set. The end time is determined /// when this function is called for the first time, calling it multiple times does not @@ -464,6 +476,9 @@ class ClientRequestState { /// True if there was a transaction and it got committed or aborted. bool transaction_closed_ = false; + /// Indicates whether the planning is done for the request. + std::atomic_bool is_planning_done_{false}; + /// Executor for any child queries (e.g. compute stats subqueries). Always non-NULL. const boost::scoped_ptr<ChildQueryExecutor> child_query_executor_; diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc index c1a92caf9..c900e5fad 100644 --- a/be/src/service/impala-server.cc +++ b/be/src/service/impala-server.cc @@ -1269,6 +1269,7 @@ Status ImpalaServer::ExecuteInternal(const TQueryCtx& query_ctx, if (!is_external_req) { (*query_handle)->query_events()->MarkEvent("Planning finished"); } + (*query_handle)->SetPlanningDone(); (*query_handle)->set_user_profile_access(result.user_has_profile_access); (*query_handle)->summary_profile()->AddEventSequence( result.timeline.name, result.timeline); diff --git a/tests/custom_cluster/test_admission_controller.py b/tests/custom_cluster/test_admission_controller.py index 654ca7e6f..87d0508bd 100644 --- a/tests/custom_cluster/test_admission_controller.py +++ b/tests/custom_cluster/test_admission_controller.py @@ -304,14 +304,13 @@ class TestAdmissionController(TestAdmissionControllerBase, HS2TestSuite): queueA_mem_limit = "MEM_LIMIT=%s" % (128 * 1024 * 1024) try: for pool in ['', 'not_a_pool_name']: - expected_error =\ - "No mapping found for request from user '\S+' with requested pool '%s'"\ - % (pool) + expected_error = re.compile(r"Request from user '\S+' with requested pool " + "'%s' denied access to assigned pool" % (pool)) self.__check_pool_rejected(client, pool, expected_error) # Check rejected if user does not have access. - expected_error = "Request from user '\S+' with requested pool 'root.queueC' "\ - "denied access to assigned pool 'root.queueC'" + expected_error = re.compile(r"Request from user '\S+' with requested pool " + "'root.queueC' denied access to assigned pool 'root.queueC'") self.__check_pool_rejected(client, 'root.queueC', expected_error) # Also try setting a valid pool diff --git a/tests/custom_cluster/test_executor_groups.py b/tests/custom_cluster/test_executor_groups.py index b204ddbdb..739de16c7 100644 --- a/tests/custom_cluster/test_executor_groups.py +++ b/tests/custom_cluster/test_executor_groups.py @@ -147,6 +147,28 @@ class TestExecutorGroups(CustomClusterTestSuite): pool.""" return self.impalad_test_service.get_num_running_queries("default-pool") + def _verify_total_admitted_queries(self, resource_pool, expected_query_num): + """Verify the total number of queries that have been admitted to the given resource + pool on the Web admission site.""" + query_num = self.impalad_test_service.get_total_admitted_queries(resource_pool) + assert query_num == expected_query_num, \ + "Not matched number of queries admitted to %s pool on the Web admission site." \ + % (resource_pool) + + def _verify_query_num_for_resource_pool(self, resource_pool, expected_query_num): + """ Verify the number of queries which use the given resource pool on + the Web queries site.""" + queries_json = self.impalad_test_service.get_queries_json() + queries = queries_json.get("in_flight_queries") + \ + queries_json.get("completed_queries") + query_num = 0 + for query in queries: + if query["resource_pool"] == resource_pool: + query_num += 1 + assert query_num == expected_query_num, \ + "Not matched number of queries using %s pool on the Web queries site: %s." \ + % (resource_pool, json) + def _wait_for_num_executor_groups(self, num_exec_grps, only_healthy=False): """Waits for the number of executor groups to reach 'num_exec_grps'. If 'only_healthy' is True, only the healthy executor groups are accounted for, otherwise all groups @@ -905,6 +927,14 @@ class TestExecutorGroups(CustomClusterTestSuite): self._run_query_and_verify_profile(GROUPING_TEST_QUERY, CPU_DOP_OPTIONS, ["Executor Group: root.small-group", "ExecutorGroupsConsidered: 2", "Verdict: Match", "CpuAsk: 4", "CpuAskUnbounded: 1"]) + + # Check resource pools on the Web queries site and admission site + self._verify_query_num_for_resource_pool("root.small", 2) + self._verify_query_num_for_resource_pool("root.tiny", 1) + self._verify_query_num_for_resource_pool("root.large", 2) + self._verify_total_admitted_queries("root.small", 2) + self._verify_total_admitted_queries("root.tiny", 1) + self._verify_total_admitted_queries("root.large", 2) self.client.close() @pytest.mark.execute_serially @@ -915,6 +945,9 @@ class TestExecutorGroups(CustomClusterTestSuite): self._run_query_and_verify_profile(CPU_TEST_QUERY, CPU_DOP_OPTIONS, ["Executor Group: root.tiny-group", "EffectiveParallelism: 3", "ExecutorGroupsConsidered: 1"]) + # Check resource pools on the Web queries site and admission site + self._verify_query_num_for_resource_pool("root.tiny", 1) + self._verify_total_admitted_queries("root.tiny", 1) self.client.close() @pytest.mark.execute_serially @@ -935,6 +968,9 @@ class TestExecutorGroups(CustomClusterTestSuite): ["Executor Group: root.large-group", "EffectiveParallelism: 7", "ExecutorGroupsConsidered: 3", "CpuAsk: 234", "Verdict: no executor group set fit. Admit to last executor group set."]) + # Check resource pools on the Web queries site and admission site + self._verify_query_num_for_resource_pool("root.large", 2) + self._verify_total_admitted_queries("root.large", 2) self.client.close() @pytest.mark.execute_serially diff --git a/tests/custom_cluster/test_web_pages.py b/tests/custom_cluster/test_web_pages.py index 69b873ba9..e14853918 100644 --- a/tests/custom_cluster/test_web_pages.py +++ b/tests/custom_cluster/test_web_pages.py @@ -123,6 +123,7 @@ class TestWebPage(CustomClusterTestSuite): response = requests.get("http://localhost:25000/queries?json") response_json = response.text assert expected in response_json, "No matching statement found in the queries site." + assert '"resource_pool": "default-pool"' in response_json @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( @@ -140,6 +141,7 @@ class TestWebPage(CustomClusterTestSuite): response = requests.get("http://localhost:25000/queries?json") response_json = response.text assert expected in response_json, "No matching statement found in the queries site." + assert '"resource_pool": "default-pool"' in response_json # Checks if 'messages' exists/does not exist in 'result_stderr' based on the value of # 'should_exist'
