This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit a8618c6a65f054bd8dac578ec4a5e1eacaaab7e3
Author: Abhishek Rawat <[email protected]>
AuthorDate: Tue Sep 23 11:53:23 2025 -0700

    IMPALA-10204: Make AdmitQuery params more efficient
    
    The admission request may contain the lineage graphs and
    other stuff that the admission control service doesn't need.
    For example, currently the admission controller service would
    hold onto the full TQueryExecRequest object for the entire
    lifetime of a query, even after the admission decision was
    complete. This led to unnecessary memory consumption.
    
    This commit introduces two optimizations for reducing the
    memory footprint:
    1.  A lightweight copy of TQueryExecRequest is now created
    on the client side before sending to the admission control
    service. Fields that are not required for admission
    decisions (e.g., query_plan, lineage_graph) are cleared from
    this copy.
    2.  The AdmissionState now uses a unique_ptr to manage the
    TQueryExecRequest. This allows the object's memory to be
    explicitly released as soon as the query schedule is generated
    and the request object is no longer needed.
    
    During a customized high concurrent TPCDS run, without the
    change, the peak memory usage in admissiond was around 2GB.
    With this change, it required less than half that memory.
    
    Tests:
    Passed exhaustive tests.
    
    Change-Id: I1ba5e8818336bd1fc3ad604a0acee5eb7a1116c4
    Reviewed-on: http://gerrit.cloudera.org:8080/23546
    Reviewed-by: Michael Smith <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Abhishek Rawat <[email protected]>
---
 be/src/scheduling/admission-control-service.cc |  9 ++++--
 be/src/scheduling/admission-control-service.h  |  2 +-
 be/src/service/client-request-state.cc         | 38 ++++++++++++++++++++++++--
 3 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/be/src/scheduling/admission-control-service.cc 
b/be/src/scheduling/admission-control-service.cc
index f39a3c6b9..4489f7f36 100644
--- a/be/src/scheduling/admission-control-service.cc
+++ b/be/src/scheduling/admission-control-service.cc
@@ -133,12 +133,13 @@ void AdmissionControlService::AdmitQuery(
 
   shared_ptr<AdmissionState> admission_state;
   admission_state = make_shared<AdmissionState>(req->query_id(), 
req->coord_id());
+  admission_state->query_exec_request = make_unique<TQueryExecRequest>();
 
   admission_state->summary_profile =
       RuntimeProfile::Create(&admission_state->profile_pool, "Summary");
 
   RESPOND_IF_ERROR(GetSidecar(req->query_exec_request_sidecar_idx(), 
rpc_context,
-      &admission_state->query_exec_request));
+      admission_state->query_exec_request.get()));
 
   for (const NetworkAddressPB& address : 
req->blacklisted_executor_addresses()) {
     admission_state->blacklisted_executor_addresses.emplace(address);
@@ -190,6 +191,8 @@ void AdmissionControlService::GetQueryStatus(const 
GetQueryStatusRequestPB* req,
       if (admission_state->admission_done) {
         if (admission_state->admit_status.ok()) {
           *resp->mutable_query_schedule() = *admission_state->schedule.get();
+          // Free TQueryExecRequest since it's not required after admission is 
done
+          admission_state->query_exec_request.reset();
         } else {
           status = admission_state->admit_status;
         }
@@ -339,8 +342,8 @@ void AdmissionControlService::AdmitFromThreadPool(const 
UniqueIdPB& query_id) {
     lock_guard<mutex> l(admission_state->lock);
     bool queued;
     AdmissionController::AdmissionRequest request = {admission_state->query_id,
-        admission_state->coord_id, admission_state->query_exec_request,
-        
admission_state->query_exec_request.query_ctx.client_request.query_options,
+        admission_state->coord_id, *admission_state->query_exec_request,
+        
admission_state->query_exec_request->query_ctx.client_request.query_options,
         admission_state->summary_profile,
         admission_state->blacklisted_executor_addresses};
     admission_state->admit_status =
diff --git a/be/src/scheduling/admission-control-service.h 
b/be/src/scheduling/admission-control-service.h
index 7bc98ff1c..5123e735d 100644
--- a/be/src/scheduling/admission-control-service.h
+++ b/be/src/scheduling/admission-control-service.h
@@ -92,7 +92,7 @@ class AdmissionControlService : public 
AdmissionControlServiceIf,
     // at any point after this AdmissionState has been added to 
'admission_state_map_'.
     UniqueIdPB query_id;
     UniqueIdPB coord_id;
-    TQueryExecRequest query_exec_request;
+    std::unique_ptr<TQueryExecRequest> query_exec_request;
     std::unordered_set<NetworkAddressPB> blacklisted_executor_addresses;
 
     // Protects all of the following members.
diff --git a/be/src/service/client-request-state.cc 
b/be/src/service/client-request-state.cc
index eb1c53c9e..e6206ccef 100644
--- a/be/src/service/client-request-state.cc
+++ b/be/src/service/client-request-state.cc
@@ -663,10 +663,42 @@ void ClientRequestState::FinishExecQueryOrDmlRequest() {
     otel_span_manager_->StartChildSpanAdmissionControl();
   }
 
+  const TQueryExecRequest* query_exec_request;
+  TQueryExecRequest req;
+  if (ExecEnv::GetInstance()->AdmissionServiceEnabled()) {
+    req = exec_req.query_exec_request;
+    if (req.__isset.query_plan) {
+      // Use the swap() to ensure the string's memory is deallocated.
+      // Using clear() sets the size to 0 but may not release the capacity.
+      std::string().swap(req.query_plan);
+      req.__isset.query_plan = false;
+    }
+    if (req.__isset.lineage_graph) {
+      req.lineage_graph = TLineageGraph();
+      req.__isset.lineage_graph = false;
+    }
+    if (req.__isset.result_set_metadata) {
+      req.result_set_metadata = TResultSetMetadata();
+      req.__isset.result_set_metadata = false;
+    }
+    if (req.__isset.finalize_params) {
+      req.finalize_params = TFinalizeParams();
+      req.__isset.finalize_params = false;
+    }
+    TClientRequest& client_req = req.query_ctx.client_request;
+    if (client_req.__isset.redacted_stmt) {
+      // Use the swap() to ensure the string's memory is deallocated.
+      std::string().swap(client_req.redacted_stmt);
+      client_req.__isset.redacted_stmt = false;
+    }
+    query_exec_request = &req;
+  } else {
+    query_exec_request = &exec_req.query_exec_request;
+  }
+
   Status admit_status = admission_control_client_->SubmitForAdmission(
-      {query_id_pb, ExecEnv::GetInstance()->backend_id(),
-          exec_req.query_exec_request, exec_req.query_options,
-          summary_profile_, blacklisted_executor_addresses_},
+      {query_id_pb, ExecEnv::GetInstance()->backend_id(), *query_exec_request,
+          exec_req.query_options, summary_profile_, 
blacklisted_executor_addresses_},
       query_events_, &schedule_, &wait_start_time_ms_, &wait_end_time_ms_,
       otel_span_manager_.get());
 

Reply via email to