This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new b1eff0c3d IMPALA-13137: Add additional client fetch metrics columns to 
the queries page
b1eff0c3d is described below

commit b1eff0c3d8a34e9fa02311e26e39d19f0f6f7968
Author: Surya Hebbar <[email protected]>
AuthorDate: Thu Jun 6 15:51:59 2024 +0530

    IMPALA-13137: Add additional client fetch metrics columns to the queries 
page
    
    For helping users to better understand query execution times,
    the following columns have been added to the queries page.
      - First Fetch (First row fetched)
      - Client Fetch Duration (Client Fetch Wait Timer's value)
    
    The duration needed to fetch the first row is already present in the
    query state record. This timestamp has to be found by matching the label
    "First row fetched" within the query state record's 'event_sequence'.
    
    The time taken for the client to fetch all rows since the first row is
    monitored by the "ClientFetchWaitTimer" in client request state.
    To incorporate this into the query state record, the following
    new attribute has been added.
      - client_fetch_wait_time_ns
    
    In order to constantly retrieve and update this value, a new method has
    been added to expose the private counter holding "ClientFetchWaitTimer".
    
    The duration for both these columns is printed in human readable format,
    i.e. seconds, minutes, hours, etc., after converting from nanoseconds.
    
    Tips for the two additional columns, mentioning relation between the
    profile's labels and column names have also been added to the queries
    page.
    
    Change-Id: I74a9393a7b38750de0c3f6230b6e5e048048c4b5
    Reviewed-on: http://gerrit.cloudera.org:8080/21482
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/runtime/coordinator.cc         |  4 +++-
 be/src/runtime/coordinator.h          |  2 ++
 be/src/service/client-request-state.h |  5 +++++
 be/src/service/impala-http-handler.cc | 28 ++++++++++++++++++++++++++++
 be/src/service/query-state-record.cc  |  1 +
 be/src/service/query-state-record.h   |  3 +++
 www/queries.tmpl                      | 18 ++++++++++++++++++
 7 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/be/src/runtime/coordinator.cc b/be/src/runtime/coordinator.cc
index a1572ee6c..d49887e52 100644
--- a/be/src/runtime/coordinator.cc
+++ b/be/src/runtime/coordinator.cc
@@ -121,6 +121,8 @@ PROFILE_DEFINE_COUNTER(NumCompletedBackends, STABLE_HIGH, 
TUnit::UNIT,"The numbe
 PROFILE_DEFINE_TIMER(FinalizationTimer, STABLE_LOW,
     "Total time spent in finalization (typically 0 except for INSERT into hdfs 
tables).");
 
+const string Coordinator::PROFILE_EVENT_LABEL_FIRST_ROW_FETCHED = "First row 
fetched";
+
 // Maximum number of fragment instances that can publish each broadcast filter.
 static const int MAX_BROADCAST_FILTER_PRODUCERS = 3;
 
@@ -1048,7 +1050,7 @@ Status Coordinator::GetNext(QueryResultSet* results, int 
max_rows, bool* eos,
 
   Status status = coord_sink_->GetNext(runtime_state, results, max_rows, eos, 
timeout_us);
   if (!first_row_fetched_ && results->size() > 0) {
-    query_events_->MarkEvent("First row fetched");
+    
query_events_->MarkEvent(Coordinator::PROFILE_EVENT_LABEL_FIRST_ROW_FETCHED);
     first_row_fetched_ = true;
   }
   RETURN_IF_ERROR(UpdateExecState(
diff --git a/be/src/runtime/coordinator.h b/be/src/runtime/coordinator.h
index 92fea991f..3a6b129ba 100644
--- a/be/src/runtime/coordinator.h
+++ b/be/src/runtime/coordinator.h
@@ -118,6 +118,8 @@ class TUpdateCatalogRequest;
 /// and unnest them
 class Coordinator { // NOLINT: The member variables could be re-ordered to 
save space
  public:
+  static const std::string PROFILE_EVENT_LABEL_FIRST_ROW_FETCHED;
+
   Coordinator(ClientRequestState* parent, const TExecRequest& exec_request,
       const QuerySchedulePB& query_schedule, RuntimeProfile::EventSequence* 
events);
   ~Coordinator();
diff --git a/be/src/service/client-request-state.h 
b/be/src/service/client-request-state.h
index 71fcf5d6a..8457728ae 100644
--- a/be/src/service/client-request-state.h
+++ b/be/src/service/client-request-state.h
@@ -398,6 +398,11 @@ class ClientRequestState {
     return wait_end_time_ms - wait_start_time_ms_;
   }
 
+  /// Returns the time taken for the client to fetch all rows.
+  int64_t client_fetch_wait_time_ns() const {
+    return client_wait_timer_->value();
+  }
+
   /// Sets the RetryState to RETRYING. Updates the runtime profile with the 
retry status
   /// and cause. Must be called while 'lock_' is held. Sets the query_status_. 
Future
   /// calls to UpdateQueryStatus will not have any effect. This is necessary 
to prevent
diff --git a/be/src/service/impala-http-handler.cc 
b/be/src/service/impala-http-handler.cc
index 38a5eb33e..26ef6390d 100644
--- a/be/src/service/impala-http-handler.cc
+++ b/be/src/service/impala-http-handler.cc
@@ -456,6 +456,14 @@ void ImpalaHttpHandler::AddQueryRecordTips(Document* 
document) {
   document->AddMember("tips_end_time", "The end time of the query, i.e. the 
time when the"
       " query is completed, canceled or failed.", document->GetAllocator());
 
+  document->AddMember("tips_first_fetch", "The time taken to fetch the first 
row."
+      " Available in the query profile as \"First row fetched\".",
+      document->GetAllocator());
+
+  document->AddMember("tips_client_fetch_duration", "Total time spent 
returning rows to "
+      "the client and other client-side processing. Available in the query 
profile as "
+      "\"ClientFetchWaitTimer\".", document->GetAllocator());
+
   document->AddMember("tips_duration", "The total duration of the query, 
including queued"
       " time.", document->GetAllocator());
 
@@ -538,6 +546,26 @@ void ImpalaHttpHandler::QueryStateToJson(const 
QueryStateRecord& record,
       TimePrecision::Millisecond).c_str(), document->GetAllocator());
   value->AddMember("end_time", end_time, document->GetAllocator());
 
+  vector<string>::const_iterator it = 
std::find(record.event_sequence.labels.begin(),
+      record.event_sequence.labels.end(),
+      Coordinator::PROFILE_EVENT_LABEL_FIRST_ROW_FETCHED);
+  int64_t first_fetch_ns = 0;
+  if (it != record.event_sequence.labels.end()) {
+    first_fetch_ns = record.event_sequence.timestamps[it
+        - record.event_sequence.labels.begin()];
+  }
+  const string& printed_first_fetch = PrettyPrinter::Print(first_fetch_ns,
+      TUnit::TIME_NS);
+  Value val_first_fetch(printed_first_fetch.c_str(), document->GetAllocator());
+  value->AddMember("first_fetch", val_first_fetch, document->GetAllocator());
+
+  const string& printed_client_fetch_duration = PrettyPrinter::Print(
+      record.client_fetch_wait_time_ns, TUnit::TIME_NS);
+  Value val_client_fetch_duration(printed_client_fetch_duration.c_str(),
+      document->GetAllocator());
+  value->AddMember("client_fetch_duration", val_client_fetch_duration,
+      document->GetAllocator());
+
   // record.end_time_us might still be zero if the query is not yet done
   // Use the current Unix time in that case. Note that the duration can be
   // negative if a system clock reset happened after the query was initiated.
diff --git a/be/src/service/query-state-record.cc 
b/be/src/service/query-state-record.cc
index db1b6c928..75aabd0c5 100644
--- a/be/src/service/query-state-record.cc
+++ b/be/src/service/query-state-record.cc
@@ -69,6 +69,7 @@ void QueryStateRecord::Init(const ClientRequestState& 
query_handle) {
   start_time_us = query_handle.start_time_us();
   end_time_us = query_handle.end_time_us();
   wait_time_ms = query_handle.wait_time_ms();
+  client_fetch_wait_time_ns = query_handle.client_fetch_wait_time_ns();
   query_handle.summary_profile()->GetTimeline(&timeline);
 
   Coordinator* coord = query_handle.GetCoordinator();
diff --git a/be/src/service/query-state-record.h 
b/be/src/service/query-state-record.h
index 6ada7a0e9..14a917b72 100644
--- a/be/src/service/query-state-record.h
+++ b/be/src/service/query-state-record.h
@@ -80,6 +80,9 @@ struct QueryStateRecord {
   /// The number of rows fetched by the client
   int64_t num_rows_fetched;
 
+  /// Total time spent returning rows to the client and other client-side 
processing.
+  int64_t client_fetch_wait_time_ns;
+
   /// The state of the query as of this snapshot. The possible values for the
   /// query_state = union(beeswax::QueryState, 
ClientRequestState::RetryState). This is
   /// necessary so that the query_state can accurately reflect if a query has 
been
diff --git a/www/queries.tmpl b/www/queries.tmpl
index 666d93767..efaef5100 100644
--- a/www/queries.tmpl
+++ b/www/queries.tmpl
@@ -42,6 +42,11 @@ command line parameter.</p>
     <th title="{{tips_default_db}}">Default Db</th>
     <th title="{{tips_query_type}}">Query Type</th>
     <th title="{{tips_start_time}}">Start Time</th>
+    <th>
+      <span title="{{tips_first_fetch}}">First&nbsp;Fetch</span>
+      <hr style="margin-top:0px;margin-bottom:0px;"/>
+      <span 
title="{{tips_client_fetch_duration}}">Client&nbsp;Fetch&nbsp;Duration</span>
+    </th>
     <th>
       <span title="{{tips_duration}}">Duration</span>
       <hr style="margin-top:0px;margin-bottom:0px;"/>
@@ -79,6 +84,7 @@ command line parameter.</p>
     <td>{{default_db}}</td>
     <td><samp>{{stmt_type}}</samp></td>
     <td>{{start_time}}</td>
+    <td>{{first_fetch}}<hr 
style="margin-top:0px;margin-bottom:0px;"/>{{client_fetch_duration}}</td>
     <td>{{duration}}<hr 
style="margin-top:0px;margin-bottom:0px;"/>{{queued_duration}}</td>
     <td>{{mem_usage}}<hr 
style="margin-top:0px;margin-bottom:0px;"/>{{mem_est}}</td>
     <td>{{progress}}<hr 
style="margin-top:0px;margin-bottom:0px;"/>{{query_progress}}</td>
@@ -107,6 +113,11 @@ command line parameter.</p>
     <th title="{{tips_query_type}}">Query Type</th>
     <th title="{{tips_start_time}}">Start Time</th>
     <th title="{{tips_waiting_time}}">Waiting Time</th>
+    <th>
+      <span title="{{tips_first_fetch}}">First&nbsp;Fetch</span>
+      <hr style="margin-top:0px;margin-bottom:0px;"/>
+      <span 
title="{{tips_client_fetch_duration}}">Client&nbsp;Fetch&nbsp;Duration</span>
+    </th>
     <th>
       <span title="{{tips_duration}}">Duration</span>
       <hr style="margin-top:0px;margin-bottom:0px;"/>
@@ -144,6 +155,7 @@ command line parameter.</p>
     <td><samp>{{stmt_type}}</samp></td>
     <td>{{start_time}}</td>
     <td>{{waiting_time}}</td>
+    <td>{{first_fetch}}<hr 
style="margin-top:0px;margin-bottom:0px;"/>{{client_fetch_duration}}</td>
     <td>{{duration}}<hr 
style="margin-top:0px;margin-bottom:0px;"/>{{queued_duration}}</td>
     <td>{{mem_usage}}<hr 
style="margin-top:0px;margin-bottom:0px;"/>{{mem_est}}</td>
     <td>{{progress}}<hr 
style="margin-top:0px;margin-bottom:0px;"/>{{query_progress}}</td>
@@ -168,6 +180,11 @@ command line parameter.</p>
     <th title="{{tips_query_type}}">Query Type</th>
     <th title="{{tips_start_time}}">Start Time</th>
     <th title="{{tips_end_time}}">End Time</th>
+    <th>
+      <span title="{{tips_first_fetch}}">First&nbsp;Fetch</span>
+      <hr style="margin-top:0px;margin-bottom:0px;"/>
+      <span 
title="{{tips_client_fetch_duration}}">Client&nbsp;Fetch&nbsp;Duration</span>
+    </th>
     <th>
       <span title="{{tips_duration}}">Duration</span>
       <hr style="margin-top:0px;margin-bottom:0px;"/>
@@ -201,6 +218,7 @@ command line parameter.</p>
     <td><samp>{{stmt_type}}</samp></td>
     <td>{{start_time}}</td>
     <td>{{end_time}}</td>
+    <td>{{first_fetch}}<hr 
style="margin-top:0px;margin-bottom:0px;"/>{{client_fetch_duration}}</td>
     <td>{{duration}}<hr 
style="margin-top:0px;margin-bottom:0px;"/>{{queued_duration}}</td>
     <td>{{mem_usage}}<hr 
style="margin-top:0px;margin-bottom:0px;"/>{{mem_est}}</td>
     <td>{{progress}}<hr 
style="margin-top:0px;margin-bottom:0px;"/>{{query_progress}}</td>

Reply via email to