This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 7e934b1f4 IMPALA-11702: add resource metrics for kudu scanner to 
profile
7e934b1f4 is described below

commit 7e934b1f40bee858d73cbec11a5711fc2a302362
Author: zhangyifan27 <[email protected]>
AuthorDate: Thu Nov 3 18:36:15 2022 +0800

    IMPALA-11702: add resource metrics for kudu scanner to profile
    
    This patch adds some counters about kudu scanner resource metrics[1]
    to profile. These counters are updated before we close kudu scanner
    through kudu client's GetResourceMetrics interface[2].
    
    The MaterializeTupleTime counter is also updated, it should only
    include time spent materializing tuples and evaluating predicates.
    
    Tests
    - Ran some queries manually and checked the profiles.
    
    [1] 
https://github.com/apache/kudu/blob/a2fdba62d2129d0d7d0c2d0f175e8b2a6e2d2650/src/kudu/tserver/tserver.proto#L398
    [2] 
https://github.com/apache/kudu/blob/892bda293f238fddec47423d5c0b5be9576581f1/src/kudu/client/client.h#L2967
    
    Change-Id: If0e84e7756ce92af48fa55672157caafcd396547
    Reviewed-on: http://gerrit.cloudera.org:8080/19201
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/exec/kudu/kudu-scan-node-base.cc | 28 ++++++++++++++++++++++++++++
 be/src/exec/kudu/kudu-scan-node-base.h  | 25 +++++++++++++++++++++++++
 be/src/exec/kudu/kudu-scanner.cc        | 23 +++++++++++++++++++----
 3 files changed, 72 insertions(+), 4 deletions(-)

diff --git a/be/src/exec/kudu/kudu-scan-node-base.cc 
b/be/src/exec/kudu/kudu-scan-node-base.cc
index 8bc1e9761..7a00175fa 100644
--- a/be/src/exec/kudu/kudu-scan-node-base.cc
+++ b/be/src/exec/kudu/kudu-scan-node-base.cc
@@ -45,6 +45,20 @@ using kudu::client::KuduTable;
 namespace impala {
 
 PROFILE_DECLARE_COUNTER(ScanRangesComplete);
+PROFILE_DEFINE_TIMER(KuduScannerTotalDurationTime, STABLE_LOW,
+    "Total time taken for all scan rpc requests to complete for Kudu 
scanners.");
+PROFILE_DEFINE_TIMER(KuduScannerQueueDurationTime, STABLE_LOW,
+    "Total time taken between scan rpc requests being accepted and when they 
were "
+    "handled by Kudu scanners.");
+PROFILE_DEFINE_TIMER(KuduScannerCpuUserTime, STABLE_LOW,
+    "Total elapsed CPU user time for all scan rpc requests for Kudu 
scanners.");
+PROFILE_DEFINE_TIMER(KuduScannerCpuSysTime, STABLE_LOW,
+    "Total elapsed CPU system time for all scan rpc requests for Kudu 
scanners.");
+PROFILE_DEFINE_COUNTER(KuduScannerCfileCacheHitBytes, STABLE_LOW, TUnit::BYTES,
+    "Number of bytes that were read from the block cache because of a hit for 
Kudu "
+    "scanners.");
+PROFILE_DEFINE_COUNTER(KuduScannerCfileCacheMissBytes, STABLE_LOW, 
TUnit::BYTES,
+    "Number of bytes that were read because of a block cache miss for Kudu 
scanners.");
 
 const string KuduScanNodeBase::KUDU_ROUND_TRIPS = "TotalKuduScanRoundTrips";
 const string KuduScanNodeBase::KUDU_REMOTE_TOKENS = "KuduRemoteScanTokens";
@@ -69,12 +83,26 @@ KuduScanNodeBase::~KuduScanNodeBase() {
 Status KuduScanNodeBase::Prepare(RuntimeState* state) {
   RETURN_IF_ERROR(ScanNode::Prepare(state));
 
+  AddBytesReadCounters();
   scan_ranges_complete_counter_ =
       PROFILE_ScanRangesComplete.Instantiate(runtime_profile());
   kudu_round_trips_ = ADD_COUNTER(runtime_profile(), KUDU_ROUND_TRIPS, 
TUnit::UNIT);
   kudu_remote_tokens_ = ADD_COUNTER(runtime_profile(), KUDU_REMOTE_TOKENS, 
TUnit::UNIT);
   kudu_client_time_ = ADD_TIMER(runtime_profile(), KUDU_CLIENT_TIME);
 
+  kudu_scanner_total_duration_time_ =
+      PROFILE_KuduScannerTotalDurationTime.Instantiate(runtime_profile());
+  kudu_scanner_queue_duration_time_ =
+      PROFILE_KuduScannerQueueDurationTime.Instantiate(runtime_profile());
+  kudu_scanner_cpu_user_time_ =
+      PROFILE_KuduScannerCpuUserTime.Instantiate(runtime_profile());
+  kudu_scanner_cpu_sys_time_ =
+      PROFILE_KuduScannerCpuSysTime.Instantiate(runtime_profile());
+  kudu_scanner_cfile_cache_hit_bytes_ =
+      PROFILE_KuduScannerCfileCacheHitBytes.Instantiate(runtime_profile());
+  kudu_scanner_cfile_cache_miss_bytes_ =
+      PROFILE_KuduScannerCfileCacheMissBytes.Instantiate(runtime_profile());
+
   DCHECK(state->desc_tbl().GetTupleDescriptor(tuple_id_) != NULL);
   tuple_desc_ = state->desc_tbl().GetTupleDescriptor(tuple_id_);
   table_desc_ = static_cast<const 
KuduTableDescriptor*>(tuple_desc_->table_desc());
diff --git a/be/src/exec/kudu/kudu-scan-node-base.h 
b/be/src/exec/kudu/kudu-scan-node-base.h
index 79d8e593a..7d4dc093e 100644
--- a/be/src/exec/kudu/kudu-scan-node-base.h
+++ b/be/src/exec/kudu/kudu-scan-node-base.h
@@ -95,6 +95,13 @@ class KuduScanNodeBase : public ScanNode {
   RuntimeProfile::Counter* kudu_round_trips_ = nullptr;
   RuntimeProfile::Counter* kudu_remote_tokens_ = nullptr;
   RuntimeProfile::Counter* kudu_client_time_ = nullptr;
+  RuntimeProfile::Counter* kudu_scanner_total_duration_time_ = nullptr;
+  RuntimeProfile::Counter* kudu_scanner_queue_duration_time_ = nullptr;
+  RuntimeProfile::Counter* kudu_scanner_cpu_user_time_ = nullptr;
+  RuntimeProfile::Counter* kudu_scanner_cpu_sys_time_ = nullptr;
+  RuntimeProfile::Counter* kudu_scanner_cfile_cache_hit_bytes_ = nullptr;
+  RuntimeProfile::Counter* kudu_scanner_cfile_cache_miss_bytes_ = nullptr;
+
   static const std::string KUDU_ROUND_TRIPS;
   static const std::string KUDU_REMOTE_TOKENS;
   static const std::string KUDU_CLIENT_TIME;
@@ -102,5 +109,23 @@ class KuduScanNodeBase : public ScanNode {
   kudu::client::KuduClient* kudu_client() { return client_.get(); }
   RuntimeProfile::Counter* kudu_round_trips() const { return 
kudu_round_trips_; }
   RuntimeProfile::Counter* kudu_client_time() const { return 
kudu_client_time_; }
+  RuntimeProfile::Counter* kudu_scanner_total_duration_time() const {
+    return kudu_scanner_total_duration_time_;
+  }
+  RuntimeProfile::Counter* kudu_scanner_queue_duration_time() const {
+    return kudu_scanner_queue_duration_time_;
+  }
+  RuntimeProfile::Counter* kudu_scanner_cpu_user_time() const {
+    return kudu_scanner_cpu_user_time_;
+  }
+  RuntimeProfile::Counter* kudu_scanner_cpu_sys_time() const {
+    return kudu_scanner_cpu_sys_time_;
+  }
+  RuntimeProfile::Counter* kudu_scanner_cfile_cache_hit_bytes() const {
+    return kudu_scanner_cfile_cache_hit_bytes_;
+  }
+  RuntimeProfile::Counter* kudu_scanner_cfile_cache_miss_bytes() const {
+    return kudu_scanner_cfile_cache_miss_bytes_;
+  }
 };
 } // namespace impala
diff --git a/be/src/exec/kudu/kudu-scanner.cc b/be/src/exec/kudu/kudu-scanner.cc
index 4a404bcaf..62353eea4 100644
--- a/be/src/exec/kudu/kudu-scanner.cc
+++ b/be/src/exec/kudu/kudu-scanner.cc
@@ -19,10 +19,13 @@
 
 #include <string>
 #include <vector>
+
+#include <kudu/client/resource_metrics.h>
 #include <kudu/client/row_result.h>
 #include <kudu/client/value.h>
 #include <thrift/protocol/TDebugProtocol.h>
 
+#include "common/names.h"
 #include "exec/exec-node.inline.h"
 #include "exec/kudu/kudu-util.h"
 #include "exprs/scalar-expr-evaluator.h"
@@ -48,14 +51,13 @@
 #include "util/periodic-counter-updater.h"
 #include "util/runtime-profile-counters.h"
 
-#include "common/names.h"
-
 using kudu::client::KuduClient;
 using kudu::client::KuduPredicate;
 using kudu::client::KuduScanBatch;
 using kudu::client::KuduSchema;
 using kudu::client::KuduTable;
 using kudu::client::KuduValue;
+using kudu::client::ResourceMetrics;
 
 DEFINE_string(kudu_read_mode, "READ_LATEST", "(Advanced) Sets the Kudu scan 
ReadMode. "
     "Supported Kudu read modes are READ_LATEST and READ_AT_SNAPSHOT. Can be 
overridden "
@@ -68,7 +70,6 @@ DECLARE_int32(kudu_operation_timeout_ms);
 
 namespace impala {
 
-
 KuduScanner::KuduScanner(KuduScanNodeBase* scan_node, RuntimeState* state)
   : scan_node_(scan_node),
     state_(state),
@@ -124,6 +125,7 @@ Status 
KuduScanner::GetNextWithCountStarOptimization(RowBatch* row_batch, bool*
   int64_t tuple_buffer_size;
   uint8_t* tuple_buffer;
   int capacity = 1;
+  SCOPED_TIMER(scan_node_->materialize_tuple_timer());
   RETURN_IF_ERROR(row_batch->ResizeAndAllocateTupleBuffer(state_,
       row_batch->tuple_data_pool(), row_batch->row_desc()->GetRowSize(), 
&capacity,
       &tuple_buffer_size, &tuple_buffer));
@@ -141,7 +143,6 @@ Status 
KuduScanner::GetNextWithCountStarOptimization(RowBatch* row_batch, bool*
 }
 
 Status KuduScanner::GetNext(RowBatch* row_batch, bool* eos) {
-  SCOPED_TIMER(scan_node_->materialize_tuple_timer());
   // Optimized scanning for count(*), only write the NumRows
   if (scan_node_->optimize_count_star()) {
     return GetNextWithCountStarOptimization(row_batch, eos);
@@ -311,6 +312,19 @@ Status KuduScanner::OpenNextScanToken(const string& 
scan_token, bool* eos) {
 
 void KuduScanner::CloseCurrentClientScanner() {
   DCHECK_NOTNULL(scanner_.get());
+
+  std::map<std::string, int64_t> metrics = 
scanner_->GetResourceMetrics().Get();
+  COUNTER_ADD(scan_node_->bytes_read_counter(), metrics["bytes_read"]);
+  COUNTER_ADD(
+      scan_node_->kudu_scanner_total_duration_time(), 
metrics["total_duration_nanos"]);
+  COUNTER_ADD(
+      scan_node_->kudu_scanner_queue_duration_time(), 
metrics["queue_duration_nanos"]);
+  COUNTER_ADD(scan_node_->kudu_scanner_cpu_user_time(), 
metrics["cpu_user_nanos"]);
+  COUNTER_ADD(scan_node_->kudu_scanner_cpu_sys_time(), 
metrics["cpu_system_nanos"]);
+  COUNTER_ADD(
+      scan_node_->kudu_scanner_cfile_cache_hit_bytes(), 
metrics["cfile_cache_hit_bytes"]);
+  COUNTER_ADD(scan_node_->kudu_scanner_cfile_cache_miss_bytes(),
+      metrics["cfile_cache_miss_bytes"]);
   scanner_->Close();
   scanner_.reset();
 }
@@ -341,6 +355,7 @@ Status KuduScanner::HandleEmptyProjection(RowBatch* 
row_batch) {
 }
 
 Status KuduScanner::DecodeRowsIntoRowBatch(RowBatch* row_batch, Tuple** 
tuple_mem) {
+  SCOPED_TIMER(scan_node_->materialize_tuple_timer());
   // Short-circuit for empty projection cases.
   if (scan_node_->tuple_desc()->slots().empty()) {
     return HandleEmptyProjection(row_batch);

Reply via email to