This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 1907153ab5092b13ed0ab0cb52b66705cc7a903f
Author: Riza Suminto <[email protected]>
AuthorDate: Fri Jan 3 15:27:32 2025 -0800

    IMPALA-13641: Lazily init Parquet column read counters
    
    ParquetUncompressedBytesReadPerColumn and
    ParquetCompressedBytesReadPerColumn exist in runtime profile even when
    no parquet file is read (all scan text files). This patch lazily init
    those counters only if HdfsScanNodeBase::bytes_read_per_col_ is not
    empty.
    
    Testing:
    - Run and pass TestParquet::test_bytes_read_per_column.
    - Run TestTpcdsInsert and confirm no Parquet specific counters exist
      when reading TEXTFILE table.
    
    Change-Id: I8ba767b69b8c432f0eb954aa54f86876b329160c
    Reviewed-on: http://gerrit.cloudera.org:8080/22297
    Reviewed-by: Michael Smith <[email protected]>
    Reviewed-by: Csaba Ringhofer <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/exec/hdfs-scan-node-base.cc | 29 +++++++++++++++--------------
 be/src/exec/hdfs-scan-node-base.h  |  4 ----
 2 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/be/src/exec/hdfs-scan-node-base.cc 
b/be/src/exec/hdfs-scan-node-base.cc
index 51046243c..70495e01f 100644
--- a/be/src/exec/hdfs-scan-node-base.cc
+++ b/be/src/exec/hdfs-scan-node-base.cc
@@ -641,11 +641,6 @@ Status HdfsScanNodeBase::Open(RuntimeState* state) {
   initial_range_actual_reservation_stats_ =
       PROFILE_InitialRangeActualReservation.Instantiate(runtime_profile());
 
-  uncompressed_bytes_read_per_column_counter_ =
-      
PROFILE_ParquetUncompressedBytesReadPerColumn.Instantiate(runtime_profile());
-  compressed_bytes_read_per_column_counter_ =
-      
PROFILE_ParquetCompressedBytesReadPerColumn.Instantiate(runtime_profile());
-
   bytes_read_local_ = PROFILE_BytesReadLocal.Instantiate(runtime_profile());
   bytes_read_short_circuit_ =
       PROFILE_BytesReadShortCircuit.Instantiate(runtime_profile());
@@ -1259,15 +1254,21 @@ void HdfsScanNodeBase::StopAndFinalizeCounters() {
   {
     shared_lock<shared_mutex> bytes_read_per_col_guard_read_lock(
         bytes_read_per_col_lock_);
-    for (const auto& bytes_read : bytes_read_per_col_) {
-      int64_t uncompressed_bytes_read = 
bytes_read.second.uncompressed_bytes_read.Load();
-      if (uncompressed_bytes_read > 0) {
-        uncompressed_bytes_read_per_column_counter_->UpdateCounter(
-            uncompressed_bytes_read);
-      }
-      int64_t compressed_bytes_read = 
bytes_read.second.compressed_bytes_read.Load();
-      if (compressed_bytes_read > 0) {
-        
compressed_bytes_read_per_column_counter_->UpdateCounter(compressed_bytes_read);
+    if (!bytes_read_per_col_.empty()) {
+      auto uncompressed_bytes_counter =
+          
PROFILE_ParquetUncompressedBytesReadPerColumn.Instantiate(runtime_profile());
+      auto compressed_bytes_counter =
+          
PROFILE_ParquetCompressedBytesReadPerColumn.Instantiate(runtime_profile());
+      for (const auto& bytes_read : bytes_read_per_col_) {
+        int64_t uncompressed_bytes_read =
+            bytes_read.second.uncompressed_bytes_read.Load();
+        if (uncompressed_bytes_read > 0) {
+          uncompressed_bytes_counter->UpdateCounter(uncompressed_bytes_read);
+        }
+        int64_t compressed_bytes_read = 
bytes_read.second.compressed_bytes_read.Load();
+        if (compressed_bytes_read > 0) {
+          compressed_bytes_counter->UpdateCounter(compressed_bytes_read);
+        }
       }
     }
   }
diff --git a/be/src/exec/hdfs-scan-node-base.h 
b/be/src/exec/hdfs-scan-node-base.h
index 02a157f56..ed4c6948b 100644
--- a/be/src/exec/hdfs-scan-node-base.h
+++ b/be/src/exec/hdfs-scan-node-base.h
@@ -785,10 +785,6 @@ class HdfsScanNodeBase : public ScanNode {
   RuntimeProfile::Counter* hdfs_open_file_timer_ = nullptr;
   RuntimeProfile::SummaryStatsCounter* initial_range_ideal_reservation_stats_ 
= nullptr;
   RuntimeProfile::SummaryStatsCounter* initial_range_actual_reservation_stats_ 
= nullptr;
-  RuntimeProfile::SummaryStatsCounter* 
compressed_bytes_read_per_column_counter_ =
-      nullptr;
-  RuntimeProfile::SummaryStatsCounter* 
uncompressed_bytes_read_per_column_counter_ =
-      nullptr;
 
   /// HDFS read thread concurrency bucket: bucket[i] refers to the number of 
sample
   /// taken where there are i concurrent hdfs read thread running. Created in 
Open().

Reply via email to