This is an automated email from the ASF dual-hosted git repository. asherman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 0677b8a0bab5288713d2e1a00ff5ec3123ab284c Author: Riza Suminto <[email protected]> AuthorDate: Fri Nov 22 10:38:21 2024 -0800 IMPALA-13567: Update RowsRead counter more frequently HdfsColumnarScanner implementation update RowsRead counter near the very end of AssembleRows function. Ideally, RowsRead should be incremented more frequently, every time before calling TransferScratchTuples where conjunct and runtime filter evaluation happen. That way, RowsRead is increased even if all rows in the scratch batch are filtered by conjunct or runtime filter. This patch move the counter increment just before TransferScratchTuples. Testing: - Pass core tests. Change-Id: I17f1c40a0e790750ffbd4e987ca181b82cc14c40 Reviewed-on: http://gerrit.cloudera.org:8080/22102 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/exec/orc/hdfs-orc-scanner.cc | 2 +- be/src/exec/parquet/hdfs-parquet-scanner.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/exec/orc/hdfs-orc-scanner.cc b/be/src/exec/orc/hdfs-orc-scanner.cc index 631c89ea8..a8c15126d 100644 --- a/be/src/exec/orc/hdfs-orc-scanner.cc +++ b/be/src/exec/orc/hdfs-orc-scanner.cc @@ -1005,6 +1005,7 @@ Status HdfsOrcScanner::AssembleRows(RowBatch* row_batch) { end_of_stripe_ = true; return Status::OK(); } + COUNTER_ADD(scan_node_->rows_read_counter(), orc_root_batch_->numElements); num_rows_read += orc_root_batch_->numElements; } @@ -1013,7 +1014,6 @@ Status HdfsOrcScanner::AssembleRows(RowBatch* row_batch) { continue_execution &= !scan_node_->ReachedLimitShared() && !context_->cancelled(); } stripe_rows_read_ += num_rows_read; - COUNTER_ADD(scan_node_->rows_read_counter(), num_rows_read); // Merge Scanner-local counter into HdfsScanNode counter and reset. COUNTER_ADD(scan_node_->collection_items_read_counter(), coll_items_read_counter_); coll_items_read_counter_ = 0; diff --git a/be/src/exec/parquet/hdfs-parquet-scanner.cc b/be/src/exec/parquet/hdfs-parquet-scanner.cc index ad9d0c3ed..9e33ff360 100644 --- a/be/src/exec/parquet/hdfs-parquet-scanner.cc +++ b/be/src/exec/parquet/hdfs-parquet-scanner.cc @@ -2328,13 +2328,13 @@ Status HdfsParquetScanner::AssembleRowsWithoutLateMaterialization( last_num_tuples = scratch_batch_->num_tuples; } RETURN_IF_ERROR(CheckPageFiltering()); + COUNTER_ADD(scan_node_->rows_read_counter(), scratch_batch_->num_tuples); num_rows_read += scratch_batch_->num_tuples; int num_row_to_commit = TransferScratchTuples(row_batch); RETURN_IF_ERROR(CommitRows(row_batch, num_row_to_commit)); if (row_batch->AtCapacity()) break; } row_group_rows_read_ += num_rows_read; - COUNTER_ADD(scan_node_->rows_read_counter(), num_rows_read); // Merge Scanner-local counter into HdfsScanNode counter and reset. COUNTER_ADD(scan_node_->collection_items_read_counter(), coll_items_read_counter_); coll_items_read_counter_ = 0;
