wsjz commented on code in PR #11601:
URL: https://github.com/apache/doris/pull/11601#discussion_r945651384


##########
be/src/vec/exec/format/parquet/vparquet_reader.cpp:
##########
@@ -90,63 +95,240 @@ Status ParquetReader::_init_read_columns(const 
std::vector<SlotDescriptor*>& tup
         auto physical_type = 
_file_metadata->schema().get_column(parquet_col_id)->physical_type;
         column.parquet_type = physical_type;
         _read_columns.emplace_back(column);
+        VLOG_DEBUG << "slot_desc " << slot_desc->debug_string();
     }
     return Status::OK();
 }
 
-Status ParquetReader::read_next_batch(Block* block) {
-    int32_t group_id = 0;
-    RETURN_IF_ERROR(_row_group_reader->get_next_row_group(&group_id));
+Status ParquetReader::read_next_batch(Block* block, bool* eof) {
+    DCHECK(_total_groups == _row_group_readers.size());
+    if (_total_groups == 0) {
+        *eof = true;
+    }
+    bool _batch_eof = false;
+    auto row_group_reader = _row_group_readers[_current_row_group_id];
+    RETURN_IF_ERROR(row_group_reader->next_batch(block, _batch_size, 
&_batch_eof));
+    if (_batch_eof) {
+        _current_row_group_id++;
+        if (_current_row_group_id > _total_groups) {
+            *eof = true;
+        }

Review Comment:
   The `_current_row_group_id++;` statement can do this,so it's implicit to 
mark `*eof = true`
   



##########
be/src/vec/exec/format/parquet/vparquet_reader.cpp:
##########
@@ -90,63 +95,240 @@ Status ParquetReader::_init_read_columns(const 
std::vector<SlotDescriptor*>& tup
         auto physical_type = 
_file_metadata->schema().get_column(parquet_col_id)->physical_type;
         column.parquet_type = physical_type;
         _read_columns.emplace_back(column);
+        VLOG_DEBUG << "slot_desc " << slot_desc->debug_string();
     }
     return Status::OK();
 }
 
-Status ParquetReader::read_next_batch(Block* block) {
-    int32_t group_id = 0;
-    RETURN_IF_ERROR(_row_group_reader->get_next_row_group(&group_id));
+Status ParquetReader::read_next_batch(Block* block, bool* eof) {
+    DCHECK(_total_groups == _row_group_readers.size());
+    if (_total_groups == 0) {
+        *eof = true;
+    }
+    bool _batch_eof = false;
+    auto row_group_reader = _row_group_readers[_current_row_group_id];
+    RETURN_IF_ERROR(row_group_reader->next_batch(block, _batch_size, 
&_batch_eof));
+    if (_batch_eof) {
+        _current_row_group_id++;
+        if (_current_row_group_id > _total_groups) {
+            *eof = true;
+        }

Review Comment:
   The `_current_row_group_id++;` statement can do this,so it's implicit to 
mark `*eof =false`
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to