This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new c9d0ffb6ccca fix(common): close parquet reader iterator on EOF (#18407)
c9d0ffb6ccca is described below

commit c9d0ffb6cccab81fd96e2666a6f97ff97ad9ce17
Author: Surya Prasanna <[email protected]>
AuthorDate: Wed Apr 1 15:21:07 2026 -0700

    fix(common): close parquet reader iterator on EOF (#18407)
    
    Auto-close the underlying parquet reader when ParquetReaderIterator reaches 
EOF.
    
    ---------
    
    Co-authored-by: sivabalan <[email protected]>
---
 .../apache/hudi/common/util/ParquetReaderIterator.java   | 16 +++++++++++++++-
 .../hudi/common/util/TestParquetReaderIterator.java      |  2 +-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git 
a/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/ParquetReaderIterator.java
 
b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/ParquetReaderIterator.java
index 19494407e905..62ed1d8e4f44 100644
--- 
a/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/ParquetReaderIterator.java
+++ 
b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/ParquetReaderIterator.java
@@ -37,6 +37,8 @@ public class ParquetReaderIterator<T> implements 
ClosableIterator<T> {
   private final ParquetReader<? extends T> parquetReader;
   // Holds the next entry returned by the parquet reader
   private T next;
+  // Tracks whether the reader has been closed
+  private boolean closed = false;
 
   public ParquetReaderIterator(ParquetReader<? extends T> parquetReader) {
     this.parquetReader = parquetReader;
@@ -45,11 +47,19 @@ public class ParquetReaderIterator<T> implements 
ClosableIterator<T> {
   @Override
   public boolean hasNext() {
     try {
+      // Short-circuit if already closed
+      if (closed) {
+        return false;
+      }
       // To handle when hasNext() is called multiple times for idempotency 
and/or the first time
       if (this.next == null) {
         this.next = parquetReader.read();
       }
-      return this.next != null;
+      boolean hasNextRecord = this.next != null;
+      if (!hasNextRecord) {
+        close();
+      }
+      return hasNextRecord;
     } catch (Exception e) {
       FileIOUtils.closeQuietly(parquetReader);
       throw new HoodieException("unable to read next record from parquet file 
", e);
@@ -75,8 +85,12 @@ public class ParquetReaderIterator<T> implements 
ClosableIterator<T> {
   }
 
   public void close() {
+    if (closed) {
+      return;
+    }
     try {
       parquetReader.close();
+      closed = true;
     } catch (IOException e) {
       throw new HoodieException("Exception while closing the parquet reader", 
e);
     }
diff --git 
a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestParquetReaderIterator.java
 
b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestParquetReaderIterator.java
index eab1908cbaeb..8037d16c70d5 100644
--- 
a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestParquetReaderIterator.java
+++ 
b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestParquetReaderIterator.java
@@ -65,6 +65,6 @@ public class TestParquetReaderIterator {
     // no more entries to iterate on
     assertFalse(iterator.hasNext());
     assertThrows(HoodieException.class, iterator::next, "should throw an 
exception since there is only 1 record");
-    verify(reader, times(1)).close();
+    verify(reader, times(2)).close();
   }
 }

Reply via email to