This is an automated email from the ASF dual-hosted git repository.
sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new c9d0ffb6ccca fix(common): close parquet reader iterator on EOF (#18407)
c9d0ffb6ccca is described below
commit c9d0ffb6cccab81fd96e2666a6f97ff97ad9ce17
Author: Surya Prasanna <[email protected]>
AuthorDate: Wed Apr 1 15:21:07 2026 -0700
fix(common): close parquet reader iterator on EOF (#18407)
Auto-close the underlying parquet reader when ParquetReaderIterator reaches
EOF.
---------
Co-authored-by: sivabalan <[email protected]>
---
.../apache/hudi/common/util/ParquetReaderIterator.java | 16 +++++++++++++++-
.../hudi/common/util/TestParquetReaderIterator.java | 2 +-
2 files changed, 16 insertions(+), 2 deletions(-)
diff --git
a/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/ParquetReaderIterator.java
b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/ParquetReaderIterator.java
index 19494407e905..62ed1d8e4f44 100644
---
a/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/ParquetReaderIterator.java
+++
b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/ParquetReaderIterator.java
@@ -37,6 +37,8 @@ public class ParquetReaderIterator<T> implements
ClosableIterator<T> {
private final ParquetReader<? extends T> parquetReader;
// Holds the next entry returned by the parquet reader
private T next;
+ // Tracks whether the reader has been closed
+ private boolean closed = false;
public ParquetReaderIterator(ParquetReader<? extends T> parquetReader) {
this.parquetReader = parquetReader;
@@ -45,11 +47,19 @@ public class ParquetReaderIterator<T> implements
ClosableIterator<T> {
@Override
public boolean hasNext() {
try {
+ // Short-circuit if already closed
+ if (closed) {
+ return false;
+ }
// To handle when hasNext() is called multiple times for idempotency
and/or the first time
if (this.next == null) {
this.next = parquetReader.read();
}
- return this.next != null;
+ boolean hasNextRecord = this.next != null;
+ if (!hasNextRecord) {
+ close();
+ }
+ return hasNextRecord;
} catch (Exception e) {
FileIOUtils.closeQuietly(parquetReader);
throw new HoodieException("unable to read next record from parquet file
", e);
@@ -75,8 +85,12 @@ public class ParquetReaderIterator<T> implements
ClosableIterator<T> {
}
public void close() {
+ if (closed) {
+ return;
+ }
try {
parquetReader.close();
+ closed = true;
} catch (IOException e) {
throw new HoodieException("Exception while closing the parquet reader",
e);
}
diff --git
a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestParquetReaderIterator.java
b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestParquetReaderIterator.java
index eab1908cbaeb..8037d16c70d5 100644
---
a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestParquetReaderIterator.java
+++
b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestParquetReaderIterator.java
@@ -65,6 +65,6 @@ public class TestParquetReaderIterator {
// no more entries to iterate on
assertFalse(iterator.hasNext());
assertThrows(HoodieException.class, iterator::next, "should throw an
exception since there is only 1 record");
- verify(reader, times(1)).close();
+ verify(reader, times(2)).close();
}
}