This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git


The following commit(s) were added to refs/heads/master by this push:
     new 2686e8578 GH-3411: Expose row group index (#3412)
2686e8578 is described below

commit 2686e85783fba2b3bf947d08b76ecee522581a90
Author: uros7251brick <[email protected]>
AuthorDate: Thu Mar 12 14:34:22 2026 +0100

    GH-3411: Expose row group index (#3412)
    
    * add getCurrentRowGroupIndex method to Parquet readers
    
    * Format with `mvn spotless:apply`
---
 .../hadoop/InternalParquetRecordReader.java        |  8 ++++++
 .../apache/parquet/hadoop/ParquetFileReader.java   |  8 ++++++
 .../org/apache/parquet/hadoop/ParquetReader.java   | 11 ++++++++
 .../apache/parquet/hadoop/ParquetRecordReader.java |  8 ++++++
 .../apache/parquet/hadoop/TestParquetReader.java   | 33 ++++++++++++++++++++++
 5 files changed, 68 insertions(+)

diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordReader.java
 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordReader.java
index c9842c937..19b1d5426 100644
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordReader.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordReader.java
@@ -294,6 +294,14 @@ class InternalParquetRecordReader<T> {
     return Collections.unmodifiableMap(setMultiMap);
   }
 
+  /**
+   * Returns the 0-based index of the row group currently being read. Returns 
-1 if no row group
+   * has been read yet.
+   */
+  public int getCurrentRowGroupIndex() {
+    return currentBlock;
+  }
+
   /**
    * Returns the row index of the current row. If no row has been processed or 
if the
    * row index information is unavailable from the underlying @{@link 
PageReadStore}, returns -1.
diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
index 551b1bf6c..e0b0d76e0 100644
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
@@ -1097,6 +1097,14 @@ public class ParquetFileReader implements Closeable {
     return blocks;
   }
 
+  /**
+   * Returns the 0-based index of the row group that was last read via {@link 
#readNextRowGroup()}
+   * or {@link #readNextFilteredRowGroup()}. Returns -1 if no row group has 
been read yet.
+   */
+  public int getCurrentRowGroupIndex() {
+    return currentBlock - 1;
+  }
+
   public void setRequestedSchema(List<ColumnDescriptor> columns) {
     paths.clear();
     for (ColumnDescriptor col : columns) {
diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java
index 4514a829c..01ac69b33 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java
@@ -144,6 +144,17 @@ public class ParquetReader<T> implements Closeable {
     }
   }
 
+  /**
+   * @return the 0-based index of the row group currently being read. If no 
row group has been
+   *     read yet, returns -1.
+   */
+  public int getCurrentRowGroupIndex() {
+    if (reader == null) {
+      return -1;
+    }
+    return reader.getCurrentRowGroupIndex();
+  }
+
   /**
    * @return the row index of the last read row. If no row has been processed, 
returns -1.
    */
diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordReader.java
 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordReader.java
index b217116aa..c0e52fc5c 100644
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordReader.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordReader.java
@@ -207,6 +207,14 @@ public class ParquetRecordReader<T> extends 
RecordReader<Void, T> {
     return internalReader.nextKeyValue();
   }
 
+  /**
+   * @return the 0-based index of the row group currently being read. If no 
row group has been
+   *     read yet, returns -1.
+   */
+  public int getCurrentRowGroupIndex() {
+    return internalReader.getCurrentRowGroupIndex();
+  }
+
   /**
    * @return the row index of the current row. If no row has been processed, 
returns -1.
    */
diff --git 
a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetReader.java 
b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetReader.java
index 4a4157e7a..807e61899 100644
--- 
a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetReader.java
+++ 
b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetReader.java
@@ -22,6 +22,7 @@ import static 
org.apache.parquet.filter2.predicate.FilterApi.in;
 import static org.apache.parquet.filter2.predicate.FilterApi.longColumn;
 import static org.apache.parquet.hadoop.ParquetFileWriter.Mode.OVERWRITE;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
 import java.net.URISyntaxException;
@@ -201,6 +202,38 @@ public class TestParquetReader {
     assertEquals(reader.getCurrentRowIndex(), -1);
   }
 
+  @Test
+  public void testCurrentRowGroupIndex() throws Exception {
+    int expectedRowGroups;
+    try (ParquetFileReader fileReader =
+        ParquetFileReader.open(HadoopInputFile.fromPath(file, new 
Configuration()))) {
+      expectedRowGroups = fileReader.getRowGroups().size();
+    }
+    assertTrue("expected multiple row groups for this test", expectedRowGroups 
> 1);
+
+    try (ParquetReader<Group> reader = PhoneBookWriter.createReader(file, 
FilterCompat.NOOP, allocator)) {
+      // before reading anything, returns -1
+      assertEquals(-1, reader.getCurrentRowGroupIndex());
+
+      reader.read();
+      assertEquals(0, reader.getCurrentRowGroupIndex());
+      // idempotent
+      assertEquals(0, reader.getCurrentRowGroupIndex());
+
+      int prevIdx = 0;
+      while (reader.read() != null) {
+        int idx = reader.getCurrentRowGroupIndex();
+        assertTrue(idx >= prevIdx);
+        assertTrue(idx <= prevIdx + 1);
+        prevIdx = idx;
+      }
+      // last row group seen should be the final one
+      assertEquals(expectedRowGroups - 1, prevIdx);
+      // after exhaustion, returns -1
+      assertEquals(-1, reader.getCurrentRowGroupIndex());
+    }
+  }
+
   @Test
   public void testRangeFiltering() throws Exception {
     // The readUsers also validates the rowIndex for each returned row.

Reply via email to