This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/master by this push:
new 2686e8578 GH-3411: Expose row group index (#3412)
2686e8578 is described below
commit 2686e85783fba2b3bf947d08b76ecee522581a90
Author: uros7251brick <[email protected]>
AuthorDate: Thu Mar 12 14:34:22 2026 +0100
GH-3411: Expose row group index (#3412)
* add getCurrentRowGroupIndex method to Parquet readers
* Format with `mvn spotless:apply`
---
.../hadoop/InternalParquetRecordReader.java | 8 ++++++
.../apache/parquet/hadoop/ParquetFileReader.java | 8 ++++++
.../org/apache/parquet/hadoop/ParquetReader.java | 11 ++++++++
.../apache/parquet/hadoop/ParquetRecordReader.java | 8 ++++++
.../apache/parquet/hadoop/TestParquetReader.java | 33 ++++++++++++++++++++++
5 files changed, 68 insertions(+)
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordReader.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordReader.java
index c9842c937..19b1d5426 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordReader.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordReader.java
@@ -294,6 +294,14 @@ class InternalParquetRecordReader<T> {
return Collections.unmodifiableMap(setMultiMap);
}
+ /**
+ * Returns the 0-based index of the row group currently being read. Returns
-1 if no row group
+ * has been read yet.
+ */
+ public int getCurrentRowGroupIndex() {
+ return currentBlock;
+ }
+
/**
* Returns the row index of the current row. If no row has been processed or
if the
* row index information is unavailable from the underlying @{@link
PageReadStore}, returns -1.
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
index 551b1bf6c..e0b0d76e0 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
@@ -1097,6 +1097,14 @@ public class ParquetFileReader implements Closeable {
return blocks;
}
+ /**
+ * Returns the 0-based index of the row group that was last read via {@link
#readNextRowGroup()}
+ * or {@link #readNextFilteredRowGroup()}. Returns -1 if no row group has
been read yet.
+ */
+ public int getCurrentRowGroupIndex() {
+ return currentBlock - 1;
+ }
+
public void setRequestedSchema(List<ColumnDescriptor> columns) {
paths.clear();
for (ColumnDescriptor col : columns) {
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java
index 4514a829c..01ac69b33 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java
@@ -144,6 +144,17 @@ public class ParquetReader<T> implements Closeable {
}
}
+ /**
+ * @return the 0-based index of the row group currently being read. If no
row group has been
+ * read yet, returns -1.
+ */
+ public int getCurrentRowGroupIndex() {
+ if (reader == null) {
+ return -1;
+ }
+ return reader.getCurrentRowGroupIndex();
+ }
+
/**
* @return the row index of the last read row. If no row has been processed,
returns -1.
*/
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordReader.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordReader.java
index b217116aa..c0e52fc5c 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordReader.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordReader.java
@@ -207,6 +207,14 @@ public class ParquetRecordReader<T> extends
RecordReader<Void, T> {
return internalReader.nextKeyValue();
}
+ /**
+ * @return the 0-based index of the row group currently being read. If no
row group has been
+ * read yet, returns -1.
+ */
+ public int getCurrentRowGroupIndex() {
+ return internalReader.getCurrentRowGroupIndex();
+ }
+
/**
* @return the row index of the current row. If no row has been processed,
returns -1.
*/
diff --git
a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetReader.java
b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetReader.java
index 4a4157e7a..807e61899 100644
---
a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetReader.java
+++
b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetReader.java
@@ -22,6 +22,7 @@ import static
org.apache.parquet.filter2.predicate.FilterApi.in;
import static org.apache.parquet.filter2.predicate.FilterApi.longColumn;
import static org.apache.parquet.hadoop.ParquetFileWriter.Mode.OVERWRITE;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.net.URISyntaxException;
@@ -201,6 +202,38 @@ public class TestParquetReader {
assertEquals(reader.getCurrentRowIndex(), -1);
}
+ @Test
+ public void testCurrentRowGroupIndex() throws Exception {
+ int expectedRowGroups;
+ try (ParquetFileReader fileReader =
+ ParquetFileReader.open(HadoopInputFile.fromPath(file, new
Configuration()))) {
+ expectedRowGroups = fileReader.getRowGroups().size();
+ }
+ assertTrue("expected multiple row groups for this test", expectedRowGroups
> 1);
+
+ try (ParquetReader<Group> reader = PhoneBookWriter.createReader(file,
FilterCompat.NOOP, allocator)) {
+ // before reading anything, returns -1
+ assertEquals(-1, reader.getCurrentRowGroupIndex());
+
+ reader.read();
+ assertEquals(0, reader.getCurrentRowGroupIndex());
+ // idempotent
+ assertEquals(0, reader.getCurrentRowGroupIndex());
+
+ int prevIdx = 0;
+ while (reader.read() != null) {
+ int idx = reader.getCurrentRowGroupIndex();
+ assertTrue(idx >= prevIdx);
+ assertTrue(idx <= prevIdx + 1);
+ prevIdx = idx;
+ }
+ // last row group seen should be the final one
+ assertEquals(expectedRowGroups - 1, prevIdx);
+ // after exhaustion, returns -1
+ assertEquals(-1, reader.getCurrentRowGroupIndex());
+ }
+ }
+
@Test
public void testRangeFiltering() throws Exception {
// The readUsers also validates the rowIndex for each returned row.