This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 321111595df Add support for SNAPSHOT_WITH_DELETE validDocIdsType to
server APIs. (#16726)
321111595df is described below
commit 321111595dfae6828ce3a73513355bdcb6181d1d
Author: Abhishek Bafna <[email protected]>
AuthorDate: Wed Sep 3 08:24:26 2025 +0530
Add support for SNAPSHOT_WITH_DELETE validDocIdsType to server APIs.
(#16726)
* Add support for SNAPSHOT_WITH_DELETE validDocIdsType to server APIs.
* removing some redundant tests.
---------
Co-authored-by: abhishekbafna <[email protected]>
---
.../common/restlet/resources/ValidDocIdsType.java | 5 +
.../pinot/server/api/resources/TablesResource.java | 4 +
.../apache/pinot/server/api/BaseResourceTest.java | 7 +-
.../pinot/server/api/TablesResourceTest.java | 134 +++++++++++++++++++--
.../test/resources/data/test_data_with_delete.avro | Bin 0 -> 15615184 bytes
5 files changed, 138 insertions(+), 12 deletions(-)
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/ValidDocIdsType.java
b/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/ValidDocIdsType.java
index a8fbb129dc1..3ffb94e69d6 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/ValidDocIdsType.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/ValidDocIdsType.java
@@ -23,6 +23,11 @@ public enum ValidDocIdsType {
// Pinot segment. UpsertConfig's 'enableSnapshot' must be enabled for this
type.
SNAPSHOT,
+ // This indicates that the validDocIds bitmap is loaded from the snapshot
from the Pinot segment.
+ // The valid document ids here does take account into the deleted records.
UpsertConfig's 'enableSnapshot' must be
+ // enabled for this type. UpsertConfig's 'deleteRecordColumn' must be
provided for this type.
+ SNAPSHOT_WITH_DELETE,
+
// This indicates that the validDocIds bitmap is loaded from the real-time
server's in-memory.
//
// NOTE: Using in-memory based validDocids bitmap is a bit dangerous as it
will not give us the consistency in some
diff --git
a/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java
b/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java
index d8493ebaf66..a2a2e691141 100644
---
a/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java
+++
b/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java
@@ -761,6 +761,10 @@ public class TablesResource {
case SNAPSHOT:
return Pair.of(validDocIdsType,
((ImmutableSegmentImpl)
indexSegment).loadDocIdsFromSnapshot(V1Constants.VALID_DOC_IDS_SNAPSHOT_FILE_NAME));
+ case SNAPSHOT_WITH_DELETE:
+ return Pair.of(validDocIdsType,
+ ((ImmutableSegmentImpl) indexSegment).loadDocIdsFromSnapshot(
+ V1Constants.QUERYABLE_DOC_IDS_SNAPSHOT_FILE_NAME));
case IN_MEMORY:
return Pair.of(validDocIdsType,
indexSegment.getValidDocIds().getMutableRoaringBitmap());
case IN_MEMORY_WITH_DELETE:
diff --git
a/pinot-server/src/test/java/org/apache/pinot/server/api/BaseResourceTest.java
b/pinot-server/src/test/java/org/apache/pinot/server/api/BaseResourceTest.java
index 93b61911145..b3c194954f7 100644
---
a/pinot-server/src/test/java/org/apache/pinot/server/api/BaseResourceTest.java
+++
b/pinot-server/src/test/java/org/apache/pinot/server/api/BaseResourceTest.java
@@ -75,7 +75,10 @@ import static org.testng.Assert.assertTrue;
public abstract class BaseResourceTest {
- private static final String AVRO_DATA_PATH = "data/test_data-mv.avro";
+ protected String getAvroFileName() {
+ return "data/test_data-mv.avro";
+ }
+
private static final File TEMP_DIR = new File(FileUtils.getTempDirectory(),
"BaseResourceTest");
protected static final String TABLE_NAME = "testTable";
protected static final String LLC_SEGMENT_NAME_FOR_UPLOAD_SUCCESS =
@@ -104,7 +107,7 @@ public abstract class BaseResourceTest {
FileUtils.deleteQuietly(TEMP_DIR);
assertTrue(TEMP_DIR.mkdirs());
- URL resourceUrl = getClass().getClassLoader().getResource(AVRO_DATA_PATH);
+ URL resourceUrl =
getClass().getClassLoader().getResource(getAvroFileName());
assertNotNull(resourceUrl);
_avroFile = new File(resourceUrl.getFile());
diff --git
a/pinot-server/src/test/java/org/apache/pinot/server/api/TablesResourceTest.java
b/pinot-server/src/test/java/org/apache/pinot/server/api/TablesResourceTest.java
index 2ed2af463d3..42c78660142 100644
---
a/pinot-server/src/test/java/org/apache/pinot/server/api/TablesResourceTest.java
+++
b/pinot-server/src/test/java/org/apache/pinot/server/api/TablesResourceTest.java
@@ -213,10 +213,10 @@ public class TablesResourceTest extends BaseResourceTest {
Assert.assertEquals(jsonResponse.get("columns").size(), 2);
Assert.assertEquals(jsonResponse.get("indexes").size(), 2);
Assert.assertNotNull(jsonResponse.get("columns").get(0).get("indexSizeMap"));
-
Assert.assertEquals(jsonResponse.get("columns").get(0).get("indexSizeMap").get("forward_index").asText(),
"200008");
+
Assert.assertEquals(jsonResponse.get("columns").get(0).get("indexSizeMap").get("forward_index").asText(),
"400008");
Assert.assertEquals(jsonResponse.get("columns").get(0).get("indexSizeMap").get("dictionary").asText(),
"206384");
Assert.assertNotNull(jsonResponse.get("columns").get(1).get("indexSizeMap"));
-
Assert.assertEquals(jsonResponse.get("columns").get(1).get("indexSizeMap").get("forward_index").asText(),
"200008");
+
Assert.assertEquals(jsonResponse.get("columns").get(1).get("indexSizeMap").get("forward_index").asText(),
"400008");
Assert.assertEquals(jsonResponse.get("columns").get(1).get("indexSizeMap").get("dictionary").asText(),
"168976");
Assert.assertEquals(jsonResponse.get("indexes").get("column1").get("h3-index").asText(),
"NO");
Assert.assertEquals(jsonResponse.get("indexes").get("column1").get("fst-index").asText(),
"NO");
@@ -320,10 +320,10 @@ public class TablesResourceTest extends BaseResourceTest {
.get(String.class);
JsonNode validDocIdMetadata =
JsonUtils.stringToJsonNode(metadataResponse).get(0);
- Assert.assertEquals(validDocIdMetadata.get("totalDocs").asInt(), 100000);
+ Assert.assertEquals(validDocIdMetadata.get("totalDocs").asInt(), 200000);
Assert.assertEquals(validDocIdMetadata.get("totalValidDocs").asInt(), 8);
- Assert.assertEquals(validDocIdMetadata.get("totalInvalidDocs").asInt(),
99992);
- Assert.assertEquals(validDocIdMetadata.get("segmentCrc").asText(),
"1894900283");
+ Assert.assertEquals(validDocIdMetadata.get("totalInvalidDocs").asInt(),
199992);
+ Assert.assertEquals(validDocIdMetadata.get("segmentCrc").asText(),
"187068486");
Assert.assertEquals(validDocIdMetadata.get("validDocIdsType").asText(),
"SNAPSHOT");
}
@@ -346,12 +346,51 @@ public class TablesResourceTest extends BaseResourceTest {
.post(Entity.json(tableSegments), String.class);
JsonNode validDocIdsMetadata = JsonUtils.stringToJsonNode(response).get(0);
- Assert.assertEquals(validDocIdsMetadata.get("totalDocs").asInt(), 100000);
+ Assert.assertEquals(validDocIdsMetadata.get("totalDocs").asInt(), 200000);
Assert.assertEquals(validDocIdsMetadata.get("totalValidDocs").asInt(), 8);
- Assert.assertEquals(validDocIdsMetadata.get("totalInvalidDocs").asInt(),
99992);
- Assert.assertEquals(validDocIdsMetadata.get("segmentCrc").asText(),
"1894900283");
+ Assert.assertEquals(validDocIdsMetadata.get("totalInvalidDocs").asInt(),
199992);
+ Assert.assertEquals(validDocIdsMetadata.get("segmentCrc").asText(),
"187068486");
Assert.assertEquals(validDocIdsMetadata.get("validDocIdsType").asText(),
"SNAPSHOT");
-
Assert.assertEquals(validDocIdsMetadata.get("segmentSizeInBytes").asLong(),
1877636);
+
Assert.assertEquals(validDocIdsMetadata.get("segmentSizeInBytes").asLong(),
4514723);
+ Assert.assertTrue(validDocIdsMetadata.has("segmentCreationTimeMillis"));
+
Assert.assertTrue(validDocIdsMetadata.get("segmentCreationTimeMillis").asLong()
> 0);
+
+ // Verify server status information
+ Assert.assertTrue(validDocIdsMetadata.has("serverStatus"), "Server status
should be included in response");
+ String serverStatus = validDocIdsMetadata.get("serverStatus").asText();
+ Assert.assertNotNull(serverStatus, "Server status should not be null");
+ Assert.assertEquals(serverStatus, "NOT_STARTED", serverStatus);
+ }
+
+ @Test
+ public void testValidDocIdsMetadataPostForSnapshotWithDelete()
+ throws IOException {
+ IndexSegment segment = _realtimeIndexSegments.get(0);
+ // Verify the content of the downloaded snapshot from a realtime table.
+
downLoadAndVerifyValidDocIdsSnapshot(TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME),
+ (ImmutableSegmentImpl) segment);
+
downLoadAndVerifyValidDocIdsSnapshotBitmap(TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME),
+ (ImmutableSegmentImpl) segment);
+
+ List<String> segments = List.of(segment.getSegmentName());
+ TableSegments tableSegments = new TableSegments(segments);
+ String validDocIdsMetadataPath =
+ "/tables/" + TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME) +
"/validDocIdsMetadata";
+
+ // Test the new SNAPSHOT_WITH_DELETE validDocIdsType
+ String response = _webTarget.path(validDocIdsMetadataPath)
+ .queryParam("segmentNames", segment.getSegmentName())
+ .queryParam("validDocIdsType",
ValidDocIdsType.SNAPSHOT_WITH_DELETE.toString())
+ .request()
+ .post(Entity.json(tableSegments), String.class);
+ JsonNode validDocIdsMetadata = JsonUtils.stringToJsonNode(response).get(0);
+
+ Assert.assertEquals(validDocIdsMetadata.get("totalDocs").asInt(), 200000);
+ Assert.assertEquals(validDocIdsMetadata.get("totalValidDocs").asInt(), 8);
+ Assert.assertEquals(validDocIdsMetadata.get("totalInvalidDocs").asInt(),
199992);
+ Assert.assertEquals(validDocIdsMetadata.get("segmentCrc").asText(),
"187068486");
+ Assert.assertEquals(validDocIdsMetadata.get("validDocIdsType").asText(),
"SNAPSHOT_WITH_DELETE");
+
Assert.assertEquals(validDocIdsMetadata.get("segmentSizeInBytes").asLong(),
4514723);
Assert.assertTrue(validDocIdsMetadata.has("segmentCreationTimeMillis"));
Assert.assertTrue(validDocIdsMetadata.get("segmentCreationTimeMillis").asLong()
> 0);
@@ -413,6 +452,13 @@ public class TablesResourceTest extends BaseResourceTest {
FileUtils.writeByteArrayToFile(validDocIdsSnapshotFile,
RoaringBitmapUtils.serialize(validDocIdsSnapshot.getMutableRoaringBitmap()));
+ // Create the queryableDocIds snapshot file needed for SNAPSHOT_WITH_DELETE
+ File queryableDocIdsSnapshotFile =
+ new
File(SegmentDirectoryPaths.findSegmentDirectory(segment.getSegmentMetadata().getIndexDir()),
+ V1Constants.QUERYABLE_DOC_IDS_SNAPSHOT_FILE_NAME);
+ FileUtils.writeByteArrayToFile(queryableDocIdsSnapshotFile,
+
RoaringBitmapUtils.serialize(queryableDocIds.getMutableRoaringBitmap()));
+
// Check no type (default should be validDocIdsSnapshot)
Response response =
_webTarget.path(snapshotPath).request().get(Response.class);
Assert.assertEquals(response.getStatus(),
Response.Status.OK.getStatusCode());
@@ -472,7 +518,15 @@ public class TablesResourceTest extends BaseResourceTest {
V1Constants.VALID_DOC_IDS_SNAPSHOT_FILE_NAME);
FileUtils.writeByteArrayToFile(validDocIdsSnapshotFile,
RoaringBitmapUtils.serialize(validDocIdsSnapshot.getMutableRoaringBitmap()));
- String expectedSegmentCrc = "1894900283";
+
+ // Create the queryableDocIds snapshot file needed for SNAPSHOT_WITH_DELETE
+ File queryableDocIdsSnapshotFile =
+ new
File(SegmentDirectoryPaths.findSegmentDirectory(segment.getSegmentMetadata().getIndexDir()),
+ V1Constants.QUERYABLE_DOC_IDS_SNAPSHOT_FILE_NAME);
+ FileUtils.writeByteArrayToFile(queryableDocIdsSnapshotFile,
+
RoaringBitmapUtils.serialize(queryableDocIds.getMutableRoaringBitmap()));
+
+ String expectedSegmentCrc = "187068486";
// Check no type (default should be validDocIdsSnapshot)
ValidDocIdsBitmapResponse response =
_webTarget.path(snapshotPath).request().get(ValidDocIdsBitmapResponse.class);
@@ -521,6 +575,60 @@ public class TablesResourceTest extends BaseResourceTest {
queryableDocIds.getMutableRoaringBitmap());
}
+ @Test
+ public void testValidDocIdsMetadataGetForSnapshotWithDelete()
+ throws IOException {
+ IndexSegment segment = _realtimeIndexSegments.get(0);
+ // Verify the content of the downloaded snapshot from a realtime table.
+
downLoadAndVerifyValidDocIdsSnapshot(TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME),
+ (ImmutableSegmentImpl) segment);
+
downLoadAndVerifyValidDocIdsSnapshotBitmap(TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME),
+ (ImmutableSegmentImpl) segment);
+
+ String validDocIdsMetadataPath =
+ "/tables/" + TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME) +
"/validDocIdMetadata";
+
+ // Test GET endpoint with SNAPSHOT_WITH_DELETE validDocIdsType
+ String response = _webTarget.path(validDocIdsMetadataPath)
+ .queryParam("segmentNames", segment.getSegmentName())
+ .queryParam("validDocIdsType",
ValidDocIdsType.SNAPSHOT_WITH_DELETE.toString())
+ .request()
+ .get(String.class);
+ JsonNode validDocIdsMetadata = JsonUtils.stringToJsonNode(response).get(0);
+
+ Assert.assertEquals(validDocIdsMetadata.get("totalDocs").asInt(), 200000);
+ Assert.assertEquals(validDocIdsMetadata.get("totalValidDocs").asInt(), 8);
+ Assert.assertEquals(validDocIdsMetadata.get("totalInvalidDocs").asInt(),
199992);
+ Assert.assertEquals(validDocIdsMetadata.get("segmentCrc").asText(),
"187068486");
+ Assert.assertEquals(validDocIdsMetadata.get("validDocIdsType").asText(),
"SNAPSHOT_WITH_DELETE");
+ }
+
+ @Test
+ public void testValidDocIdsBitmapForSnapshotWithDelete()
+ throws IOException {
+ IndexSegment segment = _realtimeIndexSegments.get(0);
+ // Verify the content of the downloaded snapshot from a realtime table.
+
downLoadAndVerifyValidDocIdsSnapshot(TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME),
+ (ImmutableSegmentImpl) segment);
+
downLoadAndVerifyValidDocIdsSnapshotBitmap(TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME),
+ (ImmutableSegmentImpl) segment);
+
+ String validDocIdsBitmapPath = "/segments/" +
TableNameBuilder.REALTIME.tableNameWithType(TABLE_NAME)
+ + "/" + segment.getSegmentName() + "/validDocIdsBitmap";
+
+ // Test validDocIdsBitmap endpoint with SNAPSHOT_WITH_DELETE
validDocIdsType
+ ValidDocIdsBitmapResponse response = _webTarget.path(validDocIdsBitmapPath)
+ .queryParam("validDocIdsType",
ValidDocIdsType.SNAPSHOT_WITH_DELETE.toString())
+ .request()
+ .get(ValidDocIdsBitmapResponse.class);
+
+ Assert.assertNotNull(response);
+ Assert.assertEquals(response.getSegmentCrc(), "187068486");
+ Assert.assertEquals(response.getSegmentName(), segment.getSegmentName());
+ Assert.assertEquals(response.getValidDocIdsType(),
ValidDocIdsType.SNAPSHOT_WITH_DELETE);
+ Assert.assertNotNull(response.getBitmap());
+ }
+
@Test
public void testUploadSegments()
throws Exception {
@@ -602,4 +710,10 @@ public class TablesResourceTest extends BaseResourceTest {
.request().get(Response.class);
Assert.assertEquals(response.getStatus(),
Response.Status.NOT_FOUND.getStatusCode());
}
+
+ // Override to use data with delete records
+ @Override
+ protected String getAvroFileName() {
+ return "data/test_data_with_delete.avro";
+ }
}
diff --git a/pinot-server/src/test/resources/data/test_data_with_delete.avro
b/pinot-server/src/test/resources/data/test_data_with_delete.avro
new file mode 100644
index 00000000000..96616430d60
Binary files /dev/null and
b/pinot-server/src/test/resources/data/test_data_with_delete.avro differ
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]