This is an automated email from the ASF dual-hosted git repository.
tingchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 22c4e446c69 Allow skip CRC check during segment load when server
starting. (#16432)
22c4e446c69 is described below
commit 22c4e446c69c1071a0ab531751ed323a7c576675
Author: Ting Chen <[email protected]>
AuthorDate: Tue Sep 2 10:21:09 2025 -0700
Allow skip CRC check during segment load when server starting. (#16432)
* Allow skip CRC check during segment load when server starting.
* Fix unit tests.
* Fix based on the comments.
* Revise based on comments
---
.../core/data/manager/BaseTableDataManager.java | 28 +++++++++++++++-------
.../data/manager/BaseTableDataManagerTest.java | 2 ++
.../helix/HelixInstanceDataManagerConfig.java | 8 +++++++
.../config/instance/InstanceDataManagerConfig.java | 2 ++
4 files changed, 32 insertions(+), 8 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java
index e4db4f0b50a..0abd9bda1be 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java
@@ -425,6 +425,12 @@ public abstract class BaseTableDataManager implements
TableDataManager {
_logger.info("Segment: {} has CRC: {} same as before, not replacing it",
segmentName, localMetadata.getCrc());
return;
}
+ if (!_instanceDataManagerConfig.shouldCheckCRCOnSegmentLoad()) {
+ _logger.info("Skipping replacing segment: {} even though its CRC has
changed from: {} to: {} because "
+ + "instance.check.crc.on.segment.load is set to false", segmentName,
localMetadata.getCrc(),
+ zkMetadata.getCrc());
+ return;
+ }
_logger.info("Replacing segment: {} because its CRC has changed from: {}
to: {}", segmentName,
localMetadata.getCrc(), zkMetadata.getCrc());
downloadAndLoadSegment(zkMetadata, indexLoadingConfig);
@@ -806,7 +812,8 @@ public abstract class BaseTableDataManager implements
TableDataManager {
- Continue loading the segment from the index directory.
*/
boolean shouldDownload =
- forceDownload || (isSegmentStatusCompleted(zkMetadata) &&
!hasSameCRC(zkMetadata, localMetadata));
+ forceDownload || (isSegmentStatusCompleted(zkMetadata) &&
!hasSameCRC(zkMetadata, localMetadata)
+ && _instanceDataManagerConfig.shouldCheckCRCOnSegmentLoad());
if (shouldDownload) {
// Create backup directory to handle failure of segment reloading.
createBackup(indexDir);
@@ -1205,20 +1212,25 @@ public abstract class BaseTableDataManager implements
TableDataManager {
- The "DONE" status confirms the COMMIT_END_METADATA call succeeded,
and the segment is available either in deep storage or with a peer
before discarding the local copy.
+ The only exception is if the server does not check CRC on segment load.
Then:
We need to fall back to downloading the segment from deep storage to load
it.
*/
- if (segmentMetadata == null || (isSegmentStatusCompleted(zkMetadata) &&
!hasSameCRC(zkMetadata, segmentMetadata))) {
- if (segmentMetadata == null) {
- _logger.info("Segment: {} does not exist", segmentName);
- } else if (!hasSameCRC(zkMetadata, segmentMetadata)) {
- _logger.info("Segment: {} has CRC changed from: {} to: {}",
segmentName, segmentMetadata.getCrc(),
- zkMetadata.getCrc());
- }
+ if (segmentMetadata == null) {
+ _logger.info("Segment: {} does not exist", segmentName);
closeSegmentDirectoryQuietly(segmentDirectory);
return false;
}
+ if (isSegmentStatusCompleted(zkMetadata) && !hasSameCRC(zkMetadata,
segmentMetadata)) {
+ _logger.warn("Segment: {} has CRC changed from: {} to: {}", segmentName,
segmentMetadata.getCrc(),
+ zkMetadata.getCrc());
+ if (_instanceDataManagerConfig.shouldCheckCRCOnSegmentLoad()) {
+ closeSegmentDirectoryQuietly(segmentDirectory);
+ return false;
+ }
+ _logger.info("Skipping CRC check for segment: {} as configured. Proceed
to load segment.", segmentName);
+ }
try {
// If the segment is still kept by the server, then we can
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java
index 6224fd05b15..2064d393129 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java
@@ -675,6 +675,8 @@ public class BaseTableDataManagerTest {
private static InstanceDataManagerConfig
createDefaultInstanceDataManagerConfig() {
InstanceDataManagerConfig config = mock(InstanceDataManagerConfig.class);
when(config.getInstanceDataDir()).thenReturn(TEMP_DIR.getAbsolutePath());
+ // Check CRC matching on segment load time.
+ when(config.shouldCheckCRCOnSegmentLoad()).thenReturn(true);
return config;
}
diff --git
a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java
b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java
index 0ae3c648e2f..cb24365d94d 100644
---
a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java
+++
b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java
@@ -119,6 +119,9 @@ public class HelixInstanceDataManagerConfig implements
InstanceDataManagerConfig
private static final int DEFAULT_DELETED_TABLES_CACHE_TTL_MINUTES = 60;
private static final int DEFAULT_DELETED_SEGMENTS_CACHE_SIZE = 10_000;
private static final int DEFAULT_DELETED_SEGMENTS_CACHE_TTL_MINUTES = 2;
+ // By default, check CRC matching when loading segments.
+ private static final boolean DEFAULT_CHECK_CRC_ON_SEGMENT_LOAD = true;
+ private static final String CHECK_CRC_ON_SEGMENT_LOAD =
"check.crc.on.segment.load";
private final PinotConfiguration _serverConfig;
private final PinotConfiguration _upsertConfig;
@@ -331,4 +334,9 @@ public class HelixInstanceDataManagerConfig implements
InstanceDataManagerConfig
public boolean isUploadSegmentToDeepStore() {
return _serverConfig.getProperty(UPLOAD_SEGMENT_TO_DEEP_STORE,
DEFAULT_UPLOAD_SEGMENT_TO_DEEP_STORE);
}
+
+ @Override
+ public boolean shouldCheckCRCOnSegmentLoad() {
+ return _serverConfig.getProperty(CHECK_CRC_ON_SEGMENT_LOAD,
DEFAULT_CHECK_CRC_ON_SEGMENT_LOAD);
+ }
}
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/config/instance/InstanceDataManagerConfig.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/config/instance/InstanceDataManagerConfig.java
index 579e390bc0f..fcd7c6b61de 100644
---
a/pinot-spi/src/main/java/org/apache/pinot/spi/config/instance/InstanceDataManagerConfig.java
+++
b/pinot-spi/src/main/java/org/apache/pinot/spi/config/instance/InstanceDataManagerConfig.java
@@ -86,4 +86,6 @@ public interface InstanceDataManagerConfig {
Map<String, Map<String, String>> getTierConfigs();
boolean isUploadSegmentToDeepStore();
+
+ boolean shouldCheckCRCOnSegmentLoad();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]