yihua commented on a change in pull request #4878:
URL: https://github.com/apache/hudi/pull/4878#discussion_r816168666
##########
File path:
hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
##########
@@ -438,27 +481,62 @@ private void
validateLatestBaseFiles(HoodieTableFileSystemView metaFsView, Hoodi
/**
* Compare getLatestFileSlices between metadata table and fileSystem.
*/
- private void validateLatestFileSlices(HoodieTableFileSystemView metaFsView,
HoodieTableFileSystemView fsView, String partitionPath) {
+ private void validateLatestFileSlices(
+ HoodieMetadataValidationContext metadataTableBasedContext,
+ HoodieMetadataValidationContext fsBasedContext, String partitionPath) {
- List<FileSlice> latestFileSlicesFromMetadataTable =
metaFsView.getLatestFileSlices(partitionPath).sorted(new
FileSliceCompactor()).collect(Collectors.toList());
- List<FileSlice> latestFileSlicesFromFS =
fsView.getLatestFileSlices(partitionPath).sorted(new
FileSliceCompactor()).collect(Collectors.toList());
+ List<FileSlice> latestFileSlicesFromMetadataTable =
metadataTableBasedContext.getSortedLatestFileSliceList(partitionPath);
+ List<FileSlice> latestFileSlicesFromFS =
fsBasedContext.getSortedLatestFileSliceList(partitionPath);
- LOG.info("Latest file list from metadata: " +
latestFileSlicesFromMetadataTable + ". For partition " + partitionPath);
- LOG.info("Latest file list from direct listing: " + latestFileSlicesFromFS
+ ". For partition " + partitionPath);
+ LOG.debug("Latest file list from metadata: " +
latestFileSlicesFromMetadataTable + ". For partition " + partitionPath);
+ LOG.debug("Latest file list from direct listing: " +
latestFileSlicesFromFS + ". For partition " + partitionPath);
- validateFileSlice(latestFileSlicesFromMetadataTable,
latestFileSlicesFromFS, partitionPath);
+ validate(latestFileSlicesFromMetadataTable, latestFileSlicesFromFS,
partitionPath, "file slices");
LOG.info("Validation of getLatestFileSlices succeeded for partition " +
partitionPath);
}
- private HoodieTableFileSystemView
createHoodieTableFileSystemView(HoodieSparkEngineContext engineContext, boolean
enableMetadataTable) {
+ private void validateAllColumnStats(
+ HoodieMetadataValidationContext metadataTableBasedContext,
+ HoodieMetadataValidationContext fsBasedContext, String partitionPath) {
+ List<String> latestBaseFilenameList =
fsBasedContext.getSortedLatestBaseFileList(partitionPath)
+ .stream().map(BaseFile::getFileName).collect(Collectors.toList());
+ List<HoodieColumnRangeMetadata<String>> metadataBasedColStats =
metadataTableBasedContext
+ .getSortedColumnStatsList(partitionPath, latestBaseFilenameList);
+ List<HoodieColumnRangeMetadata<String>> fsBasedColStats = fsBasedContext
+ .getSortedColumnStatsList(partitionPath, latestBaseFilenameList);
- HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder()
- .enable(enableMetadataTable)
- .withAssumeDatePartitioning(cfg.assumeDatePartitioning)
- .build();
+ validate(metadataBasedColStats, fsBasedColStats, partitionPath, "column
stats");
- return FileSystemViewManager.createInMemoryFileSystemView(engineContext,
- metaClient, metadataConfig);
+ LOG.info("Validation of column stats succeeded for partition " +
partitionPath);
+ }
+
+ private void validateBloomFilters(
+ HoodieMetadataValidationContext metadataTableBasedContext,
+ HoodieMetadataValidationContext fsBasedContext, String partitionPath) {
+ List<String> latestBaseFilenameList =
fsBasedContext.getSortedLatestBaseFileList(partitionPath)
+ .stream().map(BaseFile::getFileName).collect(Collectors.toList());
+ List<BloomFilterData> metadataBasedBloomFilters = metadataTableBasedContext
Review comment:
Thanks for raising this. The same reasoning as above.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]