yihua commented on a change in pull request #4878:
URL: https://github.com/apache/hudi/pull/4878#discussion_r816389548



##########
File path: 
hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
##########
@@ -438,27 +481,62 @@ private void 
validateLatestBaseFiles(HoodieTableFileSystemView metaFsView, Hoodi
   /**
    * Compare getLatestFileSlices between metadata table and fileSystem.
    */
-  private void validateLatestFileSlices(HoodieTableFileSystemView metaFsView, 
HoodieTableFileSystemView fsView, String partitionPath) {
+  private void validateLatestFileSlices(
+      HoodieMetadataValidationContext metadataTableBasedContext,
+      HoodieMetadataValidationContext fsBasedContext, String partitionPath) {
 
-    List<FileSlice> latestFileSlicesFromMetadataTable = 
metaFsView.getLatestFileSlices(partitionPath).sorted(new 
FileSliceCompactor()).collect(Collectors.toList());
-    List<FileSlice> latestFileSlicesFromFS = 
fsView.getLatestFileSlices(partitionPath).sorted(new 
FileSliceCompactor()).collect(Collectors.toList());
+    List<FileSlice> latestFileSlicesFromMetadataTable = 
metadataTableBasedContext.getSortedLatestFileSliceList(partitionPath);
+    List<FileSlice> latestFileSlicesFromFS = 
fsBasedContext.getSortedLatestFileSliceList(partitionPath);
 
-    LOG.info("Latest file list from metadata: " + 
latestFileSlicesFromMetadataTable + ". For partition " + partitionPath);
-    LOG.info("Latest file list from direct listing: " + latestFileSlicesFromFS 
+ ". For partition " + partitionPath);
+    LOG.debug("Latest file list from metadata: " + 
latestFileSlicesFromMetadataTable + ". For partition " + partitionPath);
+    LOG.debug("Latest file list from direct listing: " + 
latestFileSlicesFromFS + ". For partition " + partitionPath);
 
-    validateFileSlice(latestFileSlicesFromMetadataTable, 
latestFileSlicesFromFS, partitionPath);
+    validate(latestFileSlicesFromMetadataTable, latestFileSlicesFromFS, 
partitionPath, "file slices");
     LOG.info("Validation of getLatestFileSlices succeeded for partition " + 
partitionPath);
   }
 
-  private HoodieTableFileSystemView 
createHoodieTableFileSystemView(HoodieSparkEngineContext engineContext, boolean 
enableMetadataTable) {
+  private void validateAllColumnStats(
+      HoodieMetadataValidationContext metadataTableBasedContext,
+      HoodieMetadataValidationContext fsBasedContext, String partitionPath) {
+    List<String> latestBaseFilenameList = 
fsBasedContext.getSortedLatestBaseFileList(partitionPath)
+        .stream().map(BaseFile::getFileName).collect(Collectors.toList());
+    List<HoodieColumnRangeMetadata<String>> metadataBasedColStats = 
metadataTableBasedContext
+        .getSortedColumnStatsList(partitionPath, latestBaseFilenameList);
+    List<HoodieColumnRangeMetadata<String>> fsBasedColStats = fsBasedContext
+        .getSortedColumnStatsList(partitionPath, latestBaseFilenameList);

Review comment:
       Thanks for raising the concern.  The idea is to separate the issue due 
to wrong column stats from the mismatched latest base file list for clarity of 
validation.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to