nsivabalan commented on code in PR #5234:
URL: https://github.com/apache/hudi/pull/5234#discussion_r845436003


##########
hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java:
##########
@@ -723,6 +722,121 @@ private void validateBloomFilters(
     }
   }
 
+  private void validateFileSlices(
+      List<FileSlice> fileSliceListFromMetadataTable, List<FileSlice> 
fileSliceListFromFS,
+      String partitionPath, HoodieTableMetaClient metaClient, String label) {
+    boolean mismatch = false;
+    if (fileSliceListFromMetadataTable.size() != fileSliceListFromFS.size()) {
+      mismatch = true;
+    } else if (!fileSliceListFromMetadataTable.equals(fileSliceListFromFS)) {
+      for (int i = 0; i < fileSliceListFromMetadataTable.size(); i++) {
+        FileSlice fileSlice1 = fileSliceListFromMetadataTable.get(i);
+        FileSlice fileSlice2 = fileSliceListFromFS.get(i);
+        if (!Objects.equals(fileSlice1.getFileGroupId(), 
fileSlice2.getFileGroupId())
+            || !Objects.equals(fileSlice1.getBaseInstantTime(), 
fileSlice2.getBaseInstantTime())
+            || !Objects.equals(fileSlice1.getBaseFile(), 
fileSlice2.getBaseFile())) {
+          mismatch = true;
+          break;
+        }
+        if (!areFileSliceCommittedLogFilesMatching(fileSlice1, fileSlice2, 
metaClient)) {
+          mismatch = true;
+          break;
+        } else {
+          LOG.warn(String.format("There are uncommitted log files in the 
latest file slices "
+              + "but the committed log files match: %s %s", fileSlice1, 
fileSlice2));
+        }
+      }
+    }
+
+    if (mismatch) {
+      String message = String.format("Validation of %s for partition %s 
failed."
+              + "\n%s from metadata: %s\n%s from file system and base files: 
%s",
+          label, partitionPath, label, fileSliceListFromMetadataTable, label, 
fileSliceListFromFS);
+      LOG.error(message);
+      throw new HoodieValidationException(message);
+    } else {
+      LOG.info(String.format("Validation of %s succeeded for partition %s", 
label, partitionPath));
+    }
+  }
+
+  /**
+   * Compares committed log files from two file slices.
+   *
+   * @param fs1        File slice 1
+   * @param fs2        File slice 2
+   * @param metaClient {@link HoodieTableMetaClient} instance
+   * @return {@code true} if matching; {@code false} otherwise.
+   */
+  private boolean areFileSliceCommittedLogFilesMatching(
+      FileSlice fs1, FileSlice fs2, HoodieTableMetaClient metaClient) {
+    Set<String> fs1LogPathSet =

Review Comment:
   minor optimization. we can check data table timeline and check if there are 
any inflights. and if its committed in MDT and then proceed w/ further checks. 
if not, we can right away fail the validation. will leave it to you to take the 
call. 
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to