nsivabalan commented on code in PR #5234: URL: https://github.com/apache/hudi/pull/5234#discussion_r845436003
########## hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java: ########## @@ -723,6 +722,121 @@ private void validateBloomFilters( } } + private void validateFileSlices( + List<FileSlice> fileSliceListFromMetadataTable, List<FileSlice> fileSliceListFromFS, + String partitionPath, HoodieTableMetaClient metaClient, String label) { + boolean mismatch = false; + if (fileSliceListFromMetadataTable.size() != fileSliceListFromFS.size()) { + mismatch = true; + } else if (!fileSliceListFromMetadataTable.equals(fileSliceListFromFS)) { + for (int i = 0; i < fileSliceListFromMetadataTable.size(); i++) { + FileSlice fileSlice1 = fileSliceListFromMetadataTable.get(i); + FileSlice fileSlice2 = fileSliceListFromFS.get(i); + if (!Objects.equals(fileSlice1.getFileGroupId(), fileSlice2.getFileGroupId()) + || !Objects.equals(fileSlice1.getBaseInstantTime(), fileSlice2.getBaseInstantTime()) + || !Objects.equals(fileSlice1.getBaseFile(), fileSlice2.getBaseFile())) { + mismatch = true; + break; + } + if (!areFileSliceCommittedLogFilesMatching(fileSlice1, fileSlice2, metaClient)) { + mismatch = true; + break; + } else { + LOG.warn(String.format("There are uncommitted log files in the latest file slices " + + "but the committed log files match: %s %s", fileSlice1, fileSlice2)); + } + } + } + + if (mismatch) { + String message = String.format("Validation of %s for partition %s failed." + + "\n%s from metadata: %s\n%s from file system and base files: %s", + label, partitionPath, label, fileSliceListFromMetadataTable, label, fileSliceListFromFS); + LOG.error(message); + throw new HoodieValidationException(message); + } else { + LOG.info(String.format("Validation of %s succeeded for partition %s", label, partitionPath)); + } + } + + /** + * Compares committed log files from two file slices. + * + * @param fs1 File slice 1 + * @param fs2 File slice 2 + * @param metaClient {@link HoodieTableMetaClient} instance + * @return {@code true} if matching; {@code false} otherwise. + */ + private boolean areFileSliceCommittedLogFilesMatching( + FileSlice fs1, FileSlice fs2, HoodieTableMetaClient metaClient) { + Set<String> fs1LogPathSet = Review Comment: minor optimization. we can check data table timeline and check if there are any inflights. and if its committed in MDT and then proceed w/ further checks. if not, we can right away fail the validation. will leave it to you to take the call. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org