zhangyue19921010 commented on a change in pull request #4078: URL: https://github.com/apache/hudi/pull/4078#discussion_r781948726
########## File path: hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java ########## @@ -248,11 +253,32 @@ private HoodieInstant readCommit(GenericRecord record, boolean loadDetails) { break; } } + } catch (Exception originalException) { + // merge small archive files may left uncompleted archive file which will cause exception. + // need to ignore this kind of exception here. + try { + Path planPath = new Path(metaClient.getArchivePath(), "mergeArchivePlan"); + HoodieWrapperFileSystem fileSystem = metaClient.getFs(); + if (fileSystem.exists(planPath)) { + HoodieMergeArchiveFilePlan plan = TimelineMetadataUtils.deserializeAvroMetadata(FileIOUtils.readDataFromPath(fileSystem, planPath).get(), HoodieMergeArchiveFilePlan.class); + String mergedArchiveFileName = plan.getMergedArchiveFileName(); + if (!StringUtils.isNullOrEmpty(mergedArchiveFileName) && fs.getPath().getName().equalsIgnoreCase(mergedArchiveFileName)) { + LOG.warn("Catch exception because of reading uncompleted merging archive file " + mergedArchiveFileName + ". Ignore it here."); + continue; + } + } + throw originalException; + } catch (Exception e) { + // If anything wrong during parsing merge archive plan, we need to throw the original exception. + // For example corrupted archive file and corrupted plan are both existed. + throw originalException; + } Review comment: We use these code to check if originalException is caused by corrupted mergedArchiveFile and ignore it. Anything else needs to be threw again. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org