This is an automated email from the ASF dual-hosted git repository. sivabalan pushed a commit to branch release-0.5.3 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 500b3907a5057b3945115132beb97ef6cae37687 Author: Alexander Filipchik <[email protected]> AuthorDate: Wed Apr 22 09:47:32 2020 -0700 [HUDI-795] Handle auto-deleted empty aux folder (#1515) Co-authored-by: Alex Filipchik <[email protected]> --- .../apache/hudi/table/HoodieCommitArchiveLog.java | 24 +++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/hudi-client/src/main/java/org/apache/hudi/table/HoodieCommitArchiveLog.java b/hudi-client/src/main/java/org/apache/hudi/table/HoodieCommitArchiveLog.java index fcc9673..aa2465d 100644 --- a/hudi-client/src/main/java/org/apache/hudi/table/HoodieCommitArchiveLog.java +++ b/hudi-client/src/main/java/org/apache/hudi/table/HoodieCommitArchiveLog.java @@ -55,6 +55,7 @@ import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaSparkContext; +import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -219,14 +220,31 @@ public class HoodieCommitArchiveLog { * @throws IOException in case of error */ private boolean deleteAllInstantsOlderorEqualsInAuxMetaFolder(HoodieInstant thresholdInstant) throws IOException { - List<HoodieInstant> instants = metaClient.scanHoodieInstantsFromFileSystem( - new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE, false); + List<HoodieInstant> instants = null; + boolean success = true; + try { + instants = + metaClient.scanHoodieInstantsFromFileSystem( + new Path(metaClient.getMetaAuxiliaryPath()), + HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE, + false); + } catch (FileNotFoundException e) { + /* + * On some FSs deletion of all files in the directory can auto remove the directory itself. + * GCS is one example, as it doesn't have real directories and subdirectories. When client + * removes all the files from a "folder" on GCS is has to create a special "/" to keep the folder + * around. If this doesn't happen (timeout, misconfigured client, ...) folder will be deleted and + * in this case we should not break when aux folder is not found. + * GCS information: (https://cloud.google.com/storage/docs/gsutil/addlhelp/HowSubdirectoriesWork) + */ + LOG.warn("Aux path not found. Skipping: " + metaClient.getMetaAuxiliaryPath()); + return success; + } List<HoodieInstant> instantsToBeDeleted = instants.stream().filter(instant1 -> HoodieTimeline.compareTimestamps(instant1.getTimestamp(), thresholdInstant.getTimestamp(), HoodieTimeline.LESSER_OR_EQUAL)).collect(Collectors.toList()); - boolean success = true; for (HoodieInstant deleteInstant : instantsToBeDeleted) { LOG.info("Deleting instant " + deleteInstant + " in auxiliary meta path " + metaClient.getMetaAuxiliaryPath()); Path metaFile = new Path(metaClient.getMetaAuxiliaryPath(), deleteInstant.getFileName());
