This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 86be8554820 [HUDI-5336] Fixing log file pattern match to ignore 
extraneous files (#7612)
86be8554820 is described below

commit 86be85548202f4726ed1ab702c3c439c3fbea4d2
Author: Sivabalan Narayanan <[email protected]>
AuthorDate: Thu Jan 19 17:41:54 2023 -0800

    [HUDI-5336] Fixing log file pattern match to ignore extraneous files (#7612)
    
    Co-authored-by: Y Ethan Guo <[email protected]>
---
 hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java      | 2 +-
 .../java/org/apache/hudi/common/functional/TestHoodieLogFormat.java   | 2 +-
 .../hudi/common/functional/TestHoodieLogFormatAppendFailure.java      | 4 ++--
 .../apache/hudi/common/table/view/TestHoodieTableFileSystemView.java  | 4 +++-
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java 
b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
index 53e7fcc2721..bb9a8d7159d 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
@@ -80,7 +80,7 @@ public class FSUtils {
   // Log files are of this pattern - 
.b5068208-e1a4-11e6-bf01-fe55135034f3_20170101134598.log.1_1-0-1
   // Archive log files are of this pattern - .commits_.archive.1_1-0-1
   public static final Pattern LOG_FILE_PATTERN =
-      
Pattern.compile("\\.(.+)_(.*)\\.(.+)\\.(\\d+)(_((\\d+)-(\\d+)-(\\d+))(.cdc)?)?");
+      
Pattern.compile("^\\.(.+)_(.*)\\.(log|archive)\\.(\\d+)(_((\\d+)-(\\d+)-(\\d+))(.cdc)?)?");
   private static final int MAX_ATTEMPTS_RECOVER_LEASE = 10;
   private static final long MIN_CLEAN_TO_KEEP = 10;
   private static final long MIN_ROLLBACK_TO_KEEP = 10;
diff --git 
a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
 
b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
index 66b1c25cef7..4c12c5e8aa5 100755
--- 
a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
+++ 
b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
@@ -345,7 +345,7 @@ public class TestHoodieLogFormat extends 
HoodieCommonTestHarness {
 
     for (int i = 0; i < 2; i++) {
       Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
-          
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive").overBaseCommit("")
+          
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits").overBaseCommit("")
           .withFs(localFs).build();
       writer.appendBlock(dataBlock);
       writer.close();
diff --git 
a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java
 
b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java
index d28dfe5e01e..309a3b04858 100644
--- 
a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java
+++ 
b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java
@@ -103,7 +103,7 @@ public class TestHoodieLogFormatAppendFailure {
     HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header, 
HoodieRecord.RECORD_KEY_METADATA_FIELD);
 
     Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
-        
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive")
+        
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits")
         .overBaseCommit("").withFs(fs).build();
 
     writer.appendBlock(dataBlock);
@@ -134,7 +134,7 @@ public class TestHoodieLogFormatAppendFailure {
     // Opening a new Writer right now will throw IOException. The code should 
handle this, rollover the logfile and
     // return a new writer with a bumped up logVersion
     writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
-        
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive")
+        
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits")
         .overBaseCommit("").withFs(fs).build();
     header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
diff --git 
a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
 
b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
index 9e14611f80f..877b8fea3bf 100644
--- 
a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
+++ 
b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
@@ -306,12 +306,14 @@ public class TestHoodieTableFileSystemView extends 
HoodieCommonTestHarness {
     String fileName2 =
         FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, 
instantTime1, 1, TEST_WRITE_TOKEN);
     // create a dummy log file mimicing cloud stores marker files
-    String fileName3 = "_DUMMY_" + fileName1.substring(1, fileName1.length());
+    String fileName3 = "_GCS_SYNCABLE_TEMPFILE_" + fileName1;
+    String fileName4 = "_DUMMY_" + fileName1.substring(1, fileName1.length());
     // this file should not be deduced as a log file.
 
     Paths.get(basePath, partitionPath, fileName1).toFile().createNewFile();
     Paths.get(basePath, partitionPath, fileName2).toFile().createNewFile();
     Paths.get(basePath, partitionPath, fileName3).toFile().createNewFile();
+    Paths.get(basePath, partitionPath, fileName4).toFile().createNewFile();
     HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
 
     HoodieInstant instant1 = new HoodieInstant(true, 
HoodieTimeline.COMMIT_ACTION, instantTime1);

Reply via email to