This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 2206326502a [HUDI-8449] Fix deletion of record from FILES partition on 
empty files list (#13346)
2206326502a is described below

commit 2206326502a6d1daecbe21bd927447a0a386fa53
Author: vamsikarnika <[email protected]>
AuthorDate: Fri May 30 10:41:24 2025 +0530

    [HUDI-8449] Fix deletion of record from FILES partition on empty files list 
(#13346)
---
 .../main/java/org/apache/hudi/metadata/MetadataPartitionType.java | 8 +++++++-
 .../java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java  | 3 +--
 .../java/org/apache/hudi/functional/TestHoodieBackedMetadata.java | 8 +++++++-
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git 
a/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java 
b/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
index 6d35f63621e..4a434ff1d98 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
@@ -26,6 +26,7 @@ import org.apache.hudi.avro.model.HoodieSecondaryIndexInfo;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.model.HoodieIndexDefinition;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.index.expression.HoodieExpressionIndex;
@@ -99,7 +100,12 @@ public enum MetadataPartitionType {
 
     @Override
     public HoodieMetadataPayload combineMetadataPayloads(HoodieMetadataPayload 
older, HoodieMetadataPayload newer) {
-      return new HoodieMetadataPayload(newer.key, newer.type, 
combineFileSystemMetadata(older, newer));
+      Map<String, HoodieMetadataFileInfo> combinedFileSystemMetadata = 
combineFileSystemMetadata(older, newer);
+      if (combinedFileSystemMetadata.isEmpty()) {
+        // if all the files in a partition are deleted after merge, delete the 
record
+        return new HoodieMetadataPayload(Option.empty());
+      }
+      return new HoodieMetadataPayload(newer.key, newer.type, 
combinedFileSystemMetadata);
     }
   },
   COLUMN_STATS(HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS, 
"col-stats-", 3) {
diff --git 
a/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java
 
b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java
index 29a27860b76..1bf9e0d8536 100644
--- 
a/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java
+++ 
b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java
@@ -166,8 +166,7 @@ public class TestHoodieMetadataPayload extends 
HoodieCommonTestHarness {
         HoodieMetadataPayload.createPartitionFilesRecord(PARTITION_NAME, 
Collections.emptyMap(), allDeletedFileList);
 
     HoodieMetadataPayload combinedPayload = 
allDeletionRecord.getData().preCombine(additionRecord.getData());
-    
assertEquals(HoodieMetadataPayload.createPartitionFilesRecord(PARTITION_NAME, 
Collections.emptyMap(), Collections.emptyList()).getData(), combinedPayload);
-    assertTrue(combinedPayload.filesystemMetadata.isEmpty());
+    assertTrue(combinedPayload.isDeleted());
 
     // test all partition record
     HoodieRecord<HoodieMetadataPayload> allPartitionsRecord = 
HoodieMetadataPayload.createPartitionListRecord(Arrays.asList(PARTITION_NAME, 
PARTITION_NAME2, PARTITION_NAME3), false);
diff --git 
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java
 
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java
index d2c92dd1273..9459733fdbf 100644
--- 
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java
+++ 
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java
@@ -3001,7 +3001,9 @@ public class TestHoodieBackedMetadata extends 
TestHoodieMetadataBase {
         .withFinalizeWriteParallelism(1)
         .withDeleteParallelism(1)
         
.withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true).build())
-        
.withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true)
+            .withMaxNumDeltaCommitsBeforeCompaction(4)
+            .build())
         .build();
 
     try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
@@ -3037,6 +3039,10 @@ public class TestHoodieBackedMetadata extends 
TestHoodieMetadataBase {
       }
       writeStatuses = client.upsert(jsc.parallelize(upsertRecords, 1), 
newCommitTime);
       assertTrue(client.commit(newCommitTime, writeStatuses));
+
+      // assert entry is not present for deleted partition in metadata table
+      HoodieTableMetadata tableMetadata = metadata(client, storage);
+      
assertTrue(tableMetadata.getRecordsByKeyPrefixes(Collections.singletonList(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH),
 FILES.getPartitionPath(), false).isEmpty());
       // above upsert would have triggered clean
       validateMetadata(client);
       assertEquals(1, metadata(client, storage).getAllPartitionPaths().size());

Reply via email to