This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 2206326502a [HUDI-8449] Fix deletion of record from FILES partition on
empty files list (#13346)
2206326502a is described below
commit 2206326502a6d1daecbe21bd927447a0a386fa53
Author: vamsikarnika <[email protected]>
AuthorDate: Fri May 30 10:41:24 2025 +0530
[HUDI-8449] Fix deletion of record from FILES partition on empty files list
(#13346)
---
.../main/java/org/apache/hudi/metadata/MetadataPartitionType.java | 8 +++++++-
.../java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java | 3 +--
.../java/org/apache/hudi/functional/TestHoodieBackedMetadata.java | 8 +++++++-
3 files changed, 15 insertions(+), 4 deletions(-)
diff --git
a/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
b/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
index 6d35f63621e..4a434ff1d98 100644
---
a/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
+++
b/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
@@ -26,6 +26,7 @@ import org.apache.hudi.avro.model.HoodieSecondaryIndexInfo;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.model.HoodieIndexDefinition;
import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.index.expression.HoodieExpressionIndex;
@@ -99,7 +100,12 @@ public enum MetadataPartitionType {
@Override
public HoodieMetadataPayload combineMetadataPayloads(HoodieMetadataPayload
older, HoodieMetadataPayload newer) {
- return new HoodieMetadataPayload(newer.key, newer.type,
combineFileSystemMetadata(older, newer));
+ Map<String, HoodieMetadataFileInfo> combinedFileSystemMetadata =
combineFileSystemMetadata(older, newer);
+ if (combinedFileSystemMetadata.isEmpty()) {
+ // if all the files in a partition are deleted after merge, delete the
record
+ return new HoodieMetadataPayload(Option.empty());
+ }
+ return new HoodieMetadataPayload(newer.key, newer.type,
combinedFileSystemMetadata);
}
},
COLUMN_STATS(HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS,
"col-stats-", 3) {
diff --git
a/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java
b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java
index 29a27860b76..1bf9e0d8536 100644
---
a/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java
+++
b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataPayload.java
@@ -166,8 +166,7 @@ public class TestHoodieMetadataPayload extends
HoodieCommonTestHarness {
HoodieMetadataPayload.createPartitionFilesRecord(PARTITION_NAME,
Collections.emptyMap(), allDeletedFileList);
HoodieMetadataPayload combinedPayload =
allDeletionRecord.getData().preCombine(additionRecord.getData());
-
assertEquals(HoodieMetadataPayload.createPartitionFilesRecord(PARTITION_NAME,
Collections.emptyMap(), Collections.emptyList()).getData(), combinedPayload);
- assertTrue(combinedPayload.filesystemMetadata.isEmpty());
+ assertTrue(combinedPayload.isDeleted());
// test all partition record
HoodieRecord<HoodieMetadataPayload> allPartitionsRecord =
HoodieMetadataPayload.createPartitionListRecord(Arrays.asList(PARTITION_NAME,
PARTITION_NAME2, PARTITION_NAME3), false);
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java
index d2c92dd1273..9459733fdbf 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java
+++
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java
@@ -3001,7 +3001,9 @@ public class TestHoodieBackedMetadata extends
TestHoodieMetadataBase {
.withFinalizeWriteParallelism(1)
.withDeleteParallelism(1)
.withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true).build())
-
.withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build())
+ .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true)
+ .withMaxNumDeltaCommitsBeforeCompaction(4)
+ .build())
.build();
try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
@@ -3037,6 +3039,10 @@ public class TestHoodieBackedMetadata extends
TestHoodieMetadataBase {
}
writeStatuses = client.upsert(jsc.parallelize(upsertRecords, 1),
newCommitTime);
assertTrue(client.commit(newCommitTime, writeStatuses));
+
+ // assert entry is not present for deleted partition in metadata table
+ HoodieTableMetadata tableMetadata = metadata(client, storage);
+
assertTrue(tableMetadata.getRecordsByKeyPrefixes(Collections.singletonList(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH),
FILES.getPartitionPath(), false).isEmpty());
// above upsert would have triggered clean
validateMetadata(client);
assertEquals(1, metadata(client, storage).getAllPartitionPaths().size());