This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 6381aacb2fd3 [HUDI-9575] Don't generate compaction plan for file slice
with no log files (#13536)
6381aacb2fd3 is described below
commit 6381aacb2fd3225693584ebb4804f43fecf1acaf
Author: Shuo Cheng <[email protected]>
AuthorDate: Wed Jul 16 09:00:54 2025 +0800
[HUDI-9575] Don't generate compaction plan for file slice with no log files
(#13536)
* it does not make sense to generate a plan if there are no logs.
---------
Co-authored-by: danny0405 <[email protected]>
---
.../compact/plan/generators/BaseHoodieCompactionPlanGenerator.java | 7 ++++++-
.../java/org/apache/hudi/common/model/CompactionOperation.java | 3 ++-
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/plan/generators/BaseHoodieCompactionPlanGenerator.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/plan/generators/BaseHoodieCompactionPlanGenerator.java
index 53c55ba0bc8b..5f1ea93b02bf 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/plan/generators/BaseHoodieCompactionPlanGenerator.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/plan/generators/BaseHoodieCompactionPlanGenerator.java
@@ -53,6 +53,7 @@ import java.io.Serializable;
import java.util.Collections;
import java.util.List;
import java.util.Map;
+import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
@@ -147,6 +148,10 @@ public abstract class BaseHoodieCompactionPlanGenerator<T
extends HoodieRecordPa
// for both OCC and NB-CC, this is in-correct.
.filter(logFile ->
completionTimeQueryView.isCompletedBefore(compactionInstant,
logFile.getDeltaCommitTime()))
.sorted(HoodieLogFile.getLogFileComparator()).collect(toList());
+ if (logFiles.isEmpty()) {
+ // compaction is not needed if there is no log file.
+ return null;
+ }
totalLogFiles.add(logFiles.size());
totalFileSlices.add(1L);
// Avro generated classes are not inheriting Serializable. Using
CompactionOperation POJO
@@ -155,7 +160,7 @@ public abstract class BaseHoodieCompactionPlanGenerator<T
extends HoodieRecordPa
Option<HoodieBaseFile> dataFile = s.getBaseFile();
return new CompactionOperation(dataFile, partitionPath, logFiles,
writeConfig.getCompactionStrategy().captureMetrics(writeConfig,
s));
- }), partitionPaths.size()).stream()
+ }).filter(Objects::nonNull), partitionPaths.size()).stream()
.map(CompactionUtils::buildHoodieCompactionOperation).collect(toList());
LOG.info("Total of {} compaction operations are retrieved for table {}",
operations.size(), hoodieTable.getConfig().getBasePath());
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/model/CompactionOperation.java
b/hudi-common/src/main/java/org/apache/hudi/common/model/CompactionOperation.java
index 7f6e1b9648ea..d74713a3e3a1 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/model/CompactionOperation.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/model/CompactionOperation.java
@@ -21,6 +21,7 @@ package org.apache.hudi.common.model;
import org.apache.hudi.avro.model.HoodieCompactionOperation;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.storage.StoragePath;
import java.io.Serializable;
@@ -64,6 +65,7 @@ public class CompactionOperation implements Serializable {
public CompactionOperation(Option<HoodieBaseFile> dataFile, String
partitionPath, List<HoodieLogFile> logFiles,
Map<String, Double> metrics) {
+ ValidationUtils.checkArgument(!logFiles.isEmpty(), "log files should not
be empty.");
if (dataFile.isPresent()) {
this.baseInstantTime = dataFile.get().getCommitTime();
this.dataFileName = Option.of(dataFile.get().getFileName());
@@ -71,7 +73,6 @@ public class CompactionOperation implements Serializable {
this.dataFileCommitTime = Option.of(dataFile.get().getCommitTime());
this.bootstrapFilePath =
dataFile.get().getBootstrapBaseFile().map(BaseFile::getPath);
} else {
- assert logFiles.size() > 0;
this.dataFileName = Option.empty();
this.baseInstantTime = logFiles.get(0).getDeltaCommitTime();
this.id = new HoodieFileGroupId(partitionPath,
logFiles.get(0).getFileId());