lokeshj1703 commented on code in PR #13229:
URL: https://github.com/apache/hudi/pull/13229#discussion_r2085324876
##########
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/clean/TestCleanerInsertAndCleanByCommits.java:
##########
@@ -250,11 +251,13 @@ && compareTimestamps(
commitTimes.remove(lastInstant.requestedTime());
}
- assertEquals(
- expectedInstantTimeMap.get(
- Pair.of(partitionPath,
fileGroup.getFileGroupId().getFileId())),
- commitTimes,
- "Only contain acceptable versions of file should be present");
+ Set<String> expected =
expectedInstantTimeMap.get(Pair.of(partitionPath,
fileGroup.getFileGroupId().getFileId()));
+ Set<String> actual = commitTimes;
+ if (expected == null) {
Review Comment:
Fixed it
##########
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnCopyOnWriteStorage.java:
##########
@@ -771,14 +783,14 @@ public void testSmallInsertHandlingForUpserts() throws
Exception {
Set<String> keys1 = recordsToRecordKeySet(inserts1);
JavaRDD<HoodieRecord> insertRecordsRDD1 = jsc.parallelize(inserts1, 1);
- List<WriteStatus> statuses = client.upsert(insertRecordsRDD1,
commitTime1).collect();
+ JavaRDD<WriteStatus> rawStatuses = client.upsert(insertRecordsRDD1,
commitTime1);
+ JavaRDD<WriteStatus> statuses = jsc.parallelize(rawStatuses.collect(), 1);
+ writeClient.commit(commitTime1, statuses, Option.empty(), COMMIT_ACTION,
Collections.emptyMap(), Option.empty());
- assertNoWriteErrors(statuses);
Review Comment:
Addressed
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java:
##########
@@ -514,23 +533,65 @@ public boolean purgePendingClustering(String
clusteringInstant) {
return false;
}
+ protected abstract HoodieWriteMetadata<O>
convertToOutputMetadata(HoodieWriteMetadata<T> writeMetadata);
+
+ private Map<String, List<String>>
getPartitionToReplacedFileIds(HoodieClusteringPlan clusteringPlan,
HoodieWriteMetadata<?> writeMetadata) {
+ Set<HoodieFileGroupId> newFilesWritten =
writeMetadata.getWriteStats().get().stream()
+ .map(s -> new HoodieFileGroupId(s.getPartitionPath(),
s.getFileId())).collect(Collectors.toSet());
+
+ return ClusteringUtils.getFileGroupsFromClusteringPlan(clusteringPlan)
+ .filter(fg ->
"org.apache.hudi.client.clustering.run.strategy.SparkSingleFileSortExecutionStrategy"
+ .equals(config.getClusteringExecutionStrategyClass())
+ || !newFilesWritten.contains(fg))
+ .collect(Collectors.groupingBy(HoodieFileGroupId::getPartitionPath,
Collectors.mapping(HoodieFileGroupId::getFileId, Collectors.toList())));
+ }
+
/**
- * Delete expired partition by config.
- *
- * @param instantTime Instant Time for the action
- * @return HoodieWriteMetadata
+ * Check if any validators are configured and run those validations. If any
of the validations fail, throws HoodieValidationException.
*/
- public HoodieWriteMetadata<T> managePartitionTTL(String instantTime) {
- HoodieTable<?, I, ?, T> table = createTable(config,
context.getStorageConf());
- return table.managePartitionTTL(context, instantTime);
- }
+ protected void runPrecommitValidationForClustering(HoodieWriteMetadata<O>
writeMetadata, HoodieTable table, String instantTime) {
+ if (StringUtils.isNullOrEmpty(config.getPreCommitValidators())) {
+ return;
+ }
+ throw new HoodieIOException("Precommit validation not implemented for all
engines yet");
+ }
+
+ private void commitClustering(HoodieWriteMetadata<O> clusteringWriteMetadata,
+ HoodieTable table,
Review Comment:
Addressed
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java:
##########
@@ -514,23 +533,65 @@ public boolean purgePendingClustering(String
clusteringInstant) {
return false;
}
+ protected abstract HoodieWriteMetadata<O>
convertToOutputMetadata(HoodieWriteMetadata<T> writeMetadata);
+
+ private Map<String, List<String>>
getPartitionToReplacedFileIds(HoodieClusteringPlan clusteringPlan,
HoodieWriteMetadata<?> writeMetadata) {
Review Comment:
Addressed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]