vinothchandar commented on code in PR #13347:
URL: https://github.com/apache/hudi/pull/13347#discussion_r2114857478
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/SimpleConcurrentFileWritesConflictResolutionStrategy.java:
##########
@@ -53,12 +55,59 @@ public class
SimpleConcurrentFileWritesConflictResolutionStrategy
@Override
public Stream<HoodieInstant> getCandidateInstants(HoodieTableMetaClient
metaClient, HoodieInstant currentInstant,
Option<HoodieInstant>
lastSuccessfulInstant) {
+ if
(metaClient.getTableConfig().getTableVersion().greaterThanOrEquals(HoodieTableVersion.EIGHT))
{
+ return getCandidateInstantsV8AndAbove(metaClient, currentInstant,
lastSuccessfulInstant);
+ } else {
+ return getCandidateInstantsPreV8(metaClient, currentInstant,
lastSuccessfulInstant);
+ }
+ }
+
+ /**
+ * To find which instants are conflicting for table versions 8 and above, we
apply the following logic:
+ * <ul>
+ * <li>Get completed instants timeline only for commits that have happened
since the last successful write.</li>
+ * <li>Get any completed replace commit that happened since the last
successful write and any pending replace commit.</li>
+ * </ul>
+ * @param metaClient table meta client
+ * @param currentInstant the instant for the write this client is attempting
to commit
+ * @param lastSuccessfulInstant the last successful write before this commit
started
+ * @return a stream of instants that are candidates for conflict resolution
+ */
+ private Stream<HoodieInstant>
getCandidateInstantsV8AndAbove(HoodieTableMetaClient metaClient, HoodieInstant
currentInstant,
+
Option<HoodieInstant> lastSuccessfulInstant) {
+ HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
+ boolean isMoRTable = metaClient.getTableType() ==
HoodieTableType.MERGE_ON_READ;
+ Stream<HoodieInstant> completedCommitsInstantStream = activeTimeline
+ .getCommitsTimeline()
+ .filterCompletedInstants()
+ .filter(instant -> !isMoRTable ||
!instant.getAction().equals(HoodieTimeline.COMMIT_ACTION))
+ .findInstantsAfter(lastSuccessfulInstant.isPresent() ?
lastSuccessfulInstant.get().requestedTime() : HoodieTimeline.INIT_INSTANT_TS)
+ .getInstantsAsStream();
+
+ Stream<HoodieInstant> clusteringAndReplaceCommitInstants = activeTimeline
Review Comment:
lets pull this into a private method.. called from both places.. ?
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/SimpleConcurrentFileWritesConflictResolutionStrategy.java:
##########
@@ -53,12 +55,59 @@ public class
SimpleConcurrentFileWritesConflictResolutionStrategy
@Override
public Stream<HoodieInstant> getCandidateInstants(HoodieTableMetaClient
metaClient, HoodieInstant currentInstant,
Option<HoodieInstant>
lastSuccessfulInstant) {
+ if
(metaClient.getTableConfig().getTableVersion().greaterThanOrEquals(HoodieTableVersion.EIGHT))
{
+ return getCandidateInstantsV8AndAbove(metaClient, currentInstant,
lastSuccessfulInstant);
+ } else {
+ return getCandidateInstantsPreV8(metaClient, currentInstant,
lastSuccessfulInstant);
+ }
+ }
+
+ /**
+ * To find which instants are conflicting for table versions 8 and above, we
apply the following logic:
+ * <ul>
+ * <li>Get completed instants timeline only for commits that have happened
since the last successful write.</li>
+ * <li>Get any completed replace commit that happened since the last
successful write and any pending replace commit.</li>
+ * </ul>
+ * @param metaClient table meta client
+ * @param currentInstant the instant for the write this client is attempting
to commit
+ * @param lastSuccessfulInstant the last successful write before this commit
started
+ * @return a stream of instants that are candidates for conflict resolution
+ */
+ private Stream<HoodieInstant>
getCandidateInstantsV8AndAbove(HoodieTableMetaClient metaClient, HoodieInstant
currentInstant,
+
Option<HoodieInstant> lastSuccessfulInstant) {
+ HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
+ boolean isMoRTable = metaClient.getTableType() ==
HoodieTableType.MERGE_ON_READ;
+ Stream<HoodieInstant> completedCommitsInstantStream = activeTimeline
+ .getCommitsTimeline()
+ .filterCompletedInstants()
+ .filter(instant -> !isMoRTable ||
!instant.getAction().equals(HoodieTimeline.COMMIT_ACTION))
+ .findInstantsAfter(lastSuccessfulInstant.isPresent() ?
lastSuccessfulInstant.get().requestedTime() : HoodieTimeline.INIT_INSTANT_TS)
Review Comment:
lets use `lastSuccessfulInstant.map().orElse()` ?
##########
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestSimpleConcurrentFileWritesConflictResolutionStrategy.java:
##########
@@ -224,16 +228,23 @@ public void
testConcurrentWritesWithInterleavingScheduledCompaction() throws Exc
metaClient.reloadActiveTimeline();
List<HoodieInstant> candidateInstants =
strategy.getCandidateInstants(metaClient, currentInstant.get(),
lastSuccessfulInstant).collect(
Collectors.toList());
- // writer 1 conflicts with scheduled compaction plan 1
- Assertions.assertEquals(1, candidateInstants.size());
- ConcurrentOperation thatCommitOperation = new
ConcurrentOperation(candidateInstants.get(0), metaClient);
- ConcurrentOperation thisCommitOperation = new
ConcurrentOperation(currentInstant.get(), currentMetadata);
- Assertions.assertTrue(strategy.hasConflict(thisCommitOperation,
thatCommitOperation));
- Assertions.assertThrows(HoodieWriteConflictException.class, () ->
strategy.resolveConflict(null, thisCommitOperation, thatCommitOperation));
+ if (preTableVersion8) {
+ // writer 1 conflicts with scheduled compaction plan 1
+ Assertions.assertEquals(1, candidateInstants.size());
+ ConcurrentOperation thatCommitOperation = new
ConcurrentOperation(candidateInstants.get(0), metaClient);
+ ConcurrentOperation thisCommitOperation = new
ConcurrentOperation(currentInstant.get(), currentMetadata);
+ Assertions.assertTrue(strategy.hasConflict(thisCommitOperation,
thatCommitOperation));
+ Assertions.assertThrows(HoodieWriteConflictException.class, () ->
strategy.resolveConflict(null, thisCommitOperation, thatCommitOperation));
+ } else {
+ // writer will not have conflicts with compaction since ordering is now
based on completion time to avoid these conflicts
+ Assertions.assertTrue(candidateInstants.isEmpty());
+ }
}
- @Test
- public void testConcurrentWritesWithInterleavingSuccessfulCompaction()
throws Exception {
+ @ParameterizedTest
+ @ValueSource(booleans = {false, true})
+ public void testConcurrentWritesWithInterleavingSuccessfulCompaction(boolean
preTableVersion8) throws Exception {
+ initMetaClient(preTableVersion8, HoodieTableType.MERGE_ON_READ);
Review Comment:
I guess there are only two scenarios, that are different. So Parameterizing
the entire test class is an overkill
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/SimpleConcurrentFileWritesConflictResolutionStrategy.java:
##########
@@ -53,12 +55,59 @@ public class
SimpleConcurrentFileWritesConflictResolutionStrategy
@Override
public Stream<HoodieInstant> getCandidateInstants(HoodieTableMetaClient
metaClient, HoodieInstant currentInstant,
Option<HoodieInstant>
lastSuccessfulInstant) {
+ if
(metaClient.getTableConfig().getTableVersion().greaterThanOrEquals(HoodieTableVersion.EIGHT))
{
+ return getCandidateInstantsV8AndAbove(metaClient, currentInstant,
lastSuccessfulInstant);
+ } else {
+ return getCandidateInstantsPreV8(metaClient, currentInstant,
lastSuccessfulInstant);
+ }
+ }
+
+ /**
+ * To find which instants are conflicting for table versions 8 and above, we
apply the following logic:
+ * <ul>
+ * <li>Get completed instants timeline only for commits that have happened
since the last successful write.</li>
+ * <li>Get any completed replace commit that happened since the last
successful write and any pending replace commit.</li>
+ * </ul>
+ * @param metaClient table meta client
+ * @param currentInstant the instant for the write this client is attempting
to commit
+ * @param lastSuccessfulInstant the last successful write before this commit
started
+ * @return a stream of instants that are candidates for conflict resolution
+ */
+ private Stream<HoodieInstant>
getCandidateInstantsV8AndAbove(HoodieTableMetaClient metaClient, HoodieInstant
currentInstant,
+
Option<HoodieInstant> lastSuccessfulInstant) {
+ HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
+ boolean isMoRTable = metaClient.getTableType() ==
HoodieTableType.MERGE_ON_READ;
+ Stream<HoodieInstant> completedCommitsInstantStream = activeTimeline
+ .getCommitsTimeline()
+ .filterCompletedInstants()
+ .filter(instant -> !isMoRTable ||
!instant.getAction().equals(HoodieTimeline.COMMIT_ACTION))
+ .findInstantsAfter(lastSuccessfulInstant.isPresent() ?
lastSuccessfulInstant.get().requestedTime() : HoodieTimeline.INIT_INSTANT_TS)
Review Comment:
Similar here. the only thing thats different is the extra filter.. so we
could pull the rest to a private method shared by both paths. and do the extra
filter.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]