rpuch commented on code in PR #4684: URL: https://github.com/apache/ignite-3/pull/4684#discussion_r1833803412
########## modules/replicator/src/main/java/org/apache/ignite/internal/replicator/ReplicaManager.java: ########## @@ -1086,16 +1097,64 @@ private void idleSafeTimeSync() { } } - private void sendSafeTimeSyncIfReplicaReady(CompletableFuture<Replica> replicaFuture) { - if (isCompletedSuccessfully(replicaFuture)) { - Replica replica = replicaFuture.join(); + private void sendSafeTimeSyncIfReplicaReady(CompletableFuture<Replica> replicaFuture, HybridTimestamp proposedSafeTime) { + if (!isCompletedSuccessfully(replicaFuture)) { + return; + } - ReplicaSafeTimeSyncRequest req = REPLICA_MESSAGES_FACTORY.replicaSafeTimeSyncRequest() - .groupId(toReplicationGroupIdMessage(replica.groupId())) - .build(); + Replica replica = replicaFuture.join(); - replica.processRequest(req, localNodeId); + if (!shouldAdvanceIdleSafeTime(replica, proposedSafeTime)) { + // If previous attempt might still be waiting on the Metastorage SafeTime, we should not send the command ourselves as this + // might be an indicator that Metastorage SafeTime has stuck for some time; if we send the command, it will have to add its + // future, increasing (most probably, uselessly) heap pressure. + return; } + + getOrCreateContext(replica).lastIdleSafeTimeProposal = proposedSafeTime; + + ReplicaSafeTimeSyncRequest req = REPLICA_MESSAGES_FACTORY.replicaSafeTimeSyncRequest() + .groupId(toReplicationGroupIdMessage(replica.groupId())) + .proposedSafeTime(proposedSafeTime) + .build(); + + replica.processRequest(req, localNodeId).whenComplete((res, ex) -> { + if (ex != null) { + LOG.error("Could not advance safe time for {} to {}", replica.groupId(), proposedSafeTime); + } + }); + } + + private boolean shouldAdvanceIdleSafeTime(Replica replica, HybridTimestamp proposedSafeTime) { + if (placementDriver.getCurrentPrimaryReplica(replica.groupId(), proposedSafeTime) != null) { + // We know a primary is guaranteed to be valid at the proposed safe time, so, when processing the SafeTime propagation request, + // we'll not need to wait for Metastorage SafeTime, so processing will be cheap. + return true; + } + + // Ok, an attempt to advance idle safe time might cause a wait on Metastorage SafeTime. + // Let's see: if Metastorage SafeTime is still not enough to avoid waiting on it even for our PREVIOUS attempt to advance + // partition SafeTime, then it makes sense to skip the current attempt (as the future from the previous attempt is still there + // and takes memory; if we make the current attempt, we are guaranteed to add another future). There will be next attempt + // to advance the partition SafeTime. + + ReplicaContext context = getOrCreateContext(replica); + + HybridTimestamp lastIdleSafeTimeProposal = context.lastIdleSafeTimeProposal; + if (lastIdleSafeTimeProposal == null) { + // No previous attempt, we have to do it ourselves. + return true; + } + + // This is the Metastorage SafeTime that was needed to be achieved for previous attempt to check that this node is still a primary. + // If it's already achieved, then previous attempt is unblocked (most probably already finished), so we should proceed. + HybridTimestamp requiredMsSafeTime = lastIdleSafeTimeProposal.addPhysicalTime(clockService.maxClockSkewMillis()); + + return clusterTime.currentSafeTime().compareTo(requiredMsSafeTime) >= 0; + } + + private ReplicaContext getOrCreateContext(Replica replica) { + return contexts.computeIfAbsent(replica.groupId(), k -> new ReplicaContext()); Review Comment: Added removal of the context on replica stop -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@ignite.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org