aweisberg commented on code in PR #4696:
URL: https://github.com/apache/cassandra/pull/4696#discussion_r3029097913


##########
src/java/org/apache/cassandra/replication/MutationTrackingSyncCoordinator.java:
##########
@@ -0,0 +1,427 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.replication;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.Iterables;
+
+import org.apache.cassandra.dht.Range;
+import org.apache.cassandra.dht.Token;
+import org.apache.cassandra.exceptions.RequestFailure;
+import org.apache.cassandra.locator.InetAddressAndPort;
+import org.apache.cassandra.net.Message;
+import org.apache.cassandra.net.RequestCallback;
+import org.apache.cassandra.net.Verb;
+import org.apache.cassandra.repair.RepairJobDesc;
+import org.apache.cassandra.repair.SharedContext;
+import org.apache.cassandra.repair.messages.MutationTrackingSyncRequest;
+import org.apache.cassandra.repair.messages.MutationTrackingSyncResponse;
+import org.apache.cassandra.repair.messages.RepairMessage;
+import org.apache.cassandra.tcm.ClusterMetadata;
+import org.apache.cassandra.utils.concurrent.AsyncPromise;
+
+public class MutationTrackingSyncCoordinator
+{
+    private static final Logger logger = 
LoggerFactory.getLogger(MutationTrackingSyncCoordinator.class);
+
+    private final SharedContext ctx;
+    private final RepairJobDesc desc;
+    private final String keyspace;
+    private final Range<Token> range;
+    private final Set<InetAddressAndPort> participants;
+    private final ClusterMetadata metadata;
+    private final AsyncPromise<Void> completionFuture = new AsyncPromise<>();
+
+    // Per-shard state: tracks what each node has reported for that shard
+    private final Map<Range<Token>, ShardSyncState> shardStates = new 
HashMap<>();
+
+    // Host IDs of participants for scoped offset collection/completion.
+    // Null means all shard participants (no filtering).
+    private Set<Integer> liveHostIds;
+
+    private final AtomicBoolean started = new AtomicBoolean(false);
+
+    // Remote participants we are waiting for sync responses from. Completion 
is
+    // not possible until all responses have been received, since remote nodes 
may
+    // report targets that the local node doesn't know about yet.
+    private final Set<InetAddressAndPort> pendingSyncResponses = 
ConcurrentHashMap.newKeySet();
+
+    /**
+     * @param ctx shared context
+     * @param desc repair job descriptor
+     * @param participants the set of remote endpoints that should participate 
in this sync,
+     *                     as determined by the repair options (force, 
specific hosts).
+     *                     Only these endpoints will receive sync requests. If 
null,
+     *                     all remote replicas for overlapping shards will 
participate.
+     * @param metadata the snapshotted cluster metadata used to resolve 
endpoint-to-host-ID mappings
+     */
+    public MutationTrackingSyncCoordinator(SharedContext ctx, RepairJobDesc 
desc, Set<InetAddressAndPort> participants, ClusterMetadata metadata)
+    {
+        this.ctx = ctx;
+        this.desc = desc;
+        this.keyspace = desc.keyspace;
+        this.range = Iterables.getOnlyElement(desc.ranges);
+        this.participants = participants;
+        this.metadata = metadata;
+    }
+
+    public void start()
+    {
+        if (!started.compareAndSet(false, true))
+            throw new IllegalStateException("Sync coordinator already 
started");
+
+        List<Shard> overlappingShards = new ArrayList<>();
+        MutationTrackingService.instance().forEachShardInKeyspace(keyspace, 
shard -> {
+            if (shard.range.intersects(range))
+                overlappingShards.add(shard);
+        });
+
+        if (overlappingShards.isEmpty())
+        {
+            completionFuture.setSuccess(null);
+            return;
+        }
+
+        // Convert participant endpoints to host IDs for scoped completion 
checks.
+        // If participants is null (no filtering), all shard participants are 
live.
+        if (participants != null)
+        {
+            liveHostIds = new HashSet<>();
+            for (InetAddressAndPort ep : participants)
+            {
+                liveHostIds.add(metadata.directory.peerId(ep).id());
+            }
+            // Always include the local node
+            
liveHostIds.add(metadata.directory.peerId(ctx.broadcastAddressAndPort()).id());
+        }
+
+        for (Shard shard : overlappingShards)
+        {
+            ShardSyncState state = new ShardSyncState(shard, liveHostIds);
+            shardStates.put(shard.range, state);
+        }
+
+        // Register to receive offset updates
+        MutationTrackingService.instance().registerSyncCoordinator(this);
+
+        // Capture local targets
+        recaptureTargets();
+
+        logger.info("Sync coordinator started for keyspace {} range {}, 
tracking {} shards",
+                   keyspace, range, overlappingShards.size());
+
+        // Send sync requests to all remote participants
+        sendSyncRequests();
+
+        // Check if already complete (e.g. single node, no targets)
+        checkIfReadyToComplete();
+    }
+
+    private void complete()
+    {
+        if (completionFuture.trySuccess(null))
+            MutationTrackingService.instance().unregisterSyncCoordinator(this);
+    }
+
+    private void sendSyncRequests()
+    {
+        MutationTrackingSyncRequest request = new 
MutationTrackingSyncRequest(desc, liveHostIds);
+        // Collect remote replicas, filtering to only allowed participants if 
specified.
+        // This respects --force (which excludes dead nodes) and --hosts (which
+        // restricts to specific nodes).
+        Set<InetAddressAndPort> remoteParticipants = 
ConcurrentHashMap.newKeySet();
+        for (ShardSyncState state : shardStates.values())
+            remoteParticipants.addAll(state.shard.remoteReplicas());
+
+        if (participants != null)
+            remoteParticipants.retainAll(participants);
+
+        pendingSyncResponses.addAll(remoteParticipants);
+
+        for (InetAddressAndPort participant : remoteParticipants)
+        {
+            logger.debug("Sending mutation tracking sync request to {} for 
{}", participant, desc);
+
+            RepairMessage.sendMessageWithRetries(ctx,
+                                                 
RepairMessage.notDone(completionFuture),
+                                                 request,
+                                                 Verb.MT_SYNC_REQ,
+                                                 participant,
+                                                 new 
RequestCallback<MutationTrackingSyncResponse>()
+                                                 {
+                                                     @Override
+                                                     public void 
onResponse(Message<MutationTrackingSyncResponse> msg)
+                                                     {
+                                                         
onSyncResponse(msg.from(), msg.payload);
+                                                     }
+
+                                                     @Override
+                                                     public void 
onFailure(InetAddressAndPort from, RequestFailure failure)
+                                                     {
+                                                         fail(new 
RuntimeException(
+                                                             
String.format("Mutation tracking sync failed: participant %s returned failure 
%s", from, failure.reason)));
+                                                     }
+
+                                                     @Override
+                                                     public boolean 
invokeOnFailure()
+                                                     {
+                                                         return true;
+                                                     }
+                                                 });
+        }
+    }
+
+    private void recaptureTargets()
+    {
+        if (checkForTopologyChange())
+            return;
+
+        for (ShardSyncState state : shardStates.values())
+        {
+            state.captureTargets();
+        }
+    }
+
+    /**
+     * Checks if any of the shards we're tracking have changed due to topology 
updates.
+     * @return true if topology changed (and repair was failed), false if all 
shards are still current
+     */
+    private boolean checkForTopologyChange()
+    {
+        for (ShardSyncState state : shardStates.values())
+        {
+            Shard currentShard = getCurrentShard(state.shard.range);
+            if (currentShard != state.shard)
+            {
+                failWithTopologyChange();

Review Comment:
   Topology changes aren't supported yet 
https://issues.apache.org/jira/browse/CASSANDRA-20386
   
   I'll take a look and see if I can at least induce one to exercise this 
failure path.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to