maedhroz commented on code in PR #4696: URL: https://github.com/apache/cassandra/pull/4696#discussion_r3024241591
########## src/java/org/apache/cassandra/replication/MutationTrackingSyncCoordinator.java: ########## @@ -0,0 +1,427 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.replication; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.Iterables; + +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.exceptions.RequestFailure; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.RequestCallback; +import org.apache.cassandra.net.Verb; +import org.apache.cassandra.repair.RepairJobDesc; +import org.apache.cassandra.repair.SharedContext; +import org.apache.cassandra.repair.messages.MutationTrackingSyncRequest; +import org.apache.cassandra.repair.messages.MutationTrackingSyncResponse; +import org.apache.cassandra.repair.messages.RepairMessage; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.utils.concurrent.AsyncPromise; + +public class MutationTrackingSyncCoordinator +{ + private static final Logger logger = LoggerFactory.getLogger(MutationTrackingSyncCoordinator.class); + + private final SharedContext ctx; + private final RepairJobDesc desc; + private final String keyspace; + private final Range<Token> range; + private final Set<InetAddressAndPort> participants; + private final ClusterMetadata metadata; + private final AsyncPromise<Void> completionFuture = new AsyncPromise<>(); + + // Per-shard state: tracks what each node has reported for that shard + private final Map<Range<Token>, ShardSyncState> shardStates = new HashMap<>(); + + // Host IDs of participants for scoped offset collection/completion. + // Null means all shard participants (no filtering). + private Set<Integer> liveHostIds; + + private final AtomicBoolean started = new AtomicBoolean(false); + + // Remote participants we are waiting for sync responses from. Completion is + // not possible until all responses have been received, since remote nodes may + // report targets that the local node doesn't know about yet. + private final Set<InetAddressAndPort> pendingSyncResponses = ConcurrentHashMap.newKeySet(); + + /** + * @param ctx shared context + * @param desc repair job descriptor + * @param participants the set of remote endpoints that should participate in this sync, + * as determined by the repair options (force, specific hosts). + * Only these endpoints will receive sync requests. If null, + * all remote replicas for overlapping shards will participate. + * @param metadata the snapshotted cluster metadata used to resolve endpoint-to-host-ID mappings + */ + public MutationTrackingSyncCoordinator(SharedContext ctx, RepairJobDesc desc, Set<InetAddressAndPort> participants, ClusterMetadata metadata) + { + this.ctx = ctx; + this.desc = desc; + this.keyspace = desc.keyspace; + this.range = Iterables.getOnlyElement(desc.ranges); + this.participants = participants; + this.metadata = metadata; + } + + public void start() + { + if (!started.compareAndSet(false, true)) + throw new IllegalStateException("Sync coordinator already started"); + + List<Shard> overlappingShards = new ArrayList<>(); + MutationTrackingService.instance().forEachShardInKeyspace(keyspace, shard -> { + if (shard.range.intersects(range)) + overlappingShards.add(shard); + }); + + if (overlappingShards.isEmpty()) + { + completionFuture.setSuccess(null); + return; + } + + // Convert participant endpoints to host IDs for scoped completion checks. + // If participants is null (no filtering), all shard participants are live. + if (participants != null) + { + liveHostIds = new HashSet<>(); + for (InetAddressAndPort ep : participants) + { + liveHostIds.add(metadata.directory.peerId(ep).id()); + } + // Always include the local node + liveHostIds.add(metadata.directory.peerId(ctx.broadcastAddressAndPort()).id()); + } + + for (Shard shard : overlappingShards) + { + ShardSyncState state = new ShardSyncState(shard, liveHostIds); + shardStates.put(shard.range, state); + } + + // Register to receive offset updates + MutationTrackingService.instance().registerSyncCoordinator(this); + + // Capture local targets + recaptureTargets(); + + logger.info("Sync coordinator started for keyspace {} range {}, tracking {} shards", + keyspace, range, overlappingShards.size()); + + // Send sync requests to all remote participants + sendSyncRequests(); + + // Check if already complete (e.g. single node, no targets) + checkIfReadyToComplete(); + } + + private void complete() + { + if (completionFuture.trySuccess(null)) + MutationTrackingService.instance().unregisterSyncCoordinator(this); + } + + private void sendSyncRequests() + { + MutationTrackingSyncRequest request = new MutationTrackingSyncRequest(desc, liveHostIds); + // Collect remote replicas, filtering to only allowed participants if specified. + // This respects --force (which excludes dead nodes) and --hosts (which + // restricts to specific nodes). + Set<InetAddressAndPort> remoteParticipants = ConcurrentHashMap.newKeySet(); + for (ShardSyncState state : shardStates.values()) + remoteParticipants.addAll(state.shard.remoteReplicas()); + + if (participants != null) + remoteParticipants.retainAll(participants); + + pendingSyncResponses.addAll(remoteParticipants); + + for (InetAddressAndPort participant : remoteParticipants) + { + logger.debug("Sending mutation tracking sync request to {} for {}", participant, desc); + + RepairMessage.sendMessageWithRetries(ctx, + RepairMessage.notDone(completionFuture), + request, + Verb.MT_SYNC_REQ, + participant, + new RequestCallback<MutationTrackingSyncResponse>() + { + @Override + public void onResponse(Message<MutationTrackingSyncResponse> msg) + { + onSyncResponse(msg.from(), msg.payload); + } + + @Override + public void onFailure(InetAddressAndPort from, RequestFailure failure) + { + fail(new RuntimeException( + String.format("Mutation tracking sync failed: participant %s returned failure %s", from, failure.reason))); Review Comment: nit: Coverage tooling indicates this might not be tested. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]

