This is an automated email from the ASF dual-hosted git repository. samt pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/cassandra.git
commit 496959a8f21adbcdfee0a0a586d9d2596a5d856f Author: Sam Tunnicliffe <[email protected]> AuthorDate: Mon Nov 10 09:09:56 2025 +0000 Don't update registration status if node state for decommissioned peer is found with the same address Patch by Sam Tunnicliffe; reviewed by Marcus Eriksson for CASSANDRA-21005 --- CHANGES.txt | 1 + .../apache/cassandra/service/CassandraDaemon.java | 8 +++- .../distributed/impl/AbstractCluster.java | 11 +++++ .../distributed/test/ring/DecommissionTest.java | 49 ++++++++++++++++++++++ 4 files changed, 68 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index ac61998c3a..7b7ff72e81 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Don't update registration status if node state for decommissioned peer is found with the same address (CASSANDRA-21005) * Avoid NPE when meta keyspace placements are empty before CMS is initialized (CASSANDRA-21004) * Gossip entries for hibernating non-members don't block truncate (CASSANDRA-21003) * Retry without time limit calculates wait time incorrectly (CASSANDRA-21002) diff --git a/src/java/org/apache/cassandra/service/CassandraDaemon.java b/src/java/org/apache/cassandra/service/CassandraDaemon.java index 2f85e6e58e..973dfbfe92 100644 --- a/src/java/org/apache/cassandra/service/CassandraDaemon.java +++ b/src/java/org/apache/cassandra/service/CassandraDaemon.java @@ -90,6 +90,7 @@ import org.apache.cassandra.service.snapshot.SnapshotManager; import org.apache.cassandra.streaming.StreamManager; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.MultiStepOperation; +import org.apache.cassandra.tcm.membership.NodeState; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.JMXServerUtils; import org.apache.cassandra.utils.JVMStabilityInspector; @@ -278,7 +279,12 @@ public class CassandraDaemon disableAutoCompaction(Schema.instance.distributedKeyspaces().names()); CMSOperations.initJmx(); AccordOperations.initJmx(); - if (ClusterMetadata.current().myNodeId() != null) + NodeState nodeStateForLocalAddress = ClusterMetadata.current().myNodeState(); + // If another node with the same address was previously a member and was decommissioned, it can be + // present in ClusterMetadata with a LEFT state. That should not trigger _this_ node to update + // RegistrationStatus. During the startup process the old node will be expunged and this node + // will register, prompting another call to onRegistration. + if (nodeStateForLocalAddress != null && nodeStateForLocalAddress != NodeState.LEFT) RegistrationStatus.instance.onRegistration(); } catch (InterruptedException | ExecutionException | IOException e) diff --git a/test/distributed/org/apache/cassandra/distributed/impl/AbstractCluster.java b/test/distributed/org/apache/cassandra/distributed/impl/AbstractCluster.java index 224adabdb1..533a7b49d5 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/AbstractCluster.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/AbstractCluster.java @@ -697,6 +697,17 @@ public abstract class AbstractCluster<I extends IInstance> implements ICluster<I return instance; } + public synchronized void unsafeRemoveNode(I toRemove) + { + instances.remove(toRemove); + instanceMap.remove(toRemove.broadcastAddress(), toRemove); + } + + public void unsafeUpdateNodeIdTopology(int num, NetworkTopology.DcAndRack location) + { + nodeIdTopology.put(num, location); + } + /** * WARNING: we index from 1 here, for consistency with inet address! */ diff --git a/test/distributed/org/apache/cassandra/distributed/test/ring/DecommissionTest.java b/test/distributed/org/apache/cassandra/distributed/test/ring/DecommissionTest.java index 6f67dac03b..db2f2eca6d 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/ring/DecommissionTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/ring/DecommissionTest.java @@ -34,13 +34,19 @@ import net.bytebuddy.ByteBuddy; import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; import net.bytebuddy.implementation.MethodDelegation; import net.bytebuddy.implementation.bind.annotation.SuperCall; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.Constants; import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.IInstanceConfig; +import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.api.NodeToolResult; +import org.apache.cassandra.distributed.api.TokenSupplier; import org.apache.cassandra.distributed.shared.ClusterUtils; import org.apache.cassandra.distributed.test.TestBaseImpl; import org.apache.cassandra.gms.Gossiper; +import org.apache.cassandra.io.util.File; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.streaming.StreamSession; import org.apache.cassandra.tcm.ClusterMetadata; @@ -55,6 +61,8 @@ import org.apache.cassandra.utils.FBUtilities; import static net.bytebuddy.matcher.ElementMatchers.named; import static org.apache.cassandra.distributed.api.Feature.GOSSIP; import static org.apache.cassandra.distributed.api.Feature.NETWORK; +import static org.apache.cassandra.distributed.shared.NetworkTopology.dcAndRack; +import static org.apache.cassandra.distributed.shared.NetworkTopology.networkTopology; import static org.apache.cassandra.distributed.test.ring.BootstrapTest.populate; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -75,6 +83,47 @@ public class DecommissionTest extends TestBaseImpl } } + @Test + public void testAddressReuseAfterDecommission() throws IOException, ExecutionException, InterruptedException + { + // Initially, all nodes should be in dc1/rack1. Node 3 will be decommissioned and a new node added re-using + // node 3's address. When the new node registers, it should be in dc2/rack2. + // For now, this requires the accord service to disabled. See CASSANDRA-21026 + try (Cluster cluster = builder().withNodes(3) + .withTokenSupplier(TokenSupplier.evenlyDistributedTokens(4)) + .withConfig(config -> config.with(NETWORK, GOSSIP) + .set("accord.enabled", false)) + .withNodeIdTopology(networkTopology(3, (id) -> dcAndRack("dc1", "rack1"))) + .start()) + { + assertEquals("dc1/rack1", cluster.get(1).callOnInstance(() -> DatabaseDescriptor.getLocator().local().toString())); + assertEquals("dc1/rack1", cluster.get(2).callOnInstance(() -> DatabaseDescriptor.getLocator().local().toString())); + assertEquals("dc1/rack1", cluster.get(3).callOnInstance(() -> DatabaseDescriptor.getLocator().local().toString())); + + IInvokableInstance toRemove = cluster.get(3); + toRemove.nodetoolResult("decommission", "--force").asserts().success(); + toRemove.shutdown().get(); + ClusterUtils.getDirectories(toRemove).forEach(File::tryDeleteRecursive); + cluster.unsafeRemoveNode(toRemove); + + // Now add a new node, using the same address as the one we just removed. This new node should register + // itself in dc2/rack2 and not inherit the location of its predecessor. + // Note: because we have removed the original node3 from the cluster completely, which is necessary because + // the cluster will complain about an id clash otherwise, this new node will also be "node3". However, it is + // completely distinct from the original one. + cluster.unsafeUpdateNodeIdTopology(toRemove.config().num(), dcAndRack("dc2", "rack2")); + IInstanceConfig config = cluster.newInstanceConfig() + .set("auto_bootstrap", true) + .set(Constants.KEY_DTEST_FULL_STARTUP, true); + IInvokableInstance newInstance = cluster.bootstrap(config); + newInstance.startup(); + + assertEquals("dc1/rack1", cluster.get(1).callOnInstance(() -> DatabaseDescriptor.getLocator().local().toString())); + assertEquals("dc1/rack1", cluster.get(2).callOnInstance(() -> DatabaseDescriptor.getLocator().local().toString())); + assertEquals("dc2/rack2", newInstance.callOnInstance(() -> DatabaseDescriptor.getLocator().local().toString())); + } + } + public static class BB { static void install(ClassLoader cl, int nodeNumber) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
