This is an automated email from the ASF dual-hosted git repository.

samt pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git

commit 496959a8f21adbcdfee0a0a586d9d2596a5d856f
Author: Sam Tunnicliffe <[email protected]>
AuthorDate: Mon Nov 10 09:09:56 2025 +0000

    Don't update registration status if node state for decommissioned peer is 
found with the same address
    
    Patch by Sam Tunnicliffe; reviewed by Marcus Eriksson for CASSANDRA-21005
---
 CHANGES.txt                                        |  1 +
 .../apache/cassandra/service/CassandraDaemon.java  |  8 +++-
 .../distributed/impl/AbstractCluster.java          | 11 +++++
 .../distributed/test/ring/DecommissionTest.java    | 49 ++++++++++++++++++++++
 4 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index ac61998c3a..7b7ff72e81 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 5.1
+ * Don't update registration status if node state for decommissioned peer is 
found with the same address (CASSANDRA-21005)
  * Avoid NPE when meta keyspace placements are empty before CMS is initialized 
(CASSANDRA-21004)
  * Gossip entries for hibernating non-members don't block truncate 
(CASSANDRA-21003)
  * Retry without time limit calculates wait time incorrectly (CASSANDRA-21002)
diff --git a/src/java/org/apache/cassandra/service/CassandraDaemon.java 
b/src/java/org/apache/cassandra/service/CassandraDaemon.java
index 2f85e6e58e..973dfbfe92 100644
--- a/src/java/org/apache/cassandra/service/CassandraDaemon.java
+++ b/src/java/org/apache/cassandra/service/CassandraDaemon.java
@@ -90,6 +90,7 @@ import org.apache.cassandra.service.snapshot.SnapshotManager;
 import org.apache.cassandra.streaming.StreamManager;
 import org.apache.cassandra.tcm.ClusterMetadata;
 import org.apache.cassandra.tcm.MultiStepOperation;
+import org.apache.cassandra.tcm.membership.NodeState;
 import org.apache.cassandra.utils.FBUtilities;
 import org.apache.cassandra.utils.JMXServerUtils;
 import org.apache.cassandra.utils.JVMStabilityInspector;
@@ -278,7 +279,12 @@ public class CassandraDaemon
             
disableAutoCompaction(Schema.instance.distributedKeyspaces().names());
             CMSOperations.initJmx();
             AccordOperations.initJmx();
-            if (ClusterMetadata.current().myNodeId() != null)
+            NodeState nodeStateForLocalAddress = 
ClusterMetadata.current().myNodeState();
+            // If another node with the same address was previously a member 
and was decommissioned, it can be
+            // present in ClusterMetadata with a LEFT state. That should not 
trigger _this_ node to update
+            // RegistrationStatus. During the startup process the old node 
will be expunged and this node
+            // will register, prompting another call to onRegistration.
+            if (nodeStateForLocalAddress != null && nodeStateForLocalAddress 
!= NodeState.LEFT)
                 RegistrationStatus.instance.onRegistration();
         }
         catch (InterruptedException | ExecutionException | IOException e)
diff --git 
a/test/distributed/org/apache/cassandra/distributed/impl/AbstractCluster.java 
b/test/distributed/org/apache/cassandra/distributed/impl/AbstractCluster.java
index 224adabdb1..533a7b49d5 100644
--- 
a/test/distributed/org/apache/cassandra/distributed/impl/AbstractCluster.java
+++ 
b/test/distributed/org/apache/cassandra/distributed/impl/AbstractCluster.java
@@ -697,6 +697,17 @@ public abstract class AbstractCluster<I extends IInstance> 
implements ICluster<I
         return instance;
     }
 
+    public synchronized void unsafeRemoveNode(I toRemove)
+    {
+        instances.remove(toRemove);
+        instanceMap.remove(toRemove.broadcastAddress(), toRemove);
+    }
+
+    public void unsafeUpdateNodeIdTopology(int num, NetworkTopology.DcAndRack 
location)
+    {
+        nodeIdTopology.put(num, location);
+    }
+
     /**
      * WARNING: we index from 1 here, for consistency with inet address!
      */
diff --git 
a/test/distributed/org/apache/cassandra/distributed/test/ring/DecommissionTest.java
 
b/test/distributed/org/apache/cassandra/distributed/test/ring/DecommissionTest.java
index 6f67dac03b..db2f2eca6d 100644
--- 
a/test/distributed/org/apache/cassandra/distributed/test/ring/DecommissionTest.java
+++ 
b/test/distributed/org/apache/cassandra/distributed/test/ring/DecommissionTest.java
@@ -34,13 +34,19 @@ import net.bytebuddy.ByteBuddy;
 import net.bytebuddy.dynamic.loading.ClassLoadingStrategy;
 import net.bytebuddy.implementation.MethodDelegation;
 import net.bytebuddy.implementation.bind.annotation.SuperCall;
+import org.apache.cassandra.config.DatabaseDescriptor;
 import org.apache.cassandra.cql3.QueryProcessor;
 import org.apache.cassandra.distributed.Cluster;
+import org.apache.cassandra.distributed.Constants;
 import org.apache.cassandra.distributed.api.ConsistencyLevel;
+import org.apache.cassandra.distributed.api.IInstanceConfig;
+import org.apache.cassandra.distributed.api.IInvokableInstance;
 import org.apache.cassandra.distributed.api.NodeToolResult;
+import org.apache.cassandra.distributed.api.TokenSupplier;
 import org.apache.cassandra.distributed.shared.ClusterUtils;
 import org.apache.cassandra.distributed.test.TestBaseImpl;
 import org.apache.cassandra.gms.Gossiper;
+import org.apache.cassandra.io.util.File;
 import org.apache.cassandra.service.StorageService;
 import org.apache.cassandra.streaming.StreamSession;
 import org.apache.cassandra.tcm.ClusterMetadata;
@@ -55,6 +61,8 @@ import org.apache.cassandra.utils.FBUtilities;
 import static net.bytebuddy.matcher.ElementMatchers.named;
 import static org.apache.cassandra.distributed.api.Feature.GOSSIP;
 import static org.apache.cassandra.distributed.api.Feature.NETWORK;
+import static 
org.apache.cassandra.distributed.shared.NetworkTopology.dcAndRack;
+import static 
org.apache.cassandra.distributed.shared.NetworkTopology.networkTopology;
 import static 
org.apache.cassandra.distributed.test.ring.BootstrapTest.populate;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
@@ -75,6 +83,47 @@ public class DecommissionTest extends TestBaseImpl
         }
     }
 
+    @Test
+    public void testAddressReuseAfterDecommission() throws IOException, 
ExecutionException, InterruptedException
+    {
+        // Initially, all nodes should be in dc1/rack1. Node 3 will be 
decommissioned and a new node added re-using
+        // node 3's address. When the new node registers, it should be in 
dc2/rack2.
+        // For now, this requires the accord service to disabled. See 
CASSANDRA-21026
+        try (Cluster cluster = builder().withNodes(3)
+                                        
.withTokenSupplier(TokenSupplier.evenlyDistributedTokens(4))
+                                        .withConfig(config -> 
config.with(NETWORK, GOSSIP)
+                                                                    
.set("accord.enabled", false))
+                                        .withNodeIdTopology(networkTopology(3, 
(id) -> dcAndRack("dc1", "rack1")))
+                                        .start())
+        {
+            assertEquals("dc1/rack1", cluster.get(1).callOnInstance(() -> 
DatabaseDescriptor.getLocator().local().toString()));
+            assertEquals("dc1/rack1", cluster.get(2).callOnInstance(() -> 
DatabaseDescriptor.getLocator().local().toString()));
+            assertEquals("dc1/rack1", cluster.get(3).callOnInstance(() -> 
DatabaseDescriptor.getLocator().local().toString()));
+
+            IInvokableInstance toRemove = cluster.get(3);
+            toRemove.nodetoolResult("decommission", 
"--force").asserts().success();
+            toRemove.shutdown().get();
+            
ClusterUtils.getDirectories(toRemove).forEach(File::tryDeleteRecursive);
+            cluster.unsafeRemoveNode(toRemove);
+
+            // Now add a new node, using the same address as the one we just 
removed. This new node should register
+            // itself in dc2/rack2 and not inherit the location of its 
predecessor.
+            // Note: because we have removed the original node3 from the 
cluster completely, which is necessary because
+            // the cluster will complain about an id clash otherwise, this new 
node will also be "node3". However, it is
+            // completely distinct from the original one.
+            cluster.unsafeUpdateNodeIdTopology(toRemove.config().num(), 
dcAndRack("dc2", "rack2"));
+            IInstanceConfig config = cluster.newInstanceConfig()
+                                            .set("auto_bootstrap", true)
+                                            
.set(Constants.KEY_DTEST_FULL_STARTUP, true);
+            IInvokableInstance newInstance = cluster.bootstrap(config);
+            newInstance.startup();
+
+            assertEquals("dc1/rack1", cluster.get(1).callOnInstance(() -> 
DatabaseDescriptor.getLocator().local().toString()));
+            assertEquals("dc1/rack1", cluster.get(2).callOnInstance(() -> 
DatabaseDescriptor.getLocator().local().toString()));
+            assertEquals("dc2/rack2", newInstance.callOnInstance(() -> 
DatabaseDescriptor.getLocator().local().toString()));
+        }
+    }
+
     public static class BB
     {
         static void install(ClassLoader cl, int nodeNumber)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to