sanpwc commented on code in PR #4433: URL: https://github.com/apache/ignite-3/pull/4433#discussion_r1791375085
########## modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/MetaStorageInfo.java: ########## @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.cluster.management; + +import java.io.Serializable; +import java.util.Set; +import java.util.UUID; +import org.apache.ignite.internal.cluster.management.network.messages.CmgMessageGroup; +import org.apache.ignite.internal.network.NetworkMessage; +import org.apache.ignite.internal.network.annotations.Transferable; +import org.jetbrains.annotations.Nullable; + +/** + * Holds information about Metastorage that is stored in the CMG. + */ +@Transferable(CmgMessageGroup.METASTORAGE_INFO) +public interface MetaStorageInfo extends NetworkMessage, Serializable { + /** + * Names of the nodes that host Meta storage. + */ + Set<String> metaStorageNodes(); + + /** + * Raft index in the Metastorage group under which the forced configuration is (or will be) saved, or {@code null} if no MG Review Comment: What if there were multiple MG force configuration updates. Will we see the latest one here? If true please add the details to the javadoc. Same for metastorageRepairingConfigIndex. ########## modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/ClusterManagementGroupManager.java: ########## @@ -1052,23 +1060,54 @@ public CompletableFuture<Void> onJoinReady() { } /** - * Changes metastorage nodes in the CMG. + * Changes metastorage nodes in the CMG for the graceful Metastorage reconfiguration procedure. * + * @param newMetastorageNodes Metastorage node names to set. * @return Future that completes when the command is executed by the CMG. */ public CompletableFuture<Void> changeMetastorageNodes(Set<String> newMetastorageNodes) { + return changeMetastorageNodesInternal(newMetastorageNodes, null); + } + + /** + * Changes metastorage nodes in the CMG for the forceful (with repair) Metastorage reconfiguration procedure. + * + * @param newMetastorageNodes Metastorage node names to set. + * @param metastorageRepairingConfigIndex Raft index in the Metastorage group under which the forced configuration is + * (or will be) saved. + * @return Future that completes when the command is executed by the CMG. + */ + public CompletableFuture<Void> changeMetastorageNodes(Set<String> newMetastorageNodes, long metastorageRepairingConfigIndex) { + return changeMetastorageNodesInternal(newMetastorageNodes, metastorageRepairingConfigIndex); + } + + private CompletableFuture<Void> changeMetastorageNodesInternal( + Set<String> newMetastorageNodes, + @Nullable Long metastorageRepairingConfigIndex + ) { if (!busyLock.enterBusy()) { return failedFuture(new NodeStoppingException()); } + UUID metastorageRepairClusterId = metastorageRepairingConfigIndex == null ? null : requiredClusterId(); + try { return raftServiceAfterJoin() - .thenCompose(service -> service.changeMetastorageNodes(newMetastorageNodes)); + .thenCompose(service -> service.changeMetastorageNodes( + newMetastorageNodes, + metastorageRepairClusterId, + metastorageRepairingConfigIndex + )); } finally { busyLock.leaveBusy(); } } + private UUID requiredClusterId() { + ClusterState clusterState = clusterStateStorageMgr.getClusterState(); + return requireNonNull(clusterState, "Still no cluster state").clusterTag().clusterId(); Review Comment: Full stop is missing. ########## modules/metastorage/src/main/java/org/apache/ignite/internal/metastorage/impl/MetaStorageLearnerManager.java: ########## @@ -123,4 +129,15 @@ private CompletableFuture<Void> updateConfigUnderLock(Supplier<CompletableFuture busyLock.leaveBusy(); } } + + /** + * Disables addition of learners one by one (as a reaction to nodes joining the validated nodes set). + * + * <p>This does NOT affect other ways of changing the learners. + * + * <p>This is only used by test code, and there is no method for enabling learners addition back as this is not needed in our tests. + */ + void disableLearnersAddition() { Review Comment: Let's mark it as @TestOnly? ########## modules/system-disaster-recovery/src/main/java/org/apache/ignite/internal/disaster/system/SystemDisasterRecoveryStorage.java: ########## @@ -97,4 +101,15 @@ void saveResetClusterMessage(ResetClusterMessage message) { public @Nullable ResetClusterMessage readVolatileResetClusterMessage() { return volatileResetClusterMessage; } + + @Override + public @Nullable UUID readWitnessedMetastorageRepairClusterId() { + VaultEntry entry = vault.get(WITNESSED_METASTORAGE_REPAIR_CLUSTER_ID_VAULT_KEY); Review Comment: Just in case. We do replicate id/index with cmg raft snapshot, right? ########## modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/raft/ClusterStateStorageManager.java: ########## @@ -107,6 +113,35 @@ List<LogicalNode> getValidatedNodes() { return storage.getWithPrefix(VALIDATED_NODE_PREFIX, (k, v) -> fromBytes(v)); } + /** + * Saves information about Metastorage repair. + * + * @param repairClusterId ID that the cluster has when performaing the repair. + * @param repairingConfigIndex Raft index in the Metastorage group under which the forced configuration is (or will be) saved. + */ + void saveMetastorageRepairInfo(UUID repairClusterId, long repairingConfigIndex) { + storage.put(METASTORAGE_REPAIR_CLUSTER_ID_KEY, uuidToBytes(repairClusterId)); Review Comment: Do we preserve atomicity of clusterId, index updates? Or in other words, is it possible to read clusterIdV2 and index v1? In case of node failure in the middle of update? ########## modules/metastorage/src/main/java/org/apache/ignite/internal/metastorage/impl/MetaStorageManagerImpl.java: ########## @@ -332,60 +337,98 @@ private void listenForRecovery(long targetRevision) { } } - private CompletableFuture<MetaStorageServiceImpl> initializeMetaStorage(Set<String> metaStorageNodes) { + private CompletableFuture<MetaStorageServiceImpl> initializeMetaStorage(MetaStorageInfo metaStorageInfo) { try { - String thisNodeName = clusterService.nodeName(); - - var disruptorConfig = new RaftNodeDisruptorConfiguration("metastorage", 1); - - CompletableFuture<? extends RaftGroupService> localRaftServiceFuture = metaStorageNodes.contains(thisNodeName) - ? startFollowerNode(metaStorageNodes, disruptorConfig) - : startLearnerNode(metaStorageNodes, disruptorConfig); - - raftNodeStarted.complete(null); + if (thisNodeDidNotWitnessMetaStorageRepair(metaStorageInfo)) { Review Comment: Curious whether race between reset and restart on old topology is possible. 1. Cluster = [A,B,C,D], CMG.peers = [D], MG.peers = [A,B,C] 2. [A,B,C] dies. 3. User calls MG::reset(D) 4. In the middle of reset procedure [A,B,C] restores and forms majority. 5. reset(D) also finishes and forms another majority for MG. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@ignite.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org