DaanHoogland commented on code in PR #10417: URL: https://github.com/apache/cloudstack/pull/10417#discussion_r1984022283
########## engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java: ########## @@ -388,6 +405,11 @@ public void onManagementServerCancelMaintenance() { _monitorExecutor = new ScheduledThreadPoolExecutor(1, new NamedThreadFactory("AgentMonitor")); _monitorExecutor.scheduleWithFixedDelay(new MonitorTask(), mgmtServiceConf.getPingInterval(), mgmtServiceConf.getPingInterval(), TimeUnit.SECONDS); } + if (newAgentConnectionsMonitor.isShutdown()) { + final int cleanupTimeInSecs = Wait.value(); + newAgentConnectionsMonitor = Executors.newScheduledThreadPool(1, new NamedThreadFactory("NewAgentConnectionsMonitor")); + newAgentConnectionsMonitor.scheduleAtFixedRate(new AgentNewConnectionsMonitorTask(), cleanupTimeInSecs, cleanupTimeInSecs, TimeUnit.SECONDS); + } Review Comment: new method? ########## utils/src/main/java/com/cloud/utils/nio/NioConnection.java: ########## @@ -83,19 +83,21 @@ public abstract class NioConnection implements Callable<Boolean> { protected Set<SocketChannel> socketChannels = new HashSet<>(); protected Integer sslHandshakeTimeout = null; private final int factoryMaxNewConnectionsCount; + protected boolean blockNewConnections; public NioConnection(final String name, final int port, final int workers, final HandlerFactory factory) { _name = name; _isRunning = false; + blockNewConnections = false; _selector = null; _port = port; _workers = workers; _factory = factory; this.factoryMaxNewConnectionsCount = factory.getMaxConcurrentNewConnectionsCount(); - _executor = new ThreadPoolExecutor(workers, 5 * workers, 1, TimeUnit.DAYS, - new LinkedBlockingQueue<>(5 * workers), new NamedThreadFactory(name + "-Handler"), + _executor = new ThreadPoolExecutor(_workers, 5 * _workers, 1, TimeUnit.DAYS, + new LinkedBlockingQueue<>(5 * _workers), new NamedThreadFactory(_name + "-Handler"), new ThreadPoolExecutor.AbortPolicy()); - String sslHandshakeHandlerName = name + "-SSLHandshakeHandler"; + String sslHandshakeHandlerName = _name + "-SSLHandshakeHandler"; Review Comment: if we no longer use the method parameters, let's remove those. ########## plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java: ########## @@ -381,9 +430,9 @@ public ManagementServerMaintenanceResponse prepareForMaintenance(PrepareForMaint throw new CloudRuntimeException("Management server is not in the right state to prepare for maintenance"); } - final List<ManagementServerHostVO> preparingForMaintenanceMsList = msHostDao.listBy(State.PreparingForMaintenance); - if (CollectionUtils.isNotEmpty(preparingForMaintenanceMsList)) { - throw new CloudRuntimeException("Cannot prepare for maintenance, there are other management servers preparing for maintenance"); + final List<ManagementServerHostVO> preparingForMaintenanceOrShutDownMsList = msHostDao.listBy(State.PreparingForMaintenance, State.PreparingForShutDown); + if (CollectionUtils.isNotEmpty(preparingForMaintenanceOrShutDownMsList)) { + throw new CloudRuntimeException("Cannot prepare for maintenance, there are other management servers preparing for maintenance/shutdown"); } Review Comment: new method ########## plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java: ########## @@ -319,22 +370,23 @@ public ManagementServerMaintenanceResponse triggerShutdown(TriggerShutdownCmd cm } if (State.Up.equals(msHost.getState())) { + final List<ManagementServerHostVO> preparingForMaintenanceOrShutDownMsList = msHostDao.listBy(State.PreparingForMaintenance, State.PreparingForShutDown); + if (CollectionUtils.isNotEmpty(preparingForMaintenanceOrShutDownMsList)) { + throw new CloudRuntimeException("Cannot trigger shutdown now, there are other management servers preparing for maintenance/shutdown"); + } msHostDao.updateState(msHost.getId(), State.PreparingForShutDown); } Review Comment: new method ########## agent/src/main/java/com/cloud/agent/Agent.java: ########## @@ -1003,9 +1010,12 @@ public void processResponse(final Response response, final Link link) { for (final IAgentControlListener listener : controlListeners) { listener.processControlResponse(response, (AgentControlAnswer)answer); } - } else if (answer instanceof PingAnswer && (((PingAnswer) answer).isSendStartup()) && reconnectAllowed) { - logger.info("Management server requested startup command to reinitialize the agent"); - sendStartup(link); + } else if (answer instanceof PingAnswer) { + if ((((PingAnswer) answer).isSendStartup()) && reconnectAllowed) { + logger.info("Management server requested startup command to reinitialize the agent"); + sendStartup(link); + } + shell.setAvoidHosts(((PingAnswer) answer).getAvoidMsList()); Review Comment: new method? ########## utils/src/main/java/com/cloud/utils/nio/NioConnection.java: ########## @@ -127,17 +129,31 @@ public void start() throws NioConnectionException { _isStartup = true; if (_executor.isShutdown()) { - _executor = new ThreadPoolExecutor(_workers, 5 * _workers, 1, TimeUnit.DAYS, new LinkedBlockingQueue<>(), new NamedThreadFactory(_name + "-Handler")); + _executor = new ThreadPoolExecutor(_workers, 5 * _workers, 1, TimeUnit.DAYS, + new LinkedBlockingQueue<>(5 * _workers), new NamedThreadFactory(_name + "-Handler"), + new ThreadPoolExecutor.AbortPolicy()); + } Review Comment: new method ########## utils/src/main/java/com/cloud/utils/nio/NioConnection.java: ########## @@ -127,17 +129,31 @@ public void start() throws NioConnectionException { _isStartup = true; if (_executor.isShutdown()) { - _executor = new ThreadPoolExecutor(_workers, 5 * _workers, 1, TimeUnit.DAYS, new LinkedBlockingQueue<>(), new NamedThreadFactory(_name + "-Handler")); + _executor = new ThreadPoolExecutor(_workers, 5 * _workers, 1, TimeUnit.DAYS, + new LinkedBlockingQueue<>(5 * _workers), new NamedThreadFactory(_name + "-Handler"), + new ThreadPoolExecutor.AbortPolicy()); + } + if (_sslHandshakeExecutor.isShutdown()) { + String sslHandshakeHandlerName = _name + "-SSLHandshakeHandler"; + if (factoryMaxNewConnectionsCount > 0) { + _sslHandshakeExecutor = new ThreadPoolExecutor(0, this.factoryMaxNewConnectionsCount, 30, + TimeUnit.MINUTES, new SynchronousQueue<>(), new NamedThreadFactory(sslHandshakeHandlerName), + new ThreadPoolExecutor.AbortPolicy()); + } else { + _sslHandshakeExecutor = Executors.newCachedThreadPool(new NamedThreadFactory(sslHandshakeHandlerName)); + } } Review Comment: new method ########## plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java: ########## @@ -294,19 +343,21 @@ public ManagementServerMaintenanceResponse prepareForShutdown(PrepareForShutdown throw new CloudRuntimeException("Management server is not in the right state to prepare for shutdown"); } + final List<ManagementServerHostVO> preparingForMaintenanceOrShutDownMsList = msHostDao.listBy(State.PreparingForMaintenance, State.PreparingForShutDown); + if (CollectionUtils.isNotEmpty(preparingForMaintenanceOrShutDownMsList)) { + throw new CloudRuntimeException("Cannot prepare for shutdown, there are other management servers preparing for maintenance/shutdown"); + } Review Comment: new method ########## plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java: ########## @@ -257,8 +300,13 @@ public void cancelMaintenance() { jobManager.enableAsyncJobs(); cancelWaitForPendingJobs(); ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); - if (msHost != null && State.Maintenance.equals(msHost.getState())) { - onCancelMaintenance(); + if (msHost != null) { + if (State.PreparingForMaintenance.equals(msHost.getState())) { + onCancelPreparingForMaintenance(); + } + if (State.Maintenance.equals(msHost.getState())) { + onCancelMaintenance(); + } } Review Comment: new method ########## server/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLBServiceImpl.java: ########## @@ -342,35 +488,67 @@ public boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorith List<DataCenterVO> dataCenterList = dcDao.listAll(); for (DataCenterVO dc : dataCenterList) { - Long dcId = dc.getId(); - List<Long> orderedHostIdList = getOrderedHostIdList(dcId); - List<Host> agentBasedHostsOfMsInDc = getAllAgentBasedHostsInDc(fromMsId, dcId); - if (CollectionUtils.isEmpty(agentBasedHostsOfMsInDc)) { + List<Long> orderedHostIdList = getOrderedHostIdList(dc.getId()); + if (!migrateNonRoutingHostAgentsInZone(fromMsUuid, fromMsId, dc, migrationStartTimeInMs, + timeoutDurationInMs, avoidMsList, lbAlgorithm, lbAlgorithmChanged, orderedHostIdList)) { + return false; + } + List<Long> clusterIds = clusterDao.listAllClusterIds(dc.getId()); + if (CollectionUtils.isEmpty(clusterIds)) { continue; } - logger.debug(String.format("Migrating %d indirect agents from management server node %d (id: %s) of zone %s", agentBasedHostsOfMsInDc.size(), fromMsId, fromMsUuid, dc)); - for (final Host host : agentBasedHostsOfMsInDc) { - long migrationElapsedTimeInMs = System.currentTimeMillis() - migrationStartTime; - if (migrationElapsedTimeInMs >= timeoutDurationInMs) { - logger.debug(String.format("Stop migrating remaining indirect agents from management server node %d (id: %s), timed out", fromMsId, fromMsUuid)); + for (Long clusterId : clusterIds) { + if (!migrateRoutingHostAgentsInCluster(clusterId, fromMsUuid, fromMsId, dc, migrationStartTimeInMs, + timeoutDurationInMs, avoidMsList, lbAlgorithm, lbAlgorithmChanged, orderedHostIdList)) { return false; } + } + } Review Comment: extract method -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@cloudstack.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org