weizhouapache commented on code in PR #9840: URL: https://github.com/apache/cloudstack/pull/9840#discussion_r1824345632
########## agent/conf/agent.properties: ########## @@ -433,3 +433,9 @@ iscsi.session.cleanup.enabled=false # Implicit host tags managed by agent.properties # host.tags= + +# Timeout(in seconds) for SSL handshake when agent connects to server +#ssl.handshake.timeout= Review Comment: since this line is commented by default, can we add the default value ? ########## agent/conf/agent.properties: ########## @@ -433,3 +433,9 @@ iscsi.session.cleanup.enabled=false # Implicit host tags managed by agent.properties # host.tags= + +# Timeout(in seconds) for SSL handshake when agent connects to server +#ssl.handshake.timeout= + +# Wait(in seconds) during agent reconnections +#backoff.seconds= Review Comment: same here, add default value ? ########## server/src/main/java/com/cloud/alert/AlertManagerImpl.java: ########## @@ -257,6 +259,64 @@ public void sendAlert(AlertType alertType, long dataCenterId, Long podId, String } } + protected void recalculateHostCapacities() { + // Calculate CPU and RAM capacities + List<Long> hostIds = hostDao.listIdsByType(Host.Type.Routing); + if (hostIds.isEmpty()) { + return; + } + ConcurrentHashMap<Long, Future<Void>> futures = new ConcurrentHashMap<>(); + ExecutorService executorService = Executors.newFixedThreadPool(Math.max(1, + Math.min(CapacityManager.CapacityCalculateWorkers.value(), hostIds.size()))); + for (Long hostId : hostIds) { + futures.put(hostId, executorService.submit(() -> { + final HostVO host = hostDao.findById(hostId); + _capacityMgr.updateCapacityForHost(host); + return null; + })); + } + for (Map.Entry<Long, Future<Void>> entry: futures.entrySet()) { + try { + entry.getValue().get(); + } catch (InterruptedException | ExecutionException e) { + logger.error(String.format("Error during capacity calculation for host: %d due to : %s", + entry.getKey(), e.getMessage()), e); + } + } + executorService.shutdown(); + } + + protected void recalculateStorageCapacities() { + List<Long> storagePoolIds = _storagePoolDao.listAllIds(); + if (storagePoolIds.isEmpty()) { + return; + } + ConcurrentHashMap<Long, Future<Void>> futures = new ConcurrentHashMap<>(); + ExecutorService executorService = Executors.newFixedThreadPool(Math.max(1, Review Comment: `Math.max(1, xxx` is probably unnecessary, as there is a empty check on storagePoolIds. ########## agent/src/main/java/com/cloud/agent/Agent.java: ########## @@ -1136,22 +1175,16 @@ public void doTask(final Task task) throws TaskExecutionException { } else { //put the requests from mgt server into another thread pool, as the request may take a longer time to finish. Don't block the NIO main thread pool //processRequest(request, task.getLink()); - _executor.submit(new AgentRequestHandler(getType(), getLink(), request)); + requestHandler.submit(new AgentRequestHandler(getType(), getLink(), request)); } } catch (final ClassNotFoundException e) { logger.error("Unable to find this request "); } catch (final Exception e) { logger.error("Error parsing task", e); } } else if (task.getType() == Task.Type.DISCONNECT) { - try { Review Comment: any reason this is removed ? @shwstppr ########## api/src/main/java/org/apache/cloudstack/api/command/admin/domain/ListDomainsCmd.java: ########## @@ -142,7 +142,10 @@ protected void updateDomainResponse(List<DomainResponse> response) { if (CollectionUtils.isEmpty(response)) { return; } - _resourceLimitService.updateTaggedResourceLimitsAndCountsForDomains(response, getTag()); + EnumSet<DomainDetails> details = getDetails(); + if (details.contains(DomainDetails.all) || details.contains(DomainDetails.resource)) { + _resourceLimitService.updateTaggedResourceLimitsAndCountsForDomains(response, getTag()); Review Comment: changes make sense. do we need to fix it in 4.20 (seems to be related to PR #8362? ########## server/src/main/java/com/cloud/alert/AlertManagerImpl.java: ########## @@ -257,6 +259,64 @@ public void sendAlert(AlertType alertType, long dataCenterId, Long podId, String } } + protected void recalculateHostCapacities() { + // Calculate CPU and RAM capacities + List<Long> hostIds = hostDao.listIdsByType(Host.Type.Routing); + if (hostIds.isEmpty()) { + return; + } + ConcurrentHashMap<Long, Future<Void>> futures = new ConcurrentHashMap<>(); + ExecutorService executorService = Executors.newFixedThreadPool(Math.max(1, + Math.min(CapacityManager.CapacityCalculateWorkers.value(), hostIds.size()))); + for (Long hostId : hostIds) { + futures.put(hostId, executorService.submit(() -> { + final HostVO host = hostDao.findById(hostId); + _capacityMgr.updateCapacityForHost(host); + return null; + })); + } + for (Map.Entry<Long, Future<Void>> entry: futures.entrySet()) { + try { + entry.getValue().get(); + } catch (InterruptedException | ExecutionException e) { + logger.error(String.format("Error during capacity calculation for host: %d due to : %s", + entry.getKey(), e.getMessage()), e); + } + } + executorService.shutdown(); + } + + protected void recalculateStorageCapacities() { + List<Long> storagePoolIds = _storagePoolDao.listAllIds(); Review Comment: do this consider the state of storage pool ? maybe filter out the pools which are not needed ? for example in maintenance ? ########## engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java: ########## @@ -3793,12 +3803,12 @@ public int getTimeout() { public boolean processCommands(final long agentId, final long seq, final Command[] cmds) { boolean processed = false; for (final Command cmd : cmds) { + // FIXME: PingRoutingCommand handler is DB & CPU hotspot Review Comment: will you work on it in this pr or in a future pr ? ########## engine/schema/src/main/java/com/cloud/dc/dao/ClusterDaoImpl.java: ########## @@ -167,23 +171,17 @@ public List<HypervisorType> getAvailableHypervisorInZone(Long zoneId) { sc.setParameters("zoneId", zoneId); } List<ClusterVO> clusters = listBy(sc); - List<HypervisorType> hypers = new ArrayList<HypervisorType>(4); + List<HypervisorType> distinctHypervisors = new ArrayList<>(4); Review Comment: maybe remove 4 ? ########## plugins/hypervisors/simulator/src/main/java/com/cloud/resource/AgentRoutingResource.java: ########## @@ -122,7 +123,8 @@ public PingCommand getCurrentStatus(long id) { } } - config = _simMgr.getMockConfigurationDao().findByNameBottomUP(agentHost.getDataCenterId(), agentHost.getPodId(), agentHost.getClusterId(), agentHost.getId(), "PingRoutingWithNwGroupsCommand"); + //config = _simMgr.getMockConfigurationDao().findByNameBottomUP(agentHost.getDataCenterId(), agentHost.getPodId(), agentHost.getClusterId(), agentHost.getId(), "PingRoutingWithNwGroupsCommand"); Review Comment: remove it ? ########## engine/schema/src/main/java/com/cloud/upgrade/dao/DatabaseAccessObject.java: ########## @@ -114,6 +114,17 @@ public void createIndex(Connection conn, String tableName, String indexName, Str } } + public void renameIndex(Connection conn, String tableName, String oldName, String newName) { + String stmt = String.format("ALTER TABLE %s RENAME INDEX %s TO %s", tableName, oldName, newName); Review Comment: tested on mysql 8.0.x, looks good better to test with mariadb 10.x as well ########## tools/marvin/setup.py: ########## @@ -27,7 +27,7 @@ raise RuntimeError("python setuptools is required to build Marvin") -VERSION = "4.20.0.0-SNAPSHOT" +VERSION = "4.20.0.0" Review Comment: this seems to be caused by maven build may be not needed ########## plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/lifecycle/CloudStackPrimaryDataStoreLifeCycleImpl.java: ########## @@ -373,85 +375,54 @@ protected boolean createStoragePool(long hostId, StoragePool pool) { } } - @Override - public boolean attachCluster(DataStore store, ClusterScope scope) { - PrimaryDataStoreInfo primarystore = (PrimaryDataStoreInfo)store; - // Check if there is host up in this cluster - List<HostVO> allHosts = - _resourceMgr.listAllUpHosts(Host.Type.Routing, primarystore.getClusterId(), primarystore.getPodId(), primarystore.getDataCenterId()); + private Pair<List<Long>, Boolean> prepareOcfs2NodesIfNeeded(PrimaryDataStoreInfo primaryStore) { + if (!StoragePoolType.OCFS2.equals(primaryStore.getPoolType())) { + return new Pair<>(_hostDao.listIdsForUpRouting(primaryStore.getDataCenterId(), + primaryStore.getPodId(), primaryStore.getClusterId()), true); + } + List<HostVO> allHosts = _resourceMgr.listAllUpHosts(Host.Type.Routing, primaryStore.getClusterId(), + primaryStore.getPodId(), primaryStore.getDataCenterId()); if (allHosts.isEmpty()) { - primaryDataStoreDao.expunge(primarystore.getId()); - throw new CloudRuntimeException("No host up to associate a storage pool with in cluster " + primarystore.getClusterId()); + return new Pair<>(Collections.emptyList(), true); + } + List<Long> hostIds = allHosts.stream().map(HostVO::getId).collect(Collectors.toList()); + if (!_ocfs2Mgr.prepareNodes(allHosts, primaryStore)) { + return new Pair<>(hostIds, false); } + return new Pair<>(hostIds, true); + } - if (primarystore.getPoolType() == StoragePoolType.OCFS2 && !_ocfs2Mgr.prepareNodes(allHosts, primarystore)) { - logger.warn("Can not create storage pool " + primarystore + " on cluster " + primarystore.getClusterId()); - primaryDataStoreDao.expunge(primarystore.getId()); + @Override + public boolean attachCluster(DataStore store, ClusterScope scope) { + PrimaryDataStoreInfo primaryStore = (PrimaryDataStoreInfo)store; + Pair<List<Long>, Boolean> result = prepareOcfs2NodesIfNeeded(primaryStore); + List<Long> hostIds = result.first(); + if (hostIds.isEmpty()) { + primaryDataStoreDao.expunge(primaryStore.getId()); + throw new CloudRuntimeException("No host up to associate a storage pool with in cluster " + primaryStore.getClusterId()); + } + if (!result.second()) { + logger.warn("Can not create storage pool {} on cluster {}", primaryStore, primaryStore.getClusterId()); + primaryDataStoreDao.expunge(primaryStore.getId()); return false; } - - boolean success = false; - for (HostVO h : allHosts) { - success = createStoragePool(h.getId(), primarystore); - if (success) { + for (Long hId : hostIds) { + if (createStoragePool(hId, primaryStore)) { break; } } - logger.debug("In createPool Adding the pool to each of the hosts"); - List<HostVO> poolHosts = new ArrayList<HostVO>(); - for (HostVO h : allHosts) { - try { - storageMgr.connectHostToSharedPool(h.getId(), primarystore.getId()); - poolHosts.add(h); - } catch (StorageConflictException se) { - primaryDataStoreDao.expunge(primarystore.getId()); - throw new CloudRuntimeException("Storage has already been added as local storage"); - } catch (Exception e) { - logger.warn("Unable to establish a connection between " + h + " and " + primarystore, e); - String reason = storageMgr.getStoragePoolMountFailureReason(e.getMessage()); - if (reason != null) { - throw new CloudRuntimeException(reason); - } - } - } - - if (poolHosts.isEmpty()) { - logger.warn("No host can access storage pool " + primarystore + " on cluster " + primarystore.getClusterId()); - primaryDataStoreDao.expunge(primarystore.getId()); - throw new CloudRuntimeException("Failed to access storage pool"); - } - + storageMgr.connectHostsToPool(store, hostIds, scope, true, true); Review Comment: good refactoring ########## utils/src/main/java/com/cloud/utils/nio/NioConnection.java: ########## @@ -190,9 +209,25 @@ public Boolean call() throws NioConnectionException { abstract void unregisterLink(InetSocketAddress saddr); + protected boolean rejectConnectionIfBusy(final SocketChannel socketChannel) throws IOException { + if (activeAcceptConnections.get() < sslHandshakeMaxWorkers) { Review Comment: will reject and drop cause some issues ? any wait and retry mechanism ? ########## server/src/main/java/org/apache/cloudstack/acl/RoleManagerImpl.java: ########## @@ -594,7 +594,7 @@ public String getConfigComponentName() { @Override public ConfigKey<?>[] getConfigKeys() { - return new ConfigKey<?>[] {RoleService.EnableDynamicApiChecker}; + return new ConfigKey<?>[] {RoleService.EnableDynamicApiChecker, RoleService.DynamicApiCheckerCachePeriod}; Review Comment: does this work ? ``` return new ConfigKey<?>[] {EnableDynamicApiChecker, DynamicApiCheckerCachePeriod}; ``` ########## agent/src/main/java/com/cloud/agent/Agent.java: ########## @@ -444,59 +466,87 @@ protected void cancelTasks() { * when host is added back */ protected void cleanupAgentZoneProperties() { - _shell.setPersistentProperty(null, "zone", ""); - _shell.setPersistentProperty(null, "cluster", ""); - _shell.setPersistentProperty(null, "pod", ""); + shell.setPersistentProperty(null, "zone", ""); + shell.setPersistentProperty(null, "cluster", ""); + shell.setPersistentProperty(null, "pod", ""); + } + + public void lockStartupTask(final Link link) { + logger.debug("Creating startup task for link: {}", getLinkLog(link)); + StartupTask currentTask = startupTask.get(); + if (currentTask != null) { + logger.warn("A Startup task is already locked or in progress, cannot create for link {}", + getLinkLog(link)); + return; + } + currentTask = new StartupTask(link); + if (startupTask.compareAndSet(null, currentTask)) { + selfTaskExecutor.schedule(currentTask, startupWait, TimeUnit.SECONDS); + return; + } + logger.warn("Failed to lock a StartupTask for link: {}", getLinkLog(link)); } - public synchronized void lockStartupTask(final Link link) { - _startup = new StartupTask(link); - _timer.schedule(_startup, _startupWait); + protected boolean cancelStartupTask() { + StartupTask task = startupTask.getAndSet(null); + if (task != null) { + task.cancel(); + return true; + } + return false; } public void sendStartup(final Link link) { - final StartupCommand[] startup = _resource.initialize(); + final StartupCommand[] startup = serverResource.initialize(); if (startup != null) { - final String msHostList = _shell.getPersistentProperty(null, "host"); + final String msHostList = shell.getPersistentProperty(null, "host"); final Command[] commands = new Command[startup.length]; for (int i = 0; i < startup.length; i++) { setupStartupCommand(startup[i]); startup[i].setMSHostList(msHostList); commands[i] = startup[i]; } - final Request request = new Request(_id != null ? _id : -1, -1, commands, false, false); + final Request request = new Request(id != null ? id : -1, -1, commands, false, false); request.setSequence(getNextSequence()); logger.debug("Sending Startup: {}", request.toString()); lockStartupTask(link); try { link.send(request.toBytes()); } catch (final ClosedChannelException e) { - logger.warn("Unable to send request: {}", request.toString()); + logger.warn("Unable to send request to {} due to '{}', request: {}", + getLinkLog(link), e.getMessage(), request); } - if (_resource instanceof ResourceStatusUpdater) { - ((ResourceStatusUpdater) _resource).registerStatusUpdater(this); + if (serverResource instanceof ResourceStatusUpdater) { + ((ResourceStatusUpdater) serverResource).registerStatusUpdater(this); } } } - protected void setupStartupCommand(final StartupCommand startup) { - InetAddress addr; + protected String retrieveHostname() { + if (logger.isTraceEnabled()) { + logger.trace(" Retrieving hostname " + serverResource.getClass().getSimpleName()); Review Comment: ```suggestion logger.trace(" Retrieving hostname with resource=" + serverResource.getClass().getSimpleName()); ``` ########## plugins/hypervisors/simulator/src/main/java/com/cloud/resource/AgentRoutingResource.java: ########## @@ -109,7 +109,8 @@ public Type getType() { public PingCommand getCurrentStatus(long id) { TransactionLegacy txn = TransactionLegacy.open(TransactionLegacy.SIMULATOR_DB); try { - MockConfigurationVO config = _simMgr.getMockConfigurationDao().findByNameBottomUP(agentHost.getDataCenterId(), agentHost.getPodId(), agentHost.getClusterId(), agentHost.getId(), "PingCommand"); + //MockConfigurationVO config = _simMgr.getMockConfigurationDao().findByNameBottomUP(agentHost.getDataCenterId(), agentHost.getPodId(), agentHost.getClusterId(), agentHost.getId(), "PingCommand"); Review Comment: can line 112 be removed ? ########## framework/db/src/main/java/com/cloud/utils/db/TransactionLegacy.java: ########## @@ -1334,7 +1334,7 @@ private static DataSource createDbcpDataSource(String uri, String username, Stri poolableConnectionFactory.setDefaultTransactionIsolation(isolationLevel); } return new PoolingDataSource<>(connectionPool); - } + } Review Comment: is indent in line 1337 needed ? ########## api/src/main/java/org/apache/cloudstack/api/command/user/account/ListAccountsCmd.java: ########## @@ -149,7 +149,10 @@ protected void updateAccountResponse(List<AccountResponse> response) { if (CollectionUtils.isEmpty(response)) { return; } - _resourceLimitService.updateTaggedResourceLimitsAndCountsForAccounts(response, getTag()); + EnumSet<DomainDetails> details = getDetails(); + if (details.contains(DomainDetails.all) || details.contains(DomainDetails.resource)) { + _resourceLimitService.updateTaggedResourceLimitsAndCountsForAccounts(response, getTag()); Review Comment: create a PR for 4.20 ? ########## agent/src/main/java/com/cloud/agent/Agent.java: ########## @@ -444,59 +466,87 @@ protected void cancelTasks() { * when host is added back */ protected void cleanupAgentZoneProperties() { - _shell.setPersistentProperty(null, "zone", ""); - _shell.setPersistentProperty(null, "cluster", ""); - _shell.setPersistentProperty(null, "pod", ""); + shell.setPersistentProperty(null, "zone", ""); + shell.setPersistentProperty(null, "cluster", ""); + shell.setPersistentProperty(null, "pod", ""); + } + + public void lockStartupTask(final Link link) { + logger.debug("Creating startup task for link: {}", getLinkLog(link)); + StartupTask currentTask = startupTask.get(); + if (currentTask != null) { + logger.warn("A Startup task is already locked or in progress, cannot create for link {}", + getLinkLog(link)); + return; + } + currentTask = new StartupTask(link); + if (startupTask.compareAndSet(null, currentTask)) { + selfTaskExecutor.schedule(currentTask, startupWait, TimeUnit.SECONDS); + return; + } + logger.warn("Failed to lock a StartupTask for link: {}", getLinkLog(link)); } - public synchronized void lockStartupTask(final Link link) { - _startup = new StartupTask(link); - _timer.schedule(_startup, _startupWait); + protected boolean cancelStartupTask() { + StartupTask task = startupTask.getAndSet(null); + if (task != null) { + task.cancel(); + return true; + } + return false; } public void sendStartup(final Link link) { - final StartupCommand[] startup = _resource.initialize(); + final StartupCommand[] startup = serverResource.initialize(); if (startup != null) { - final String msHostList = _shell.getPersistentProperty(null, "host"); + final String msHostList = shell.getPersistentProperty(null, "host"); final Command[] commands = new Command[startup.length]; for (int i = 0; i < startup.length; i++) { setupStartupCommand(startup[i]); startup[i].setMSHostList(msHostList); commands[i] = startup[i]; } - final Request request = new Request(_id != null ? _id : -1, -1, commands, false, false); + final Request request = new Request(id != null ? id : -1, -1, commands, false, false); request.setSequence(getNextSequence()); logger.debug("Sending Startup: {}", request.toString()); lockStartupTask(link); try { link.send(request.toBytes()); } catch (final ClosedChannelException e) { - logger.warn("Unable to send request: {}", request.toString()); + logger.warn("Unable to send request to {} due to '{}', request: {}", + getLinkLog(link), e.getMessage(), request); } - if (_resource instanceof ResourceStatusUpdater) { - ((ResourceStatusUpdater) _resource).registerStatusUpdater(this); + if (serverResource instanceof ResourceStatusUpdater) { + ((ResourceStatusUpdater) serverResource).registerStatusUpdater(this); } } } - protected void setupStartupCommand(final StartupCommand startup) { - InetAddress addr; + protected String retrieveHostname() { + if (logger.isTraceEnabled()) { + logger.trace(" Retrieving hostname " + serverResource.getClass().getSimpleName()); + } + final String result = Script.runSimpleBashScript(Script.getExecutableAbsolutePath("hostname"), 500); Review Comment: hostname might not be found in /usr/bin refer to #8310 #8633 ########## engine/schema/src/main/java/com/cloud/dc/ClusterDetailsDaoImpl.java: ########## @@ -82,6 +85,23 @@ public Map<String, String> findDetails(long clusterId) { return details; } + @Override + public Map<String, String> findDetails(long clusterId, Collection<String> names) { + if (CollectionUtils.isEmpty(names)) { + return new HashMap<>(); + } + SearchBuilder<ClusterDetailsVO> sb = createSearchBuilder(); Review Comment: same here, define a final variable ? ########## engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java: ########## @@ -223,17 +231,20 @@ public boolean configure(final String name, final Map<String, Object> params) th registerForHostEvents(new SetHostParamsListener(), true, true, false); - _executor = new ThreadPoolExecutor(threads, threads, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("AgentTaskPool")); + final int agentTaskThreads = DirectAgentLoadSize.value(); + _executor = new ThreadPoolExecutor(agentTaskThreads, agentTaskThreads, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("AgentTaskPool")); _connectExecutor = new ThreadPoolExecutor(100, 500, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("AgentConnectTaskPool")); // allow core threads to time out even when there are no items in the queue _connectExecutor.allowCoreThreadTimeOut(true); - _connection = new NioServer("AgentManager", Port.value(), Workers.value() + 10, this, caService); + _connection = new NioServer("AgentManager", Port.value(), Workers.value() + 10, + RemoteAgentSslHandshakeMinWorkers.value(), RemoteAgentSslHandshakeMaxWorkers.value(), this, + caService, RemoteAgentSslHandshakeTimeout.value()); logger.info("Listening on {} with {} workers.", Port.value(), Workers.value()); // executes all agent commands other than cron and ping - _directAgentExecutor = new ScheduledThreadPoolExecutor(DirectAgentPoolSize.value(), new NamedThreadFactory("DirectAgent")); + _directAgentExecutor = new ThreadPoolExecutor(Math.max(agentTaskThreads/10, 1), DirectAgentPoolSize.value(), 120L, TimeUnit.SECONDS, new LinkedBlockingQueue<>(), new NamedThreadFactory("DirectAgent")); Review Comment: can you explain why the core threads count is set to `Math.max(agentTaskThreads/10, 1)` ? ########## engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java: ########## @@ -1602,4 +1701,75 @@ private String createSqlFindHostToExecuteCommand(boolean useDisabledHosts) { } return String.format(sqlFindHostInZoneToExecuteCommand, hostResourceStatus); } + + @Override + public boolean isHostUp(long hostId) { + GenericSearchBuilder<HostVO, Status> sb = createSearchBuilder(Status.class); + sb.and("id", sb.entity().getId(), Op.EQ); + sb.selectFields(sb.entity().getStatus()); + SearchCriteria<Status> sc = sb.create(); + List<Status> statuses = customSearch(sc, null); + return CollectionUtils.isNotEmpty(statuses) && Status.Up.equals(statuses.get(0)); + } + + @Override + public List<Long> findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(final Long zoneId, final Long clusterId, + final List<ResourceState> resourceStates, final List<Type> types, + final List<Hypervisor.HypervisorType> hypervisorTypes) { + GenericSearchBuilder<HostVO, Long> sb = createSearchBuilder(Long.class); + sb.selectFields(sb.entity().getId()); + sb.and("zoneId", sb.entity().getDataCenterId(), SearchCriteria.Op.EQ); + sb.and("clusterId", sb.entity().getClusterId(), SearchCriteria.Op.EQ); + sb.and("resourceState", sb.entity().getResourceState(), SearchCriteria.Op.IN); + sb.and("type", sb.entity().getType(), SearchCriteria.Op.IN); + if (CollectionUtils.isNotEmpty(hypervisorTypes)) { + sb.and().op(sb.entity().getHypervisorType(), SearchCriteria.Op.NULL); + sb.or("hypervisorTypes", sb.entity().getHypervisorType(), SearchCriteria.Op.IN); + sb.cp(); + } + sb.done(); + SearchCriteria<Long> sc = sb.create(); + if (zoneId != null) { + sc.setParameters("zoneId", zoneId); + } + if (clusterId != null) { + sc.setParameters("clusterId", clusterId); + } + if (CollectionUtils.isNotEmpty(hypervisorTypes)) { + sc.setParameters("hypervisorTypes", hypervisorTypes.toArray()); + } + sc.setParameters("resourceState", resourceStates.toArray()); + sc.setParameters("type", types.toArray()); + return customSearch(sc, null); + } + + @Override + public List<Long> listAllIds() { + return listIdsBy(null, null, null, null, null, null, null); + } + + @Override + public List<HypervisorType> listDistinctHypervisorTypes(final Long zoneId) { + GenericSearchBuilder<HostVO, HypervisorType> sb = createSearchBuilder(HypervisorType.class); Review Comment: define as final ? ########## engine/schema/src/main/java/org/apache/cloudstack/storage/datastore/db/PrimaryDataStoreDao.java: ########## @@ -154,4 +154,6 @@ Pair<List<Long>, Integer> searchForIdsAndCount(Long storagePoolId, String storag String keyword, Filter searchFilter); List<StoragePoolVO> listByIds(List<Long> ids); + + List<Long> listAllIds(); Review Comment: this is defined in multiple Daos consider moving to GenericDaoBase ? ########## engine/schema/src/main/java/com/cloud/capacity/dao/CapacityDaoImpl.java: ########## @@ -671,6 +671,18 @@ public CapacityVO findByHostIdType(Long hostId, short capacityType) { return findOneBy(sc); } + @Override + public List<CapacityVO> listByHostIdTypes(Long hostId, List<Short> capacityTypes) { + SearchBuilder<CapacityVO> sb = createSearchBuilder(); Review Comment: define a final variable ? similar as `private final SearchBuilder<CapacityVO> _allFieldsSearch;` ########## engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java: ########## @@ -4988,89 +5016,58 @@ private void scanStalledVMInTransitionStateOnDisconnectedHosts() { } } - private List<Long> listStalledVMInTransitionStateOnUpHost(final long hostId, final Date cutTime) { - final String sql = "SELECT i.* FROM vm_instance as i, host as h WHERE h.status = 'UP' " + - "AND h.id = ? AND i.power_state_update_time < ? AND i.host_id = h.id " + - "AND (i.state ='Starting' OR i.state='Stopping' OR i.state='Migrating') " + - "AND i.id NOT IN (SELECT w.vm_instance_id FROM vm_work_job AS w JOIN async_job AS j ON w.id = j.id WHERE j.job_status = ?)" + - "AND i.removed IS NULL"; - - final List<Long> l = new ArrayList<>(); - try (TransactionLegacy txn = TransactionLegacy.open(TransactionLegacy.CLOUD_DB)) { - String cutTimeStr = DateUtil.getDateDisplayString(TimeZone.getTimeZone("GMT"), cutTime); - - try { - PreparedStatement pstmt = txn.prepareAutoCloseStatement(sql); - - pstmt.setLong(1, hostId); - pstmt.setString(2, cutTimeStr); - pstmt.setInt(3, JobInfo.Status.IN_PROGRESS.ordinal()); - final ResultSet rs = pstmt.executeQuery(); - while (rs.next()) { - l.add(rs.getLong(1)); - } - } catch (SQLException e) { - logger.error("Unable to execute SQL [{}] with params {\"h.id\": {}, \"i.power_state_update_time\": \"{}\"} due to [{}].", sql, hostId, cutTimeStr, e.getMessage(), e); - } + private List<VMInstanceVO> listStalledVMInTransitionStateOnUpHost( + final List<VMInstanceVO> transitioningVms, final long cutTime) { + if (CollectionUtils.isEmpty(transitioningVms)) { + return transitioningVms; } - return l; + List<Long> vmIdsInProgress = vmIdsInProgressCache.get(); + return transitioningVms.stream() + .filter(v -> v.getPowerStateUpdateTime().getTime() < cutTime && !vmIdsInProgress.contains(v.getId())) + .collect(Collectors.toList()); } - private List<Long> listVMInTransitionStateWithRecentReportOnUpHost(final long hostId, final Date cutTime) { - final String sql = "SELECT i.* FROM vm_instance as i, host as h WHERE h.status = 'UP' " + - "AND h.id = ? AND i.power_state_update_time > ? AND i.host_id = h.id " + - "AND (i.state ='Starting' OR i.state='Stopping' OR i.state='Migrating') " + - "AND i.id NOT IN (SELECT w.vm_instance_id FROM vm_work_job AS w JOIN async_job AS j ON w.id = j.id WHERE j.job_status = ?)" + - "AND i.removed IS NULL"; - - final List<Long> l = new ArrayList<>(); - try (TransactionLegacy txn = TransactionLegacy.open(TransactionLegacy.CLOUD_DB)) { - String cutTimeStr = DateUtil.getDateDisplayString(TimeZone.getTimeZone("GMT"), cutTime); - int jobStatusInProgress = JobInfo.Status.IN_PROGRESS.ordinal(); - - try { - PreparedStatement pstmt = txn.prepareAutoCloseStatement(sql); - - pstmt.setLong(1, hostId); - pstmt.setString(2, cutTimeStr); - pstmt.setInt(3, jobStatusInProgress); - final ResultSet rs = pstmt.executeQuery(); - while (rs.next()) { - l.add(rs.getLong(1)); - } - } catch (final SQLException e) { - logger.error("Unable to execute SQL [{}] with params {\"h.id\": {}, \"i.power_state_update_time\": \"{}\", \"j.job_status\": {}} due to [{}].", sql, hostId, cutTimeStr, jobStatusInProgress, e.getMessage(), e); - } - return l; + private List<VMInstanceVO> listVMInTransitionStateWithRecentReportOnUpHost( + final List<VMInstanceVO> transitioningVms, final long cutTime) { + if (CollectionUtils.isEmpty(transitioningVms)) { + return transitioningVms; } + List<Long> vmIdsInProgress = vmIdsInProgressCache.get(); + return transitioningVms.stream() + .filter(v -> v.getPowerStateUpdateTime().getTime() > cutTime && !vmIdsInProgress.contains(v.getId())) + .collect(Collectors.toList()); } private List<Long> listStalledVMInTransitionStateOnDisconnectedHosts(final Date cutTime) { - final String sql = "SELECT i.* FROM vm_instance as i, host as h WHERE h.status != 'UP' " + - "AND i.power_state_update_time < ? AND i.host_id = h.id " + - "AND (i.state ='Starting' OR i.state='Stopping' OR i.state='Migrating') " + - "AND i.id NOT IN (SELECT w.vm_instance_id FROM vm_work_job AS w JOIN async_job AS j ON w.id = j.id WHERE j.job_status = ?)" + - "AND i.removed IS NULL"; + final String sql = "SELECT i.*\n" + Review Comment: for safe use, I suggest to use a space instead of "\n" ########## server/src/main/java/com/cloud/api/query/QueryManagerImpl.java: ########## @@ -2384,7 +2384,7 @@ public Pair<List<Long>, Integer> searchForServerIdsAndCount(ListHostsCmd cmd) { sc.setParameters("name", name); } if (type != null) { - sc.setParameters("type", "%" + type); + sc.setParameters("type", type); Review Comment: :+1: ########## engine/schema/src/main/java/com/cloud/storage/dao/StoragePoolHostDaoImpl.java: ########## @@ -169,23 +169,23 @@ public List<Long> findHostsConnectedToPools(List<Long> poolIds) { } @Override - public List<Pair<Long, Integer>> getDatacenterStoragePoolHostInfo(long dcId, boolean sharedOnly) { - ArrayList<Pair<Long, Integer>> l = new ArrayList<Pair<Long, Integer>>(); + public boolean hasDatacenterStoragePoolHostInfo(long dcId, boolean sharedOnly) { + Long poolCount = 0L; String sql = sharedOnly ? SHARED_STORAGE_POOL_HOST_INFO : STORAGE_POOL_HOST_INFO; TransactionLegacy txn = TransactionLegacy.currentTxn(); - PreparedStatement pstmt = null; - try { - pstmt = txn.prepareAutoCloseStatement(sql); + try (PreparedStatement pstmt = txn.prepareAutoCloseStatement(sql)) { pstmt.setLong(1, dcId); - ResultSet rs = pstmt.executeQuery(); while (rs.next()) { - l.add(new Pair<Long, Integer>(rs.getLong(1), rs.getInt(2))); + poolCount = rs.getLong(1); Review Comment: the SQLs (SHARED_STORAGE_POOL_HOST_INFO : STORAGE_POOL_HOST_INFO) return the storage_pool_host_ref.id, not the count of pools or hosts, is it intended ? ########## server/src/main/java/com/cloud/configuration/ConfigurationManagerImpl.java: ########## @@ -2428,7 +2428,7 @@ protected void checkIfZoneIsDeletable(final long zoneId) { // Check if there are any non-removed hosts in the zone. - if (!_hostDao.listByDataCenterId(zoneId).isEmpty()) { + if (!_hostDao.listEnabledIdsByDataCenterId(zoneId).isEmpty()) { Review Comment: if host is disabled, can zone be deleted ? ########## server/src/main/java/com/cloud/storage/StorageManagerImpl.java: ########## @@ -1540,6 +1545,77 @@ protected String getStoragePoolNonDestroyedVolumesLog(long storagePoolId) { return sb.toString(); } + protected void cleanupConnectedHostConnectionForFailedStorage(DataStore primaryStore, List<Long> poolHostIds) { + for (Long hostId : poolHostIds) { + try { + disconnectHostFromSharedPool(hostId, primaryStore.getId()); + } catch (StorageUnavailableException | StorageConflictException e) { + logger.error("Error during cleaning up failed storage host connection", e); + } + } + } + + @Override + public void connectHostsToPool(DataStore primaryStore, List<Long> hostIds, Scope scope, + boolean handleExceptionsPartially, boolean errorOnNoUpHost) throws CloudRuntimeException { Review Comment: something like ? if (hostIds.size() == 0) { return; } -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@cloudstack.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org