[ 
https://issues.apache.org/jira/browse/CLOUDSTACK-10326?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16449828#comment-16449828
 ] 

ASF GitHub Bot commented on CLOUDSTACK-10326:
---------------------------------------------

nvazquez closed pull request #2493: CLOUDSTACK-10326: Prevent hosts fall into 
Maintenance when there are running VMs on it
URL: https://github.com/apache/cloudstack/pull/2493
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/engine/schema/src/com/cloud/vm/dao/VMInstanceDao.java 
b/engine/schema/src/com/cloud/vm/dao/VMInstanceDao.java
index 69efea42df9..6fda4a15c32 100755
--- a/engine/schema/src/com/cloud/vm/dao/VMInstanceDao.java
+++ b/engine/schema/src/com/cloud/vm/dao/VMInstanceDao.java
@@ -150,4 +150,6 @@
     VMInstanceVO findVMByHostNameInZone(String hostName, long zoneId);
 
     boolean isPowerStateUpToDate(long instanceId);
+
+    List<VMInstanceVO> listNonMigratingVmsByHostEqualsLastHost(long hostId);
 }
diff --git a/engine/schema/src/com/cloud/vm/dao/VMInstanceDaoImpl.java 
b/engine/schema/src/com/cloud/vm/dao/VMInstanceDaoImpl.java
index 6e97d1275a6..1565f53233b 100755
--- a/engine/schema/src/com/cloud/vm/dao/VMInstanceDaoImpl.java
+++ b/engine/schema/src/com/cloud/vm/dao/VMInstanceDaoImpl.java
@@ -93,6 +93,7 @@
     protected GenericSearchBuilder<VMInstanceVO, String> 
DistinctHostNameSearch;
     protected SearchBuilder<VMInstanceVO> HostAndStateSearch;
     protected SearchBuilder<VMInstanceVO> StartingWithNoHostSearch;
+    protected SearchBuilder<VMInstanceVO> NotMigratingSearch;
 
     @Inject
     ResourceTagDao _tagsDao;
@@ -280,6 +281,11 @@ protected void init() {
         DistinctHostNameSearch.join("nicSearch", nicSearch, 
DistinctHostNameSearch.entity().getId(), nicSearch.entity().getInstanceId(), 
JoinBuilder.JoinType.INNER);
         DistinctHostNameSearch.done();
 
+        NotMigratingSearch = createSearchBuilder();
+        NotMigratingSearch.and("host", 
NotMigratingSearch.entity().getHostId(), Op.EQ);
+        NotMigratingSearch.and("lastHost", 
NotMigratingSearch.entity().getLastHostId(), Op.EQ);
+        NotMigratingSearch.and("state", 
NotMigratingSearch.entity().getState(), Op.NEQ);
+        NotMigratingSearch.done();
     }
 
     @Override
@@ -304,6 +310,15 @@ protected void init() {
         return listBy(sc);
     }
 
+    @Override
+    public List<VMInstanceVO> listNonMigratingVmsByHostEqualsLastHost(long 
hostId) {
+        SearchCriteria<VMInstanceVO> sc = NotMigratingSearch.create();
+        sc.setParameters("host", hostId);
+        sc.setParameters("lastHost", hostId);
+        sc.setParameters("state", State.Migrating);
+        return listBy(sc);
+    }
+
     @Override
     public List<VMInstanceVO> listByZoneId(long zoneId) {
         SearchCriteria<VMInstanceVO> sc = AllFieldsSearch.create();
diff --git a/server/src/com/cloud/resource/ResourceManagerImpl.java 
b/server/src/com/cloud/resource/ResourceManagerImpl.java
index 2966d41d8bf..25bfc4c9c67 100755
--- a/server/src/com/cloud/resource/ResourceManagerImpl.java
+++ b/server/src/com/cloud/resource/ResourceManagerImpl.java
@@ -1296,10 +1296,17 @@ public boolean checkAndMaintain(final long hostId) {
             if (host.getType() != Host.Type.Storage) {
                 final List<VMInstanceVO> vos = _vmDao.listByHostId(hostId);
                 final List<VMInstanceVO> vosMigrating = 
_vmDao.listVmsMigratingFromHost(hostId);
+                final List<VMInstanceVO> failedMigratedVms = 
_vmDao.listNonMigratingVmsByHostEqualsLastHost(hostId);
                 if (vos.isEmpty() && vosMigrating.isEmpty()) {
-                    resourceStateTransitTo(host, 
ResourceState.Event.InternalEnterMaintenance, _nodeId);
-                    hostInMaintenance = true;
-                    
ActionEventUtils.onCompletedActionEvent(CallContext.current().getCallingUserId(),
 CallContext.current().getCallingAccountId(), EventVO.LEVEL_INFO, 
EventTypes.EVENT_MAINTENANCE_PREPARE, "completed maintenance for host " + 
hostId, 0);
+                    if (!failedMigratedVms.isEmpty()) {
+                        s_logger.debug("Unable to migrate " + 
failedMigratedVms.size() + " VM(s) from host " + host.getUuid());
+                        resourceStateTransitTo(host, 
ResourceState.Event.UnableToMigrate, _nodeId);
+                    } else {
+                        s_logger.debug("Host " + host.getUuid() + " entering 
in Maintenance");
+                        resourceStateTransitTo(host, 
ResourceState.Event.InternalEnterMaintenance, _nodeId);
+                        hostInMaintenance = true;
+                        
ActionEventUtils.onCompletedActionEvent(CallContext.current().getCallingUserId(),
 CallContext.current().getCallingAccountId(), EventVO.LEVEL_INFO, 
EventTypes.EVENT_MAINTENANCE_PREPARE, "completed maintenance for host " + 
hostId, 0);
+                    }
                 }
             }
         } catch (final NoTransitionException e) {


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Prevent hosts fall into Maintenance when there are running VMs on it
> --------------------------------------------------------------------
>
>                 Key: CLOUDSTACK-10326
>                 URL: https://issues.apache.org/jira/browse/CLOUDSTACK-10326
>             Project: CloudStack
>          Issue Type: Bug
>      Security Level: Public(Anyone can view this level - this is the 
> default.) 
>    Affects Versions: 4.11.0.0
>            Reporter: Nicolas Vazquez
>            Assignee: Nicolas Vazquez
>            Priority: Major
>             Fix For: 4.11.1.0
>
>         Attachments: CLOUDSTACK-10326-Debug.png, 
> CLOUDSTACK-10326-InitialState.png, CLOUDSTACK-10326-Migrating.png, 
> CLOUDSTACK-10326-MigrationFailed.png
>
>
> This issue was discovered, fixed and tested on KVM, but applies for every 
> hypervisor.
> h2. Background
> When enabling maintenance mode in a host, host state is put into 
> 'PrepareForMaintenance' and running VMs are migrated into another host. After 
> every VM is migrated, host goes to 'Maintenance' state.
> Checks are performed on ResourceManagerImpl.checkAndMaintan() method:
>  * List VMs with host_id = HOST_ID
>  * List VMs with last_host_id = HOST_ID and state=Migrating
> When both queries are empty, then the host can be put into Maintenance.
> When a VM is being migrated to DEST_HOST, its host_id column is set to 
> DEST_HOST, last_host_id = ORIGIN_HOST and state = Migrating. If then 
> migration fails, host_id = last_host_id = ORIGIN_HOST 
> h2. Issue
> This sequence:
>  * Enable maintenance mode on ORIGIN_HOST
>  * VMs start being migrated to a host, say DEST_HOST
>  * checkAndMaintain() starts:
>  ** First check passes (no VM with host_id = ORIGIN_HOST_ID as those are 
> being migrated)
>  ** Before the second check, one or more migrations fail
>  ** Second check passes, however there are VMs running on the host as 
> migrations have failed.
>  * Host goes into Maintenance state.
> Screenshots attached, query executed on each case:
> select id, name, instance_name, state, host_id, last_host_id from vm_instance;



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to