Updated Branches: refs/heads/master 25fa21f19 -> 5b0314fff
CLOUDSTACK-3535: add kvminvestigator to investirage kvm host Conflicts: plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/5b0314ff Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/5b0314ff Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/5b0314ff Branch: refs/heads/master Commit: 5b0314fff906a7e7fa30724ecaa7846200cb2383 Parents: 25fa21f Author: Edison Su <sudi...@gmail.com> Authored: Wed Aug 7 11:44:00 2013 -0700 Committer: Edison Su <sudi...@gmail.com> Committed: Wed Aug 7 11:44:19 2013 -0700 ---------------------------------------------------------------------- client/tomcatconf/applicationContext.xml.in | 4 ++ client/tomcatconf/componentContext.xml.in | 1 + .../kvm/src/com/cloud/ha/KVMInvestigator.java | 72 ++++++++++++++++++++ .../kvm/resource/LibvirtComputingResource.java | 23 +++++++ .../cloud/agent/manager/AgentManagerImpl.java | 24 ++++--- .../manager/ClusteredAgentManagerImpl.java | 18 ++--- 6 files changed, 122 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cloudstack/blob/5b0314ff/client/tomcatconf/applicationContext.xml.in ---------------------------------------------------------------------- diff --git a/client/tomcatconf/applicationContext.xml.in b/client/tomcatconf/applicationContext.xml.in index 0f55134..b832b09 100644 --- a/client/tomcatconf/applicationContext.xml.in +++ b/client/tomcatconf/applicationContext.xml.in @@ -481,6 +481,10 @@ <property name="name" value="XenServerInvestigator"/> </bean> + <bean id="KVMInvestigator" class="com.cloud.ha.KVMInvestigator"> + <property name="name" value="KVMInvestigator"/> + </bean> + <bean id="UserVmDomRInvestigator" class="com.cloud.ha.UserVmDomRInvestigator"> <property name="name" value="PingInvestigator"/> </bean> http://git-wip-us.apache.org/repos/asf/cloudstack/blob/5b0314ff/client/tomcatconf/componentContext.xml.in ---------------------------------------------------------------------- diff --git a/client/tomcatconf/componentContext.xml.in b/client/tomcatconf/componentContext.xml.in index 1fbec61..5ca0750 100644 --- a/client/tomcatconf/componentContext.xml.in +++ b/client/tomcatconf/componentContext.xml.in @@ -173,6 +173,7 @@ <ref bean="XenServerInvestigator"/> <ref bean="UserVmDomRInvestigator"/> <ref bean="ManagementIPSystemVMInvestigator"/> + <ref bean="KVMInvestigator"/> </list> </property> </bean> http://git-wip-us.apache.org/repos/asf/cloudstack/blob/5b0314ff/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java ---------------------------------------------------------------------- diff --git a/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java b/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java new file mode 100644 index 0000000..65024f8 --- /dev/null +++ b/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.cloud.ha; + +import com.cloud.agent.AgentManager; +import com.cloud.agent.api.Answer; +import com.cloud.agent.api.CheckOnHostCommand; +import com.cloud.host.Host; +import com.cloud.host.HostVO; +import com.cloud.host.Status; +import com.cloud.host.dao.HostDao; +import com.cloud.hypervisor.Hypervisor; +import com.cloud.resource.ResourceManager; +import com.cloud.utils.component.AdapterBase; +import org.apache.log4j.Logger; + +import javax.inject.Inject; +import java.util.List; + +public class KVMInvestigator extends AdapterBase implements Investigator { + private final static Logger s_logger = Logger.getLogger(KVMInvestigator.class); + @Inject + HostDao _hostDao; + @Inject + AgentManager _agentMgr; + @Inject + ResourceManager _resourceMgr; + @Override + public Boolean isVmAlive(com.cloud.vm.VirtualMachine vm, Host host) { + Status status = isAgentAlive(host); + if (status == null) { + return null; + } + return status == Status.Up ? true : null; + } + + @Override + public Status isAgentAlive(Host agent) { + if (agent.getHypervisorType() != Hypervisor.HypervisorType.KVM) { + return null; + } + CheckOnHostCommand cmd = new CheckOnHostCommand(agent); + List<HostVO> neighbors = _resourceMgr.listAllHostsInCluster(agent.getClusterId()); + for (HostVO neighbor : neighbors) { + if (neighbor.getId() == agent.getId() || neighbor.getHypervisorType() != Hypervisor.HypervisorType.KVM) { + continue; + } + Answer answer = _agentMgr.easySend(neighbor.getId(), cmd); + + return answer.getResult() ? Status.Down : Status.Up; + + } + + return null; + } +} http://git-wip-us.apache.org/repos/asf/cloudstack/blob/5b0314ff/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java ---------------------------------------------------------------------- diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index 0aba152..542136a 100755 --- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -55,6 +55,7 @@ import java.util.concurrent.Future; import javax.ejb.Local; import javax.naming.ConfigurationException; +import com.cloud.agent.api.CheckOnHostCommand; import org.apache.commons.io.FileUtils; import org.apache.log4j.Logger; import org.libvirt.Connect; @@ -1278,6 +1279,8 @@ ServerResource { return storageHandler.handleStorageCommands((StorageSubSystemCommand)cmd); } else if (cmd instanceof PvlanSetupCommand) { return execute((PvlanSetupCommand) cmd); + } else if (cmd instanceof CheckOnHostCommand) { + return execute((CheckOnHostCommand)cmd); } else { s_logger.warn("Unsupported command "); return Answer.createUnsupportedCommandAnswer(cmd); @@ -1411,6 +1414,26 @@ ServerResource { } + protected Answer execute(CheckOnHostCommand cmd) { + ExecutorService executors = Executors.newSingleThreadExecutor(); + List<NfsStoragePool> pools = _monitor.getStoragePools(); + KVMHAChecker ha = new KVMHAChecker(pools, cmd.getHost().getPrivateNetwork().getIp()); + Future<Boolean> future = executors.submit(ha); + try { + Boolean result = future.get(); + if (result) { + return new Answer(cmd, false, "Heart is still beating..."); + } else { + return new Answer(cmd); + } + } catch (InterruptedException e) { + return new Answer(cmd, false, "can't get status of host:"); + } catch (ExecutionException e) { + return new Answer(cmd, false, "can't get status of host:"); + } + + } + protected Storage.StorageResourceType getStorageResourceType() { return Storage.StorageResourceType.STORAGE_POOL; } http://git-wip-us.apache.org/repos/asf/cloudstack/blob/5b0314ff/server/src/com/cloud/agent/manager/AgentManagerImpl.java ---------------------------------------------------------------------- diff --git a/server/src/com/cloud/agent/manager/AgentManagerImpl.java b/server/src/com/cloud/agent/manager/AgentManagerImpl.java index b1b862d..cf59b31 100755 --- a/server/src/com/cloud/agent/manager/AgentManagerImpl.java +++ b/server/src/com/cloud/agent/manager/AgentManagerImpl.java @@ -521,19 +521,19 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl ConnectionException ce = (ConnectionException)e; if (ce.isSetupError()) { s_logger.warn("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage()); - handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true); + handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true); throw ce; } else { s_logger.info("Monitor " + monitor.second().getClass().getSimpleName() + " says not to continue the connect process for " + hostId + " due to " + e.getMessage()); - handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true); + handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true, true); return attache; } } else if (e instanceof HypervisorVersionChangedException) { - handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true); + handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true, true); throw new CloudRuntimeException("Unable to connect " + attache.getId(), e); } else { s_logger.error("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage(), e); - handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true); + handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true); throw new CloudRuntimeException("Unable to connect " + attache.getId(), e); } } @@ -547,7 +547,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl // this is tricky part for secondary storage // make it as disconnected, wait for secondary storage VM to be up // return the attache instead of null, even it is disconnectede - handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true); + handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true); } agentStatusTransitTo(host, Event.Ready, _nodeId); @@ -744,7 +744,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl return true; } - protected boolean handleDisconnectWithoutInvestigation(AgentAttache attache, Status.Event event, boolean transitState) { + protected boolean handleDisconnectWithoutInvestigation(AgentAttache attache, Status.Event event, boolean transitState, boolean removeAgent) { long hostId = attache.getId(); s_logger.info("Host " + hostId + " is disconnecting with event " + event); @@ -779,8 +779,10 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl s_logger.debug("Deregistering link for " + hostId + " with state " + nextStatus); } - //remove the attache - removeAgent(attache, nextStatus); + //For KVM, if removeagent is false, don't remove the agent in agentmap, pingtimeout will pick it up. + if (host.getHypervisorType() != HypervisorType.KVM || removeAgent) { + removeAgent(attache, nextStatus); + } //update the DB if (host != null && transitState) { @@ -859,7 +861,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl } } - handleDisconnectWithoutInvestigation(attache, event, true); + handleDisconnectWithoutInvestigation(attache, event, true, true); host = _hostDao.findById(host.getId()); if (host.getStatus() == Status.Alert || host.getStatus() == Status.Down) { _haMgr.scheduleRestartForVmsOnHost(host, true); @@ -885,7 +887,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl if (_investigate == true) { handleDisconnectWithInvestigation(_attache, _event); } else { - handleDisconnectWithoutInvestigation(_attache, _event, true); + handleDisconnectWithoutInvestigation(_attache, _event, true, false); } } catch (final Exception e) { s_logger.error("Exception caught while handling disconnect: ", e); @@ -979,7 +981,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl AgentAttache attache = null; attache = findAttache(hostId); if (attache != null) { - handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true); + handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true); } return true; } else if (event == Event.ShutdownRequested) { http://git-wip-us.apache.org/repos/asf/cloudstack/blob/5b0314ff/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java ---------------------------------------------------------------------- diff --git a/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java b/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java index bc72aff..0c3d6e1 100755 --- a/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java +++ b/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java @@ -310,19 +310,19 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } @Override - protected boolean handleDisconnectWithoutInvestigation(AgentAttache attache, Status.Event event, boolean transitState) { - return handleDisconnect(attache, event, false, true); + protected boolean handleDisconnectWithoutInvestigation(AgentAttache attache, Status.Event event, boolean transitState, boolean removeAgent) { + return handleDisconnect(attache, event, false, true, removeAgent); } @Override protected boolean handleDisconnectWithInvestigation(AgentAttache attache, Status.Event event) { - return handleDisconnect(attache, event, true, true); + return handleDisconnect(attache, event, true, true, true); } - protected boolean handleDisconnect(AgentAttache agent, Status.Event event, boolean investigate, boolean broadcast) { + protected boolean handleDisconnect(AgentAttache agent, Status.Event event, boolean investigate, boolean broadcast, boolean removeAgent) { boolean res; if (!investigate) { - res = super.handleDisconnectWithoutInvestigation(agent, event, true); + res = super.handleDisconnectWithoutInvestigation(agent, event, true, removeAgent); } else { res = super.handleDisconnectWithInvestigation(agent, event); } @@ -365,7 +365,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust return true; } - return super.handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, false); + return super.handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, false, true); } return true; @@ -1058,7 +1058,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust AgentAttache attache = findAttache(hostId); if (attache != null) { - result = handleDisconnect(attache, Event.AgentDisconnected, false, false); + result = handleDisconnect(attache, Event.AgentDisconnected, false, false, true); } if (result) { @@ -1134,7 +1134,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust try { s_logger.debug("Management server " + _nodeId + " failed to rebalance agent " + hostId); _hostTransferDao.completeAgentTransfer(hostId); - handleDisconnectWithoutInvestigation(findAttache(hostId), Event.RebalanceFailed, true); + handleDisconnectWithoutInvestigation(findAttache(hostId), Event.RebalanceFailed, true, true); } catch (Exception ex) { s_logger.warn("Failed to reconnect host id=" + hostId + " as a part of failed rebalance task cleanup"); } @@ -1151,7 +1151,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust synchronized (_agents) { ClusteredDirectAgentAttache attache = (ClusteredDirectAgentAttache)_agents.get(hostId); if (attache != null && attache.getQueueSize() == 0 && attache.getNonRecurringListenersSize() == 0) { - handleDisconnectWithoutInvestigation(attache, Event.StartAgentRebalance, true); + handleDisconnectWithoutInvestigation(attache, Event.StartAgentRebalance, true, true); ClusteredAgentAttache forwardAttache = (ClusteredAgentAttache)createAttache(hostId); if (forwardAttache == null) { s_logger.warn("Unable to create a forward attache for the host " + hostId + " as a part of rebalance process");