Martin Peřina has uploaded a new change for review. Change subject: core: Replace FenceExecutor in Start/Stop/RestartVDSCommands ......................................................................
core: Replace FenceExecutor in Start/Stop/RestartVDSCommands Replaces FenceExecutor with HostFenceActionExecutor in Start/Stop/RestartVDSCommands. Change-Id: I2cda348997927decc1b61bf4ce06c4489859c1d3 Bug-Url: https://bugzilla.redhat.com/1182510 Signed-off-by: Martin Perina <[email protected]> --- M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/FenceVdsBaseCommand.java M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RestartVdsCommand.java M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/StartVdsCommand.java M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/StopVdsCommand.java M backend/manager/modules/bll/src/test/java/org/ovirt/engine/core/bll/StartVdsCommandTest.java M backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties 6 files changed, 103 insertions(+), 586 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/66/38966/1 diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/FenceVdsBaseCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/FenceVdsBaseCommand.java index 124e703..e5ada8e 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/FenceVdsBaseCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/FenceVdsBaseCommand.java @@ -1,24 +1,23 @@ package org.ovirt.engine.core.bll; -import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.concurrent.Callable; import org.apache.commons.lang.StringUtils; import org.ovirt.engine.core.bll.context.CommandContext; -import org.ovirt.engine.core.bll.pm.PowerManagementHelper; -import org.ovirt.engine.core.bll.pm.PowerManagementHelper.AgentsIterator; +import org.ovirt.engine.core.bll.pm.HostFenceActionExecutor; import org.ovirt.engine.core.bll.validator.FenceValidator; import org.ovirt.engine.core.common.AuditLogType; import org.ovirt.engine.core.common.action.FenceVdsActionParameters; import org.ovirt.engine.core.common.action.VdcActionType; -import org.ovirt.engine.core.common.businessentities.pm.FenceActionType; -import org.ovirt.engine.core.common.businessentities.FenceStatusReturnValue; -import org.ovirt.engine.core.common.businessentities.FenceAgent; +import org.ovirt.engine.core.common.businessentities.FencingPolicy; +import org.ovirt.engine.core.common.businessentities.VDS; import org.ovirt.engine.core.common.businessentities.VDSStatus; import org.ovirt.engine.core.common.businessentities.VM; +import org.ovirt.engine.core.common.businessentities.pm.FenceActionType; +import org.ovirt.engine.core.common.businessentities.pm.FenceOperationResult; +import org.ovirt.engine.core.common.businessentities.pm.FenceOperationResult.Status; import org.ovirt.engine.core.common.errors.VdcBLLException; import org.ovirt.engine.core.common.errors.VdcBllErrors; import org.ovirt.engine.core.common.errors.VdcBllMessages; @@ -26,20 +25,14 @@ import org.ovirt.engine.core.common.utils.Pair; import org.ovirt.engine.core.common.vdscommands.SetVdsStatusVDSCommandParameters; import org.ovirt.engine.core.common.vdscommands.VDSCommandType; -import org.ovirt.engine.core.common.vdscommands.VDSFenceReturnValue; import org.ovirt.engine.core.compat.Guid; import org.ovirt.engine.core.dal.dbbroker.DbFacade; import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogableBase; -import org.ovirt.engine.core.utils.ThreadUtils; public abstract class FenceVdsBaseCommand<T extends FenceVdsActionParameters> extends VdsCommand<T> { - private static final int SLEEP_BEFORE_FIRST_ATTEMPT = 5000; private static final String INTERNAL_FENCE_USER = "Engine"; - private static final String VDSM_STATUS_UNKONWN = "unknown"; - private static final int UNKNOWN_RESULT_ALLOWED = 3; protected FenceValidator fenceValidator; - protected FenceExecutor fenceExecutor; /** * Constructor for command creation when compensation is applied on startup @@ -48,7 +41,6 @@ */ protected FenceVdsBaseCommand(Guid commandId) { super(commandId); - fenceExecutor = new FenceExecutor(getVds(), getParameters().getFencingPolicy()); fenceValidator = new FenceValidator(); } @@ -58,7 +50,6 @@ public FenceVdsBaseCommand(T parameters, CommandContext commandContext) { super(parameters, commandContext); - fenceExecutor = new FenceExecutor(getVds(), getParameters().getFencingPolicy()); fenceValidator = new FenceValidator(); } @@ -103,10 +94,13 @@ log.info("Power-Management: {} of host '{}' initiated.", getAction(), getVdsName()); audit(AuditLogType.FENCE_OPERATION_STARTED); VDSStatus lastStatus = getVds().getStatus(); - VDSFenceReturnValue result = null; + FenceOperationResult result = null; try { setup(); - result = fence(); + result = createHostFenceActionExecutor( + getVds(), + getParameters().getFencingPolicy() + ).fence(getParameters().getAction()); handleResult(result); if (getSucceeded()) { log.info("Power-Management: {} host '{}' succeeded.", getAction(), getVdsName()); @@ -118,7 +112,7 @@ } finally { if (!getSucceeded()) { setStatus(lastStatus); - if (!wasSkippedDueToPolicy(result)) { + if (result.getStatus() != Status.SKIPPED_DUE_TO_POLICY) { // show alert only if command was not skipped due to fencing policy alertIfPowerManagementOperationFailed(); } @@ -130,118 +124,36 @@ } } + protected HostFenceActionExecutor createHostFenceActionExecutor(VDS fencedHost, FencingPolicy fencingPolicy) { + return new HostFenceActionExecutor(fencedHost, fencingPolicy); + } + private void audit(AuditLogType auditMessage) { AuditLogableBase logable = new AuditLogableBase(); - logable.addCustomValue("Action", getAction().name()); + logable.addCustomValue("Action", getAction().name().toLowerCase()); logable.addCustomValue("VdsName", getVds().getName()); logable.setVdsId(getVdsId()); auditLogDirector.log(logable, auditMessage); } - private void handleResult(VDSFenceReturnValue result) { - if (wasSkippedDueToPolicy(result)) { - // when fencing is skipped due to policy we want to suppress command result logging, because - // we fire an alert in VdsNotRespondingTreatment - setCommandShouldBeLogged(false); - setSucceeded(false); - } else { - setSucceeded(result.getSucceeded()); + private void handleResult(FenceOperationResult result) { + switch (result.getStatus()) { + case SKIPPED_DUE_TO_POLICY: + // when fencing is skipped due to policy we want to suppress command result logging, because + // we fire an alert in VdsNotRespondingTreatment + setCommandShouldBeLogged(false); + setSucceeded(false); + break; + + case SUCCESS: + handleSpecificCommandActions(); + setSucceeded(true); + break; + + default: + setSucceeded(false); } setActionReturnValue(result); - } - - /** - * Attempt fencing using agents by order. - */ - private VDSFenceReturnValue fence() { - // loop over agents and try to fence. - AgentsIterator iterator = PowerManagementHelper.getAgentsIterator(getVds().getFenceAgents()); - VDSFenceReturnValue result = null; - while (iterator.hasNext()) { - result = fence(iterator.next()); - if (result.getSucceeded()) { - break; - } - } - return result; - } - - /** - * Attempt to fence the host using agent\agents with next order. - * - */ - private VDSFenceReturnValue fence(List<FenceAgent> agents) { - if (agents.size() == 1) { - return fence(agents.get(0), getFenceRetries()); - } else if (agents.size() > 1) { - return fenceConcurrently(agents); - } else { // 0 agents, we never reach here. - return null; - } - } - - /** - * Creates tasks based on the supplied agents. - */ - protected List<Callable<VDSFenceReturnValue>> createTasks(List<FenceAgent> agents) { - List<Callable<VDSFenceReturnValue>> tasks = new ArrayList<Callable<VDSFenceReturnValue>>(); - for (FenceAgent agent : agents) { - tasks.add(createTask(agent)); - } - return tasks; - } - - /** - * Creates a task based on the supplied agent. - */ - protected Callable<VDSFenceReturnValue> createTask(final FenceAgent agent) { - return (new Callable<VDSFenceReturnValue>() { - @Override - public VDSFenceReturnValue call() { - return fence(agent, getFenceRetries()); - } - }); - } - - private VDSFenceReturnValue fence(FenceAgent fenceAgent, int retries) { - VDSFenceReturnValue fenceExecutionResult = fenceExecutor.fence(getAction(), fenceAgent); - if (wasSkippedDueToStatus(fenceExecutionResult)) { - log.info("Attemp to {} host using fence agent '{}' skipped, host is already at the requested state.", - getAction().name().toLowerCase(), - fenceAgent.getId()); - } else if (wasSkippedDueToPolicy(fenceExecutionResult)) { - // fencing execution was skipped due to fencing policy - return fenceExecutionResult; - } else { - if (fenceExecutionResult.getSucceeded()) { - boolean requiredStatusAchieved = waitForStatus(); - int i = 0; - while (!requiredStatusAchieved && i < retries) { - fenceExecutionResult = fenceExecutor.fence(getAction(), fenceAgent); - requiredStatusAchieved = waitForStatus(); - i++; - } - if (requiredStatusAchieved) { - handleSpecificCommandActions(); - } else { - auditFailure(); - } - fenceExecutionResult.setSucceeded(requiredStatusAchieved); - } else { - logAgentFailure(fenceExecutionResult); - } - } - return fenceExecutionResult; - } - - private void logAgentFailure(final VDSFenceReturnValue result) { - if (!wasSkippedDueToPolicy(result)) { - log.error("Failed to {} host using fence agent {} (if other agents are running, {} may still succeed).", - getAction().name().toLowerCase(), - result.getFenceAgentUsed().getId() == null ? "New Agent (no ID)" : result.getFenceAgentUsed() - .getId(), - getAction().name().toLowerCase()); - } } protected void setStatus() { @@ -265,72 +177,6 @@ return StringUtils.isEmpty(userName) ? INTERNAL_FENCE_USER : userName; } - protected boolean waitForStatus() { - int i = 1; - int j = 1; - boolean requiredStatusReached = false; - String requiredStatus = getRequiredStatus(); - String hostName = getVds().getName(); - log.info("Waiting for host '{}' to reach status '{}'", hostName, requiredStatus); - // Waiting before first attempt to check the host status. - // This is done because if we will attempt to get host status immediately - // in most cases it will not turn from on/off to off/on and we will need - // to wait a full cycle for it. - ThreadUtils.sleep(getSleepBeforeFirstAttempt()); - int retries = getWaitForStatusRerties(); - while (!requiredStatusReached && i <= retries) { - log.info("Attempt {} to get host '{}' status", i, hostName); - VDSFenceReturnValue returnValue = fenceExecutor.checkHostStatus(); - if (returnValue != null && returnValue.getSucceeded()) { - String status = ((FenceStatusReturnValue) returnValue.getReturnValue()).getStatus(); - if (status.equalsIgnoreCase(VDSM_STATUS_UNKONWN)) { - // Allow command to fail temporarily - if (j <= UNKNOWN_RESULT_ALLOWED && i <= retries) { - ThreadUtils.sleep(getDelayInSeconds() * 1000); - i++; - j++; - } else { - // No need to retry , agent definitions are corrupted - log.error("Host '{}' PM Agent definitions are corrupted, aborting fence operation.", hostName); - break; - } - } - else { - if (requiredStatus.equalsIgnoreCase(status)) { - requiredStatusReached = true; - log.info("Host '{}' status is '{}'", hostName, requiredStatus); - } else { - i++; - if (i <= retries) { - ThreadUtils.sleep(getDelayInSeconds() * 1000); - } - } - } - } else { - log.error("Failed to get host '{}' status.", hostName); - break; - } - } - return requiredStatusReached; - } - - protected void auditFailure() { - // Send an Alert - String actionName = (getParameters().getParentCommand() == VdcActionType.RestartVds) ? - FenceActionType.RESTART.name() : getAction().name(); - AuditLogableBase auditLogable = new AuditLogableBase(); - auditLogable.addCustomValue("Host", getVds().getName()); - auditLogable.addCustomValue("Status", actionName); - auditLogable.setVdsId(getVds().getId()); - auditLogDirector.log(auditLogable, AuditLogType.VDS_ALERT_FENCE_STATUS_VERIFICATION_FAILED); - log.error("Failed to verify host '{}' {} status. Have retried {} times with delay of {} seconds between each retry.", - getVds().getName(), - getAction().name(), - getWaitForStatusRerties(), - getDelayInSeconds()); - - } - @Override protected Map<String, Pair<String, String>> getExclusiveLocks() { return createFenceExclusiveLocksMap(getVdsId()); @@ -342,70 +188,8 @@ VdcBllMessages.POWER_MANAGEMENT_ACTION_ON_ENTITY_ALREADY_IN_PROGRESS)); } - protected void logConcurrentAgentsFailure(FenceActionType action, - List<FenceAgent> agents, - VDSFenceReturnValue result) { - StringBuilder builder = new StringBuilder(); - for (FenceAgent agent : agents) { - builder.append(agent.getId()).append(", "); - } - String agentIds = builder.toString(); - agentIds = agentIds.substring(0, agentIds.length() - 2); - log.error("Failed to {} host using fence agents '{}' concurrently: {}", - action.name(), - agentIds, - result.getExceptionString()); - - } - - protected FenceProxyLocator createProxyHostLocator() { - return new FenceProxyLocator(getVds(), getParameters().getFencingPolicy()); - } - protected List<VM> getVmList() { return getVmDAO().getAllRunningForVds(getVdsId()); - } - - /** - * in the specific scenario where this stop/start command is executed in the context of a restart, we interpret - * 'skipped' as having occurred to due a fencing policy violation. - */ - private boolean wasSkippedDueToPolicy(VDSFenceReturnValue result) { - return result != null - && result.isSkippedDueToPolicy() - && getParameters().getParentCommand() == VdcActionType.RestartVds; - } - - /** - * if stop/start command returned with status=skipped, and the command was NOT run in the context of a restart then - * we interpret the skip as having occurred because the host is already at the required state. - */ - private boolean wasSkippedDueToStatus(VDSFenceReturnValue result) { - return result != null - && result.isSkippedDueToStatus() - && getParameters().getParentCommand() != VdcActionType.RestartVds; - } - - public FenceValidator getFenceValidator() { - return fenceValidator; - } - - public void setFenceValidator(FenceValidator fenceValidator) { - this.fenceValidator = fenceValidator; - } - - // Exported to a method for mocking purposes (when running unit-tests, we don't want to wait 5 seconds for each - // test...) - int getSleepBeforeFirstAttempt() { - return SLEEP_BEFORE_FIRST_ATTEMPT; - } - - public FenceExecutor getFenceExecutor() { - return fenceExecutor; - } - - public void setFenceExecutor(FenceExecutor fenceExecutor) { - this.fenceExecutor = fenceExecutor; } /** @@ -420,46 +204,10 @@ protected abstract void teardown(); - protected abstract String getRequiredStatus(); - protected abstract void handleSpecificCommandActions(); - - /** - * Gets the number of times to retry a get status PM operation after stop/start PM operation. - */ - protected abstract int getWaitForStatusRerties(); - - /** - * Attempt to fence the host using several agents with the same 'order' (thus considered 'concurrent'). Return the - * result of one of the agents (the most relevant one is chosen). - */ - protected abstract VDSFenceReturnValue fenceConcurrently(List<FenceAgent> agents); - - /** - * Get the number of time to retry the fence operation, if the first attempt fails. - */ - protected abstract int getFenceRetries(); - - /** - * Gets the number of seconds to delay between each retry. - */ - protected abstract int getDelayInSeconds(); /** * Get the fence action */ protected abstract FenceActionType getAction(); - - /** - * Returns numbers of agents for which fencing operation was skipped - */ - protected int countSkippedOperations(List<VDSFenceReturnValue> results) { - int numOfSkipped = 0; - for (VDSFenceReturnValue result : results) { - if (wasSkippedDueToPolicy(result)) { - numOfSkipped++; - } - } - return numOfSkipped; - } } diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RestartVdsCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RestartVdsCommand.java index 0d72114..d9ea2cd 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RestartVdsCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RestartVdsCommand.java @@ -14,6 +14,7 @@ import org.apache.commons.lang.StringUtils; import org.ovirt.engine.core.bll.context.CommandContext; +import org.ovirt.engine.core.bll.pm.HostFenceActionExecutor; import org.ovirt.engine.core.bll.validator.FenceValidator; import org.ovirt.engine.core.common.AuditLogType; import org.ovirt.engine.core.common.action.FenceVdsActionParameters; @@ -23,14 +24,15 @@ import org.ovirt.engine.core.common.action.VdcActionType; import org.ovirt.engine.core.common.action.VdcReturnValueBase; import org.ovirt.engine.core.common.businessentities.pm.FenceActionType; -import org.ovirt.engine.core.common.businessentities.FenceStatusReturnValue; import org.ovirt.engine.core.common.businessentities.VDSStatus; +import org.ovirt.engine.core.common.businessentities.pm.FenceOperationResult; +import org.ovirt.engine.core.common.businessentities.pm.FenceOperationResult.Status; +import org.ovirt.engine.core.common.businessentities.pm.HostPowerStatus; import org.ovirt.engine.core.common.errors.VdcBllMessages; import org.ovirt.engine.core.common.locks.LockingGroup; import org.ovirt.engine.core.common.utils.Pair; import org.ovirt.engine.core.common.vdscommands.SetVdsStatusVDSCommandParameters; import org.ovirt.engine.core.common.vdscommands.VDSCommandType; -import org.ovirt.engine.core.common.vdscommands.VDSFenceReturnValue; import org.ovirt.engine.core.compat.Guid; import org.ovirt.engine.core.dal.dbbroker.DbFacade; @@ -120,7 +122,7 @@ final String sessionId = getParameters().getSessionId(); // do not try to stop Host if Host is reported as Down via PM - if (FenceExecutor.isStatusOff(new FenceExecutor(getVds()).checkHostStatus())) { + if (isHostPoweredOff()) { returnValue.setSucceeded(true); } else { @@ -151,6 +153,13 @@ runVdsCommand(VDSCommandType.SetVdsStatus, new SetVdsStatusVDSCommandParameters(vdsId, VDSStatus.NonResponsive)); } + } + + protected boolean isHostPoweredOff() { + HostFenceActionExecutor executor = new HostFenceActionExecutor(getVds()); + FenceOperationResult result = executor.fence(FenceActionType.STATUS); + return result.getStatus() == Status.SUCCESS + && result.getPowerStatus() == HostPowerStatus.OFF; } private void executeFenceVdsManuallyAction(final Guid vdsId, String sessionId) { @@ -206,11 +215,9 @@ */ protected boolean wasSkippedDueToPolicy(VdcReturnValueBase result) { boolean skipped = false; - if (result.getActionReturnValue() instanceof VDSFenceReturnValue) { - VDSFenceReturnValue fenceReturnValue = result.getActionReturnValue(); - if (fenceReturnValue.getReturnValue() instanceof FenceStatusReturnValue) { - skipped = ((FenceStatusReturnValue) fenceReturnValue.getReturnValue()).getIsSkippedDueToPolicy(); - } + if (result.getActionReturnValue() instanceof FenceOperationResult) { + FenceOperationResult fenceResult = result.getActionReturnValue(); + skipped = fenceResult.getStatus() == Status.SKIPPED_DUE_TO_POLICY; } return skipped; } diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/StartVdsCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/StartVdsCommand.java index 1441df1..0e421f7 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/StartVdsCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/StartVdsCommand.java @@ -1,26 +1,15 @@ package org.ovirt.engine.core.bll; -import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorCompletionService; -import java.util.concurrent.Future; - import org.ovirt.engine.core.bll.context.CommandContext; import org.ovirt.engine.core.common.AuditLogType; import org.ovirt.engine.core.common.action.FenceVdsActionParameters; import org.ovirt.engine.core.common.action.LockProperties; import org.ovirt.engine.core.common.action.LockProperties.Scope; import org.ovirt.engine.core.common.action.VdcActionType; -import org.ovirt.engine.core.common.businessentities.pm.FenceActionType; -import org.ovirt.engine.core.common.businessentities.FenceAgent; import org.ovirt.engine.core.common.businessentities.VDS; import org.ovirt.engine.core.common.businessentities.VDSStatus; -import org.ovirt.engine.core.common.config.Config; -import org.ovirt.engine.core.common.config.ConfigValues; +import org.ovirt.engine.core.common.businessentities.pm.FenceActionType; import org.ovirt.engine.core.common.errors.VdcBllMessages; -import org.ovirt.engine.core.common.vdscommands.VDSFenceReturnValue; -import org.ovirt.engine.core.compat.Guid; -import org.ovirt.engine.core.utils.threadpool.ThreadPoolUtil; /** * Send a Start action to a power control device. @@ -29,11 +18,9 @@ * to clear the VMs and start them. * * @see RestartVdsCommand - * @see FenceVdsBaseCommand#restartVdsVms() */ @NonTransactiveCommandAttribute public class StartVdsCommand<T extends FenceVdsActionParameters> extends FenceVdsBaseCommand<T> { - private static final String VDSM_STATUS_ON = "on"; public StartVdsCommand(T parameters) { this(parameters, null); } @@ -67,64 +54,18 @@ return retValue; } - @Override - /** - * Attempt to 'start' the host using the provided agents. This method receives several agents which have the same - * 'order' (thus considered 'concurrent') and runs them concurrently until the first succeeds. It returns the first - * agent that succeeded, or any failed agent if none succeeded. - */ - protected VDSFenceReturnValue fenceConcurrently(List<FenceAgent> agents) { - // create a 'task' for each agent, and insert tasks into a concurrent 'executor'. - ExecutorCompletionService<VDSFenceReturnValue> tasksExecutor = ThreadPoolUtil.createCompletionService(); - List<Future<VDSFenceReturnValue>> futures = - ThreadPoolUtil.submitTasks(tasksExecutor, createTasks(agents)); - VDSFenceReturnValue result = null; - for (int i = 0; i < agents.size(); ++i) { - try { - result = tasksExecutor.take().get(); - if (result != null && result.getSucceeded()) { - cancelFutureTasks(futures, result.getFenceAgentUsed().getId()); - break; - } - } catch (ExecutionException | InterruptedException e) { - log.warn("Attempt to start host '{}' using one of its agents has failed: {}", - getVdsName(), - e.getMessage()); - log.debug("Exception", e); - } - } - if (result != null && !result.getSucceeded()) { - logConcurrentAgentsFailure(FenceActionType.START, agents, result); - } - return result; - } - - private void cancelFutureTasks(List<Future<VDSFenceReturnValue>> futures, Guid agentId) { - if (!futures.isEmpty()) { - log.info("Start of host '{}' succeeded using fencing agent '{}'," - + " cancelling concurrent attempts by other agents to start the host", - getVdsName(), - agentId); - } - for (Future<VDSFenceReturnValue> future : futures) { - try { - log.info("Cancelling agent '{}' ", future.get().getFenceAgentUsed().getId()); - } catch (InterruptedException | ExecutionException e) { - // do nothing - } - future.cancel(true); - } - } protected boolean legalStatusForStartingVds(VDSStatus status) { - return status == VDSStatus.Down || status == VDSStatus.NonResponsive || status == VDSStatus.Reboot || status == VDSStatus.Maintenance; + return status == VDSStatus.Down + || status == VDSStatus.NonResponsive + || status == VDSStatus.Reboot + || status == VDSStatus.Maintenance; } @Override protected void setStatus() { if (getParameters().isChangeHostToMaintenanceOnStart()) { setStatus(VDSStatus.Maintenance); - } - else { + } else { setStatus(VDSStatus.NonResponsive); } } @@ -161,33 +102,10 @@ } @Override - protected int getWaitForStatusRerties() { - return Config.<Integer> getValue(ConfigValues.FenceStartStatusRetries); - } - - @Override - protected int getDelayInSeconds() { - return Config.<Integer> getValue(ConfigValues.FenceStartStatusDelayBetweenRetriesInSec); - } - - @Override protected void freeLock() { if (getParameters().getParentCommand() != VdcActionType.RestartVds) { super.freeLock(); } - } - - @Override - protected int getFenceRetries() { - // in case of 'Start' allow one retry since there is a chance that Agent & Host use the same power supply - // and a Start command had failed (because we just get success on the script invocation and not on its - // result). - return 1; - } - - @Override - protected String getRequiredStatus() { - return VDSM_STATUS_ON; } @Override diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/StopVdsCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/StopVdsCommand.java index fe115dc..49aa614 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/StopVdsCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/StopVdsCommand.java @@ -2,29 +2,22 @@ import static org.ovirt.engine.core.common.errors.VdcBllMessages.VAR__ACTION__STOP; -import org.ovirt.engine.core.bll.context.CommandContext; - -import java.util.LinkedList; import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorCompletionService; +import org.ovirt.engine.core.bll.context.CommandContext; import org.ovirt.engine.core.common.AuditLogType; import org.ovirt.engine.core.common.action.FenceVdsActionParameters; import org.ovirt.engine.core.common.action.LockProperties; import org.ovirt.engine.core.common.action.LockProperties.Scope; import org.ovirt.engine.core.common.action.VdcActionType; -import org.ovirt.engine.core.common.businessentities.pm.FenceActionType; -import org.ovirt.engine.core.common.businessentities.FenceAgent; import org.ovirt.engine.core.common.businessentities.VDSStatus; import org.ovirt.engine.core.common.businessentities.VM; -import org.ovirt.engine.core.common.config.Config; -import org.ovirt.engine.core.common.config.ConfigValues; +import org.ovirt.engine.core.common.businessentities.VdsSpmStatus; +import org.ovirt.engine.core.common.businessentities.pm.FenceActionType; import org.ovirt.engine.core.common.errors.VdcBllMessages; +import org.ovirt.engine.core.common.vdscommands.SpmStopVDSCommandParameters; import org.ovirt.engine.core.common.vdscommands.UpdateVdsVMsClearedVDSCommandParameters; import org.ovirt.engine.core.common.vdscommands.VDSCommandType; -import org.ovirt.engine.core.common.vdscommands.VDSFenceReturnValue; -import org.ovirt.engine.core.utils.threadpool.ThreadPoolUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,7 +32,6 @@ */ @NonTransactiveCommandAttribute public class StopVdsCommand<T extends FenceVdsActionParameters> extends FenceVdsBaseCommand<T> { - private final String VDSM_STATUS_OFF = "off"; private static final Logger log = LoggerFactory.getLogger(StopVdsCommand.class); public StopVdsCommand(T parameters) { @@ -119,105 +111,10 @@ } @Override - protected int getWaitForStatusRerties() { - return Config.<Integer> getValue(ConfigValues.FenceStopStatusRetries); - } - - @Override - protected int getDelayInSeconds() { - return Config.<Integer> getValue(ConfigValues.FenceStopStatusDelayBetweenRetriesInSec); - } - - @Override protected void freeLock() { if (getParameters().getParentCommand() != VdcActionType.RestartVds) { super.freeLock(); } - } - - @Override - protected int getFenceRetries() { - return 0; - } - - @Override - /** - * Attempt to stop the host using the provided agents concurrently. - * - */ - protected VDSFenceReturnValue fenceConcurrently(List<FenceAgent> agents) { - try { - // create a 'task' for each agent, and insert tasks into an 'executor' which executes tasks - // concurrently. (meaning: attempt to stop the host using the agents concurrently). - ExecutorCompletionService<VDSFenceReturnValue> tasksExecutor = - ThreadPoolUtil.createCompletionService(createTasks(agents)); - - // run the tasks concurrently, return all results in a list. - List<VDSFenceReturnValue> results = collectResults(tasksExecutor, agents.size()); - - // If any agent has failed, return the result for that agent, since one failure is enough to fail 'Stop'. If - // no agent has failed, return any successful agent. - VDSFenceReturnValue result = getMostRelevantResult(results); - - if (result != null) { - if (result.getSucceeded()) { - int numOfSkipped = countSkippedOperations(results); - if (numOfSkipped == 0) { - // no agent reported that stop operation was skipped, continue with stop operation - handleSpecificCommandActions(); - } else { - // check if all agents reported that stop operation has skipped, - // if so, just return, skipping is handled in caller - if (numOfSkipped < agents.size()) { - // not all agents skipped stop operation, mark as error - result.setSucceeded(false); - } - } - } else { - logConcurrentAgentsFailure(FenceActionType.STOP, agents, result); - } - } - return result; - } catch (InterruptedException | ExecutionException e) { - log.error("Exception", e); - } - return null; - } - - /** - * Execute the supplied tasks concurrently. - * - * @return list of result objects for the executed tasks. - * @throws ExecutionException - */ - private List<VDSFenceReturnValue> collectResults(ExecutorCompletionService<VDSFenceReturnValue> tasksExecutor, - int threadNum) - throws InterruptedException, ExecutionException { - List<VDSFenceReturnValue> executedTasks = new LinkedList<>(); - for (int i = 0; i < threadNum; i++) { - executedTasks.add(tasksExecutor.take().get()); - } - return executedTasks; - } - - /** - * Invoked when stopping a host concurrently using 2 agents. If any agent has failed, return the result for that - * agent, since one failure is enough to fail 'Stop'. If no agent has failed, return any successful agent. - */ - private VDSFenceReturnValue getMostRelevantResult(List<VDSFenceReturnValue> results) - throws InterruptedException, - ExecutionException { - for (VDSFenceReturnValue result : results) { - if (!result.getSucceeded()) { - return result; - } - } - return results.get(0); - } - - @Override - protected String getRequiredStatus() { - return VDSM_STATUS_OFF; } @Override @@ -234,6 +131,19 @@ protected void setup() { // Set status immediately to prevent a race (BZ 636950/656224) setStatus(); + + stopSpm(); + } + + private void stopSpm() { + // get the host spm status again from the database in order to test it's current state. + VdsSpmStatus spmStatus = getDbFacade().getVdsDao().get(getVds().getId()).getSpmStatus(); + // try to stop SPM if action is Restart or Stop and the vds is SPM + if (spmStatus != VdsSpmStatus.None) { + getBackend().getResourceManager().RunVdsCommand( + VDSCommandType.SpmStop, + new SpmStopVDSCommandParameters(getVds().getId(), getVds().getStoragePoolId())); + } } @Override diff --git a/backend/manager/modules/bll/src/test/java/org/ovirt/engine/core/bll/StartVdsCommandTest.java b/backend/manager/modules/bll/src/test/java/org/ovirt/engine/core/bll/StartVdsCommandTest.java index 7ecaccd..bb7b65f 100644 --- a/backend/manager/modules/bll/src/test/java/org/ovirt/engine/core/bll/StartVdsCommandTest.java +++ b/backend/manager/modules/bll/src/test/java/org/ovirt/engine/core/bll/StartVdsCommandTest.java @@ -9,7 +9,6 @@ import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.stub; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -18,28 +17,29 @@ import java.util.List; import org.junit.Before; -import org.junit.ClassRule; import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.Mock; import org.mockito.runners.MockitoJUnitRunner; import org.ovirt.engine.core.bll.interfaces.BackendInternal; +import org.ovirt.engine.core.bll.pm.HostFenceActionExecutor; import org.ovirt.engine.core.common.AuditLogType; import org.ovirt.engine.core.common.action.FenceVdsActionParameters; import org.ovirt.engine.core.common.businessentities.AuditLog; -import org.ovirt.engine.core.common.businessentities.pm.FenceActionType; import org.ovirt.engine.core.common.businessentities.FenceAgent; -import org.ovirt.engine.core.common.businessentities.FenceStatusReturnValue; +import org.ovirt.engine.core.common.businessentities.FencingPolicy; import org.ovirt.engine.core.common.businessentities.VDS; import org.ovirt.engine.core.common.businessentities.VDSGroup; import org.ovirt.engine.core.common.businessentities.VDSStatus; import org.ovirt.engine.core.common.businessentities.VM; -import org.ovirt.engine.core.common.config.ConfigValues; +import org.ovirt.engine.core.common.businessentities.pm.FenceActionType; +import org.ovirt.engine.core.common.businessentities.pm.FenceOperationResult; +import org.ovirt.engine.core.common.businessentities.pm.FenceOperationResult.Status; +import org.ovirt.engine.core.common.businessentities.pm.HostPowerStatus; import org.ovirt.engine.core.common.errors.VdcBLLException; import org.ovirt.engine.core.common.interfaces.VDSBrokerFrontend; import org.ovirt.engine.core.common.vdscommands.SetVdsStatusVDSCommandParameters; import org.ovirt.engine.core.common.vdscommands.VDSCommandType; -import org.ovirt.engine.core.common.vdscommands.VDSFenceReturnValue; import org.ovirt.engine.core.compat.Guid; import org.ovirt.engine.core.dal.dbbroker.DbFacade; import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogDirector; @@ -48,7 +48,6 @@ import org.ovirt.engine.core.dao.VdsDAO; import org.ovirt.engine.core.dao.VdsGroupDAO; import org.ovirt.engine.core.dao.VmDAO; -import org.ovirt.engine.core.utils.MockConfigRule; @RunWith(MockitoJUnitRunner.class) public class StartVdsCommandTest extends DbDependentTestBase { @@ -57,12 +56,8 @@ private static final Guid FENCECD_HOST_ID = new Guid("11111111-1111-1111-1111-111111111111"); private static final Guid FENCECD_HOST_CLUSTER_ID = new Guid("22222222-2222-2222-2222-222222222222"); - @ClassRule - public static MockConfigRule configRule = - new MockConfigRule(MockConfigRule.mockConfig(ConfigValues.FenceStartStatusRetries, 2), - MockConfigRule.mockConfig(ConfigValues.FenceStartStatusDelayBetweenRetriesInSec, 1)); @Mock - private FenceExecutor executor; + private HostFenceActionExecutor executor; private FenceAgent agent1; private FenceAgent agent2; private DbFacade dbFacade; @@ -139,10 +134,9 @@ FenceVdsActionParameters params = new FenceVdsActionParameters(); params.setVdsId(FENCECD_HOST_ID); command = new StartVdsCommand<>(params); - command.setFenceExecutor(executor); - command = spy(command); - stub(command.getSleepBeforeFirstAttempt()).toReturn(0); command.setAuditLogDirector(auditLogDirector); + command = spy(command); + doReturn(executor).when(command).createHostFenceActionExecutor(any(VDS.class), any(FencingPolicy.class)); command.setVdsGroupId(FENCECD_HOST_CLUSTER_ID); } @@ -159,33 +153,14 @@ return vds; } - private void mockCheckStatus(String status) { - VDSFenceReturnValue retValue = new VDSFenceReturnValue(); - retValue.setSucceeded(true); - FenceStatusReturnValue stat = new FenceStatusReturnValue(status, ""); - retValue.setReturnValue(stat); - when(executor.checkHostStatus()).thenReturn(retValue); - } - - private VDSFenceReturnValue createStatus(String value) { - VDSFenceReturnValue retValue = new VDSFenceReturnValue(); - retValue.setSucceeded(true); - FenceStatusReturnValue stat = new FenceStatusReturnValue(value, ""); - retValue.setReturnValue(stat); - return retValue; - } - - private void mockVdsSingleAgent() { - VDS vds = createHost(); - vds.getFenceAgents().remove(1); // remove the second agent - when(vdsDao.get(FENCECD_HOST_ID)).thenReturn(vds); - } - - private void mockExecutor(FenceAgent agent, boolean success) { - VDSFenceReturnValue returnValue = new VDSFenceReturnValue(); - returnValue.setSucceeded(success); - returnValue.setFenceAgentUsed(agent); - when(executor.fence(eq(FenceActionType.START), eq(agent))).thenReturn(returnValue); + private void mockExecutor(boolean success) { + FenceOperationResult result; + if (success) { + result = new FenceOperationResult(Status.SUCCESS, HostPowerStatus.ON); + } else { + result = new FenceOperationResult(Status.ERROR, HostPowerStatus.UNKNOWN); + } + doReturn(result).when(executor).fence(any(FenceActionType.class)); } /** @@ -195,6 +170,7 @@ */ @Test() public void onFailureResetInitialStatus() { + mockExecutor(false); try { command.executeCommand(); } catch (VdcBLLException exception) { @@ -206,44 +182,19 @@ } /** - * This test verifies that when the fence operation is successful, it is not attempted again with the next agent - */ - @Test - public void onSuccessDontTryNextAgent() { - mockExecutor(agent1, true); - mockCheckStatus("on"); - // this won't happen, because second agent isn't reached. - when(executor.fence(eq(FenceActionType.START), eq(agent2))).thenThrow(new IllegalStateException()); - command.executeCommand(); - } - - /** - * This test verifies that when the fence operation fails using the first agent, the second agent is used. - */ - @Test - public void onFailureTryNextAgent() { - mockExecutor(agent1, false); - mockExecutor(agent2, true); - mockCheckStatus("on"); - command.executeCommand(); - assertTrue(command.getSucceeded()); - } - - /** * This test verifies that when the fence operation is successful, the return value contains all that is should: * succeeded=true, the agents used, and the object returned. */ @Test public void onSuccessReturnValueOk() { - mockExecutor(agent1, true); - mockCheckStatus("on"); + mockExecutor(true); command.executeCommand(); assertTrue(command.getSucceeded()); Object commandReturnValue = command.getActionReturnValue(); assertNotNull(commandReturnValue); - assertTrue(commandReturnValue instanceof VDSFenceReturnValue); - VDSFenceReturnValue commandReturnValueCasted = (VDSFenceReturnValue)commandReturnValue; - assertEquals(commandReturnValueCasted.getFenceAgentUsed(), agent1); + assertTrue(commandReturnValue instanceof FenceOperationResult); + FenceOperationResult commandReturnValueCasted = (FenceOperationResult) commandReturnValue; + assertEquals(Status.SUCCESS, commandReturnValueCasted.getStatus()); } /** @@ -254,8 +205,7 @@ */ @Test public void onSuccessAudit() { - mockExecutor(agent1, true); - mockCheckStatus("on"); + mockExecutor(true); command.executeCommand(); verify(auditLogDirector, times(2)).log(any(AuditLogableBase.class), any(AuditLogType.class)); @@ -268,30 +218,14 @@ * saved to the database. In case of success, there are 2 audit messages. In case of failure the second audit * message isn't reached, but because of the auditing of the alert, there *are still* 2 audit messages. */ - @Test() - public void onFaliureAlertShown() { - mockVdsSingleAgent(); - mockExecutor(agent1, false); - mockCheckStatus("on"); + @Test + public void onFailureAlertShown() { + mockExecutor(false); try { command.executeCommand(); - } catch (VdcBLLException exception) { - verify(auditLogDirector, times(2)).log(any(AuditLogableBase.class), any(AuditLogType.class)); - return; + fail(); + } catch (VdcBLLException ex) { + verify(auditLogDirector, times(3)).log(any(AuditLogableBase.class), any(AuditLogType.class)); } - fail(); - } - - /** - * After fence-operation is performed, the command waits for the desired status to be reached. This test verifies - * that wait-for-status is retried according to the number of retries specified. - */ - @Test - public void onFailureRetryWaitForStatus() { - mockVdsSingleAgent(); - mockExecutor(agent1, true); - when(executor.checkHostStatus()).thenReturn(createStatus("off")).thenReturn(createStatus("on")); - command.executeCommand(); - assertTrue(command.getSucceeded()); } } diff --git a/backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties b/backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties index 1442cd2..f6ec37f 100644 --- a/backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties +++ b/backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties @@ -591,9 +591,9 @@ FENCE_DISABLED_IN_CLUSTER_POLICY=Fencing is disabled in Fencing Policy of the Cluster ${VdsGroupName}, so HA VMs running on a non-responsive host will not be restarted elsewhere. VDS_TIME_DRIFT_ALERT=Host ${VdsName} has time-drift of ${Actual} seconds while maximum configured value is ${Max} seconds. PROXY_HOST_SELECTION=Host ${Proxy} from ${Origin} was chosen as a proxy to execute fencing on Host ${VdsName}. -FENCE_OPERATION_STARTED=${Action} of Host ${VdsName} initiated. -FENCE_OPERATION_SUCCEEDED=${Action} of Host ${VdsName} succeeded. -FENCE_OPERATION_FAILED=${Action} of Host ${VdsName} failed. +FENCE_OPERATION_STARTED=Power management ${Action} of Host ${VdsName} initiated. +FENCE_OPERATION_SUCCEEDED=Power management ${Action} of Host ${VdsName} succeeded. +FENCE_OPERATION_FAILED=Power management ${Action} of Host ${VdsName} failed. FENCE_USING_AGENT_AND_PROXY_HOST=${Action} Host ${Host} using Proxy-Host ${ProxyHost} and fence-agent ${Agent}. FENCE_OPERATION_FAILED_USING_PROXY=${Action} Host ${Host} using Proxy-Host ${ProxyHost} and fence-agent ${Agent} has failed. RECONSTRUCT_MASTER_FAILED_NO_MASTER=No valid Data Storage Domains are available in Data Center ${StoragePoolName} (please check your storage infrastructure). -- To view, visit https://gerrit.ovirt.org/38966 To unsubscribe, visit https://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I2cda348997927decc1b61bf4ce06c4489859c1d3 Gerrit-PatchSet: 1 Gerrit-Project: ovirt-engine Gerrit-Branch: master Gerrit-Owner: Martin Peřina <[email protected]> _______________________________________________ Engine-patches mailing list [email protected] http://lists.ovirt.org/mailman/listinfo/engine-patches
