We want to give the error state priority over EWRONG_NODE as a
service may be in the error state because of EWRONG_NODE

Change the error message a bit and add a possibility to not log
the error message which will be used in a future patch to spam
the log less.

Signed-off-by: Thomas Lamprecht <t.lampre...@proxmox.com>
---
 src/PVE/HA/LRM.pm                          | 20 +++++++++++---------
 src/test/test-resource-failure5/log.expect |  6 +++---
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/PVE/HA/LRM.pm b/src/PVE/HA/LRM.pm
index 060ae9d..9e8d046 100644
--- a/src/PVE/HA/LRM.pm
+++ b/src/PVE/HA/LRM.pm
@@ -643,6 +643,17 @@ sub exec_resource_agent {
        return EUNKNOWN_SERVICE;
     }
 
+    # process error state early
+    if ($cmd eq 'error') {
+       # allow to skip output, do not spam logs
+       return SUCCESS if defined($params[1]) && $params[1];
+
+       $haenv->log('err', "service $sid is in an error state and needs manual" 
.
+                   "intervention. Look up 'ERROR RECOVERY' in the 
documentation.");
+
+       return SUCCESS; # error always succeeds
+    }
+
     if ($service_config->{node} ne $nodename) {
        $haenv->log('err', "service '$sid' not on this node");
        return EWRONG_NODE;
@@ -713,15 +724,6 @@ sub exec_resource_agent {
 
        return SUCCESS;
 
-    } elsif ($cmd eq 'error') {
-
-       if ($running) {
-           $haenv->log("err", "service $sid is in an error state while 
running");
-       } else {
-           $haenv->log("warning", "service $sid is not running and in an error 
state");
-       }
-       return SUCCESS; # error always succeeds
-
     }
 
     $haenv->log("err", "implement me (cmd '$cmd')");
diff --git a/src/test/test-resource-failure5/log.expect 
b/src/test/test-resource-failure5/log.expect
index b6e7807..db0f90c 100644
--- a/src/test/test-resource-failure5/log.expect
+++ b/src/test/test-resource-failure5/log.expect
@@ -30,9 +30,9 @@ warn    143    node2/lrm: unable to start service fa:130
 err     143    node2/lrm: unable to start service fa:130 on local node after 1 
retries
 err     160    node1/crm: recovery policy for service fa:130 failed, entering 
error state!
 info    160    node1/crm: service 'fa:130': state changed from 'started' to 
'error'
-warn    163    node2/lrm: service fa:130 is not running and in an error state
-warn    183    node2/lrm: service fa:130 is not running and in an error state
-warn    203    node2/lrm: service fa:130 is not running and in an error state
+err     163    node2/lrm: service fa:130 is in an error state and needs 
manualintervention. Look up 'ERROR RECOVERY' in the documentation.
+err     183    node2/lrm: service fa:130 is in an error state and needs 
manualintervention. Look up 'ERROR RECOVERY' in the documentation.
+err     203    node2/lrm: service fa:130 is in an error state and needs 
manualintervention. Look up 'ERROR RECOVERY' in the documentation.
 info    220      cmdlist: execute service fa:130 disabled
 info    220    node1/crm: service 'fa:130': state changed from 'error' to 
'stopped'
 info    820     hardware: exit simulation - done
-- 
2.1.4


_______________________________________________
pve-devel mailing list
pve-devel@pve.proxmox.com
http://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to