Hi all!
I know there was already similar thread in the past on this ML, but I
found nothing to solve my problem.
Just short problem description:
Simple 2 nodes-cluster with 12 LSB resources in 2 groups (and 2 extra
IPMI's) -see configuration attached. All is working well until the start
of the 5-th resource (in the second group) fails - with rc=1.
lrmd/crmd shows immediately after failed start "unknown error" and attrd
sets the fail-count for this resource (ubis_alarmmelder_1) to
INFINITY. What are the (typical) reasons or possibilities for such
error? Where should I searching?
Versions:
pacemaker : 1.1.5 (Build: c86cb93c5a57c1f507a21be69d24fd28dee85397)
cluster-glue : 1.0.7 (Build: 6fa74ce2ed7ef6df41be2b634cd4aa89c318a8dc)
resource-agents: 1.0.4 (Build: 7a11934b142d1daf42a04fbaa0391a3ac47cee4c)
heartbeat: 3.0.5
TIA!
Nikita Michalko
node $id="3037bfe7-37a5-4950-b3fc-39629012cc39" castor
node $id="ed784724-a9dd-418e-b3b7-0621c4c99902" pollux
primitive IPaddr_10_112_68_106 ocf:heartbeat:IPaddr \
params ip="10.112.68.106" cidr_netmask="26" broadcast="10.112.68.0" \
meta migration-threshold="3" \
op monitor interval="60s" timeout="60s"
primitive IPaddr_10_112_68_107 ocf:heartbeat:IPaddr \
params ip="10.112.68.107" cidr_netmask="26" broadcast="10.112.68.0" \
meta migration-threshold="3" \
op monitor interval="60s" timeout="60s"
primitive IPaddr_10_112_69_106 ocf:heartbeat:IPaddr \
params ip="10.112.69.106" cidr_netmask="26" broadcast="10.112.69.0" \
meta migration-threshold="3" \
op monitor interval="60s" timeout="60s"
primitive IPaddr_10_112_69_107 ocf:heartbeat:IPaddr \
params ip="10.112.69.107" cidr_netmask="26" broadcast="10.112.69.0" \
meta migration-threshold="3" \
op monitor interval="60s" timeout="60s"
primitive IPaddr_192_168_68_106 ocf:heartbeat:IPaddr \
params ip="192.168.68.106" cidr_netmask="24" broadcast="192.168.68.0" \
meta migration-threshold="3" \
op monitor interval="60s" timeout="60s"
primitive IPaddr_192_168_68_107 ocf:heartbeat:IPaddr \
params ip="192.168.68.107" cidr_netmask="24" broadcast="192.168.68.0" \
meta migration-threshold="3" \
op monitor interval="60s" timeout="60s"
primitive ipmi1_ston stonith:external/ipmi \
params hostname="pollux" ipaddr="10.112.68.112" userid="ADMIN"
passwd="pentagon" \
op start interval="0" timeout="30s" \
op monitor interval="120s" timeout="120s" \
meta target-role="Started"
primitive ipmi2_ston stonith:external/ipmi \
params hostname="castor" ipaddr="10.112.68.111" userid="ADMIN"
passwd="pentagon" \
op start interval="0" timeout="60s" \
op monitor interval="120s" timeout="120s" \
meta target-role="Started"
primitive ubis_alarmmelder_1 lsb:ubis_alarmmelder \
op monitor interval="120s" timeout="120s"
primitive ubis_applmain_1 lsb:ubis_applmain \
meta migration-threshold="3" \
op monitor interval="120s" timeout="110s"
primitive ubis_fax_1 lsb:ubis_fax \
meta migration-threshold="3" \
op monitor interval="120s" timeout="120s"
primitive ubis_mserv_1 lsb:ubis_mserv \
meta migration-threshold="3" \
op monitor interval="120s" timeout="120s"
primitive ubis_nserv_1 lsb:ubis_nserv \
meta migration-threshold="3" \
op monitor interval="120s" timeout="120s"
primitive ubis_up_mkctab_1 lsb:ubis_up_mkctab \
meta migration-threshold="3" \
op monitor interval="120s" timeout="120s"
group group_1 IPaddr_10_112_68_106 IPaddr_192_168_68_106 IPaddr_10_112_69_106
ubis_applmain_1 \
meta migration-threshold="3" \
meta is-managed="true" target-role="Started"
group group_11 IPaddr_10_112_68_107 IPaddr_192_168_68_107 IPaddr_10_112_69_107
ubis_up_mkctab_1 ubis_alarmmelder_1 ubis_nserv_1 ubis_mserv_1 ubis_fax_1 \
meta migration-threshold="3" target-role="Started" \
meta is-managed="true" target-role="Started"
location node_pref_gr1 group_1 2: 3037bfe7-37a5-4950-b3fc-39629012cc39
location node_pref_gr11 group_11 2: ed784724-a9dd-418e-b3b7-0621c4c99902
location node_pref_ipmi1 ipmi1_ston inf: 3037bfe7-37a5-4950-b3fc-39629012cc39
location node_pref_ipmi2 ipmi2_ston inf: ed784724-a9dd-418e-b3b7-0621c4c99902
property $id="cib-bootstrap-options" \
dc-version="1.1.5-c86cb93c5a57c1f507a21be69d24fd28dee85397" \
cluster-infrastructure="Heartbeat" \
expected-quorum-votes="2" \
symmetric-cluster="true" \
no-quorum-policy="ignore" \
stonith-enabled="true" \
stonith-action="reboot" \
startup-fencing="true" \
stop-orphan-resources="true" \
stop-orphan-actions="true" \
remove-after-stop="false" \
default-action-timeout="110s" \
default-resource-stickiness="2" \
is-managed-default="true" \
cluster-delay="30s" \
last-lrm-refresh="1310456071"
rsc_defaults $id="rsc_defaults-options" \
target-role="Started"
op_defaults $id="op_defaults-options" \
timeout="110s"
_______________________________________________
Pacemaker mailing list: Pacemaker@oss.clusterlabs.org
http://oss.clusterlabs.org/mailman/listinfo/pacemaker
Project Home: http://www.clusterlabs.org
Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
Bugs: http://developerbugs.linux-foundation.org/enter_bug.cgi?product=Pacemaker