OK, attached is the full cibadmin -Q output.
Thanks,
Brad
On 09/08/2011 02:07 PM, Florian Haas wrote:
On 09/08/11 20:59, Brad Johnson wrote:
We have a 2 node cluster with a single resource. The resource must run
on only a single node at one time. Using the pacemaker:ocf:ping RA we
are pinging a WAN gateway and a LAN host on each node so the resource
runs on the node with the greatest connectivity. The problem is when a
ping host goes down (so both nodes lose connectivity to it), the
resource moves to the other node due to timing differences in how fast
they update the score attribute. The dampening value has no effect,
since it delays both nodes by the same amount. These unnecessary
fail-overs aren't acceptable since they are disruptive to the network
for no reason.
Is there a way to dampen the ping update by different amounts on the
active and passive nodes? Or some other way to configure the cluster to
try to keep the resource where it is during these tie score scenarios?
We are running Pacemaker 1.0.10 with Heartbeat 3.0.5. Here are our
current resource and constraints sections:
Just those two aren't enormously helpful; as the current state of your
node attributes isn't evident from them.
If you could just dump your full CIB and upload them to
pastebin/pastie/fpaste etc., it would be easier to make recommendations.
Cheers,
Florian
_______________________________________________
Pacemaker mailing list: Pacemaker@oss.clusterlabs.org
http://oss.clusterlabs.org/mailman/listinfo/pacemaker
Project Home: http://www.clusterlabs.org
Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
Bugs: http://developerbugs.linux-foundation.org/enter_bug.cgi?product=Pacemaker
<cib validate-with="pacemaker-1.0" crm_feature_set="3.0.1" have-quorum="1"
dc-uuid="121292d3-60cc-4e13-9ec6-5b6ea889f20e" admin_epoch="0" epoch="9"
num_updates="1">
<configuration>
<crm_config>
<cluster_property_set id="cluster_props">
<nvpair name="shutdown-escalation" id="nvpair.shutdown" value="60s"/>
<nvpair name="no-quorum-policy" id="nvpair.quorum" value="ignore"/>
<nvpair name="stonith-enabled" id="nvpair.stonith" value="false"/>
<nvpair name="dc-deadtime" id="nvpair.dcdead" value="20s"/>
</cluster_property_set>
<cluster_property_set id="cib-bootstrap-options">
<nvpair id="cib-bootstrap-options-dc-version" name="dc-version"
value="1.0.9-da7075976b5ff0bee71074385f8fd02f296ec8a3"/>
<nvpair id="cib-bootstrap-options-cluster-infrastructure"
name="cluster-infrastructure" value="Heartbeat"/>
</cluster_property_set>
</crm_config>
<nodes>
<node id="121292d3-60cc-4e13-9ec6-5b6ea889f20e" uname="clarilink00123456"
type="normal"/>
<node id="e6fe941b-8bc6-4812-9d3e-f210ab1b06de"
uname="shieldlink00123457" type="normal"/>
</nodes>
<resources>
<group id="group_1">
<meta_attributes id="group-group_1.meta"/>
<primitive class="heartbeat" id="pld_1" type="pld">
<meta_attributes id="primitive-pld_1.meta"/>
<operations>
<op id="pld_1_mon" interval="20s" name="monitor" timeout="60s"/>
</operations>
<meta_attributes id="pld_1-meta_attributes">
<nvpair id="pld_1-resource-stickiness" name="resource-stickiness"
value="1000"/>
</meta_attributes>
</primitive>
</group>
<clone id="Connected">
<primitive id="pld_pingd" provider="pacemaker" class="ocf" type="ping">
<instance_attributes id="ping-attrs">
<nvpair id="pingd-dampen" name="dampen" value="12s"/>
<nvpair id="pingd-multiplier" name="multiplier" value="2000"/>
<nvpair id="pingd-hosts" name="host_list" value="173.160.121.110
192.168.10.98"/>
<nvpair id="pingd-attempts" name="attempts" value="3"/>
<nvpair id="pingd-timeout" name="timeout" value="4"/>
</instance_attributes>
<operations>
<op id="ping-monitor-interval" interval="13s" name="monitor"/>
</operations>
</primitive>
</clone>
</resources>
<constraints>
<rsc_location id="pingd-constraint" rsc="group_1">
<rule id="pingd-constraint-rule" score-attribute="pingd">
<expression id="expression.id2244454" attribute="pingd"
operation="defined"/>
</rule>
</rsc_location>
</constraints>
</configuration>
<status>
<node_state id="121292d3-60cc-4e13-9ec6-5b6ea889f20e"
uname="clarilink00123456" ha="active" in_ccm="true" crmd="online" join="member"
expected="member" crm-debug-origin="do_update_resource" shutdown="0">
<transient_attributes id="121292d3-60cc-4e13-9ec6-5b6ea889f20e">
<instance_attributes id="status-121292d3-60cc-4e13-9ec6-5b6ea889f20e">
<nvpair
id="status-121292d3-60cc-4e13-9ec6-5b6ea889f20e-probe_complete"
name="probe_complete" value="true"/>
<nvpair id="status-121292d3-60cc-4e13-9ec6-5b6ea889f20e-pingd"
name="pingd" value="4000"/>
<nvpair
id="status-121292d3-60cc-4e13-9ec6-5b6ea889f20e-192.168.10.98"
name="192.168.10.98" value="ping"/>
</instance_attributes>
<instance_attributes id="status">
<nvpair id="status-foobar" name="foobar" value="up"/>
</instance_attributes>
</transient_attributes>
<lrm id="121292d3-60cc-4e13-9ec6-5b6ea889f20e">
<lrm_resources>
<lrm_resource id="pld_pingd:0" type="ping" class="ocf"
provider="pacemaker">
<lrm_rsc_op id="pld_pingd:0_monitor_0" operation="monitor"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.1"
transition-key="5:0:7:edf12b5f-32b5-47ec-90d7-66b09e6d456d"
transition-magic="0:7;5:0:7:edf12b5f-32b5-47ec-90d7-66b09e6d456d" call-id="3"
rc-code="7" op-status="0" interval="0" last-run="1315494733"
last-rc-change="1315494733" exec-time="520" queue-time="0"
op-digest="08e7c2c1509e3cb253552cdd1f4e8805"/>
<lrm_rsc_op id="pld_pingd:0_stop_0" operation="stop"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.1"
transition-key="13:4:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d"
transition-magic="0:0;13:4:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d" call-id="8"
rc-code="0" op-status="0" interval="0" last-run="1315495022"
last-rc-change="1315495022" exec-time="50" queue-time="0"
op-digest="08e7c2c1509e3cb253552cdd1f4e8805"/>
<lrm_rsc_op id="pld_pingd:0_start_0" operation="start"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.1"
transition-key="4:4:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d"
transition-magic="0:0;4:4:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d" call-id="9"
rc-code="0" op-status="0" interval="0" last-run="1315495022"
last-rc-change="1315495022" exec-time="4080" queue-time="0"
op-digest="2108486d8a382d013de2f5836e16ace1"/>
<lrm_rsc_op id="pld_pingd:0_monitor_13000" operation="monitor"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.1"
transition-key="1:4:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d"
transition-magic="0:0;1:4:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d" call-id="10"
rc-code="0" op-status="0" interval="13000" last-run="1315497078"
last-rc-change="1315495027" exec-time="4070" queue-time="0"
op-digest="e359a31a378e2f376fa463936f69ff7c"/>
</lrm_resource>
<lrm_resource id="pld_1" type="pld" class="heartbeat">
<lrm_rsc_op id="pld_1_monitor_0" operation="monitor"
crm-debug-origin="build_active_RAs" crm_feature_set="3.0.1"
transition-key="4:0:7:edf12b5f-32b5-47ec-90d7-66b09e6d456d"
transition-magic="0:7;4:0:7:edf12b5f-32b5-47ec-90d7-66b09e6d456d" call-id="2"
rc-code="7" op-status="0" interval="0" last-run="1315494733"
last-rc-change="1315494733" exec-time="2820" queue-time="0"
op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
<lrm_rsc_op id="pld_1_start_0" operation="start"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="9:25:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d"
transition-magic="0:0;9:25:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d" call-id="12"
rc-code="0" op-status="0" interval="0" last-run="1315506014"
last-rc-change="1315506014" exec-time="530" queue-time="0"
op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
<lrm_rsc_op id="pld_1_monitor_20000" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="10:25:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d"
transition-magic="0:0;10:25:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d"
call-id="13" rc-code="0" op-status="0" interval="20000" last-run="1315506015"
last-rc-change="1315506015" exec-time="450" queue-time="0"
op-digest="873ed4f07792aa8ff18f3254244675ea"/>
<lrm_rsc_op id="pld_1_stop_0" operation="stop"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="8:14:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d"
transition-magic="0:0;8:14:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d" call-id="11"
rc-code="0" op-status="0" interval="0" last-run="1315498807"
last-rc-change="1315498807" exec-time="450" queue-time="0"
op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
</lrm_resource>
</lrm_resources>
</lrm>
</node_state>
<node_state id="e6fe941b-8bc6-4812-9d3e-f210ab1b06de"
uname="shieldlink00123457" ha="active" join="member"
crm-debug-origin="do_update_resource" in_ccm="true" crmd="online" shutdown="0"
expected="member">
<lrm id="e6fe941b-8bc6-4812-9d3e-f210ab1b06de">
<lrm_resources>
<lrm_resource id="pld_pingd:1" type="ping" class="ocf"
provider="pacemaker">
<lrm_rsc_op id="pld_pingd:1_monitor_0" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="8:8:7:edf12b5f-32b5-47ec-90d7-66b09e6d456d"
transition-magic="0:7;8:8:7:edf12b5f-32b5-47ec-90d7-66b09e6d456d" call-id="3"
rc-code="7" op-status="0" interval="0" last-run="1315497138"
last-rc-change="1315497138" exec-time="90" queue-time="0"
op-digest="2108486d8a382d013de2f5836e16ace1"/>
<lrm_rsc_op id="pld_pingd:1_start_0" operation="start"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="17:8:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d"
transition-magic="0:0;17:8:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d" call-id="4"
rc-code="0" op-status="0" interval="0" last-run="1315497140"
last-rc-change="1315497140" exec-time="240" queue-time="0"
op-digest="2108486d8a382d013de2f5836e16ace1"/>
<lrm_rsc_op id="pld_pingd:1_monitor_13000" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="17:9:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d"
transition-magic="0:0;17:9:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d" call-id="5"
rc-code="0" op-status="0" interval="13000" last-run="1315497141"
last-rc-change="1315497141" exec-time="290" queue-time="0"
op-digest="e359a31a378e2f376fa463936f69ff7c"/>
</lrm_resource>
<lrm_resource id="pld_1" type="pld" class="heartbeat">
<lrm_rsc_op id="pld_1_monitor_0" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="7:8:7:edf12b5f-32b5-47ec-90d7-66b09e6d456d"
transition-magic="0:7;7:8:7:edf12b5f-32b5-47ec-90d7-66b09e6d456d" call-id="2"
rc-code="7" op-status="0" interval="0" last-run="1315497138"
last-rc-change="1315497138" exec-time="370" queue-time="0"
op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
<lrm_rsc_op id="pld_1_start_0" operation="start"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="9:14:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d"
transition-magic="0:0;9:14:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d" call-id="6"
rc-code="0" op-status="0" interval="0" last-run="1315498849"
last-rc-change="1315498849" exec-time="250" queue-time="0"
op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
<lrm_rsc_op id="pld_1_monitor_20000" operation="monitor"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="9:15:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d"
transition-magic="0:0;9:15:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d" call-id="7"
rc-code="0" op-status="0" interval="20000" last-run="1315498851"
last-rc-change="1315498851" exec-time="500" queue-time="0"
op-digest="873ed4f07792aa8ff18f3254244675ea"/>
<lrm_rsc_op id="pld_1_stop_0" operation="stop"
crm-debug-origin="do_update_resource" crm_feature_set="3.0.1"
transition-key="8:25:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d"
transition-magic="0:0;8:25:0:edf12b5f-32b5-47ec-90d7-66b09e6d456d" call-id="8"
rc-code="0" op-status="0" interval="0" last-run="1315506054"
last-rc-change="1315506054" exec-time="500" queue-time="0"
op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
</lrm_resource>
</lrm_resources>
</lrm>
<transient_attributes id="e6fe941b-8bc6-4812-9d3e-f210ab1b06de">
<instance_attributes id="status-e6fe941b-8bc6-4812-9d3e-f210ab1b06de">
<nvpair
id="status-e6fe941b-8bc6-4812-9d3e-f210ab1b06de-probe_complete"
name="probe_complete" value="true"/>
<nvpair id="status-e6fe941b-8bc6-4812-9d3e-f210ab1b06de-pingd"
name="pingd" value="4000"/>
</instance_attributes>
</transient_attributes>
</node_state>
</status>
</cib>
_______________________________________________
Pacemaker mailing list: Pacemaker@oss.clusterlabs.org
http://oss.clusterlabs.org/mailman/listinfo/pacemaker
Project Home: http://www.clusterlabs.org
Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
Bugs: http://developerbugs.linux-foundation.org/enter_bug.cgi?product=Pacemaker