On Thu, Jan 31, 2013 at 3:04 AM, James Guthrie <j...@open.ch> wrote: > Hi all, > > I'm having a bit of difficulty with the way that my cluster is behaving on > failure of a resource. > > The objective of my clustering setup is to provide a virtual IP, to which a > number of other services are bound. The services are bound to the VIP with > constraints to force the service to be running on the same host as the VIP. > > I have been testing the way that the cluster behaves if it is unable to start > a resource. What I observe is the following: the cluster tries to start the > resource on node 1,
Can you define "the resource"? You have a few and it matters :) > fails 10 times, reaches the migration threshold, moves the resource to the > other host, fails 10 times, reaches the migration threshold. Now it has > reached the migration threshold on all possible hosts. I was then expecting > that it would stop the resource on all nodes and run all of the other > resources as though nothing were wrong. What I see though is that the cluster > demotes all master/slave resources, despite the fact that only one of them is > failing. > > I wasn't able to find a parameter which would dictate what the behaviour > should be if the migration failed on all available hosts. I must therefore > believe that the constraints configuration I'm using isn't doing quite what I > hope it's doing. > > Below is the configuration xml I am using on the hosts (no crmsh config, > sorry). > > I am using Corosync 2.3.0 and Pacemaker 1.1.8, built from source. > > Regards, > James > > <!-- Configuration file for pacemaker --> > <resources> > <!--resource for conntrackd--> > <master id="master-conntrackd"> > <meta_attributes id="master-conntrackd-meta_attributes"> > <nvpair id="master-conntrackd-meta_attributes-notify" name="notify" > value="true"/> > <nvpair id="master-conntrackd-meta_attributes-interleave" > name="interleave" value="true"/> > <nvpair id="master-conntrackd-meta_attributes-target-role" > name="target-role" value="Master"/> > <nvpair id="master-conndtrakd-meta_attributes-failure-timeout" > name="failure-timeout" value="600"/> > <nvpair id="master-conntrackd-meta_attributes-migration-threshold" > name="migration-threshold" value="10"/> > </meta_attributes> > <primitive id="conntrackd" class="ocf" provider="OSAG" type="conntrackd"> > <operations> > <op id="conntrackd-slave-check" name="monitor" interval="60" > role="Slave" /> > <op id="conntrackd-master-check" name="monitor" interval="61" > role="Master" /> > </operations> > </primitive> > </master> > <master id="master-condition"> > <meta_attributes id="master-condition-meta_attributes"> > <nvpair id="master-condition-meta_attributes-notify" name="notify" > value="false"/> > <nvpair id="master-condition-meta_attributes-interleave" > name="interleave" value="true"/> > <nvpair id="master-condition-meta_attributes-target-role" > name="target-role" value="Master"/> > <nvpair id="master-condition-meta_attributes-failure-timeout" > name="failure-timeout" value="600"/> > <nvpair id="master-condition-meta_attributes-migration-threshold" > name="migration-threshold" value="10"/> > </meta_attributes> > <primitive id="condition" class="ocf" provider="OSAG" type="condition"> > <instance_attributes id="condition-attrs"> > </instance_attributes> > <operations> > <op id="condition-slave-check" name="monitor" interval="10" > role="Slave" /> > <op id="condition-master-check" name="monitor" interval="11" > role="Master" /> > </operations> > </primitive> > </master> > <master id="master-ospfd.init"> > <meta_attributes id="master-ospfd-meta_attributes"> > <nvpair id="master-ospfd-meta_attributes-notify" name="notify" > value="false"/> > <nvpair id="master-ospfd-meta_attributes-interleave" name="interleave" > value="true"/> > <nvpair id="master-ospfd-meta_attributes-target-role" > name="target-role" value="Master"/> > <nvpair id="master-ospfd-meta_attributes-failure-timeout" > name="failure-timeout" value="600"/> > <nvpair id="master-ospfd-meta_attributes-migration-threshold" > name="migration-threshold" value="10"/> > </meta_attributes> > <primitive id="ospfd" class="ocf" provider="OSAG" type="osaginit"> > <instance_attributes id="ospfd-attrs"> > <nvpair id="ospfd-script" name="script" value="ospfd.init"/> > </instance_attributes> > <operations> > <op id="ospfd-slave-check" name="monitor" interval="10" role="Slave" > /> > <op id="ospfd-master-check" name="monitor" interval="11" > role="Master" /> > </operations> > </primitive> > </master> > <master id="master-ripd.init"> > <meta_attributes id="master-ripd-meta_attributes"> > <nvpair id="master-ripd-meta_attributes-notify" name="notify" > value="false"/> > <nvpair id="master-ripd-meta_attributes-interleave" name="interleave" > value="true"/> > <nvpair id="master-ripd-meta_attributes-target-role" name="target-role" > value="Master"/> > <nvpair id="master-ripd-meta_attributes-failure-timeout" > name="failure-timeout" value="600"/> > <nvpair id="master-ripd-meta_attributes-migration-threshold" > name="migration-threshold" value="10"/> > </meta_attributes> > <primitive id="ripd" class="ocf" provider="OSAG" type="osaginit"> > <instance_attributes id="ripd-attrs"> > <nvpair id="ripd-script" name="script" value="ripd.init"/> > </instance_attributes> > <operations> > <op id="ripd-slave-check" name="monitor" interval="10" role="Slave" /> > <op id="ripd-master-check" name="monitor" interval="11" role="Master" > /> > </operations> > </primitive> > </master> > <master id="master-squid.init"> > <meta_attributes id="master-squid-meta_attributes"> > <nvpair id="master-squid-meta_attributes-notify" name="notify" > value="false"/> > <nvpair id="master-squid-meta_attributes-interleave" name="interleave" > value="true"/> > <nvpair id="master-squid-meta_attributes-target-role" > name="target-role" value="Master"/> > <nvpair id="master-squid-meta_attributes-failure-timeout" > name="failure-timeout" value="600"/> > <nvpair id="master-squid-meta_attributes-migration-threshold" > name="migration-threshold" value="10"/> > </meta_attributes> > <primitive id="squid" class="ocf" provider="OSAG" type="osaginit"> > <instance_attributes id="squid-attrs"> > <nvpair id="squid-script" name="script" value="squid.init"/> > </instance_attributes> > <operations> > <op id="squid-slave-check" name="monitor" interval="10" role="Slave" > /> > <op id="squid-master-check" name="monitor" interval="11" > role="Master" /> > </operations> > </primitive> > </master> > > <!--resource for interface checks --> > <clone id="clone-IFcheck"> > <primitive id="IFcheck" class="ocf" provider="OSAG" type="ifmonitor"> > <instance_attributes id="resIFcheck-attrs"> > <nvpair id="IFcheck-interfaces" name="interfaces" value="eth0 eth1"/> > <nvpair id="IFcheck-multiplier" name="multiplier" value="200"/> > <nvpair id="IFcheck-dampen" name="dampen" value="6s" /> > </instance_attributes> > <operations> > <op id="IFcheck-monitor" interval="3s" name="monitor"/> > </operations> > </primitive> > </clone> > > <!--resource for ISP checks--> > <clone id="clone-ISPcheck"> > <primitive id="ISPcheck" class="ocf" provider="OSAG" type="ispcheck"> > <instance_attributes id="ISPcheck-attrs"> > <nvpair id="ISPcheck-ipsec" name="ipsec-check" value="1" /> > <nvpair id="ISPcheck-ping" name="ping-check" value="1" /> > <nvpair id="ISPcheck-multiplier" name="multiplier" value="200"/> > <nvpair id="ISPcheck-dampen" name="dampen" value="60s"/> > </instance_attributes> > <operations> > <op id="ISPcheck-monitor" interval="30s" name="monitor"/> > </operations> > </primitive> > </clone> > > <!--Virtual IP group--> > <group id="VIP-group"> > <primitive id="eth1-0-192.168.1.10" class="ocf" provider="heartbeat" > type="IPaddr2"> > <meta_attributes id="meta-VIP-1"> > <nvpair id="VIP-1-failure-timeout" name="failure-timeout" value="60"/> > <nvpair id="VIP-1-migration-threshold" name="migration-threshold" > value="50"/> > </meta_attributes> > <instance_attributes id="VIP-1-instance_attributes"> > <nvpair id="VIP-1-IP" name = "ip" value="192.168.1.10"/> > <nvpair id="VIP-1-nic" name="nic" value="eth1"/> > <nvpair id="VIP-1-cidr" name="cidr_netmask" value="24"/> > <nvpair id="VIP-1-iflabel" name="iflabel" value="0"/> > <nvpair id="VIP-1-arp-sender" name="arp_sender" value="send_arp"/> > </instance_attributes> > <operations> > <op id="VIP-1-monitor" interval="10s" name="monitor"/> > </operations> > </primitive> > </group> > </resources> > > <!--resource constraints--> > <constraints> > <!--set VIP location based on the following two rules--> > <rsc_location id="VIPs" rsc="VIP-group"> > <!--prefer host with more interfaces--> > <rule id="VIP-prefer-connected-rule-1" score-attribute="ifcheck" > > <expression id="VIP-prefer-most-connected-1" attribute="ifcheck" > operation="defined"/> > </rule> > <!--prefer host with better ISP connectivity--> > <rule id="VIP-prefer-connected-rule-2" score-attribute="ispcheck"> > <expression id="VIP-prefer-most-connected-2" attribute="ispcheck" > operation="defined"/> > </rule> > </rsc_location> > <!--conntrack master must run where the VIPs are--> > <rsc_colocation id="conntrack-master-with-VIPs" rsc="master-conntrackd" > with-rsc="VIP-group" rsc-role="Master" score="INFINITY" /> > <rsc_colocation id="condition-master-with-VIPs" rsc="master-condition" > with-rsc="VIP-group" rsc-role="Master" score="INFINITY" /> > <!--services masters must run where the VIPs are--> > <rsc_colocation id="ospfd-master-with-VIPs" rsc="master-ospfd.init" > with-rsc="VIP-group" rsc-role="Master" score="INFINITY" /> > <rsc_colocation id="ripd-master-with-VIPs" rsc="master-ripd.init" > with-rsc="VIP-group" rsc-role="Master" score="INFINITY" /> > <rsc_colocation id="squid-master-with-VIPs" rsc="master-squid.init" > with-rsc="VIP-group" rsc-role="Master" score="INFINITY" /> > <!--prefer as master the following hosts in ascending order--> > <rsc_location id="VIP-master-xi" rsc="VIP-group" node="xi" score="0"/> > <rsc_location id="VIP-master-nu" rsc="VIP-group" node="nu" score="20"/> > <rsc_location id="VIP-master-mu" rsc="VIP-group" node="mu" score="40"/> > </constraints> > _______________________________________________ > Pacemaker mailing list: Pacemaker@oss.clusterlabs.org > http://oss.clusterlabs.org/mailman/listinfo/pacemaker > > Project Home: http://www.clusterlabs.org > Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf > Bugs: http://bugs.clusterlabs.org _______________________________________________ Pacemaker mailing list: Pacemaker@oss.clusterlabs.org http://oss.clusterlabs.org/mailman/listinfo/pacemaker Project Home: http://www.clusterlabs.org Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf Bugs: http://bugs.clusterlabs.org