On Thu, Jan 31, 2013 at 3:04 AM, James Guthrie <j...@open.ch> wrote:
> Hi all,
>
> I'm having a bit of difficulty with the way that my cluster is behaving on 
> failure of a resource.
>
> The objective of my clustering setup is to provide a virtual IP, to which a 
> number of other services are bound. The services are bound to the VIP with 
> constraints to force the service to be running on the same host as the VIP.
>
> I have been testing the way that the cluster behaves if it is unable to start 
> a resource. What I observe is the following: the cluster tries to start the 
> resource on node 1,

Can you define "the resource"?  You have a few and it matters :)

> fails 10 times, reaches the migration threshold, moves the resource to the 
> other host, fails 10 times, reaches the migration threshold. Now it has 
> reached the migration threshold on all possible hosts. I was then expecting 
> that it would stop the resource on all nodes and run all of the other 
> resources as though nothing were wrong. What I see though is that the cluster 
> demotes all master/slave resources, despite the fact that only one of them is 
> failing.
>
> I wasn't able to find a parameter which would dictate what the behaviour 
> should be if the migration failed on all available hosts. I must therefore 
> believe that the constraints configuration I'm using isn't doing quite what I 
> hope it's doing.
>
> Below is the configuration xml I am using on the hosts (no crmsh config, 
> sorry).
>
> I am using Corosync 2.3.0 and Pacemaker 1.1.8, built from source.
>
> Regards,
> James
>
> <!-- Configuration file for pacemaker -->
> <resources>
>   <!--resource for conntrackd-->
>   <master id="master-conntrackd">
>     <meta_attributes id="master-conntrackd-meta_attributes">
>       <nvpair id="master-conntrackd-meta_attributes-notify" name="notify" 
> value="true"/>
>       <nvpair id="master-conntrackd-meta_attributes-interleave" 
> name="interleave" value="true"/>
>       <nvpair id="master-conntrackd-meta_attributes-target-role" 
> name="target-role" value="Master"/>
>       <nvpair id="master-conndtrakd-meta_attributes-failure-timeout" 
> name="failure-timeout" value="600"/>
>       <nvpair id="master-conntrackd-meta_attributes-migration-threshold" 
> name="migration-threshold" value="10"/>
>     </meta_attributes>
>     <primitive id="conntrackd" class="ocf" provider="OSAG" type="conntrackd">
>       <operations>
>         <op id="conntrackd-slave-check" name="monitor" interval="60" 
> role="Slave" />
>         <op id="conntrackd-master-check" name="monitor" interval="61" 
> role="Master" />
>       </operations>
>     </primitive>
>   </master>
>   <master id="master-condition">
>     <meta_attributes id="master-condition-meta_attributes">
>       <nvpair id="master-condition-meta_attributes-notify" name="notify" 
> value="false"/>
>       <nvpair id="master-condition-meta_attributes-interleave" 
> name="interleave" value="true"/>
>       <nvpair id="master-condition-meta_attributes-target-role" 
> name="target-role" value="Master"/>
>       <nvpair id="master-condition-meta_attributes-failure-timeout" 
> name="failure-timeout" value="600"/>
>       <nvpair id="master-condition-meta_attributes-migration-threshold" 
> name="migration-threshold" value="10"/>
>     </meta_attributes>
>     <primitive id="condition" class="ocf" provider="OSAG" type="condition">
>       <instance_attributes id="condition-attrs">
>       </instance_attributes>
>       <operations>
>         <op id="condition-slave-check" name="monitor" interval="10" 
> role="Slave" />
>         <op id="condition-master-check" name="monitor" interval="11" 
> role="Master" />
>       </operations>
>     </primitive>
>   </master>
>   <master id="master-ospfd.init">
>     <meta_attributes id="master-ospfd-meta_attributes">
>       <nvpair id="master-ospfd-meta_attributes-notify" name="notify" 
> value="false"/>
>       <nvpair id="master-ospfd-meta_attributes-interleave" name="interleave" 
> value="true"/>
>       <nvpair id="master-ospfd-meta_attributes-target-role" 
> name="target-role" value="Master"/>
>       <nvpair id="master-ospfd-meta_attributes-failure-timeout" 
> name="failure-timeout" value="600"/>
>       <nvpair id="master-ospfd-meta_attributes-migration-threshold" 
> name="migration-threshold" value="10"/>
>     </meta_attributes>
>     <primitive id="ospfd" class="ocf" provider="OSAG" type="osaginit">
>       <instance_attributes id="ospfd-attrs">
>         <nvpair id="ospfd-script" name="script" value="ospfd.init"/>
>       </instance_attributes>
>       <operations>
>         <op id="ospfd-slave-check" name="monitor" interval="10" role="Slave" 
> />
>         <op id="ospfd-master-check" name="monitor" interval="11" 
> role="Master" />
>       </operations>
>     </primitive>
>   </master>
>   <master id="master-ripd.init">
>     <meta_attributes id="master-ripd-meta_attributes">
>       <nvpair id="master-ripd-meta_attributes-notify" name="notify" 
> value="false"/>
>       <nvpair id="master-ripd-meta_attributes-interleave" name="interleave" 
> value="true"/>
>       <nvpair id="master-ripd-meta_attributes-target-role" name="target-role" 
> value="Master"/>
>       <nvpair id="master-ripd-meta_attributes-failure-timeout" 
> name="failure-timeout" value="600"/>
>       <nvpair id="master-ripd-meta_attributes-migration-threshold" 
> name="migration-threshold" value="10"/>
>     </meta_attributes>
>     <primitive id="ripd" class="ocf" provider="OSAG" type="osaginit">
>       <instance_attributes id="ripd-attrs">
>         <nvpair id="ripd-script" name="script" value="ripd.init"/>
>       </instance_attributes>
>       <operations>
>         <op id="ripd-slave-check" name="monitor" interval="10" role="Slave" />
>         <op id="ripd-master-check" name="monitor" interval="11" role="Master" 
> />
>       </operations>
>     </primitive>
>   </master>
>   <master id="master-squid.init">
>     <meta_attributes id="master-squid-meta_attributes">
>       <nvpair id="master-squid-meta_attributes-notify" name="notify" 
> value="false"/>
>       <nvpair id="master-squid-meta_attributes-interleave" name="interleave" 
> value="true"/>
>       <nvpair id="master-squid-meta_attributes-target-role" 
> name="target-role" value="Master"/>
>       <nvpair id="master-squid-meta_attributes-failure-timeout" 
> name="failure-timeout" value="600"/>
>       <nvpair id="master-squid-meta_attributes-migration-threshold" 
> name="migration-threshold" value="10"/>
>     </meta_attributes>
>     <primitive id="squid" class="ocf" provider="OSAG" type="osaginit">
>       <instance_attributes id="squid-attrs">
>         <nvpair id="squid-script" name="script" value="squid.init"/>
>       </instance_attributes>
>       <operations>
>         <op id="squid-slave-check" name="monitor" interval="10" role="Slave" 
> />
>         <op id="squid-master-check" name="monitor" interval="11" 
> role="Master" />
>       </operations>
>     </primitive>
>   </master>
>
>   <!--resource for interface checks -->
>   <clone id="clone-IFcheck">
>     <primitive id="IFcheck" class="ocf" provider="OSAG" type="ifmonitor">
>       <instance_attributes id="resIFcheck-attrs">
>         <nvpair id="IFcheck-interfaces" name="interfaces" value="eth0 eth1"/>
>         <nvpair id="IFcheck-multiplier" name="multiplier" value="200"/>
>         <nvpair id="IFcheck-dampen" name="dampen" value="6s" />
>       </instance_attributes>
>       <operations>
>         <op id="IFcheck-monitor" interval="3s" name="monitor"/>
>       </operations>
>     </primitive>
>   </clone>
>
>   <!--resource for ISP checks-->
>   <clone id="clone-ISPcheck">
>     <primitive id="ISPcheck" class="ocf" provider="OSAG" type="ispcheck">
>       <instance_attributes id="ISPcheck-attrs">
>         <nvpair id="ISPcheck-ipsec" name="ipsec-check" value="1" />
>         <nvpair id="ISPcheck-ping" name="ping-check" value="1" />
>         <nvpair id="ISPcheck-multiplier" name="multiplier" value="200"/>
>         <nvpair id="ISPcheck-dampen" name="dampen" value="60s"/>
>       </instance_attributes>
>       <operations>
>         <op id="ISPcheck-monitor" interval="30s" name="monitor"/>
>       </operations>
>     </primitive>
>   </clone>
>
>   <!--Virtual IP group-->
>   <group id="VIP-group">
>     <primitive id="eth1-0-192.168.1.10" class="ocf" provider="heartbeat" 
> type="IPaddr2">
>       <meta_attributes id="meta-VIP-1">
>         <nvpair id="VIP-1-failure-timeout" name="failure-timeout" value="60"/>
>         <nvpair id="VIP-1-migration-threshold" name="migration-threshold" 
> value="50"/>
>       </meta_attributes>
>       <instance_attributes id="VIP-1-instance_attributes">
>         <nvpair id="VIP-1-IP" name = "ip" value="192.168.1.10"/>
>         <nvpair id="VIP-1-nic" name="nic" value="eth1"/>
>         <nvpair id="VIP-1-cidr" name="cidr_netmask" value="24"/>
>         <nvpair id="VIP-1-iflabel" name="iflabel" value="0"/>
>         <nvpair id="VIP-1-arp-sender" name="arp_sender" value="send_arp"/>
>       </instance_attributes>
>       <operations>
>         <op id="VIP-1-monitor" interval="10s" name="monitor"/>
>       </operations>
>     </primitive>
>   </group>
> </resources>
>
> <!--resource constraints-->
> <constraints>
>   <!--set VIP location based on the following two rules-->
>   <rsc_location id="VIPs" rsc="VIP-group">
>     <!--prefer host with more interfaces-->
>     <rule id="VIP-prefer-connected-rule-1" score-attribute="ifcheck" >
>       <expression id="VIP-prefer-most-connected-1" attribute="ifcheck" 
> operation="defined"/>
>     </rule>
>     <!--prefer host with better ISP connectivity-->
>     <rule id="VIP-prefer-connected-rule-2" score-attribute="ispcheck">
>       <expression id="VIP-prefer-most-connected-2" attribute="ispcheck" 
> operation="defined"/>
>     </rule>
>   </rsc_location>
>   <!--conntrack master must run where the VIPs are-->
>   <rsc_colocation id="conntrack-master-with-VIPs" rsc="master-conntrackd" 
> with-rsc="VIP-group" rsc-role="Master" score="INFINITY" />
>   <rsc_colocation id="condition-master-with-VIPs" rsc="master-condition" 
> with-rsc="VIP-group" rsc-role="Master" score="INFINITY" />
>   <!--services masters must run where the VIPs are-->
>   <rsc_colocation id="ospfd-master-with-VIPs" rsc="master-ospfd.init" 
> with-rsc="VIP-group" rsc-role="Master" score="INFINITY" />
>   <rsc_colocation id="ripd-master-with-VIPs" rsc="master-ripd.init" 
> with-rsc="VIP-group" rsc-role="Master" score="INFINITY" />
>   <rsc_colocation id="squid-master-with-VIPs" rsc="master-squid.init" 
> with-rsc="VIP-group" rsc-role="Master" score="INFINITY" />
>   <!--prefer as master the following hosts in ascending order-->
>   <rsc_location id="VIP-master-xi" rsc="VIP-group" node="xi" score="0"/>
>   <rsc_location id="VIP-master-nu" rsc="VIP-group" node="nu" score="20"/>
>   <rsc_location id="VIP-master-mu" rsc="VIP-group" node="mu" score="40"/>
> </constraints>
> _______________________________________________
> Pacemaker mailing list: Pacemaker@oss.clusterlabs.org
> http://oss.clusterlabs.org/mailman/listinfo/pacemaker
>
> Project Home: http://www.clusterlabs.org
> Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
> Bugs: http://bugs.clusterlabs.org

_______________________________________________
Pacemaker mailing list: Pacemaker@oss.clusterlabs.org
http://oss.clusterlabs.org/mailman/listinfo/pacemaker

Project Home: http://www.clusterlabs.org
Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
Bugs: http://bugs.clusterlabs.org

Reply via email to