Attached is my cib.xml file.
I have a two node DRBD cluster setup in Active/Active. For whatever reason, it
seems all my resources are attached to Node2. What I mean by that is that
although the resources show that they are collocated, whenever I turn Node2
off or unplug a cable from Node2, then the cluster goes down. I wait to see if
they come back up on the other node (although they should already be running as
it is an Active/Active cluster) but they never do, even after 10 minutes. With
Node2 off, I can't even ping the collocated IP address. However, if I turn off
Node1 while Node2 is running, nothing goes down.
I am using the LCMC to give me a graphical overview of the setup and the screen
seems to indicate that everything is okay. I believe it has to do with my
fencing agent which is pacemaker. I know that even though it is set to turn a
node off if there is an issue, the node never seems to shutdown. It complains
that devices are busy and it can't reboot.
I am just hoping someone can take a look at my configuration and see if there
is anything that stands out. If it is the fencing agent, is there a better
fencing agent?
William
<cib epoch="279" num_updates="0" admin_epoch="14" validate-with="pacemaker-1.2" crm_feature_set="3.0.6" update-origin=" NODE 2" update-client="crmd" cib-last-written="Wed Aug 1 03:56:44 2012" have-quorum="1">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<nvpair id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled" value="true"/>
<nvpair id="cib-bootstrap-options-stonith-action" name="stonith-action" value="poweroff"/>
<nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.1.7-6.el6-148fccfd5985c5590cc601123c6c16e966b85d14"/>
<nvpair id="cib-bootstrap-options-no-quorum-policy" name="no-quorum-policy" value="ignore"/>
<nvpair id="cib-bootstrap-options-cluster-recheck-interval" name="cluster-recheck-interval" value="5min"/>
<nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="cman"/>
<nvpair id="cib-bootstrap-options-last-lrm-refresh" name="last-lrm-refresh" value="1343364281"/>
</cluster_property_set>
</crm_config>
<nodes>
<node id="NODE1" type="normal" uname=" NODE 1">
<instance_attributes id="nodes- NODE 1">
<nvpair id="nodes- NODE 1-standby" name="standby" value="off"/>
</instance_attributes>
</node>
<node id=" NODE 2" type="normal" uname=" NODE 2">
<instance_attributes id="nodes- NODE 2">
<nvpair id="nodes- NODE 2-standby" name="standby" value="off"/>
</instance_attributes>
</node>
</nodes>
<resources>
<clone id="ClusterIPClone">
<meta_attributes id="ClusterIPClone-meta_attributes">
<nvpair id="ClusterIPClone-meta_attributes-globally-unique" name="globally-unique" value="true"/>
<nvpair id="ClusterIPClone-meta_attributes-clone-max" name="clone-max" value="2"/>
<nvpair id="ClusterIPClone-meta_attributes-clone-node-max" name="clone-node-max" value="2"/>
</meta_attributes>
<primitive class="ocf" id="ClusterIP" provider="heartbeat" type="IPaddr2">
<instance_attributes id="ClusterIP-instance_attributes">
<nvpair id="ClusterIP-instance_attributes-ip" name="ip" value="10.89.99.30"/>
<nvpair id="ClusterIP-instance_attributes-cidr_netmask" name="cidr_netmask" value="22"/>
<nvpair id="ClusterIP-instance_attributes-clusterip_hash" name="clusterip_hash" value="sourceip"/>
</instance_attributes>
<operations>
<op id="ClusterIP-monitor-30s" interval="30s" name="monitor"/>
</operations>
<meta_attributes id="ClusterIP-meta_attributes">
<nvpair id="ClusterIP-meta_attributes-is-managed" name="is-managed" value="true"/>
</meta_attributes>
</primitive>
</clone>
<clone id="dlm_clone">
<meta_attributes id="dlm_clone-meta_attributes">
<nvpair id="dlm_clone-meta_attributes-clone-max" name="clone-max" value="2"/>
<nvpair id="dlm_clone-meta_attributes-clone-node-max" name="clone-node-max" value="1"/>
</meta_attributes>
<primitive class="ocf" id="dlm" provider="pacemaker" type="controld">
<operations>
<op id="dlm-monitor-60s" interval="60s" name="monitor"/>
</operations>
<meta_attributes id="dlm-meta_attributes">
<nvpair id="dlm-meta_attributes-is-managed" name="is-managed" value="true"/>
</meta_attributes>
</primitive>
</clone>
<master id="ClusterDataClone">
<meta_attributes id="ClusterDataClone-meta_attributes">
<nvpair id="ClusterDataClone-meta_attributes-master-max" name="master-max" value="2"/>
<nvpair id="ClusterDataClone-meta_attributes-master-node-max" name="master-node-max" value="1"/>
<nvpair id="ClusterDataClone-meta_attributes-clone-max" name="clone-max" value="2"/>
<nvpair id="ClusterDataClone-meta_attributes-clone-node-max" name="clone-node-max" value="1"/>
<nvpair id="ClusterDataClone-meta_attributes-notify" name="notify" value="true"/>
</meta_attributes>
<primitive class="ocf" id="ClusterData" provider="linbit" type="drbd">
<instance_attributes id="ClusterData-instance_attributes">
<nvpair id="ClusterData-instance_attributes-drbd_resource" name="drbd_resource" value="nfs"/>
</instance_attributes>
<operations>
<operations>
<op id="ClusterData-monitor-60s" interval="60s" name="monitor" role="Master"/>
</operations>
<meta_attributes id="ClusterData-meta_attributes">
<nvpair id="ClusterData-meta_attributes-is-managed" name="is-managed" value="true"/>
</meta_attributes>
</primitive>
</master>
<clone id="ClusterFSClone">
<primitive class="ocf" id="ClusterFS" provider="heartbeat" type="Filesystem">
<instance_attributes id="ClusterFS-instance_attributes">
<nvpair id="ClusterFS-instance_attributes-device" name="device" value="/dev/drbd/by-res/nfs"/>
<nvpair id="ClusterFS-instance_attributes-directory" name="directory" value="/Storage"/>
<nvpair id="ClusterFS-instance_attributes-fstype" name="fstype" value="gfs2"/>
</instance_attributes>
<meta_attributes id="ClusterFS-meta_attributes">
<nvpair id="ClusterFS-meta_attributes-is-managed" name="is-managed" value="true"/>
</meta_attributes>
</primitive>
</clone>
<clone id="Fencing">
<primitive class="stonith" id="pcmk-fencing" type="fence_pcmk">
<instance_attributes id="pcmk-fencing-instance_attributes">
<nvpair id="pcmk-fencing-instance_attributes-pcmk_host_list" name="pcmk_host_list" value=" NODE 1 NODE 2"/>
</instance_attributes>
<operations>
<op id="pcmk-fencing-monitor-60s" interval="60s" name="monitor"/>
</operations>
</primitive>
</clone>
</resources>
<constraints>
<rsc_colocation id="fs_on_drbd" rsc="ClusterFSClone" score="INFINITY" with-rsc="ClusterDataClone" with-rsc-role="Master"/>
<rsc_order first="ClusterDataClone" first-action="promote" id="ClusterFS-after-ClusterData" score="INFINITY" then="ClusterFSClone" then-action="start"/>
</constraints>
<rsc_defaults>
<meta_attributes id="rsc-options">
<nvpair id="rsc-options-target-role" name="target-role" value="started"/>
<nvpair id="rsc-options-allow-migrate" name="allow-migrate" value="true"/>
<nvpair id="rsc-options-resource-stickiness" name="resource-stickiness" value="100"/>
</meta_attributes>
</rsc_defaults>
<op_defaults>
<meta_attributes id="op-options">
<nvpair id="op-options-timeout" name="timeout" value="240s"/>
</meta_attributes>
</op_defaults>
</configuration>
</cib>
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems