I installed a two-node cluster following this link on clusterlabs.org
http://www.howtoforge.com/installation-and-setup-guide-for-drbd-openais-pacemaker-xen-on-opensuse-11.1
The guide is for OpenSuse but I did it on Centos 5 as all the packages are available there.

Basically what I did so far is a Xen guest instance sitting on the DRBD-backed file system. Everything seems to be working fine except for one thing - When I shutdown openais (service openais stop) on the active node for failover testing, the passive node was trying to take over all the resources but got stuck at the point of file system resource. Logs show it required file system check thus manual intervention is required to bring it online. However if I do 'crm resource move xen_rsc passive_node' the failover transits fine every time.

Can I borrow someone's sharp eyses and give me a clue what might be causing this?


[r...@ilo141 ~]# crm configure show
node ilo141 \
       attributes standby="off"
node ilo142 \
       attributes standby="off"
primitive drbd_xen ocf:linbit:drbd \
       params drbd_resource="r0" \
       op monitor interval="15s"
primitive ns1 ocf:heartbeat:Xen \
       params xmfile="/xen/ns1" \
       op monitor interval="10s" \
       op start interval="0s" timeout="30s" \
       op stop interval="0s" timeout="300s" \
       meta target-role="Started"
primitive xen_fs ocf:heartbeat:Filesystem \
       params device="/dev/drbd0" directory="/xen" \
       meta target-role="Started"
ms ms_drbd_xen drbd_xen \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
location cli-prefer-ns1 ns1 \
       rule $id="cli-prefer-rule-ns1" inf: #uname eq ilo142
colocation fs_on_drbd inf: xen_fs ms_drbd_xen:Master
colocation ns1-with-xen_fs inf: ns1 xen_fs
order fs_after_drbd inf: ms_drbd_xen:promote xen_fs:start
order ns1-after-xen_fs inf: xen_fs:start ns1:start
property $id="cib-bootstrap-options" \
       dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
       cluster-infrastructure="openais" \
       expected-quorum-votes="2" \
       no-quorum-policy="ignore" \
       stonith-enabled="false" \
       default-resource-stickiness="1000" \
       last-lrm-refresh="1260156983"


cat /etc/drbd.conf

global {
   usage-count yes;
}
common {
 syncer { rate 100M; }
}
resource r0 {
 protocol C;
 handlers {
pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f"; pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f"; local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f";
   fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
   after-resync-target "/usr/lib/drbd/crm-unfence-peer.sh";
 }
 startup {
 }
 disk {
   on-io-error   detach;
   fencing resource-only;
 }
 net {
   allow-two-primaries;
   after-sb-0pri disconnect;
   after-sb-1pri disconnect;
   after-sb-2pri disconnect;
   rr-conflict disconnect;
 }
 syncer {
   rate 100M;
   al-extents 257;
 }
 on ilo142 {
   device     /dev/drbd0;
   disk       /dev/VolGroup00/drbdr0;
   address    172.16.1.2:7788;
   meta-disk  internal;
 }
 on ilo141 {
   device    /dev/drbd0;
   disk       /dev/VolGroup00/drbdr0;
   address    172.16.1.1:7788;
   meta-disk internal;
 }
}




Thanks,
Daniel



_______________________________________________
Pacemaker mailing list
Pacemaker@oss.clusterlabs.org
http://oss.clusterlabs.org/mailman/listinfo/pacemaker

Reply via email to