First time posting to a mailing list hope I get this right.
I have a 2 node DRBD backed SCST/SRP single target(ib_srpt) setup working great
using pacemaker/corosync. I am using this for the data store for a mail
server. Where I am running into an issue is the initiator's are running on
vmware ESXi 4.1 hosts, when a fail over occurs on the target the vm host
initiators go dead and you have to rescan to pick up the target via the new
path causing the vm guest to go down until the new path is discovered.
Hope that makes sense.
What I see as the potential problem is lvm and scst are only active on the
primary node thus the secondary node is un-discoverable by ESXi host until it
fails over. I am not sure what the answer is but my thought process is I am
trying to figure out if it is possible to have:
1. on the node1 (primary node) drbd(primary), lvm, scst with the target in
read/write mode
2. on the node2 (secondary node) drbd(secondary), lvm, scst with the target in
read mode
and when the node1 fails over, node1 scst target goes ready only and node2 scst
target would switch to read/write. What I am trying to achieve is the the vm
host seeing the target and paths at all times.
Hopefully there is an easier solution to this and that I am not making things
more difficult. I have been researching this for weeks and at the point of
frustration. Any guidance would be appreciated.
Side note: I modified SCSTTarget RA to work with ib_srpt as it was not written
for it originally and did not find another RA out there specifically for my
setup.
Thank you for any help you may be able to provide.
Setup:
Initiator machines vmware ESXi 4.1
Target machines
2 nodes running CentOS 2.6.32-279.19.1.el6.x86_64
DRBD:
kmod-drbd84-8.4.2-1.el6_3.elrepo.x86_64
Pacemaker/Corosync:
pacemaker-libs-1.1.7-6.el6.x86_64
pacemaker-cli-1.1.7-6.el6.x86_64
pacemaker-1.1.7-6.el6.x86_64
pacemaker-cluster-libs-1.1.7-6.el6.x86_64
corosync-1.4.1-7.el6_3.1.x86_64
corosynclib-1.4.1-7.el6_3.1.x86_64
SCST/SRPT:
scst-tools-2.6.32-279.19.1.el6-2.2.1-1.ab.x86_64
kernel-module-scst-iscsi-2.6.32-279.19.1.el6-2.2.1-1.ab.x86_64
kernel-module-scst-core-2.6.32-279.19.1.el6-2.2.1-1.ab.x86_64
kernel-module-scst-srpt-2.6.32-279.19.1.el6-2.2.1-1.ab.x86_64
scst config:
HANDLER vdisk_fileio {
DEVICE disk00 {
filename /dev/drbd-stor/mail-stor
nv_cache 1
}
}
TARGET_DRIVER ib_srpt {
TARGET 0002:c902:0020:2020 {
enabled 1
cpu_mask ff
rel_tgt_id 1
GROUP data {
LUN 0 disk00
INITIATOR 0x8102c902002020210002c903000f2bf3
INITIATOR 0x8102c902002020220002c903000f2bf3
INITIATOR 0x8102c902002020210002c903000c67bd
INITIATOR 0x8102c902002020220002c903000c67bd
INITIATOR 0x8102c902002008ed0002c903000c67bd
INITIATOR 0x8102c902002008ee0002c903000c67bd
INITIATOR 0x8102c902002008ee0002c903000f2bf3
INITIATOR 0x8102c902002008ed0002c903000f2bf3
cpu_mask ff
}
}
}
drbd config:
resource r0 {
device /dev/drbd1;
disk /dev/mapper/lun-lun00;
meta-disk internal;
net {
protocol C;
max-buffers 8000;
max-epoch-size 8000;
cram-hmac-alg sha1;
shared-secret "secret";
}
handlers {
split-brain "/usr/lib/drbd/notify-split-brain.sh
[email protected]";
}
disk {
resync-rate 10M;
}
on mail-stor01.domain.com {
address 172.24.252.1:7790;
}
on mail-stor02.domain.com {
address 172.24.252.2:7790;
}
}
crm configure show:
node mail-stor01.domain.com
node mail-stor02.domain.com
primitive drbd-r0 ocf:linbit:drbd \
params drbd_resource="r0" \
operations $id="drbd-r0-operations" \
op monitor start-delay="0" interval="25"
primitive lvm-r0 ocf:heartbeat:LVM \
params volgrpname="drbd-stor" \
meta is-managed="true" target-role="Started" \
op monitor interval="10" timeout="30" depth="0" \
op start interval="0" timeout="500" \
op stop interval="0" timeout="500"
primitive node1-stonith stonith:fence_drac5 \
params action="off" ipaddr="drac-mailstor01" login="cluster"
passwd="secret" ipport="22" inet4_only="true" secure="true" verbose="true"
debug="/var/log/stonith" pcmk_host_check="static-list"
pcmk_host_list="mail-stor01.domain.com" \
meta target-role="started" is-managed="true"
primitive node2-stonith stonith:fence_drac5 \
params action="off" ipaddr="drac-mailstor02" login="cluster"
passwd="secret" ipport="22" inet4_only="true" secure="true" verbose="true"
debug="/var/log/stonith" pcmk_host_check="static-list"
pcmk_host_list="mail-stor02.domain.com" \
meta target-role="started" is-managed="true"
primitive p_ping ocf:pacemaker:ping \
params host_list="172.24.252.126 172.24.252.254" multiplier="100"
dampen="5s" \
op monitor interval="60" timeout="60" \
op start interval="0" timeout="60" \
op stop interval="0" timeout="60"
ms ms-drbd-r0 drbd-r0 \
meta clone-max="2" notify="true" target-role="Started"
resource-stickiness="0"
clone c_ping p_ping \
meta target-role="Started"
location loc-node1-stonith node1-stonith -inf: mail-stor01.domain.com
location loc-node2-stonith node2-stonith -inf: mail-stor02.domain.com
location pref-ping-lvm-drbd ms-drbd-r0 \
rule $id="pref-ping-lvm-rule" -inf: not_defined ping or ping lte 100
location pref-drbd-r0 ms-drbd-r0 \
rule $id="pref-drbd-r0-rule" $role="master" 100: #uname eq
mail-stor01.domain.com
colocation lvm-drbd-r0 inf: lvm-r0 ms-drbd-r0:Master
order or-drbd-lvm inf: ms-drbd-r0:promote lvm-r0:start
property $id="cib-bootstrap-options" \
default-resource-stickiness="200" \
expected-quorum-votes="2" \
dc-version="1.1.7-6.el6-148fccfd5985c5590cc601123c6c16e966b85d14" \
no-quorum-policy="ignore" \
cluster-infrastructure="openais" \
last-lrm-refresh="1355877808" \
stonith-enabled="true" \
stonith-action="poweroff"
Corosync.conf
totem {
version: 2
token: 5000
token_retransmits_before_loss_const: 10
join: 1000
consensus: 2500
vsftype: none
max_messages: 20
send_join: 45
clear_node_high_bit: yes
secauth: off
threads: 0
# RRP can have three modes (rrp_mode): if set to active, Corosync uses both
# interfaces actively. If set to passive, Corosync uses the second interface
# only if the first ring fails. If rrp_mode is set to none, RRP is disabled.
rrp_mode: active
interface {
ringnumber: 0
bindnetaddr: 172.24.0.0
mcastaddr: 226.94.1.1
mcastport: 4000
ttl: 1
}
interface {
ringnumber: 1
bindnetaddr: 172.24.16.0
mcastaddr: 226.94.1.1
mcastport: 4000
ttl: 1
}
}
logging {
fileline: off
to_stderr: no
to_logfile: yes
to_syslog: no
logfile: /var/log/cluster/corosync.log
debug: off
timestamp: on
syslog_facility: daemon
}
amf {
mode: disabled
}
aisexec {
user: root
group: root
}
service {
name: pacemaker
ver: 0
------
Jason Thomas [email protected]
_______________________________________________
drbd-user mailing list
[email protected]
http://lists.linbit.com/mailman/listinfo/drbd-user