I have a pretty basic setup by most people's standards, but there must be something that is not quite right about it. Sometimes when I force a resource failover from one server to the other, the clients with the NFS mounts don't cleanly migrate to the new server. I configured this using a few different "Pacemaker-DRBD-NFS" guides out there for reference (I believe they were the Linbit guides).

Sorry in advance for the long email.

Here is the config:
------------------------------
------------------------------
* Two identical servers
* Four exported NFS shares total (so I can independently fail over individual shares and run half on one server and half on the other)
* Bonded interface using LACP for "outgoing" client access
* Direct ethernet connection between the two servers (for Pacemaker/Corosync and DRBD)

Package versions (installed from either Debian Squeeze or Backports)
* lvm 2.02.66-5
* drbd 8.3.7-2.1
* nfs-kernel-server 1.2.2-4squeeze2
* pacemaker 1.1.7-1~bpo60+1
* corosync 1.4.2-1~bpo60+1

Each NFS share is created using the same component format and has its own virtual IP.

Hardware RAID -> /dev/sdb -> LVM -> DRBD single master (one resource for each share)


Here is the pacemaker config (I really hope it doesn't get mangled):
====================
node storage1 \
    attributes standby="off"
node storage2 \
    attributes standby="off"
primitive p_drbd_distribion_storage ocf:linbit:drbd \
    params drbd_resource="distribion-storage" \
    op monitor interval="15" role="Master" \
    op monitor interval="30" role="Slave"
primitive p_drbd_vni_storage ocf:linbit:drbd \
    params drbd_resource="vni-storage" \
    op monitor interval="15" role="Master" \
    op monitor interval="30" role="Slave"
primitive p_drbd_xen_data1 ocf:linbit:drbd \
    params drbd_resource="xen-data1" \
    op monitor interval="15" role="Master" \
    op monitor interval="30" role="Slave"
primitive p_drbd_xen_data2 ocf:linbit:drbd \
    params drbd_resource="xen-data2" \
    op monitor interval="15" role="Master" \
    op monitor interval="30" role="Slave"
primitive p_exportfs_distribion_storage ocf:heartbeat:exportfs \
params fsid="1" directory="/data/distribion-storage" options="rw,async,no_root_squash,subtree_check" clientspec="10.205.152.0/21" wait_for_leasetime_on_stop="false" \
    op monitor interval="30s"
primitive p_exportfs_vni_storage ocf:heartbeat:exportfs \
params fsid="2" directory="/data/vni-storage" options="rw,async,no_root_squash,subtree_check" clientspec="10.205.152.0/21" wait_for_leasetime_on_stop="false" \
    op monitor interval="30s"
primitive p_exportfs_xen_data1 ocf:heartbeat:exportfs \
params fsid="3" directory="/data/xen-data1" options="rw,async,no_root_squash,subtree_check" clientspec="10.205.152.0/21" wait_for_leasetime_on_stop="false" \
    op monitor interval="30s"
primitive p_exportfs_xen_data2 ocf:heartbeat:exportfs \
params fsid="4" directory="/data/xen-data2" options="rw,async,no_root_squash,subtree_check" clientspec="10.205.152.0/21" wait_for_leasetime_on_stop="false" \
    op monitor interval="30s"
primitive p_fs_distribion_storage ocf:heartbeat:Filesystem \
params fstype="xfs" directory="/data/distribion-storage" device="/dev/drbd1" \
    meta target-role="Started"
primitive p_fs_vni_storage ocf:heartbeat:Filesystem \
    params fstype="xfs" directory="/data/vni-storage" device="/dev/drbd2"
primitive p_fs_xen_data1 ocf:heartbeat:Filesystem \
    params fstype="xfs" directory="/data/xen-data1" device="/dev/drbd3" \
    meta target-role="Started"
primitive p_fs_xen_data2 ocf:heartbeat:Filesystem \
    params fstype="xfs" directory="/data/xen-data2" device="/dev/drbd4" \
    meta target-role="Started"
primitive p_ip_distribion_storage ocf:heartbeat:IPaddr2 \
    params ip="10.205.154.137" cidr_netmask="21" \
    op monitor interval="20s"
primitive p_ip_vni_storage ocf:heartbeat:IPaddr2 \
    params ip="10.205.154.138" cidr_netmask="21" \
    op monitor interval="20s"
primitive p_ip_xen_data1 ocf:heartbeat:IPaddr2 \
    params ip="10.205.154.139" cidr_netmask="21" \
    op monitor interval="20s"
primitive p_ip_xen_data2 ocf:heartbeat:IPaddr2 \
    params ip="10.205.154.140" cidr_netmask="21" \
    op monitor interval="20s"
primitive p_lsb_nfsserver lsb:nfs-kernel-server \
    op monitor interval="30s"
primitive p_ping ocf:pacemaker:ping \
    params host_list="10.205.154.66" multiplier="100" \
    op monitor interval="15s" timeout="5s"
group g_nfs_distribion_storage p_ip_distribion_storage p_fs_distribion_storage p_exportfs_distribion_storage group g_nfs_vni_storage p_ip_vni_storage p_fs_vni_storage p_exportfs_vni_storage \
    meta is-managed="true" target-role="Started"
group g_nfs_xen_data1 p_ip_xen_data1 p_fs_xen_data1 p_exportfs_xen_data1
group g_nfs_xen_data2 p_ip_xen_data2 p_fs_xen_data2 p_exportfs_xen_data2
ms ms_drbd_distribion_storage p_drbd_distribion_storage \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
ms ms_drbd_vni_storage p_drbd_vni_storage \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" is-managed="true" target-role="Started"
ms ms_drbd_xen_data1 p_drbd_xen_data1 \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
ms ms_drbd_xen_data2 p_drbd_xen_data2 \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
clone cl_lsb_nfsserver p_lsb_nfsserver \
    meta target-role="Started"
clone cl_ping p_ping \
    meta globally-unique="false"
location l_live_distribion_storage g_nfs_distribion_storage \
rule $id="l_live_distribion_storage-rule" -inf: not_defined pingd or pingd lte 0
location l_live_vni_storage g_nfs_vni_storage \
rule $id="l_live_vni_storage-rule" -inf: not_defined pingd or pingd lte 0
location l_live_xen_data1 g_nfs_xen_data1 \
    rule $id="l_live_xen_data1-rule" -inf: not_defined pingd or pingd lte 0
location l_live_xen_data2 g_nfs_xen_data2 \
    rule $id="l_live_xen_data2-rule" -inf: not_defined pingd or pingd lte 0
colocation c_p_fs_distribion_storage_on_ms_drbd_distribion_storage inf: g_nfs_distribion_storage ms_drbd_distribion_storage:Master colocation c_p_fs_vni_storage_on_ms_drbd_vni_storage inf: g_nfs_vni_storage ms_drbd_vni_storage:Master colocation c_p_fs_xen_data1_on_ms_drbd_xen_data1 inf: g_nfs_xen_data1 ms_drbd_xen_data1:Master colocation c_p_fs_xen_data2_on_ms_drbd_xen_data2 inf: g_nfs_xen_data2 ms_drbd_xen_data2:Master order o_ms_drbd_distribion_storage_before_p_fs_distribion_storage inf: ms_drbd_distribion_storage:promote g_nfs_distribion_storage:start order o_ms_drbd_vni_storage_before_p_fs_vni_storage inf: ms_drbd_vni_storage:promote g_nfs_vni_storage:start order o_ms_drbd_xen_data1_before_p_fs_xen_data1 inf: ms_drbd_xen_data1:promote p_exportfs_vni_storage:start order o_ms_drbd_xen_data2_before_p_fs_xen_data2 inf: ms_drbd_xen_data2:promote g_nfs_xen_data2:start
property $id="cib-bootstrap-options" \
    dc-version="1.1.6-9971ebba4494012a93c03b40a2c58ec0eb60f50c" \
    cluster-infrastructure="openais" \
    expected-quorum-votes="2" \
    stonith-enabled="false" \
    no-quorum-policy="ignore" \
    last-lrm-refresh="1350405150"
rsc_defaults $id="rsc-options" \
    resource-stickiness="200"
====================

------------------------------
------------------------------

Now on to the issue I'm experiencing. I have a particular client machine that mounts /data/vni-storage via NFS for its apache root directory (/var/www). If I log into the server and do an "ls /var/www", I'll see the files. I then manually force a resource migration ("crm resource migrate g_nfs_vni_storage storage2"). The resource will migrate successfully on the back end (as shown by crm status), and everything seems fine. However, if I issue an "ls /var/www" again on the client, it will basically hang and not properly "see" the share at its new location. If I wait long enough (usually a matter of minutes), it will sometimes eventually spit on an I/O error message. I've even had instances (without my intervention) where the ocf:heartbeat:exportfs resource would time out (according to the logs) and "re-export" itself. If I log into the server, it will still show everything running fine, but on the client, it will now be showing a "stale NFS handle" error message.

I've done research to try to understand the issue, and some have commented on the fsid parameter needing to match between the cluster servers. In fact, I've had that parameter set in the options for p_exportfs_vni_storage since the initial deployment about six months ago.

I then had an issue the other day where I had to manually migrate the share to the other server, which ultimately led to issues with some of the OTHER NFS shares (namely g_nfs_xen_data1). This was a bad share to have trouble with, as it is an NFS storage repository for our XenServer VM guests, which led to all sorts of "disk failure" issues on the guests.

After looking around some more today, my first thought was that multiple NFS shares might not be well supported (even though I really need them to be this way). I took a look at the resource script for exportfs (/usr/lib/ocf/resource.d/heartbeat/exportfs), and I noticed that when the script makes a copy of /var/lib/nfs/rmtab in the backup_rmtab function, it filters out any shares that don't match the exported directory of the active resource. It looks like this may become a problem when the restore_rmtab function is later called after a resource migration, because now /var/lib/nfs/rmtab will only contain the directory for the active resource and not the other three NFS mounts. Maybe this leads to the failover issue?


So to sum it up:

Was ocf:heartbeat:exportfs intended to work with multiple, separate NFS shares? Due to the way that the rmtab file is backed up, it doesn't seem like that is the case. If so, what would be the recommended course of action? If I manage the exportfs shares outside of pacemaker, I still have to worry about keeping /var/lib/nfs/rmtab copied over on the shares.

In regards to the client getting the "stale NFS handle" error and having trouble failing over, is this in any way related to Apache keeping a lot of files open on that share (primarily log files)? Would that affect the ability for the NFS client to try to "reconnect" to the new server?

Are there any other obviously mistakes or improvements to my pacemaker config that could be made?

Thanks.


--
Justin Pasher

_______________________________________________
Pacemaker mailing list: Pacemaker@oss.clusterlabs.org
http://oss.clusterlabs.org/mailman/listinfo/pacemaker

Project Home: http://www.clusterlabs.org
Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
Bugs: http://bugs.clusterlabs.org

Reply via email to