Afternoon all, We have a 2 node failover cluster using IBM IMM for STONITH via the external/ipmi plugin. We have recently moved from OCFS2 to ext3 for our database filesystems due to a bug we discovered, there is only one disk we need to have available to both nodes (shared scripts, logs etc) which remains as OCFS2. All SAN disks are available to both nodes all of the time, although obviously only mounted via the appropriate resource group.
I have the following questions around best practice for this type of configuration: 1) I'm planning on implementing sfex resources (a small LVM volume on the same volume group as the data being protected) as an additional safety feature along side the existing external/ipmi STONITH control ... is this best practice in case the IBM IMM is unavailable or credentials change etc and the STONITH is not carried out ? 2) Is there any risk to a healthy node if an unhealthy node with a shared OCFS2 volume mounted goes down ? Quorum policy is set to ignore. Seems to not give any issues but I want to clarify this is the designed behavior. 3) Does a node need its own STONITH resource to be able to self fence or is this covered off by internal pacemaker functionality ? ie: We currently use location constraints to ensure STONITH resources don't run on themselves as per the documentation. 4) What is the best way to disable STONITH non disruptively for node maintenance ? Is it a case of editing the CIB stonith-enabled directive to false and stopping the STONITH resources then stopping openais ? 5) Is there an OCF compliant resource agent script for Derby / JavaDB that anyone knows of ? We use an old init style script at the moment, I'm afraid it will trip us up and STONITH a node on shutdown at some stage. Are there any other considerations to be in a best practice position ? We have a window of change coming up and I want to create the best environment possible. Please feel free to critique the below configuration as it stands. Many thanks, Dave. node server-001 node server-002 primitive DERBYDB lsb:derby primitive FS_DB_NEWS ocf:heartbeat:Filesystem \ params device="/dev/vg_db_news/lv_db_news" directory="/DB_NEWS" fstype="ext3" options="acl" \ op monitor interval="40s" \ op start interval="0" timeout="60" \ op stop interval="0" timeout="60" \ meta migration-threshold="3" failure-timeout="180" primitive FS_DB_FEEDS ocf:heartbeat:Filesystem \ params device="/dev/vg_db_feeds/lv_db_feeds" directory="/DB_FEEDS" fstype="ext3" options="acl" \ op monitor interval="40s" \ op start interval="0" timeout="60" \ op stop interval="0" timeout="60" \ meta migration-threshold="3" failure-timeout="180" primitive FS_DB_SHARED ocf:heartbeat:Filesystem \ params device="/dev/mapper/07ea2ffab5c4ae011_part1" directory="/DB_SHARED" fstype="ocfs2" options="acl" \ op monitor interval="40s" \ op start interval="0" timeout="60" \ op stop interval="0" timeout="60" \ meta target-role="Started" primitive FS_LOGS_NEWS ocf:heartbeat:Filesystem \ params device="/dev/mapper/0c2ebc3735c4ae011_part1" directory="/LOGS_NEWS" fstype="ext3" options="data=writeback,noatime,acl" \ op monitor interval="40s" \ op start interval="0" timeout="60" \ op stop interval="0" timeout="60" \ meta migration-threshold="3" failure-timeout="180" primitive FS_LOGS_FEEDS ocf:heartbeat:Filesystem \ params device="/dev/mapper/0345899885c4ae011_part1" directory="/LOGS_FEEDS" fstype="ext3" options="data=writeback,noatime,acl" \ op monitor interval="40s" \ op start interval="0" timeout="60" \ op stop interval="0" timeout="60" \ meta migration-threshold="3" failure-timeout="180" primitive IP_NEWS_15 ocf:heartbeat:IPaddr2 \ params ip="192.168.15.92" cidr_netmask="24" \ op monitor interval="30s" \ meta migration-threshold="3" failure-timeout="180" primitive IP_NEWS_72 ocf:heartbeat:IPaddr2 \ params ip="192.168.72.92" cidr_netmask="24" \ op monitor interval="30s" \ meta migration-threshold="3" failure-timeout="180" primitive IP_FEEDS_15 ocf:heartbeat:IPaddr2 \ params ip="192.168.15.93" cidr_netmask="24" \ op monitor interval="30s" \ meta migration-threshold="3" failure-timeout="180" primitive IP_FEEDS_72 ocf:heartbeat:IPaddr2 \ params ip="192.168.72.93" cidr_netmask="24" \ op monitor interval="30s" \ meta migration-threshold="3" failure-timeout="180" primitive MAIL_ALERT ocf:heartbeat:MailTo \ params email="the...@thatcompany.com" \ op monitor interval="60" timeout="10" primitive PGSQL_FEEDS1 ocf:heartbeat:pgsql \ params pgdata="/DB_FEEDS/feeds1/dbdata/data/" pgport="5432" pgdba="feeds1" \ op start interval="0" timeout="120" \ op stop interval="0" timeout="120" \ op monitor interval="60" timeout="30" \ meta migration-threshold="3" failure-timeout="180" primitive PGSQL_FEEDS2 ocf:heartbeat:pgsql \ params pgdata="/DB_FEEDS/feeds2/dbdata/data/" pgport="5434" pgdba="feeds2" \ op start interval="0" timeout="120" \ op stop interval="0" timeout="120" \ op monitor interval="60" timeout="30" \ meta migration-threshold="3" failure-timeout="180" primitive PGSQL_NEWS ocf:heartbeat:pgsql \ params pgdata="/DB_NEWS/news/dbdata/data/" pgport="5433" pgdba="news" \ op start interval="0" timeout="120" \ op stop interval="0" timeout="120" \ op monitor interval="60" timeout="30" \ meta migration-threshold="3" failure-timeout="180" primitive STONITH-DB-001 stonith:external/ipmi \ params hostname="server-001" ipaddr="192.168.72.80" userid="user" passwd="password" interface="lan" \ op monitor interval="60s" timeout="30s" \ meta target-role="Started" primitive STONITH-DB-002 stonith:external/ipmi \ params hostname="server-002" ipaddr="192.168.72.81" userid="user" passwd="password" interface="lan" \ op monitor interval="60s" timeout="30s" \ meta target-role="Started" primitive VG_DB_NEWS ocf:heartbeat:LVM \ params volgrpname="vg_db_news" \ op monitor interval="60" timeout="60" primitive VG_DB_FEEDS ocf:heartbeat:LVM \ params volgrpname="vg_db_feeds" \ op monitor interval="60" timeout="60" primitive clvm ocf:lvm2:clvmd \ params daemon_timeout="30" \ op start interval="0" timeout="90" \ op stop interval="0" timeout="100" primitive dlm ocf:pacemaker:controld \ op monitor interval="60" timeout="60" primitive o2cb ocf:ocfs2:o2cb \ op monitor interval="60" timeout="60" group NEWS VG_DB_NEWS FS_LOGS_NEWS FS_DB_NEWS IP_NEWS_15 IP_NEWS_72 DERBYDB PGSQL_NEWS \ meta target-role="Started" group FEEDS VG_DB_FEEDS FS_LOGS_FEEDS FS_DB_FEEDS IP_FEEDS_15 IP_FEEDS_72 PGSQL_FEEDS1 PGSQL_FEEDS2 \ meta target-role="Started" group OCFS2_SHARED dlm o2cb clvm FS_DB_SHARED clone CL_MAIL_ALERT MAIL_ALERT clone CL_OCFS2_SHARED OCFS2_SHARED \ meta interleave="true" location LOC_NEWS NEWS 25: server-001 location LOC_FEEDS FEEDS 25: server-002 location LOC_STONITH-001 STONITH-DB-001 -inf: server-001 location LOC_STONITH-002 STONITH-DB-002 -inf: server-002 colocation COL_DB_SHARED_NEWS inf: NEWS CL_OCFS2_SHARED colocation COL_DB_SHARED_FEEDS inf: FEEDS CL_OCFS2_SHARED order DB_SHARE_FIRST_NEWS 0: CL_OCFS2_SHARED NEWS order DB_SHARE_FIRST_FEEDS 0: CL_OCFS2_SHARED FEEDS property $id="cib-bootstrap-options" \ dc-version="1.1.5-5bd2b9154d7d9f86d7f56fe0a74072a5a6590c60" \ cluster-infrastructure="openais" \ expected-quorum-votes="2" \ no-quorum-policy="ignore" \ start-failure-is-fatal="false" \ stonith-enabled="true" \ last-lrm-refresh="1346358565" rsc_defaults $id="rsc-options" \ resource-stickiness="100"
_______________________________________________ Pacemaker mailing list: Pacemaker@oss.clusterlabs.org http://oss.clusterlabs.org/mailman/listinfo/pacemaker Project Home: http://www.clusterlabs.org Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf Bugs: http://bugs.clusterlabs.org