Hello,

I'm trying to set up a 2-node, active-passive HA cluster for MySQL using heartbeat and Pacemaker. The operating system is Debian Linux 6.0.5 64-bit, and I am using the heartbeat packages installed via apt-get. The servers involved are the SQL nodes of a running MySQL cluster, so the only service I need HA for is the MySQL daemon (mysqld).

What I would like to do is have a single virtual IP address which clients use to query MySQL, and have the IP and mysqld fail over to the passive node in the event of a failure on the active node. I have read through a lot of the heartbeat and Pacemaker documentation, and here are the resources I have configured for the cluster:

* A custom LSB script for mysqld (compliant with Pacemaker's requirements as outlined in the documentation) * An iLO2-based STONITH device using riloe (both servers are HP Proliant DL380 G5)
* A virtual IP address for mysqld using IPaddr2

I believe I have configured everything correctly, but I'm not positive. Anyway, when I start heartbeat and pacemaker (/etc/init.d/heartbeat start), everything seems to be ok. However, the virtual IP never comes up, and the output of "crm_resource -LV" indicates that something is wrong:

root@ha1:~# crm_resource -LV
crm_resource[28988]: 2012/08/22_14:41:23 WARN: unpack_rsc_op: Processing failed op stonith_start_0 on ha1: unknown error (1)
 stonith        (stonith:external/riloe) Started
 MysqlIP        (ocf::heartbeat:IPaddr2) Stopped
 mysqld (lsb:mysqld) Started

When I attempt to stop heartbeat and Pacemaker (/etc/init.d/heartbeat stop) it says "Stopping High-Availability services:" and then hangs for about 5 minutes before finally stopping the services.

So, I'm left with a couple of questions. Is there something wrong with my configuration? Is there a reason why the HA services can't shut down in a timely manner? Is there something else I need to do to get the virtual IP working? Thanks in advance for any help!

    - Dave

P.S. My full config as reported by "cibadmin --query" is as follows (iLO2 password removed):

<cib validate-with="pacemaker-1.0" crm_feature_set="3.0.1" have-quorum="1" admin_epoch="0" epoch="26" num_updates="8" cib-last-written="Wed Aug 22 11:16:59 2012" dc-uuid="1b48f410-44d1-4e89-8b52-ff23b32db1bc">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.0.9-74392a28b7f31d7ddc86689598bd23114f58978b"/> <nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="Heartbeat"/> <nvpair id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled" value="true"/>
</cluster_property_set>
</crm_config>
<nodes>
<node id="1b48f410-44d1-4e89-8b52-ff23b32db1bc" uname="ha1" type="normal"/>
<node id="9790fe6e-67b2-4817-abf4-966b5aa6948c" uname="ha2" type="normal"/>
</nodes>
<resources>
<primitive class="stonith" id="stonith" type="external/riloe">
<instance_attributes id="stonith-instance_attributes">
<nvpair id="stonith-instance_attributes-hostlist" name="hostlist" value="ha2"/> <nvpair id="stonith-instance_attributes-ilo_hostname" name="ilo_hostname" value="10.0.1.112"/> <nvpair id="stonith-instance_attributes-ilo_user" name="ilo_user" value="Administrator"/> <nvpair id="stonith-instance_attributes-ilo_password" name="ilo_password" value="XXXXXXXX"/> <nvpair id="stonith-instance_attributes-ilo_can_reset" name="ilo_can_reset" value="1"/> <nvpair id="stonith-instance_attributes-ilo_protocol" name="ilo_protocol" value="2"/> <nvpair id="stonith-instance_attributes-ilo_powerdown_method" name="ilo_powerdown_method" value="button"/>
</instance_attributes>
</primitive>
<primitive class="ocf" id="MysqlIP" provider="heartbeat" type="IPaddr2">
<instance_attributes id="MysqlIP-instance_attributes">
<nvpair id="MysqlIP-instance_attributes-ip" name="ip" value="192.168.25.9"/>
<nvpair id="MysqlIP-instance_attributes-cidr_netmask" name="cidr_netmask" value="32"/>
</instance_attributes>
<operations>
<op id="MysqlIP-monitor-30s" interval="30s" name="monitor"/>
</operations>
</primitive>
<primitive id="mysqld" class="lsb" type="mysqld">
</primitive>
</resources>
<constraints/>
<rsc_defaults/>
<op_defaults/>
</configuration>
<status>
<node_state id="1b48f410-44d1-4e89-8b52-ff23b32db1bc" uname="ha1" ha="active" in_ccm="true" crmd="online" join="member" expected="member" crm-debug-origin="do_update_resource" shutdown="0">
<lrm id="1b48f410-44d1-4e89-8b52-ff23b32db1bc">
<lrm_resources>
<lrm_resource id="stonith" type="external/riloe" class="stonith">
<lrm_rsc_op id="stonith_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.1" transition-key="4:0:7:c09f049e-ed06-4d25-bc48-143a70b97e44" transition-magic="0:7;4:0:7:c09f049e-ed06-4d25-bc48-143a70b97e44" call-id="2" rc-code="7" op-status="0" interval="0" last-run="1345660607" last-rc-change="1345660607" exec-time="0" queue-time="0" op-digest="c9a588fa10b441aa64c0a83229e8f3e1"/> <lrm_rsc_op id="stonith_start_0" operation="start" crm-debug-origin="do_update_resource" crm_feature_set="3.0.1" transition-key="4:2:0:c09f049e-ed06-4d25-bc48-143a70b97e44" transition-magic="0:1;4:2:0:c09f049e-ed06-4d25-bc48-143a70b97e44" call-id="5" rc-code="1" op-status="0" interval="0" last-run="1345660607" last-rc-change="1345660607" exec-time="21050" queue-time="0" op-digest="c9a588fa10b441aa64c0a83229e8f3e1"/>
</lrm_resource>
<lrm_resource id="mysqld" type="mysqld" class="lsb">
<lrm_rsc_op id="mysqld_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.1" transition-key="6:0:7:c09f049e-ed06-4d25-bc48-143a70b97e44" transition-magic="0:0;6:0:7:c09f049e-ed06-4d25-bc48-143a70b97e44" call-id="4" rc-code="0" op-status="0" interval="0" last-run="1345660606" last-rc-change="1345660606" exec-time="10" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
</lrm_resource>
<lrm_resource id="MysqlIP" type="IPaddr2" class="ocf" provider="heartbeat">
<lrm_rsc_op id="MysqlIP_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.1" transition-key="5:0:7:c09f049e-ed06-4d25-bc48-143a70b97e44" transition-magic="0:7;5:0:7:c09f049e-ed06-4d25-bc48-143a70b97e44" call-id="3" rc-code="7" op-status="0" interval="0" last-run="1345660606" last-rc-change="1345660606" exec-time="20" queue-time="0" op-digest="9611b7026c2dc135fbd13d3537b42d16"/>
</lrm_resource>
</lrm_resources>
</lrm>
<transient_attributes id="1b48f410-44d1-4e89-8b52-ff23b32db1bc">
<instance_attributes id="status-1b48f410-44d1-4e89-8b52-ff23b32db1bc">
<nvpair id="status-1b48f410-44d1-4e89-8b52-ff23b32db1bc-probe_complete" name="probe_complete" value="true"/> <nvpair id="status-1b48f410-44d1-4e89-8b52-ff23b32db1bc-fail-count-stonith" name="fail-count-stonith" value="INFINITY"/> <nvpair id="status-1b48f410-44d1-4e89-8b52-ff23b32db1bc-last-failure-stonith" name="last-failure-stonith" value="1345660629"/>
</instance_attributes>
</transient_attributes>
</node_state>
</status>
</cib>

--

Dave Parker
Systems Administrator
Utica College
Integrated Information Technology Services
(315) 792-3229
Registered Linux User #408177


_______________________________________________
Pacemaker mailing list: Pacemaker@oss.clusterlabs.org
http://oss.clusterlabs.org/mailman/listinfo/pacemaker

Project Home: http://www.clusterlabs.org
Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
Bugs: http://bugs.clusterlabs.org

Reply via email to