Hi,
what is the most bulletproof way to set up a two-node cluster
so both run a PostgreSQL instance?
Is it better to create two pgsql resources both bound to
a certain machine or a cloned pgsql resource?
I would need it for replication, either with a full active/active
system with SkyTools' Londiste or an active passive
replication with SkyTools' WALMgr solution.
Occasionally I see "multirunning" state for my resources
no matter which way I choose. Although it's triggerable
more easily with the cloned resource.
Also, I have set up a virtual IP with IPaddr OCF RA.
Takeover takes place instantly if I put the "master" machine
into standby mode or e.g. stop PostgreSQL manually
with its SysV script as I have specified a monitor action
for it with 2 seconds interval and fencing on failure.
But I noticed that somehow IP takeover doesn't take place
if I pull the plug on the virtual ethernet card(s).
I tried it with two Fedora 6 systems inside VMWare.
I have set up pingd and my host machine as the ping node
and services stop on the machine separated from the network,
the virtual IP isn't started on the still connected machine.
Attached are my ha.cf, cib.xml and the referenced extra scripts.
Thanks in advance and best regards,
Zoltán Böszörményi
--
----------------------------------
Zoltán Böszörményi
Cybertec Schönig & Schönig GmbH
http://www.postgresql.at/
<cib generated="true" admin_epoch="0" have_quorum="true" ignore_dtd="false" num_peers="1" cib_feature_revision="1.3" epoch="27" num_updates="1872" cib-last-written="Thu Feb 7 17:09:41 2008" ccm_transition="1" dc_uuid="2923132b-6302-4dbc-824b-a526c19f0fad">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<attributes>
<nvpair id="id-stonith-enabled" name="stonith-enabled" value="False"/>
<nvpair name="last-lrm-refresh" id="cib-bootstrap-options-last-lrm-refresh" value="1202406954"/>
</attributes>
</cluster_property_set>
</crm_config>
<nodes>
<node id="2923132b-6302-4dbc-824b-a526c19f0fad" uname="ws232.ltsp" type="normal">
<instance_attributes id="nodes-2923132b-6302-4dbc-824b-a526c19f0fad">
<attributes>
<nvpair id="standby-2923132b-6302-4dbc-824b-a526c19f0fad" name="standby" value="off"/>
</attributes>
</instance_attributes>
</node>
<node id="63614ba5-2fc3-4cd6-8af3-4540458bd8ac" uname="ws231.ltsp" type="normal"/>
</nodes>
<resources>
<primitive id="virt_ip" class="ocf" type="IPaddr" provider="heartbeat" resource_stickiness="40">
<instance_attributes id="virt_ip_instance_attrs">
<attributes>
<nvpair id="f1045083-e2d1-403b-b03a-2ca7d925c562" name="ip" value="192.168.0.78"/>
<nvpair id="virt_ip_target_role" name="target_role" value="started"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" type="pgsql" provider="heartbeat" id="pgsql_master">
<instance_attributes id="pgsql_master_instance_attrs">
<attributes>
<nvpair id="8d5bd33e-60be-4c49-b05a-2949aa660ffc" name="pgdata" value="/var/lib/pgsql/data"/>
</attributes>
</instance_attributes>
<operations>
<op name="monitor" interval="2" timeout="30" disabled="false" role="Started" prereq="nothing" on_fail="fence" id="7ac5c64e-cae0-4ac0-8be2-ce7530885972" start_delay="10"/>
<op id="8ee9d149-e0a3-4ed0-9e5f-44362e7f925a" name="stop" timeout="120" disabled="false" role="Started" prereq="fencing" on_fail="block" start_delay="0"/>
</operations>
</primitive>
<primitive class="ocf" type="pgsql" provider="heartbeat" id="pgsql_slave">
<instance_attributes id="pgsql_slave_instance_attrs">
<attributes>
<nvpair id="47fd4a7f-244e-47e7-abcc-0370e64c0bae" name="pgdata" value="/var/lib/pgsql/data"/>
</attributes>
</instance_attributes>
<operations>
<op name="monitor" interval="2" timeout="30" disabled="false" role="Started" prereq="nothing" on_fail="fence" id="f4412c2a-cfd8-4d99-b397-64782ac12584" start_delay="10"/>
<op id="47025af6-f5d0-4ac1-aa78-81236cc552e4" name="stop" timeout="120" disabled="false" role="Started" prereq="fencing" on_fail="block" start_delay="0"/>
</operations>
</primitive>
<primitive class="ocf" type="LondisteTicker" provider="heartbeat" id="ticker">
<instance_attributes id="ticker_instance_attrs">
<attributes>
<nvpair name="target_role" id="ticker_target_role" value="started"/>
<nvpair id="36b33172-b917-4ed8-859a-2ec8805c906d" name="configdir" value="/etc/cluster"/>
<nvpair id="afe2b8fd-8ca2-4c0d-be2c-2c6a6adf427a" name="pidfile" value="/etc/cluster/pgqadm_somedb.pid"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" type="LondisteReplay" provider="heartbeat" id="replay">
<instance_attributes id="replay_instance_attrs">
<attributes>
<nvpair id="d60e62c6-d5cd-49f7-a051-8827dc5668be" name="configdir" value="/etc/cluster"/>
<nvpair id="24e6b600-8dcd-45ab-a268-4c6c1a189716" name="pidfile" value="/etc/cluster/test_to_subcriber.pid"/>
<nvpair id="replay_target_role" name="target_role" value="started"/>
</attributes>
</instance_attributes>
</primitive>
<primitive id="slavemigration" class="ocf" type="SlaveMigration" provider="heartbeat">
<instance_attributes id="slavemigration_instance_attrs">
<attributes>
<nvpair id="e5d68525-f5ea-44c8-8072-aadddc540154" name="masterip" value="192.168.0.231 192.168.0.78"/>
<nvpair id="ed45bafe-4f78-48de-83fa-df2cdee714cb" name="slavehostname" value="ws231"/>
<nvpair id="169a707f-bd0b-41fc-b0d5-502420607789" name="pgdata" value="/var/lib/pgsql/data"/>
<nvpair id="4626cd33-e27f-4f2b-b865-be104bf688d9" name="pghba_ok" value="/var/lib/pgsql/data/pg_hba.conf.ok"/>
<nvpair id="66164e0f-3a74-4de6-a609-797dac78a7cc" name="pghba_failed" value="/var/lib/pgsql/data/pg_hba.conf.failed"/>
<nvpair id="slavemigration_target_role" name="target_role" value="started"/>
</attributes>
</instance_attributes>
</primitive>
</resources>
<constraints>
<rsc_location id="ticker_place" rsc="ticker">
<rule id="prefered_ticker_place" score="-INFINITY">
<expression attribute="#uname" id="fe6db73f-ba41-4608-9556-76f2487d16dd" operation="eq" value="ws231.ltsp"/>
</rule>
</rsc_location>
<rsc_location id="replay_place" rsc="replay">
<rule id="prefered_replay_place" score="-INFINITY">
<expression attribute="#uname" id="9057f011-5592-4735-842c-0eb6d254b5f6" operation="eq" value="ws231.ltsp"/>
</rule>
</rsc_location>
<rsc_colocation id="coloc_slavemigr" from="slavemigration" to="virt_ip" score="INFINITY"/>
<rsc_location id="place_pgmaster" rsc="pgsql_master">
<rule id="prefered_place_pgmaster" score="-INFINITY">
<expression attribute="#uname" id="89727b1a-4d5f-4233-8111-1d56758a5369" operation="eq" value="ws231.ltsp"/>
</rule>
</rsc_location>
<rsc_location id="place_pgslave" rsc="pgsql_slave">
<rule id="prefered_place_pgslave" score="-INFINITY">
<expression attribute="#uname" id="96233464-f58c-496a-9c9c-20a606c9e910" operation="eq" value="ws232.ltsp"/>
</rule>
</rsc_location>
<rsc_location id="virt_ip_place" rsc="virt_ip">
<rule id="prefered_virt_ip_place" score="20">
<expression attribute="#uname" id="0c7f39c2-1813-49a0-bfcb-d54da4663ebc" operation="eq" value="ws232.ltsp"/>
</rule>
<rule id="virt_ip_connected" score_attribute="pingd">
<expression id="virt_ip_connected_defined" attribute="pingd" operation="defined"/>
</rule>
<rule id="virt_ip_unconnected" score="-INFINITY" boolean_op="or">
<expression id="virt_ip_unconnected_undefined" attribute="pingd" operation="not_defined"/>
<expression id="virt_ip_unconnected_zero" attribute="pingd" operation="lte" value="0"/>
</rule>
</rsc_location>
</constraints>
</configuration>
<status>
<node_state id="2923132b-6302-4dbc-824b-a526c19f0fad" uname="ws232.ltsp" crmd="online" crm-debug-origin="do_update_resource" shutdown="0" in_ccm="true" ha="active" join="member" expected="member">
<transient_attributes id="2923132b-6302-4dbc-824b-a526c19f0fad">
<instance_attributes id="status-2923132b-6302-4dbc-824b-a526c19f0fad">
<attributes>
<nvpair id="status-2923132b-6302-4dbc-824b-a526c19f0fad-pingd" name="pingd" value="100"/>
<nvpair id="status-2923132b-6302-4dbc-824b-a526c19f0fad-probe_complete" name="probe_complete" value="true"/>
</attributes>
</instance_attributes>
</transient_attributes>
<lrm id="2923132b-6302-4dbc-824b-a526c19f0fad">
<lrm_resources>
<lrm_resource id="ticker" type="LondisteTicker" class="ocf" provider="heartbeat">
<lrm_rsc_op id="ticker_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" transition_key="6:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" transition_magic="0:7;6:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" call_id="5" crm_feature_set="1.0.7" rc_code="7" op_status="0" interval="0" op_digest="862844f7ed3f36c8cf0a44851631172e"/>
<lrm_rsc_op id="ticker_start_0" operation="start" crm-debug-origin="do_update_resource" transition_key="12:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" transition_magic="0:0;12:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" call_id="10" crm_feature_set="1.0.7" rc_code="0" op_status="0" interval="0" op_digest="862844f7ed3f36c8cf0a44851631172e"/>
</lrm_resource>
<lrm_resource id="pgsql_slave" type="pgsql" class="ocf" provider="heartbeat">
<lrm_rsc_op id="pgsql_slave_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" transition_key="5:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" transition_magic="0:7;5:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" call_id="4" crm_feature_set="1.0.7" rc_code="7" op_status="0" interval="0" op_digest="e797d2da96da1607248530709e40cc9c"/>
</lrm_resource>
<lrm_resource id="pgsql_master" type="pgsql" class="ocf" provider="heartbeat">
<lrm_rsc_op id="pgsql_master_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" transition_key="4:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" transition_magic="0:7;4:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" call_id="3" crm_feature_set="1.0.7" rc_code="7" op_status="0" interval="0" op_digest="e797d2da96da1607248530709e40cc9c"/>
<lrm_rsc_op id="pgsql_master_start_0" operation="start" crm-debug-origin="do_update_resource" transition_key="10:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" transition_magic="0:0;10:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" call_id="9" crm_feature_set="1.0.7" rc_code="0" op_status="0" interval="0" op_digest="e797d2da96da1607248530709e40cc9c"/>
<lrm_rsc_op id="pgsql_master_monitor_2000" operation="monitor" crm-debug-origin="do_update_resource" transition_key="7:1:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" transition_magic="0:0;7:1:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" call_id="13" crm_feature_set="1.0.7" rc_code="0" op_status="0" interval="2000" op_digest="e797d2da96da1607248530709e40cc9c"/>
</lrm_resource>
<lrm_resource id="virt_ip" type="IPaddr" class="ocf" provider="heartbeat">
<lrm_rsc_op id="virt_ip_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" transition_key="3:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" transition_magic="0:7;3:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" call_id="2" crm_feature_set="1.0.7" rc_code="7" op_status="0" interval="0" op_digest="6e442c32355ceb766a05f9e249149250"/>
<lrm_rsc_op id="virt_ip_start_0" operation="start" crm-debug-origin="do_update_resource" transition_key="9:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" transition_magic="0:0;9:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" call_id="8" crm_feature_set="1.0.7" rc_code="0" op_status="0" interval="0" op_digest="6e442c32355ceb766a05f9e249149250"/>
</lrm_resource>
<lrm_resource id="slavemigration" type="SlaveMigration" class="ocf" provider="heartbeat">
<lrm_rsc_op id="slavemigration_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" transition_key="8:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" transition_magic="0:7;8:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" call_id="7" crm_feature_set="1.0.7" rc_code="7" op_status="0" interval="0" op_digest="567f5cb70ff89896e74189d5d767cc98"/>
<lrm_rsc_op id="slavemigration_start_0" operation="start" crm-debug-origin="do_update_resource" transition_key="14:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" transition_magic="0:0;14:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" call_id="12" crm_feature_set="1.0.7" rc_code="0" op_status="0" interval="0" op_digest="567f5cb70ff89896e74189d5d767cc98"/>
</lrm_resource>
<lrm_resource id="replay" type="LondisteReplay" class="ocf" provider="heartbeat">
<lrm_rsc_op id="replay_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" transition_key="7:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" transition_magic="0:7;7:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" call_id="6" crm_feature_set="1.0.7" rc_code="7" op_status="0" interval="0" op_digest="a9529b525221f6197cfbf2a1cb46d09f"/>
<lrm_rsc_op id="replay_start_0" operation="start" crm-debug-origin="do_update_resource" transition_key="13:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" transition_magic="0:0;13:0:405f015f-bd93-4dd7-ab1f-ee6d695d6aca" call_id="11" crm_feature_set="1.0.7" rc_code="0" op_status="0" interval="0" op_digest="a9529b525221f6197cfbf2a1cb46d09f"/>
</lrm_resource>
</lrm_resources>
</lrm>
</node_state>
</status>
</cib>
#
# There are lots of options in this file. All you have to have is a set
# of nodes listed {"node ...} one of {serial, bcast, mcast, or ucast},
# and a value for "auto_failback".
#
# ATTENTION: As the configuration file is read line by line,
# THE ORDER OF DIRECTIVE MATTERS!
#
# In particular, make sure that the udpport, serial baud rate
# etc. are set before the heartbeat media are defined!
# debug and log file directives go into effect when they
# are encountered.
#
# All will be fine if you keep them ordered as in this example.
#
#
# Note on logging:
# If any of debugfile, logfile and logfacility are defined then they
# will be used. If debugfile and/or logfile are not defined and
# logfacility is defined then the respective logging and debug
# messages will be loged to syslog. If logfacility is not defined
# then debugfile and logfile will be used to log messges. If
# logfacility is not defined and debugfile and/or logfile are not
# defined then defaults will be used for debugfile and logfile as
# required and messages will be sent there.
#
# File to write debug messages to
#debugfile /var/log/ha-debug
#
#
# File to write other messages to
#
logfile /var/log/ha-log
#
#
# Facility to use for syslog()/logger
#
#logfacility local0
#
#
# A note on specifying "how long" times below...
#
# The default time unit is seconds
# 10 means ten seconds
#
# You can also specify them in milliseconds
# 1500ms means 1.5 seconds
#
#
# keepalive: how long between heartbeats?
#
#keepalive 2
#
# deadtime: how long-to-declare-host-dead?
#
# If you set this too low you will get the problematic
# split-brain (or cluster partition) problem.
# See the FAQ for how to use warntime to tune deadtime.
#
#deadtime 30
#
# warntime: how long before issuing "late heartbeat" warning?
# See the FAQ for how to use warntime to tune deadtime.
#
#warntime 10
#
#
# Very first dead time (initdead)
#
# On some machines/OSes, etc. the network takes a while to come up
# and start working right after you've been rebooted. As a result
# we have a separate dead time for when things first come up.
# It should be at least twice the normal dead time.
#
#initdead 120
#
#
# What UDP port to use for bcast/ucast communication?
#
#udpport 694
#
# Baud rate for serial ports...
#
#baud 19200
#
# serial serialportname ...
#serial /dev/ttyS0 # Linux
#serial /dev/cuaa0 # FreeBSD
#serial /dev/cuad0 # FreeBSD 6.x
#serial /dev/cua/a # Solaris
#
#
# What interfaces to broadcast heartbeats over?
#
#bcast eth0 # Linux
#bcast eth1 eth2 # Linux
#bcast le0 # Solaris
#bcast le1 le2 # Solaris
#
# Set up a multicast heartbeat medium
# mcast [dev] [mcast group] [port] [ttl] [loop]
#
# [dev] device to send/rcv heartbeats on
# [mcast group] multicast group to join (class D multicast address
# 224.0.0.0 - 239.255.255.255)
# [port] udp port to sendto/rcvfrom (set this value to the
# same value as "udpport" above)
# [ttl] the ttl value for outbound heartbeats. this effects
# how far the multicast packet will propagate. (0-255)
# Must be greater than zero.
# [loop] toggles loopback for outbound multicast heartbeats.
# if enabled, an outbound packet will be looped back and
# received by the interface it was sent on. (0 or 1)
# Set this value to zero.
#
#
#mcast eth0 225.0.0.1 694 1 0
#
# Set up a unicast / udp heartbeat medium
# ucast [dev] [peer-ip-addr]
#
# [dev] device to send/rcv heartbeats on
# [peer-ip-addr] IP address of peer to send packets to
#
#ucast eth0 192.168.1.2
#
#
# About boolean values...
#
# Any of the following case-insensitive values will work for true:
# true, on, yes, y, 1
# Any of the following case-insensitive values will work for false:
# false, off, no, n, 0
#
#
#
# auto_failback: determines whether a resource will
# automatically fail back to its "primary" node, or remain
# on whatever node is serving it until that node fails, or
# an administrator intervenes.
#
# The possible values for auto_failback are:
# on - enable automatic failbacks
# off - disable automatic failbacks
# legacy - enable automatic failbacks in systems
# where all nodes do not yet support
# the auto_failback option.
#
# auto_failback "on" and "off" are backwards compatible with the old
# "nice_failback on" setting.
#
# See the FAQ for information on how to convert
# from "legacy" to "on" without a flash cut.
# (i.e., using a "rolling upgrade" process)
#
# The default value for auto_failback is "legacy", which
# will issue a warning at startup. So, make sure you put
# an auto_failback directive in your ha.cf file.
# (note: auto_failback can be any boolean or "legacy")
#
#auto_failback on
#
#
# Basic STONITH support
# Using this directive assumes that there is one stonith
# device in the cluster. Parameters to this device are
# read from a configuration file. The format of this line is:
#
# stonith <stonith_type> <configfile>
#
# NOTE: it is up to you to maintain this file on each node in the
# cluster!
#
#stonith baytech /etc/ha.d/conf/stonith.baytech
#
# STONITH support
# You can configure multiple stonith devices using this directive.
# The format of the line is:
# stonith_host <hostfrom> <stonith_type> <params...>
# <hostfrom> is the machine the stonith device is attached
# to or * to mean it is accessible from any host.
# <stonith_type> is the type of stonith device (a list of
# supported drives is in /usr/lib/stonith.)
# <params...> are driver specific parameters. To see the
# format for a particular device, run:
# stonith -l -t <stonith_type>
#
#
# Note that if you put your stonith device access information in
# here, and you make this file publically readable, you're asking
# for a denial of service attack ;-)
#
# To get a list of supported stonith devices, run
# stonith -L
# For detailed information on which stonith devices are supported
# and their detailed configuration options, run this command:
# stonith -h
#
#stonith_host * baytech 10.0.0.3 mylogin mysecretpassword
#stonith_host ken3 rps10 /dev/ttyS1 kathy 0
#stonith_host kathy rps10 /dev/ttyS1 ken3 0
#
# Watchdog is the watchdog timer. If our own heart doesn't beat for
# a minute, then our machine will reboot.
# NOTE: If you are using the software watchdog, you very likely
# wish to load the module with the parameter "nowayout=0" or
# compile it without CONFIG_WATCHDOG_NOWAYOUT set. Otherwise even
# an orderly shutdown of heartbeat will trigger a reboot, which is
# very likely NOT what you want.
#
#watchdog /dev/watchdog
#
# Tell what machines are in the cluster
# node nodename ... -- must match uname -n
#node ken3
#node kathy
#
# Less common options...
#
# Treats 10.10.10.254 as a psuedo-cluster-member
# Used together with ipfail below...
# note: don't use a cluster node as ping node
#
#ping 10.10.10.254
#
# Treats 10.10.10.254 and 10.10.10.253 as a psuedo-cluster-member
# called group1. If either 10.10.10.254 or 10.10.10.253 are up
# then group1 is up
# Used together with ipfail below...
#
#ping_group group1 10.10.10.254 10.10.10.253
#
# HBA ping derective for Fiber Channel
# Treats fc-card-name as psudo-cluster-member
# used with ipfail below ...
#
# You can obtain HBAAPI from http://hbaapi.sourceforge.net. You need
# to get the library specific to your HBA directly from the vender
# To install HBAAPI stuff, all You need to do is to compile the common
# part you obtained from the sourceforge. This will produce libHBAAPI.so
# which you need to copy to /usr/lib. You need also copy hbaapi.h to
# /usr/include.
#
# The fc-card-name is the name obtained from the hbaapitest program
# that is part of the hbaapi package. Running hbaapitest will produce
# a verbose output. One of the first line is similar to:
# Apapter number 0 is named: qlogic-qla2200-0
# Here fc-card-name is qlogic-qla2200-0.
#
#hbaping fc-card-name
#
#
# Processes started and stopped with heartbeat. Restarted unless
# they exit with rc=100
#
#respawn userid /path/name/to/run
#respawn hacluster /usr/lib/heartbeat/ipfail
#
# Access control for client api
# default is no access
#
#apiauth client-name gid=gidlist uid=uidlist
#apiauth ipfail gid=haclient uid=hacluster
###########################
#
# Unusual options.
#
###########################
#
# hopfudge maximum hop count minus number of nodes in config
#hopfudge 1
#
# deadping - dead time for ping nodes
#deadping 30
#
# hbgenmethod - Heartbeat generation number creation method
# Normally these are stored on disk and incremented as needed.
#hbgenmethod time
#
# realtime - enable/disable realtime execution (high priority, etc.)
# defaults to on
#realtime off
#
# debug - set debug level
# defaults to zero
debug 1
#
# API Authentication - replaces the fifo-permissions-based system of the
past
#
#
# You can put a uid list and/or a gid list.
# If you put both, then a process is authorized if it qualifies under
either
# the uid list, or under the gid list.
#
# The groupname "default" has special meaning. If it is specified, then
# this will be used for authorizing groupless clients, and any client
groups
# not otherwise specified.
#
# There is a subtle exception to this. "default" will never be used in
the
# following cases (actual default auth directives noted in brackets)
# ipfail (uid=HA_CCMUSER)
# ccm (uid=HA_CCMUSER)
# ping (gid=HA_APIGROUP)
# cl_status (gid=HA_APIGROUP)
#
# This is done to avoid creating a gaping security hole and matches the
most
# likely desired configuration.
#
#apiauth ipfail uid=hacluster
#apiauth ccm uid=hacluster
#apiauth cms uid=hacluster
#apiauth ping gid=haclient uid=alanr,root
#apiauth default gid=haclient
# message format in the wire, it can be classic or netstring,
# default: classic
#msgfmt classic/netstring
# Do we use logging daemon?
# If logging daemon is used, logfile/debugfile/logfacility in this file
# are not meaningful any longer. You should check the config file for
logging
# daemon (the default is /etc/logd.cf)
# more infomartion can be fould in
http://www.linux-ha.org/ha_2ecf_2fUseLogdDirective
# Setting use_logd to "yes" is recommended
#
# use_logd yes/no
#
# the interval we reconnect to logging daemon if the previous connection
failed
# default: 60 seconds
#conn_logd_time 60
#
#
# Configure compression module
# It could be zlib or bz2, depending on whether u have the corresponding
# library in the system.
#compression bz2
#
# Confiugre compression threshold
# This value determines the threshold to compress a message,
# e.g. if the threshold is 1, then any message with size greater than 1 KB
# will be compressed, the default is 2 (KB)
#compression_threshold 2
node ws232.ltsp ws231.ltsp
#bcast bond0
#ucast bond0 157.177.2.31
crm on
ping 192.168.0.1
#ping 157.177.6.210
respawn root /usr/lib64/heartbeat/pingd -m 100 -d 5s
#mcast bond0 224.0.0.1 694 1 0
bcast eth0
bcast eth1
#!/bin/sh
#
#
# OCF RA for monitoring Londiste Replay process
#
# based on:
#
# Dummy OCF RA. Does nothing but wait a few seconds, can be
# configured to fail occassionally.
#
# Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Brée
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
if [ -f ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs ]
then
. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
else
if [ -f /usr/lib/heartbeat/ocf-shellfuncs ]
then
. /usr/lib/heartbeat/ocf-shellfuncs
else
if [ -f /usr/lib64/heartbeat/ocf-shellfuncs ]
then
. /usr/lib64/heartbeat/ocf-shellfuncs
else
exit $OCF_ERR_CONFIGURED
fi
fi
fi
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="LondisteReplay" version="1.0">
<version>1.0</version>
<longdesc lang="en">
This is a LondisteReplay Resource Agent. It starts/stops/monitors
londiste.py's replay process.
</longdesc>
<shortdesc lang="en">LondisteReplay resource agent</shortdesc>
<parameters>
<parameter name="configdir" unique="0">
<longdesc lang="en">
This where londiste.ini is.
</longdesc>
<shortdesc lang="en">Configuration directory</shortdesc>
<content type="string" default="/etc/cluster" />
</parameter>
<parameter name="pidfile" unique="0">
<longdesc lang="en">
This the pidfile PGQADM uses to indicate its started state.
</longdesc>
<shortdesc lang="en">Pidfile</shortdesc>
<content type="string" default="/etc/cluster/londiste.pid" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="90" />
<action name="stop" timeout="100" />
<action name="monitor" timeout="20" interval="10" depth="0"
start-delay="0" />
<action name="meta-data" timeout="5" />
<action name="verify-all" timeout="30" />
</actions>
</resource-agent>
END
}
#######################################################################
# don't exit on TERM, to test that lrmd makes sure that we do exit
trap sigterm_handler TERM
sigterm_handler() {
ocf_log info "They use TERM to bring us down. No such luck."
return
}
dummy_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
dummy_monitor() {
if [ -f $PIDFILE ]; then
PID="$((`cat $PIDFILE`))"
fi
if [ -z $PID ]; then
return $OCF_NOT_RUNNING
fi
PGQPROCFILE="/proc/$PID/cmdline"
if [ ! -f $PGQPROCFILE ]; then
return $OCF_ERR_GENERIC
fi
PGQADM=$((`grep -ia londiste $PGQPROCFILE 2>/dev/null | wc -l`))
if [ "x$PGQADM" = "x0" ]; then
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
dummy_start() {
dummy_monitor
MONRET=$?
if [ $MONRET = $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
if [ $MONRET = $OCF_NOT_RUNNING ]; then
londiste.py $CONFIGDIR/londiste.ini replay &
return $OCF_SUCCESS
fi
return $OCF_ERR_GENERIC
}
dummy_stop() {
dummy_monitor
if [ $? = $OCF_SUCCESS ]; then
kill -TERM $PID
fi
USLEEP="`which usleep`"
while [ -f $PIDFILE ]; do
if [ -x $USLEEP ]; then
$USLEEP 20
continue
fi
sleep 1
done
return $OCF_SUCCESS
}
dummy_validate() {
exit $OC_ERR_UNIMPLEMENTED
}
CONFIGDIR=${OCF_RESKEY_configdir:-/etc/cluster}
PIDFILE=${OCF_RESKEY_pidfile:-/etc/cluster/pgqadm.pid}
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS
;;
start) dummy_start
;;
stop) dummy_stop
;;
monitor) dummy_monitor
;;
validate-all) dummy_validate;;
usage|help) dummy_usage
exit $OCF_SUCCESS
;;
*) dummy_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
#!/bin/sh
#
#
# OCF RA for monitoring Londiste Ticker process
#
# based on:
#
# Dummy OCF RA. Does nothing but wait a few seconds, can be
# configured to fail occassionally.
#
# Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Brée
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
if [ -f ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs ]
then
. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
else
if [ -f /usr/lib/heartbeat/ocf-shellfuncs ]
then
. /usr/lib/heartbeat/ocf-shellfuncs
else
if [ -f /usr/lib64/heartbeat/ocf-shellfuncs ]
then
. /usr/lib64/heartbeat/ocf-shellfuncs
else
exit $OCF_ERR_CONFIGURED
fi
fi
fi
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="LondisteTicker" version="0.9">
<version>1.0</version>
<longdesc lang="en">
This is a LondisteTicker Resource Agent. It starts/stops/monitors
pgqadm.py's ticker process.
</longdesc>
<shortdesc lang="en">LondisteTicker resource agent</shortdesc>
<parameters>
<parameter name="configdir" unique="0">
<longdesc lang="en">
This where pgqadm.ini is.
</longdesc>
<shortdesc lang="en">Configuration directory</shortdesc>
<content type="string" default="/etc/cluster" />
</parameter>
<parameter name="pidfile" unique="0">
<longdesc lang="en">
This the pidfile PGQADM uses to indicate its started state.
</longdesc>
<shortdesc lang="en">Pidfile</shortdesc>
<content type="string" default="/etc/cluster/pgqadm.pid" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="90" />
<action name="stop" timeout="100" />
<action name="monitor" timeout="20" interval="10" depth="0"
start-delay="5" />
<action name="meta-data" timeout="5" />
<action name="verify-all" timeout="30" />
</actions>
</resource-agent>
END
}
#######################################################################
# don't exit on TERM, to test that lrmd makes sure that we do exit
trap sigterm_handler TERM
sigterm_handler() {
ocf_log info "They use TERM to bring us down. No such luck."
return
}
dummy_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
dummy_validate() {
exit $OC_ERR_UNIMPLEMENTED
}
dummy_monitor() {
if [ -f $PIDFILE ]; then
PID="$((`cat $PIDFILE`))"
fi
if [ -z $PID ]; then
return $OCF_NOT_RUNNING
fi
PGQPROCFILE="/proc/$PID/cmdline"
if [ ! -f $PGQPROCFILE ]; then
return $OCF_ERR_GENERIC
fi
PGQADM=$((`grep -ia pgqadm $PGQPROCFILE 2>/dev/null | wc -l`))
if [ "x$PGQADM" = "x0" ]; then
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
dummy_start() {
dummy_monitor
MONRET=$?
if [ $MONRET = $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
if [ $MONRET = $OCF_NOT_RUNNING ]; then
pgqadm.py $CONFIGDIR/pgqadm.ini ticker &
return $OCF_SUCCESS
fi
return $OCF_ERR_GENERIC
}
dummy_stop() {
dummy_monitor
if [ $? = $OCF_SUCCESS ]; then
kill -TERM $PID
fi
USLEEP="`which usleep`"
SLEEP="`which sleep`"
while [ -f $PIDFILE ]; do
if [ -x $USLEEP ]; then
$USLEEP 20
continue
fi
sleep 1
done
return $OCF_SUCCESS
}
CONFIGDIR=${OCF_RESKEY_configdir:-/etc/cluster}
PIDFILE=${OCF_RESKEY_pidfile:-/etc/cluster/pgqadm.pid}
case $__OCF_ACTION in
meta-data) meta_data
;;
start) dummy_start
;;
stop) dummy_stop
;;
monitor) dummy_monitor
;;
validate-all) dummy_validate;;
usage|help) dummy_usage
exit $OCF_SUCCESS
;;
*) dummy_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
#!/bin/sh
#
#
# SlaveMigration OCF RA. Sets up the slave PostgreSQL pg_hba.conf
# when migration to/from lardb04
#
# based on
#
# Dummy OCF RA. Does nothing but wait a few seconds, can be
# configured to fail occassionally.
#
# Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Brée
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
if [ -f ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs ]
then
. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
else
if [ -f /usr/lib/heartbeat/ocf-shellfuncs ]
then
. /usr/lib/heartbeat/ocf-shellfuncs
else
if [ -f /usr/lib64/heartbeat/ocf-shellfuncs ]
then
. /usr/lib64/heartbeat/ocf-shellfuncs
else
exit $OCF_ERR_CONFIGURED
fi
fi
fi
#. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
#. /usr/lib64/heartbeat/ocf-shellfuncs
#. /usr/share/ocf/resource.d/heartbeat/.ocf-shellfuncs
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="SlaveMigration" version="0.9">
<version>1.0</version>
<longdesc lang="en">
This is the SlaveMigration Resource Agent. It sets up the slave PostgreSQL
authentication so pgqadm/londiste cannot incidentally start replicating from
the master PostgreSQL server.
</longdesc>
<shortdesc lang="en">SlaveMigration resource agent</shortdesc>
<parameters>
<parameter name="masterip" unique="0" required="1">
<longdesc lang="en">
This is the space-separated list of IPs the master server lives at.
</longdesc>
<shortdesc lang="en">IP addresses of the master </shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="masterhostname" unique="0" required="0">
<longdesc lang="en">
This is the short form of the hostname of the master server
</longdesc>
<shortdesc lang="en">Master short hostname</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="slavehostname" unique="0" required="1">
<longdesc lang="en">
This is the short form of the hostname of the slave server
</longdesc>
<shortdesc lang="en">Slave short hostname</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="psql" unique="0" required="0">
<longdesc lang="en">
Path to psql command.
</longdesc>
<shortdesc lang="en">psql</shortdesc>
<content type="string" default="/usr/bin/psql" />
</parameter>
<parameter name="pgport" unique="0">
<longdesc lang="en">
This is post PostgreSQL listens on.
</longdesc>
<shortdesc lang="en">PostgreSQL service port</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="pgdata" unique="0" required="1">
<longdesc lang="en">
Path to PostgreSQL data directory.
</longdesc>
<shortdesc lang="en">pgdata</shortdesc>
<content type="string" default="/var/lib/pgsql/data" />
</parameter>
<parameter name="pghba_ok" unique="0" required="1">
<longdesc lang="en">
Path to normal pg_hba.conf
</longdesc>
<shortdesc lang="en">pghba_ok</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="pghba_failed" unique="0" required="1">
<longdesc lang="en">
Path to pg_hba.conf for fenced case to disallow connection of the master server.
</longdesc>
<shortdesc lang="en">pghba_failed</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="90" />
<action name="stop" timeout="100" />
<action name="monitor" timeout="20" interval="10" depth="0"
start-delay="1" />
<action name="reload" timeout="90" />
<action name="migrate_to" timeout="100" />
<action name="migrate_from" timeout="90" />
<action name="meta-data" timeout="5" />
<action name="verify-all" timeout="30" />
</actions>
</resource-agent>
END
}
#######################################################################
# don't exit on TERM, to test that lrmd makes sure that we do exit
trap sigterm_handler TERM
sigterm_handler() {
ocf_log info "They use TERM to bring us down. No such luck."
return
}
dummy_usage() {
cat <<END
usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
dummy_validate() {
return $OC_ERR_UNIMPLEMENTED
}
dummy_monitor() {
if [ -f /var/lock/subsys/SlaveMigration ]
then
return $OCF_SUCCESS
fi
return $OCF_NOT_RUNNING
}
slave_start() {
dummy_monitor
if [ $? = $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
touch /var/lock/subsys/SlaveMigration
check_for_slavehostname
check_for_masterip
if [ "`hostname -s`" = "$OCF_RESKEY_slavehostname" ]
then
check_for_pg_hbas
ln -sf "$OCF_RESKEY_pghba_failed"
"${OCF_RESKEY_pgdata}/pg_hba.conf"
reload_pg_conf
slave_kill_pg_from_master
fi
return $OCF_SUCCESS
}
slave_stop() {
dummy_monitor
if [ $? = $OCF_NOT_RUNNING ]; then
return $OCF_SUCCESS
fi
check_for_slavehostname
if [ "`hostname -s`" = "$OCF_RESKEY_slavehostname" ]
then
check_for_pg_hbas
ln -sf "$OCF_RESKEY_pghba_ok" "${OCF_RESKEY_pgdata}/pg_hba.conf"
reload_pg_conf
fi
rm -f /var/lock/subsys/SlaveMigration
return $OCF_SUCCESS
}
slave_kill_pg_from_master() {
PGPARAM="-A -t -U postgres -h localhost"
if [ "$OCF_RESKEY_pgport" != "" ]
then
PGPARAM="$PGPARAM -p $OCF_RESKEY_pgport"
fi
for ip in $OCF_RESKEY_masterip ; do
# echo -- $PGPARAM -c "\"select procpid from pg_stat_activity
where client_addr='"${ip}"'\""
$OCF_RESKEY_psql $PGPARAM -c "select procpid from
pg_stat_activity where client_addr='"${ip}"'" | \
while read pid ; do
kill -TERM $pid
done
done
}
reload_pg_conf() {
PGPARAM="-U postgres -h localhost"
if [ "$OCF_RESKEY_pgport" != "" ]
then
PGPARAM="$PGPARAM -p $OCF_RESKEY_pgport"
fi
$OCF_RESKEY_psql $PGPARAM -c "select pg_reload_conf()" 1>/dev/null
2>/dev/null
}
check_for_slavehostname() {
if [ "$OCF_RESKEY_slavehostname" = "" ]
then
ocf_log debug "${OCF_RESOURCE_INSTANCE} No slave hostname given"
exit $OCF_ERR_GENERIC
fi
}
check_for_masterhostname() {
if [ "$OCF_RESKEY_masterhostname" = "" ]
then
ocf_log debug "${OCF_RESOURCE_INSTANCE} No master hostname
given"
exit $OCF_ERR_GENERIC
fi
}
check_for_masterip() {
if [ "$OCF_RESKEY_masterip" = "" ]
then
ocf_log debug "${OCF_RESOURCE_INSTANCE} No master IP given"
exit $OCF_ERR_GENERIC
fi
}
check_for_pg_hbas() {
if [ "$OCF_RESKEY_pghba_ok" = "" ]
then
echo OCF_RESKEY_pghba_ok
ocf_log debug "${OCF_RESOURCE_INSTANCE} No normal pg_hba.conf
given"
exit $OCF_ERR_GENERIC
fi
if [ ! -f "$OCF_RESKEY_pghba_ok" ]
then
echo -- -f OCF_RESKEY_pghba_ok
ocf_log debug "${OCF_RESOURCE_INSTANCE} pg_hba.conf file for
normal operation not exists"
exit $OCF_ERR_GENERIC
fi
if [ "$OCF_RESKEY_pghba_failed" = "" ]
then
echo OCF_RESKEY_pghba_failed
ocf_log debug "${OCF_RESOURCE_INSTANCE} No failed pg_hba.conf
given"
exit $OCF_ERR_GENERIC
fi
if [ ! -f "$OCF_RESKEY_pghba_failed" ]
then
echo -- -f OCF_RESKEY_pghba_failed
ocf_log debug "${OCF_RESOURCE_INSTANCE} pg_hba.conf file for
fenced operation not exists"
exit $OCF_ERR_GENERIC
fi
if [ ! -d "$OCF_RESKEY_pgdata" ]
then
echo -- -d OCF_RESKEY_pgdata
ocf_log debug "${OCF_RESOURCE_INSTANCE} PGDATA directory not
exists"
exit $OCF_ERR_GENERIC
fi
}
: ${OCF_RESKEY_psql=/usr/bin/psql}
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS
;;
start) slave_start
;;
stop) slave_stop
;;
monitor) dummy_monitor
;;
migrate_to) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to
${OCF_RESKEY_CRM_meta_migrate_to}."
slave_stop
;;
migrate_from) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to
${OCF_RESKEY_CRM_meta_migrated_from}."
slave_start
;;
reload) ocf_log err "Reloading..."
dummy_start
;;
validate-all) dummy_validate;;
usage|help) dummy_usage
exit $OCF_SUCCESS
;;
*) dummy_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems