Hi,

I've written a new RA based on what I learnt from the ping and nginx RAs
for monitoring frontend-proxy-stacks.

It is attached here for your consideration - and indeed - critique.  I
am hopeful that it makes it into the pacemaker distribution some time.

Regards,
  Nicolai
#!/bin/sh
#
#       High-Availability httpd daemon monitoring OCF agent
# 
# nginx
#
# Description:  monitors http servers (no start, no stop).
#
# Author:       Nicolai Langfeldt, Broadnet AS
#
#               Started out as nginx agent.  Heavily repurposed.
#
# Nginx RA lists these authors: 
#               Alan Robertson
#               Dejan Muhamedagic

#
# Support:      linux...@lists.linux-ha.org
#
# License:      GNU General Public License (GPL)
#
# Copyright:
#       Some parts (C) 2012 Broadnet AS
#       Some other parts (C) 2002-2010 International Business Machines
# 
#
# Patches are being accepted ;-)
#
# Requires *curl*, wget and GET are not sane/flexible enough.
#
# Usage example:
#
#  N-node proxy cluster.  Pacemaker manages production virtual IP
#  (vip).  HAproxy started by init script on all N nodes.  HAproxy is
#  used several times in the frontend stack and is needed on all nodes
#  at all times for load distribution between the proxies.
#     
#  Production VIP must never be started on a node where HAproxy is not
#  running but can run on any node where HAproxy does run.
#
#  My solution: Create this monitoring agent inspired by the ping and
#  nginx agents and use it the same way as the ping agent to controll
#  where the VIP agent can be run.

# NOTE: This agent will not start or stop the resource.  It is assumed
#  that the resource is mananged by init script and warnings about
#  failures are sent by something else (like nagios).

# 1. Configure status URL in haproxy useing a randomized URL to hide
#    the status page from random probers (I wanted the status to be
#    available over the network too).  "pwgen" is useful for generaring
#    a random url.
#
#      listen httpsservice 0.0.0.0:80
#           ...
#           stats uri /phei1SaeIevoh4eM
#
# 2. Check if working by directing a browser there
#
# 3. Configure pacemaker
# 
#      primitive vip ocf:heartbeat:IPaddr \
#         params ip="192.168.5.8"
#
#      primitive happing ocf:pacemaker:http_ping \
#         params name="happing" testurl="http://localhost/phei1SaeIevoh4eM"; \
#         op monitor interval="1s" depth="0"
#
#      clone happingall happing \
#         meta target-role="Started"
#
#      location locVip vip \
#         rule $id="locVipRule" -INF: not_defined happing
#
#    If your frontend runs for example a
#    haproxy/nginx/varnish/whatever mix: set up http pings for all of
#    the ones that _have_ to be running and combine in the location
#    rule like this:
#
#      location locVip vip \
#         rule $id="locVipRule" -INF: not_defined happing or not_defined nxping
#
# 4. Use crm_mon -A to monitor the vip and the happing token.  Document that the
#    token is supposed to be defined on all nodes during normal operation.
#
#
# OCF parameters:
#  OCF_RESKEY_testurl
#  OCF_RESKEY_bindaddr
#  OCF_RESKEY_testregex
#  OCF_RESKEY_name
#  OCF_RESKEY_timeout
#  OCF_RESKEY_dampen
#  OCF_RESKEY_multiplier
#  OCF_RESKEY_curlopts
#  OCF_RESKEY_auth
#  OCF_RESKEY_curl
#

: ${OCF_ROOT:="/usr/lib/ocf"}
: ${OCF_FUNCTIONS_DIR=$OCF_ROOT/lib/heartbeat}

# No defaults: $OCF_RESKEY_testurl

: ${OCF_RESKEY_bindaddr:=lo}
: ${OCF_RESKEY_testregex:=""}
: ${OCF_RESKEY_name:="httpping"}
: ${OCF_RESKEY_timeout:="1s"}
: ${OCF_RESKEY_dampen:="5s"}
: ${OCF_RESKEY_multiplier:="1000"}
: ${OCF_RESKEY_curlopts:=""}
: ${OCF_RESKEY_auth:=""}
: ${OCF_RESKEY_curl:="curl"}

. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
HA_VARRUNDIR=${HA_VARRUN}

# This kind of check/recalculation should be provided by ocf-shellfuncs
integer=$(echo ${OCF_RESKEY_timeout} | egrep -o '[0-9]*')
case ${OCF_RESKEY_timeout} in
    *[0-9]ms|*[0-9]msec) OCF_RESKEY_timeout=$(( $integer / 1000 ));;
    *[0-9]m|*[0-9]min)   OCF_RESKEY_timeout=$(( $integer * 60  ));;
    *[0-9]h|*[0-9]hr)    OCF_RESKEY_timeout=$(( $integer * 60 * 60 ));;
    *) OCF_RESKEY_timeout=$integer;;
esac

# Reduce timeout by 10%
NEW=$(($OCF_RESKEY_timeout * 9 / 10))

# Check the result to avoid a zero timeout (=inifinite), and see if we still 
can't
# make sure it's less than the original.
case $NEW:$OCF_RESKEY_timeout in
    0:0)                        :;;
    0:1)                        OCF_RESKEY_timeout=1;;
    0:*)                        OCF_RESKEY_timeout=$(( $OCF_RESKEY_timeout - 1 
));;
    $OCF_RESKEY_timeout:$NEW)   OCF_RESKEY_timeout=$(( $OCF_RESKEY_timeout - 1 
));;
    *)                          OCF_RESKEY_timeout=$NEW;;
esac

#######################################################################
#
#       Configuration options - usually you don't need to change these
#
#######################################################################

# default options for http clients
# NB: We _always_ test a local resource, so it should be
# safe to connect from the local interface.

CURLOPTS="-Ssk --interface ${OCF_RESKEY_bindaddr} --max-time 
${OCF_RESKEY_timeout} ${OCF_RESKEY_curlopts}"

#
#       End of Configuration options
#######################################################################

CMD=`basename $0`

#       The config-file-pathname is the pathname to the configuration
#       file for this web server.  Various appropriate defaults are
#       assumed if no config file is specified.
usage() {
  cat <<EOM
usage: $0 action

action:
        start   "start" http_ping agent(or rather, if it's running report it as 
such)

        stop    "stop" http_ping agent

        status  human readable web server status

        monitor return TRUE if the http server appears to be working.
                A testurl must be given and this URL must be configured 
                and working.

        meta-data       show meta data message

        validate-all    validate the instance parameters
EOM
  exit $1
}

#
# run the http client
#
curl_func() {
    case $OCF_RESKEY_auth in
        '')  $OCF_RESKEY_curl "$@";;
        *)   echo "-u $OCF_RESKEY_auth" |
             $OCF_RESKEY_curl -K - "$@";;
    esac
    return $?
}


silent_status() {

    case $OCF_RESKEY_testregex in
        '') HTTP_CODE=$(curl_func -o/dev/null $CURLOPTS \
                                  --write-out '%{http_code}\n' \
                                  "$OCF_RESKEY_testurl" 2>/dev/null)
            curlexit=$?
            # Check headers file since we don't have any RE.  The last header 
should
            # be a 200.  There can be redirects before that.
            case $curlexit in
                0) case $HTTP_CODE in
                     200)   return 0;;
                   esac
                   return 1;;
                *) curlexit=$OCF_ERR_GENERIC;;
            esac
            ;;

        *)  curl_func -o- $CURLOPTS "$OCF_RESKEY_testurl" |
                grep -Eiq i"$OCF_RESKEY_testregex" >/dev/null
            curlexit=$?
            ;;
    esac

    return $curlexit

}


start() {
    silent_status
    rc=$?
    case $rc in

       0) attrd_updater -U $OCF_RESKEY_multiplier -n $OCF_RESKEY_name -d 
$OCF_RESKEY_dampen
          ocf_log info "start: test worked, set token."
          # return $OCF_SUCCESS
          ;;

       *) attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen
          ocf_log info "start: test failed, deleting token."
          # return $OCF_ERR_GENERIC
          ;;

    esac                                                                        
   

    return $OCF_SUCCESS
}


stop() {
    ocf_log info "http_ping stoping"
    attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen
    return $OCF_SUCCESS
}


status() {
    silent_status
    rc=$?
    case $rc in
        0) ocf_log info "test ($OCF_RESKEY_testurl) worked";;
        *) ocf_log info "test ($OCF_RESKEY_testurl) failed"
    esac

    return $OCF_SUCCESS
}


monitor() {
  # Monitor action always succeeds.  It just adds or removes the named 
attribute.

  silent_status
  if 
    [ $? -ne 0 ]
  then
    ocf_log info "$CMD not running"
    attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen
    return $OCF_SUCCESS # $OCF_ERR_GENERIC
  fi

  attrd_updater -q -U $OCF_RESKEY_multiplier -n $OCF_RESKEY_name -d 
$OCF_RESKEY_dampen
  return $OCF_SUCCESS
}

metadata(){
        cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="http_ping">
<version>1.0</version>
<longdesc lang="en">
This is the resource _monitor_ agent for any httpd by polling a status
page.

It provides only one level of testing, get a URL and optionaly look
for a regular expression.  The HTTP GET should be on this side of
instant, the default timeout is one second.  We allow a monitoring
interval down to one second.
</longdesc>
<shortdesc lang="en">Monitors a http server</shortdesc>

<parameters>

<parameter name="testurl">
<longdesc lang="en">
URL to test.  There is no default.  You will need to configure a
status or "ping" url in your http server.
</longdesc>
<shortdesc lang="en">test url</shortdesc>
<content type="string" />
</parameter>

<parameter name="testregex">
<longdesc lang="en">
Regular expression (egrep) to match in the output of testurl.  Case
insensitive.  If no testregex is given then the HTTP status code is
used.  It must be 200 otherwise the test fails.

If you want the test to succeed as long as the server responds in any
way set testregex to ".".

</longdesc>
<shortdesc lang="en">monitor regular expression</shortdesc>
<content type="string" default=""/>
</parameter>

<parameter name="bindaddr">
<longdesc lang="en">
By default curl is run with "--interface lo".  If you can't reach the
web server from the loopback (URL containing "localhost") specify the
interface name or address to bind to with this option.  Try
'bindaddr="0.0.0.0"' if the URL is not a localhost URL.
</longdesc>
<shortdesc lang="en">network bind</shortdesc>
<content type="string" default="lo"/>
</parameter>

<parameter name="name" unique="0">
<longdesc lang="en">
The name of the attribute to set.  This is the name to be used in the
constraints.
</longdesc>
<shortdesc lang="en">Attribute name</shortdesc>
<content type="string" default="httpping"/>
</parameter>

<parameter name="multiplier" unique="0">
<longdesc lang="en">
The number by which to set if the httpd is up.
</longdesc>
<shortdesc lang="en">Value multiplier</shortdesc>
<content type="integer" default="1000"/>
</parameter>

<parameter name="timeout" unique="0">
<longdesc lang="en">
How long (in seconds) to wait before declaring a test lost
</longdesc>
<shortdesc lang="en">test timeout in seconds</shortdesc>
<content type="integer" default="1s"/>
</parameter>

<parameter name="dampen" unique="0">
<longdesc lang="en">
Amount of time to wait (dampen) before setting any new value.
</longdesc>
<shortdesc lang="en">Dampening interval</shortdesc>
<content type="integer" default="5s"/>
</parameter>

</parameters>

<actions>
<action name="start"   timeout="1s" />
<action name="stop"    timeout="1s" />
<action name="status"  timeout="1s" />
<action name="monitor" timeout="1s" depth="0" interval="1s" />
<action name="meta-data"  timeout="5" />
<action name="validate-all"  timeout="5" />
</actions>
</resource-agent>
END

   exit $OCF_SUCCESS
}

# #####################################################################

validate_all() {
  if
    [ -z $STATUSURL ]
  then
    ocf_log err "No testurl given!"
    exit $OCF_ERR_PARAM
  fi

  case $STATUSURL in
      http://*/*) ;;
      https://*/*) ;;
      *) ocf_log err "Invalid STATUSURL $STATUSURL"
         exit $OCF_ERR_ARGS ;;
  esac

  if ! $OCF_RESKEY_curl --help >/dev/null 2>/dev/null; then
      ocf_log err "curl ($OCF_RESKEY_curl) binary not found! Please verify that 
you've installed it"
      exit $OCF_ERR_INSTALLED
  fi

}

# ########################### MAIN ###########################

if [ $# -eq 1 ]; then
    COMMAND=$1
else
    usage $OCF_ERR_ARGS
fi

STATUSURL="$OCF_RESKEY_testurl"

case $COMMAND in
  meta-data)                    metadata; exit 0;;
  validate-all)                 validate_all; exit 0;;
  start|stop|status|monitor)    validate_all; eval $COMMAND; exit 0;;
  *usage|*help)                 usage $OCF_SUCCESS;; # "help" as well as 
"--help"
  *)                            usage $OCF_ERR_UNIMPLEMENTED;;
esac

ocf_log err "$0: Running off end of script?!"

exit $OCF_ERR_GENERIC
_______________________________________________
Pacemaker mailing list: Pacemaker@oss.clusterlabs.org
http://oss.clusterlabs.org/mailman/listinfo/pacemaker

Project Home: http://www.clusterlabs.org
Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
Bugs: http://bugs.clusterlabs.org

Reply via email to