# Hey Emacs, this is a -*- shell-script -*- !!!

# utility functions for ctdb event scripts

PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH

[ -z "$CTDB_VARDIR" ] && {
    export CTDB_VARDIR="/var/ctdb"
}
[ -z "$CTDB_ETCDIR" ] && {
    export CTDB_ETCDIR="/etc"
}

#######################################
# pull in a system config file, if any
_loadconfig() {

    if [ -z "$1" ] ; then
	foo="${service_config:-${service_name}}"
	if [ -n "$foo" ] ; then
	    loadconfig "$foo"
	fi
    elif [ "$1" != "ctdb" ] ; then
	loadconfig "ctdb"
    fi

    if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
	. $CTDB_ETCDIR/sysconfig/$1
    elif [ -f $CTDB_ETCDIR/default/$1 ]; then
	. $CTDB_ETCDIR/default/$1
    elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
	. $CTDB_BASE/sysconfig/$1
    fi
}

loadconfig () {
    _loadconfig "$@"
}

##############################################################
# make sure CTDB_CURRENT_DEBUGLEVEL is set to the desired debug level
# (integer)
#
# If it is already set then do nothing, since it might have been set
# via a file in rc.local.d/.  If it is not set then set it by sourcing
# /var/ctdb/eventscript_debuglevel. If this file does not exist then
# create it using output from "ctdb getdebug".  If the option 1st arg
# is "create" then don't source an existing file but create a new one
# instead - this is useful for creating the file just once in each
# event run in 00.ctdb.  If there's a problem getting the debug level
# from ctdb then it is silently set to 0 - no use spamming logs if our
# debug code is broken...
ctdb_set_current_debuglevel ()
{
    [ -z "$CTDB_CURRENT_DEBUGLEVEL" ] || return 0

    _f="$CTDB_VARDIR/eventscript_debuglevel"

    if [ "$1" = "create" -o ! -r "$_f" ] ; then
	_t=$(ctdb getdebug -Y 2>/dev/null)
	# get last field of output
	_t="${_t%:}"
	_t="${_t##*:}"
	# Defaults to 0
	echo "export CTDB_CURRENT_DEBUGLEVEL=\"${_t:-0}\"" >"$_f"
    fi

    . "$_f"
}

debug ()
{
    if [ $CTDB_CURRENT_DEBUGLEVEL -ge 4 ] ; then
	# If there are arguments then echo them.  Otherwise expect to
	# use stdin, which allows us to pass lots of debug using a
	# here document.
	if [ -n "$1" ] ; then
	    echo "DEBUG: $*"
	elif ! tty -s ; then
	    sed -e 's@^@DEBUG: @'
	fi
    fi
}

##############################################################
# check number of args for different events
ctdb_check_args ()
{
    case "$1" in
	takeip|releaseip)
	    if [ $# != 4 ]; then
		echo "ERROR: must supply interface, IP and maskbits"
		exit 1
	    fi
	    ;;
	updateip)
	    if [ $# != 5 ]; then
		echo "ERROR: must supply old interface, new interface, IP and maskbits"
		exit 1
	    fi
	    ;;
    esac
}

##############################################################
# determine on what type of system (init style) we are running
detect_init_style() {
    # only do detection if not already set:
    test "x$CTDB_INIT_STYLE" != "x" && return

    if [ -x /sbin/startproc ]; then
        CTDB_INIT_STYLE="suse"
    elif [ -x /sbin/start-stop-daemon ]; then
        CTDB_INIT_STYLE="debian"
    else
        CTDB_INIT_STYLE="redhat"
    fi
}

######################################################
# simulate /sbin/service on platforms that don't have it
# _service() makes it easier to hook the service() function for
# testing.
_service ()
{
  _service_name="$1"
  _op="$2"

  # do nothing, when no service was specified
  [ -z "$_service_name" ] && return

  if [ -x /sbin/service ]; then
      $_nice /sbin/service "$_service_name" "$_op"
  elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
      $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
  elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
      $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
  fi
}

service()
{
    _nice=""
    _service "$@"
}

######################################################
# simulate /sbin/service (niced) on platforms that don't have it
nice_service()
{
    _nice="nice"
    _service "$@"
}

######################################################
# wrapper around /proc/ settings to allow them to be hooked
# for testing
# 1st arg is relative path under /proc/, 2nd arg is value to set
set_proc ()
{
    echo "$2" >"/proc/$1"
}

######################################################
# wrapper around getting file contents from /proc/ to allow
# this to be hooked for testing
# 1st arg is relative path under /proc/
get_proc ()
{
    cat "/proc/$1"
}

######################################################
# Check that an RPC service is healthy -
# this includes allowing a certain number of failures
# before marking the NFS service unhealthy.
#
# usage: nfs_check_rpc_service SERVICE_NAME [ triple ...]
#
# each triple is a set of 3 arguments: an operator, a 
# fail count limit and an action string.
#
# For example:
#
# 	nfs_check_rpc_service "lockd" \
#	    -ge 15 "verbose restart unhealthy" \
#	    -eq 10 "restart:bs"
#
# says that if lockd is down for 15 iterations then do
# a verbose restart of lockd and mark the node unhealthy.
# Before this, after 10 iterations of failure, the
# service is restarted silently in the background.
# Order is important: the number of failures need to be
# specified in reverse order because processing stops
# after the first condition that is true.
######################################################
nfs_check_rpc_service ()
{
    _prog_name="$1" ; shift

    _version=1
    _rpc_prog="$_prog_name"
    _restart=""
    _opts=""
    case "$_prog_name" in
	knfsd)
	    _rpc_prog=nfs
	    _version=3
	    _restart="echo 'Trying to restart NFS service'"
	    _restart="${_restart}; startstop_nfs restart"
	    ;;
	mountd)
	    _opts="${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
	    ;;
	rquotad)
	    _opts="${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
	    ;;
	lockd)
	    _rpc_prog=nlockmgr
	    _version=4
	    _restart="echo 'Trying to restart lock manager service'"
	    _restart="${_restart}; startstop_nfslock restart"
	    ;;
	statd)
	    _rpc_prog=status
	    _opts="${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
	    _opts="${_opts}${STATD_PORT:+ -p }${STATD_PORT}"
	    _opts="${_opts}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
	    ;;
	*)
	    echo "Internal error: unknown RPC program \"$_prog_name\"."
	    exit 1
    esac

    _service_name="nfs_${_prog_name}"

    if ctdb_check_rpc "$_rpc_prog" $_version >/dev/null ; then
	ctdb_counter_init "$_service_name"
	return 0
    fi

    ctdb_counter_incr "$_service_name"

    while [ -n "$3" ] ; do
	ctdb_check_counter "quiet" "$1" "$2" "$_service_name" || {
	    for _action in $3 ; do
		case "$_action" in
		    verbose)
			echo "$ctdb_check_rpc_out"
			;;
		    restart|restart:*)
			# No explicit command specified, construct rpc command.
			if [ -z "$_restart" ] ; then
			    _p="rpc.${_prog_name}"
			    _restart="echo 'Trying to restart $_prog_name [${_p}${_opts}]'"
			    _restart="${_restart}; killall -q -9 $_p"
			    _restart="${_restart}; $_p $_opts"
			fi

			# Process restart flags...
			_flags="${_action#restart:}"
			# There may not have been a colon...
			[ "$_flags" != "$_action" ] || _flags=""
			# q=quiet - everything to /dev/null
			if [ "${_flags#*q}" != "$_flags" ] ; then
			    _restart="{ ${_restart} ; } >/dev/null 2>&1"
			fi
			# s=stealthy - last command to /dev/null
			if [ "${_flags#*s}" != "$_flags" ] ; then
			    _restart="${_restart} >/dev/null 2>&1"
			fi
			# b=background - the whole thing, easy and reliable
			if [ "${_flags#*b}" != "$_flags" ] ; then
			    _restart="{ ${_restart} ; } &"
			fi

			# Do it!
			eval "${_restart}"
			;;
		    unhealthy)
			exit 1
			;;
		    *)
			echo "Internal error: unknown action \"$_action\"."
			exit 1
		esac
	    done

	    # Only process the first action group.
	    break
	}
	shift 3
    done
}

######################################################
# check that a rpc server is registered with portmap
# and responding to requests
# usage: ctdb_check_rpc SERVICE_NAME VERSION
######################################################
ctdb_check_rpc ()
{
    progname="$1"
    version="$2"

    if ! ctdb_check_rpc_out=$(rpcinfo -u localhost $progname $version 2>&1) ; then
	ctdb_check_rpc_out="ERROR: $progname failed RPC check:
$ctdb_check_rpc_out"
	echo "$ctdb_check_rpc_out"
	return 1
    fi
}

######################################################
# check a set of directories is available
# return 1 on a missing directory
# usage: ctdb_check_directories_probe SERVICE_NAME <directories...>
######################################################
ctdb_check_directories_probe() {
    while IFS="" read d ; do
	case "$d" in
	    *%*)
		continue
		;;
	    *)
		[ -d "${d}/." ] || return 1
	esac
    done
}

######################################################
# check a set of directories is available
# usage: ctdb_check_directories SERVICE_NAME <directories...>
######################################################
ctdb_check_directories() {
    n="${1:-${service_name}}"
    ctdb_check_directories_probe || {
	echo "ERROR: $n directory \"$d\" not available"
	exit 1
    }
}

######################################################
# check a set of tcp ports
# usage: ctdb_check_tcp_ports <ports...>
######################################################

# This flag file is created when a service is initially started.  It
# is deleted the first time TCP port checks for that service succeed.
# Until then ctdb_check_tcp_ports() prints a more subtle "error"
# message if a port check fails.
_ctdb_check_tcp_common ()
{
    _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started"
}

ctdb_check_tcp_init ()
{
    _ctdb_check_tcp_common
    mkdir -p "${_ctdb_service_started_file%/*}" # dirname
    touch "$_ctdb_service_started_file"
}

ctdb_check_tcp_ports()
{
    if [ -z "$1" ] ; then
	echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
	exit 1
    fi

    # Set default value for CTDB_TCP_PORT_CHECKS if unset.
    # If any of these defaults are unsupported then this variable can
    # be overridden in /etc/sysconfig/ctdb or via a file in
    # /etc/ctdb/rc.local.d/.
    : ${CTDB_TCP_PORT_CHECKERS:=ctdb nmap netstat}

    for _c in $CTDB_TCP_PORT_CHECKERS ; do
	ctdb_check_tcp_ports_$_c "$@"
	case "$?" in
	    0)
		_ctdb_check_tcp_common
		rm -f "$_ctdb_service_started_file"
		return 0
		;;
	    1)
		_ctdb_check_tcp_common
		if [ ! -f "$_ctdb_service_started_file" ] ; then
		    echo "ERROR: $service_name tcp port $_p is not responding"
		    debug <<EOF
$ctdb_check_tcp_ports_debug
EOF
		else
		    echo "INFO: $service_name tcp port $_p is not responding"
		fi

		return 1
		;;
	    127)
		debug <<EOF
ctdb_check_ports - checker $_c not implemented
output from checker was:
$ctdb_check_tcp_ports_debug
EOF
		;;
	    *)
		
	esac
    done

    echo "INTERNAL ERROR: ctdb_check_ports - no working checkers in CTDB_TCP_PORT_CHECKERS=\"$CTDB_TCP_PORT_CHECKERS\""

    return 127
}

ctdb_check_tcp_ports_netstat ()
{
    _cmd='netstat -l -t -n'
    _ns=$($_cmd 2>&1)
    if [ $? -eq 127 ] ; then
	# netstat probably not installed - unlikely?
	ctdb_check_tcp_ports_debug="$_ns"
	return 127
    fi

    for _p ; do  # process each function argument (port)
	for _a in '0\.0\.0\.0' '::' ; do
	    _pat="[[:space:]]${_a}:${_p}[[:space:]]+[^[:space:]]+[[:space:]]+LISTEN"
	    if echo "$_ns" | grep -E -q "$_pat" ; then
		# We matched the port, so process next port
		continue 2
	    fi
	done

	# We didn't match the port, so flag an error.
	ctdb_check_tcp_ports_debug="$_cmd shows this output:
$_ns"
	return 1
    done

    return 0
}

ctdb_check_tcp_ports_nmap ()
{
    # nmap wants a comma-separated list of ports
    _ports=""
    for _p ; do
	_ports="${_ports}${_ports:+,}${_p}"
    done

    _cmd="nmap -n -oG - -PS 127.0.0.1 -p $_ports"

    _nmap_out=$($_cmd 2>&1)
    if [ $? -eq 127 ] ; then
	# nmap probably not installed
	ctdb_check_tcp_ports_debug="$_nmap_out"
	return 127
    fi

    # get the port-related output
    _port_info=$(echo "$_nmap_out" | sed -n -r -e 's@^.*Ports:[[:space:]]@@p')

    for _p ; do
	# looking for something like this:
	#  445/open/tcp//microsoft-ds///
	# possibly followed by a comma
	_t="$_p/open/tcp//"
	case "$_port_info" in
	    # The info we're after must be either at the beginning of
	    # the string or it must follow a space.
            $_t*|*\ $_t*) : ;;
	    *)
		# Nope, flag an error...
		ctdb_check_tcp_ports_debug="$_cmd shows this output:
$_nmap_out"
		return 1
	esac
    done

    return 0
}

# Use the new "ctdb checktcpport" command to check the port.
# This is very cheap.
ctdb_check_tcp_ports_ctdb ()
{
    for _p ; do  # process each function argument (port)
	_cmd="ctdb checktcpport $_p"
	_out=$($_cmd 2>&1)
	_ret=$?
	case "$_ret" in
	    0)
		ctdb_check_tcp_ports_debug="\"$_cmd\" was able to bind to port"
		return 1
		;;
	    98)
		# Couldn't bind, something already listening, next port...
		continue
		;;
	    *)
		ctdb_check_tcp_ports_debug="$_cmd (exited with $_ret) with output:
$_out"
		# assume not implemented
		return 127
	esac
    done

    return 0
}

######################################################
# check a unix socket
# usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
######################################################
ctdb_check_unix_socket() {
    socket_path="$1"
    [ -z "$socket_path" ] && return

    if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
        echo "ERROR: $service_name socket $socket_path not found"
        return 1
    fi
}

######################################################
# check a command returns zero status
# usage: ctdb_check_command SERVICE_NAME <command>
######################################################
ctdb_check_command() {
  service_name="$1"
  wait_cmd="$2"
  [ -z "$wait_cmd" ] && return;
  $wait_cmd > /dev/null 2>&1 || {
      echo "ERROR: $service_name - $wait_cmd returned error"
      exit 1
  }
}

################################################
# kill off any TCP connections with the given IP
################################################
kill_tcp_connections() {
    _IP="$1"    
    _failed=0

    _killcount=0
    connfile="$CTDB_VARDIR/state/connections.$_IP"
    netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
    netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile

    while read dest src; do
	srcip=`echo $src | sed -e "s/:[^:]*$//"`
	srcport=`echo $src | sed -e "s/^.*://"`
	destip=`echo $dest | sed -e "s/:[^:]*$//"`
	destport=`echo $dest | sed -e "s/^.*://"`
	echo "Killing TCP connection $srcip:$srcport $destip:$destport"
	ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
	case $destport in
	  # we only do one-way killtcp for CIFS
	  139|445) : ;;
	  # for all others we do 2-way
	  *) 
	  	ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
		;;
	esac
	_killcount=`expr $_killcount + 1`
     done < $connfile
    rm -f $connfile

    [ $_failed = 0 ] || {
	echo "Failed to send killtcp control"
	return;
    }
    [ $_killcount -gt 0 ] || {
	return;
    }
    _count=0
    while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
	sleep 1
	_count=`expr $_count + 1`
	[ $_count -gt 3 ] && {
	    echo "Timed out killing tcp connections for IP $_IP"
	    return;
	}
    done
    echo "killed $_killcount TCP connections to released IP $_IP"
}

##################################################################
# kill off the local end for any TCP connections with the given IP
##################################################################
kill_tcp_connections_local_only() {
    _IP="$1"    
    _failed=0

    _killcount=0
    connfile="$CTDB_VARDIR/state/connections.$_IP"
    netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
    netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile

    while read dest src; do
	srcip=`echo $src | sed -e "s/:[^:]*$//"`
	srcport=`echo $src | sed -e "s/^.*://"`
	destip=`echo $dest | sed -e "s/:[^:]*$//"`
	destport=`echo $dest | sed -e "s/^.*://"`
	echo "Killing TCP connection $srcip:$srcport $destip:$destport"
	ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
	_killcount=`expr $_killcount + 1`
     done < $connfile
    rm -f $connfile

    [ $_failed = 0 ] || {
	echo "Failed to send killtcp control"
	return;
    }
    [ $_killcount -gt 0 ] || {
	return;
    }
    _count=0
    while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
	sleep 1
	_count=`expr $_count + 1`
	[ $_count -gt 3 ] && {
	    echo "Timed out killing tcp connections for IP $_IP"
	    return;
	}
    done
    echo "killed $_killcount TCP connections to released IP $_IP"
}

##################################################################
# tickle any TCP connections with the given IP
##################################################################
tickle_tcp_connections() {
    _IP="$1"
    _failed=0

    _killcount=0
    connfile="$CTDB_VARDIR/state/connections.$_IP"
    netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
    netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile

    while read dest src; do
	srcip=`echo $src | sed -e "s/:[^:]*$//"`
	srcport=`echo $src | sed -e "s/^.*://"`
	destip=`echo $dest | sed -e "s/:[^:]*$//"`
	destport=`echo $dest | sed -e "s/^.*://"`
	echo "Tickle TCP connection $srcip:$srcport $destip:$destport"
	ctdb tickle $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
	echo "Tickle TCP connection $destip:$destport $srcip:$srcport"
	ctdb tickle $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
     done < $connfile
    rm -f $connfile

    [ $_failed = 0 ] || {
	echo "Failed to send tickle control"
	return;
    }
}

########################################################
# start/stop the nfs service on different platforms
########################################################
startstop_nfs() {
	PLATFORM="unknown"
	[ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
		PLATFORM="sles"
	}
	[ -x $CTDB_ETCDIR/init.d/nfslock ] && {
		PLATFORM="rhel"
	}

	case $PLATFORM in
	sles)
		case $1 in
		start)
			service nfsserver start
			;;
		stop)
			service nfsserver stop > /dev/null 2>&1
			;;
		restart)
			set_proc "fs/nfsd/threads" 0
			service nfsserver stop > /dev/null 2>&1
			pkill -9 nfsd
			service nfsserver start
			;;
		esac
		;;
	rhel)
		case $1 in
		start)
			service nfslock start
			service nfs start
			;;
		stop)
			service nfs stop
			service nfslock stop
			;;
		restart)
			set_proc "fs/nfsd/threads" 0
			service nfs stop > /dev/null 2>&1
			service nfslock stop > /dev/null 2>&1
			pkill -9 nfsd
			service nfslock start
			service nfs start
			;;
		esac
		;;
	*)
		echo "Unknown platform. NFS is not supported with ctdb"
		exit 1
		;;
	esac
}

########################################################
# start/stop the nfs lockmanager service on different platforms
########################################################
startstop_nfslock() {
	PLATFORM="unknown"
	[ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
		PLATFORM="sles"
	}
	[ -x $CTDB_ETCDIR/init.d/nfslock ] && {
		PLATFORM="rhel"
	}

	case $PLATFORM in
	sles)
		# for sles there is no service for lockmanager
		# so we instead just shutdown/restart nfs
		case $1 in
		start)
			service nfsserver start
			;;
		stop)
			service nfsserver stop > /dev/null 2>&1
			;;
		restart)
			service nfsserver stop
			service nfsserver start
			;;
		esac
		;;
	rhel)
		case $1 in
		start)
			service nfslock start
			;;
		stop)
			service nfslock stop > /dev/null 2>&1
			;;
		restart)
			service nfslock stop
			service nfslock start
			;;
		esac
		;;
	*)
		echo "Unknown platform. NFS locking is not supported with ctdb"
		exit 1
		;;
	esac
}

add_ip_to_iface()
{
	local _iface=$1
	local _ip=$2
	local _maskbits=$3
	local _state_dir="$CTDB_VARDIR/state/interface_modify"
	local _lockfile="$_state_dir/$_iface.flock"
	local _readd_base="$_state_dir/$_iface.readd.d"

	mkdir -p $_state_dir || {
		ret=$?
		echo "Failed to mkdir -p $_state_dir - $ret"
		return $ret
	}

	test -f $_lockfile || {
		touch $_lockfile
	}

	flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh add "$_iface" "$_ip" "$_maskbits" "$_readd_base"
	return $?
}

delete_ip_from_iface()
{
	local _iface=$1
	local _ip=$2
	local _maskbits=$3
	local _state_dir="$CTDB_VARDIR/state/interface_modify"
	local _lockfile="$_state_dir/$_iface.flock"
	local _readd_base="$_state_dir/$_iface.readd.d"

	mkdir -p $_state_dir || {
		ret=$?
		echo "Failed to mkdir -p $_state_dir - $ret"
		return $ret
	}

	test -f $_lockfile || {
		touch $_lockfile
	}

	flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh delete "$_iface" "$_ip" "$_maskbits" "$_readd_base"
	return $?
}

setup_iface_ip_readd_script()
{
	local _iface=$1
	local _ip=$2
	local _maskbits=$3
	local _readd_script=$4
	local _state_dir="$CTDB_VARDIR/state/interface_modify"
	local _lockfile="$_state_dir/$_iface.flock"
	local _readd_base="$_state_dir/$_iface.readd.d"

	mkdir -p $_state_dir || {
		ret=$?
		echo "Failed to mkdir -p $_state_dir - $ret"
		return $ret
	}

	test -f $_lockfile || {
		touch $_lockfile
	}

	flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh readd_script "$_iface" "$_ip" "$_maskbits" "$_readd_base" "$_readd_script"
	return $?
}

########################################################
# some simple logic for counting events - per eventscript
# usage: ctdb_counter_init
#        ctdb_counter_incr
#        ctdb_check_counter_limit <limit>
# ctdb_check_counter_limit succeeds when count >= <limit>
########################################################
_ctdb_counter_common () {
    _service_name="${1:-${service_name}}"
    _counter_file="$ctdb_fail_dir/$_service_name"
    mkdir -p "${_counter_file%/*}" # dirname
}
ctdb_counter_init () {
    _ctdb_counter_common "$1"

    >"$_counter_file"
}
ctdb_counter_incr () {
    _ctdb_counter_common "$1"

    # unary counting!
    echo -n 1 >> "$_counter_file"
}
ctdb_check_counter_limit () {
    _ctdb_counter_common

    _limit="${1:-${service_fail_limit}}"
    _quiet="$2"

    # unary counting!
    _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
    if [ $_size -ge $_limit ] ; then
	echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
	exit 1
    elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
	echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
    fi
}
ctdb_check_counter_equal () {
    _ctdb_counter_common

    _limit=$1

    # unary counting!
    _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
    if [ $_size -eq $_limit ] ; then
	return 1
    fi
    return 0
}
ctdb_check_counter () {
    _msg="${1:-error}"  # "error"  - anything else is silent on fail
    _op="${2:--ge}"  # an integer operator supported by test
    _limit="${3:-${service_fail_limit}}"
    shift 3
    _ctdb_counter_common "$1"

    # unary counting!
    _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
    if [ $_size $_op $_limit ] ; then
	if [ "$_msg" = "error" ] ; then
	    echo "ERROR: $_limit consecutive failures for $_service_name, marking node unhealthy"
	    exit 1		
	else
	    return 1
	fi
    fi
}

########################################################

ctdb_status_dir="$CTDB_VARDIR/status"
ctdb_fail_dir="$CTDB_VARDIR/failcount"

ctdb_setup_service_state_dir ()
{
    service_state_dir="$CTDB_VARDIR/state/${1:-${service_name}}"
    mkdir -p "$service_state_dir" || {
	echo "Error creating state dir \"$service_state_dir\""
	exit 1
    }
}

########################################################
# Managed status history, for auto-start/stop

ctdb_managed_dir="$CTDB_VARDIR/managed_history"

_ctdb_managed_common ()
{
    _service_name="${1:-${service_name}}"
    _ctdb_managed_file="$ctdb_managed_dir/$_service_name"
}

ctdb_service_managed ()
{
    _ctdb_managed_common "$@"
    mkdir -p "$ctdb_managed_dir"
    touch "$_ctdb_managed_file"
}

ctdb_service_unmanaged ()
{
    _ctdb_managed_common "$@"
    rm -f "$_ctdb_managed_file"
}

is_ctdb_previously_managed_service ()
{
    _ctdb_managed_common "$@"
    [ -f "$_ctdb_managed_file" ]
}

########################################################
# Check and set status

log_status_cat ()
{
    echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)"
}

ctdb_checkstatus ()
{
    if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
	log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
	return 1
    elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
	log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
	return 2
    else
	return 0
    fi
}

ctdb_setstatus ()
{
    d="$ctdb_status_dir/$script_name"
    case "$1" in
	unhealthy|banned)
	    mkdir -p "$d"
	    cat "$2" >"$d/$1"
	    ;;
	*)
	    for i in "banned" "unhealthy" ; do
		rm -f "$d/$i"
	    done
	    ;;
    esac
}

##################################################################
# Reconfigure a service on demand

_ctdb_service_reconfigure_common ()
{
    _d="$ctdb_status_dir/${1:-${service_name}}"
    mkdir -p "$_d"
    _ctdb_service_reconfigure_flag="$_d/reconfigure"
}

ctdb_service_needs_reconfigure ()
{
    _ctdb_service_reconfigure_common "$@"
    [ -e "$_ctdb_service_reconfigure_flag" ]
}

ctdb_service_set_reconfigure ()
{
    _ctdb_service_reconfigure_common "$@"
    >"$_ctdb_service_reconfigure_flag"
}

ctdb_service_unset_reconfigure ()
{
    _ctdb_service_reconfigure_common "$@"
    rm -f "$_ctdb_service_reconfigure_flag"
}

ctdb_service_reconfigure ()
{
    echo "Reconfiguring service \"$@\"..."
    ctdb_service_unset_reconfigure "$@"
    service_reconfigure "$@" || return $?
    ctdb_counter_init "$@"
}

# Default service_reconfigure() function.
service_reconfigure ()
{
    service "${1:-$service_name}" restart
}

ctdb_reconfigure_try_lock ()
{
    
    _ctdb_service_reconfigure_common "$@"
    _lock="${_d}/reconfigure_lock"
    touch "$_lock"

    (
	flock 0
	# This is overkill but will work if we need to extend this to
	# allow certain events to run multiple times in parallel
	# (e.g. takeip) and write multiple PIDs to the file.
	read _locker_event 
	if [ -n "$_locker_event" ] ; then
	    while read _pid ; do
		if [ -n "$_pid" -a "$_pid" != $$ ] && \
		    kill -0 "$_pid" 2>/dev/null ; then
		    exit 1
		fi
	    done
	fi

	printf "%s\n%s\n" "$event_name" $$ >"$_lock"
	exit 0
    ) <"$_lock"
}

ctdb_replay_monitor_status ()
{
    echo "Replaying previous status for this script due to reconfigure..."
    # Leading colon (':') is missing in some versions...
    _out=$(ctdb scriptstatus -Y | grep -E "^:?monitor:${script_name}:")
    # Output looks like this:
    # :monitor:60.nfs:1:ERROR:1314764004.030861:1314764004.035514:foo bar:
    # This is the cheapest way of getting fields in the middle.
    set -- $(IFS=":" ; echo $_out)
    _code="$3"
    _status="$4"
    # The error output field can include colons so we'll try to
    # preserve them.  The weak checking at the beginning tries to make
    # this work for both broken (no leading ':') and fixed output.
    _out="${_out%:}"
    _err_out="${_out#*monitor:${script_name}:*:*:*:*:}"
    case "$_status" in
	OK) : ;;  # Do nothing special.
	TIMEDOUT)
	    # Recast this as an error, since we can't exit with the
	    # correct negative number.
	    _code=1
	    _err_out="[Replay of TIMEDOUT scriptstatus - note incorrect return code.] ${_err_out}"
	    ;;
	DISABLED)
	    # Recast this as an OK, since we can't exit with the
	    # correct negative number.
	    _code=0
	    _err_out="[Replay of DISABLED scriptstatus - note incorrect return code.] ${_err_out}"
	    ;;
	*) : ;;  # Must be ERROR, do nothing special.
    esac
    echo "$_err_out"
    exit $_code
}

ctdb_service_check_reconfigure ()
{
    [ -n "$1" ] || set -- "$service_name"

    # We only care about some events in this function.  For others we
    # return now.
    case "$event_name" in
	monitor|ipreallocated|reconfigure) : ;;
	*) return 0 ;;
    esac

    if ctdb_reconfigure_try_lock "$@" ; then
	# No events covered by this function are running, so proceed
	# with gay abandon.
	case "$event_name" in
	    reconfigure)
		(ctdb_service_reconfigure "$@")
		exit $?
		;;
	    ipreallocated)
		if ctdb_service_needs_reconfigure "$@" ; then
		    ctdb_service_reconfigure "$@"
		fi
		;;
	    monitor)
		if ctdb_service_needs_reconfigure "$@" ; then
		    ctdb_service_reconfigure "$@"
		    # Given that the reconfigure might not have
		    # resulted in the service being stable yet, we
		    # replay the previous status since that's the best
		    # information we have.
		    ctdb_replay_monitor_status
		fi
		;;
	esac
    else
	# Somebody else is running an event we don't want to collide
	# with.  We proceed with caution.
	case "$event_name" in
	    reconfigure)
		# Tell whoever called us to retry.
		exit 2
		;;
	    ipreallocated)
		# Defer any scheduled reconfigure and just run the
		# rest of the ipreallocated event, as per the
		# eventscript.  There's an assumption here that the
		# event doesn't depend on any scheduled reconfigure.
		# This is true in the current code.
		return 0
		;;
	    monitor)
		# There is most likely a reconfigure in progress so
		# the service is possibly unstable.  As above, we
		# defer any scheduled reconfigured.  We also replay
		# the previous monitor status since that's the best
		# information we have.
		ctdb_replay_monitor_status
		;;
	esac
    fi
}

##################################################################
# Does CTDB manage this service? - and associated auto-start/stop

ctdb_compat_managed_service ()
{
    if [ "$1" = "yes" -a "$2" = "$_service_name" ] ; then
	CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
    fi
}

is_ctdb_managed_service ()
{
    _service_name="${1:-${service_name}}"

    # $t is used just for readability and to allow better accurate
    # matching via leading/trailing spaces
    t=" $CTDB_MANAGED_SERVICES "

    # Return 0 if "<space>$_service_name<space>" appears in $t
    if [ "${t#* ${_service_name} }" != "${t}" ] ; then
	return 0
    fi

    # If above didn't match then update $CTDB_MANAGED_SERVICES for
    # backward compatibility and try again.
    ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
    ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
    ctdb_compat_managed_service "$CTDB_MANAGES_SCP"      "scp"
    ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND"  "winbind"
    ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "apache2"
    ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
    ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
    ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
    ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs"
    ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs-ganesha-gpfs"

    t=" $CTDB_MANAGED_SERVICES "

    # Return 0 if "<space>$_service_name<space>" appears in $t
    [ "${t#* ${_service_name} }" != "${t}" ]
}

ctdb_start_stop_service ()
{
    # Do nothing unless configured to...
    [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] || return 0

    _service_name="${1:-${service_name}}"

    [ "$event_name" = "monitor" ] || return 0

    if is_ctdb_managed_service "$_service_name" ; then
	if ! is_ctdb_previously_managed_service "$_service_name" ; then
	    echo "Starting service \"$_service_name\" - now managed"
	    ctdb_service_start "$_service_name"
	    exit $?
	fi
    else
	if is_ctdb_previously_managed_service "$_service_name" ; then
	    echo "Stopping service \"$_service_name\" - no longer managed"
	    ctdb_service_stop "$_service_name"
	    exit $?
	fi
    fi
}

ctdb_service_start ()
{
    # The service is marked managed if we've ever tried to start it.
    ctdb_service_managed "$@"

    # Here we only want $1.  If no argument is passed then
    # service_start needs to know.
    service_start "$@" || return $?

    ctdb_counter_init "$@"
    ctdb_check_tcp_init
}

ctdb_service_stop ()
{
    ctdb_service_unmanaged "$@"
    service_stop "$@"
}

# Default service_start() and service_stop() functions.
 
# These may be overridden in an eventscript.  When overriding, the
# following convention must be followed.  If these functions are
# called with no arguments then they may use internal logic to
# determine whether the service is managed and, therefore, whether
# they should take any action.  However, if the service name is
# specified as an argument then an attempt must be made to start or
# stop the service.  This is because the auto-start/stop code calls
# them with the service name as an argument.
service_start ()
{
    service "${1:-${service_name}}" start
}

service_stop ()
{
    service "${1:-${service_name}}" stop
}

##################################################################

ctdb_standard_event_handler ()
{
    case "$1" in
	status)
	    ctdb_checkstatus
	    exit
	    ;;
	setstatus)
            shift
	    ctdb_setstatus "$@"
	    exit
	    ;;
    esac
}

ipv4_host_addr_to_net_addr()
{
	local HOST=$1
	local MASKBITS=$2

	local HOST0=$(echo $HOST | awk -F . '{print $4}')
	local HOST1=$(echo $HOST | awk -F . '{print $3}')
	local HOST2=$(echo $HOST | awk -F . '{print $2}')
	local HOST3=$(echo $HOST | awk -F . '{print $1}')

	local HOST_NUM=$(( $HOST0 + $HOST1 * 256 + $HOST2 * (256 ** 2) + $HOST3 * (256 ** 3) ))

	local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))

	local NET_NUM=$(( $HOST_NUM & $MASK_NUM))

	local NET0=$(( $NET_NUM & 255 ))
	local NET1=$(( ($NET_NUM & (255 * 256)) / 256 ))
	local NET2=$(( ($NET_NUM & (255 * 256**2)) / 256**2 ))
	local NET3=$(( ($NET_NUM & (255 * 256**3)) / 256**3 ))

	echo "$NET3.$NET2.$NET1.$NET0"
}

ipv4_maskbits_to_net_mask()
{
	local MASKBITS=$1

	local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))

	local MASK0=$(( $MASK_NUM & 255 ))
	local MASK1=$(( ($MASK_NUM & (255 * 256)) / 256 ))
	local MASK2=$(( ($MASK_NUM & (255 * 256**2)) / 256**2 ))
	local MASK3=$(( ($MASK_NUM & (255 * 256**3)) / 256**3 ))

	echo "$MASK3.$MASK2.$MASK1.$MASK0"
}

ipv4_is_valid_addr()
{
	local ADDR=$1
	local fail=0

	local N=`echo $ADDR | sed -e 's/[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*//'`
	test -n "$N" && fail=1

	local ADDR0=$(echo $ADDR | awk -F . '{print $4}')
	local ADDR1=$(echo $ADDR | awk -F . '{print $3}')
	local ADDR2=$(echo $ADDR | awk -F . '{print $2}')
	local ADDR3=$(echo $ADDR | awk -F . '{print $1}')

	test "$ADDR0" -gt 255 && fail=1
	test "$ADDR1" -gt 255 && fail=1
	test "$ADDR2" -gt 255 && fail=1
	test "$ADDR3" -gt 255 && fail=1

	test x"$fail" != x"0" && {
		#echo "IPv4: '$ADDR' is not a valid address"
		return 1;
	}

	return 0;
}

# iptables doesn't like being re-entered, so flock-wrap it.
iptables()
{
	flock -w 30 $CTDB_VARDIR/iptables-ctdb.flock /sbin/iptables "$@"
}

########################################################
# tickle handling
########################################################

# Temporary directory for tickles.
tickledir="$CTDB_VARDIR/state/tickles"
mkdir -p "$tickledir"

update_tickles ()
{
	_port="$1"

	mkdir -p "$tickledir" # Just in case

	# Who am I?
	_pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}

	# What public IPs do I hold?
	_ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')

	# IPs as a regexp choice
	_ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"

	# Record connections to our public IPs in a temporary file
	_my_connections="${tickledir}/${_port}.connections"
	rm -f "$_my_connections"
	netstat -tn |
	awk -v destpat="^${_ipschoice}:${_port}\$" \
	  '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
	sort >"$_my_connections"

	# Record our current tickles in a temporary file
	_my_tickles="${tickledir}/${_port}.tickles"
	rm -f "$_my_tickles"
	for _i in $_ips ; do
		ctdb -Y gettickles $_i $_port | 
		awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
	done |
	sort >"$_my_tickles"

	# Add tickles for connections that we haven't already got tickles for
	comm -23 "$_my_connections" "$_my_tickles" |
	while read _src _dst ; do
		ctdb addtickle $_src $_dst
	done

	# Remove tickles for connections that are no longer there
	comm -13 "$_my_connections" "$_my_tickles" |
	while read _src _dst ; do
		ctdb deltickle $_src $_dst
	done

	rm -f "$_my_connections" "$_my_tickles" 
}

########################################################
# load a site local config file
########################################################

[ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
	. "$CTDB_RC_LOCAL"
}

[ -x $CTDB_BASE/rc.local ] && {
	. $CTDB_BASE/rc.local
}

[ -d $CTDB_BASE/rc.local.d ] && {
	for i in $CTDB_BASE/rc.local.d/* ; do
		[ -x "$i" ] && . "$i"
	done
}

# We'll call this here to ensure $CTDB_CURRENT_DEBUGLEVEL is set.
# This gives us a chance to override the debug level using a file in
# $CTDB_BASE/rc.local.d/.
ctdb_set_current_debuglevel

script_name="${0##*/}"       # basename
service_name="$script_name"  # default is just the script name
service_fail_limit=1
event_name="$1"