From: Ronnie Sahlberg Date: Wed, 17 Nov 2010 02:50:56 +0000 (+1100) Subject: add a new support function ctdb_check_counter_equal() X-Git-Url: http://git.samba.org/?p=sahlberg%2Fctdb.git;a=commitdiff_plain;h=d0b790de08b9c9226ac9848b434a051d99a41dd8 add a new support function ctdb_check_counter_equal() update nfs to try to restart the service after 10 consecutive failures and to flag the node unhealthy after 15 add similar function to mountd --- diff --git a/config/events.d/60.nfs b/config/events.d/60.nfs index 038adbb0..8889cadc 100755 --- a/config/events.d/60.nfs +++ b/config/events.d/60.nfs @@ -78,15 +78,20 @@ case "$1" in ( service_name="nfs_knfsd" ctdb_counter_incr - ctdb_check_counter_limit 10 quiet >/dev/null + + ctdb_check_counter_equal 10 || { + echo "Trying to restart NFS service" + startstop_nfs restart >/dev/null 2>&1 & + exit 0 + } + + ctdb_check_counter_limit 15 quiet >/dev/null ) || { echo "$ctdb_check_rpc_out" echo "Trying to restart NFS service" startstop_nfs restart exit 1 } - # we haven't hit the failure limit so restart quietly - startstop_nfs restart >/dev/null 2>&1 & fi } @@ -107,7 +112,25 @@ case "$1" in # mount needs special handling since it is sometimes not started # correctly on RHEL5 - ctdb_check_rpc "MOUNTD" 100005 1 || { + if ctdb_check_rpc "MOUNTD" 100005 1 >/dev/null ; then + (service_name="nfs_mountd"; ctdb_counter_init) + else + ( + service_name="nfs_mountd" + ctdb_counter_incr + + ctdb_check_counter_equal 5 || { + p="rpc.mountd" + cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}" + echo "Trying to restart MOUNTD [${cmd}]" + killall -q -9 $p + $cmd & + exit 0 + } + + ctdb_check_counter_limit 10 quiet >/dev/null + ) || { + echo "$ctdb_check_rpc_out" p="rpc.mountd" cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}" echo "Trying to restart MOUNTD [${cmd}]" @@ -115,6 +138,9 @@ case "$1" in $cmd & exit 1 } + fi + + # rquotad needs special handling since it is sometimes not started # correctly on RHEL5 # this is not a critical service so we dont flag the node as unhealthy diff --git a/config/functions b/config/functions index 610085b6..4acfc4ff 100755 --- a/config/functions +++ b/config/functions @@ -571,6 +571,19 @@ ctdb_check_counter_limit () { echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet" fi } +ctdb_check_counter_equal () { + _ctdb_counter_common + + _limit=$1 + + # unary counting! + _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0) + if [ $_size -eq $_limit ] ; then + return 1 + fi + return 0 +} + ######################################################## ctdb_spool_dir="/var/spool/ctdb"