From: Ronnie Sahlberg Date: Tue, 21 Dec 2010 23:09:35 +0000 (+1100) Subject: 60.nfs X-Git-Url: http://git.samba.org/?p=sahlberg%2Fctdb.git;a=commitdiff_plain;h=f0593e8571f22fa47dc5065263c43c7c744eb117 60.nfs Try to restart LOCKD after 10 failures and flag the node as unhealthy after 15 failures --- diff --git a/config/events.d/60.nfs b/config/events.d/60.nfs index a8fe2434..87955df9 100755 --- a/config/events.d/60.nfs +++ b/config/events.d/60.nfs @@ -107,12 +107,29 @@ case "$1" in } || exit $? # check that lockd responds to rpc requests - ctdb_check_rpc "LOCKD" 100021 1 || { - echo "Trying to restart lock manager service" - startstop_nfs restart - startstop_nfslock restart - exit 1 - } + if ctdb_check_rpc "LOCKD" 100021 1 >/dev/null ; then + (service_name="lockd"; ctdb_counter_init) + else + ( + service_name="lockd" + ctdb_counter_incr + + ctdb_check_counter_equal 10 || { + echo "Trying to restart NFS lock service" + startstop_nfs restart >/dev/null 2>&1 & + startstop_nfslock restart >/dev/null 2>&1 & + exit 0 + } + + ctdb_check_counter_limit 15 quiet >/dev/null + ) || { + echo "$ctdb_check_rpc_out" + echo "Trying to restart NFS lock service" + startstop_nfs restart + startstop_nfslock restart + exit 1 + } + fi # mount needs special handling since it is sometimes not started # correctly on RHEL5