add a new support function ctdb_check_counter_equal()

author Ronnie Sahlberg <ronniesahlberg@gmail.com>

Wed, 17 Nov 2010 02:50:56 +0000 (13:50 +1100)

committer Ronnie Sahlberg <ronniesahlberg@gmail.com>

Wed, 17 Nov 2010 02:50:56 +0000 (13:50 +1100)
author Ronnie Sahlberg <ronniesahlberg@gmail.com>
Wed, 17 Nov 2010 02:50:56 +0000 (13:50 +1100)
committer Ronnie Sahlberg <ronniesahlberg@gmail.com>
Wed, 17 Nov 2010 02:50:56 +0000 (13:50 +1100)
diff --git a/config/events.d/60.nfs b/config/events.d/60.nfs

index 038adbb09409b34010f80219b1a211ec5b92e37e..8889cadcc26d1984d646d3072c871310f06bf778 100755 (executable)
--- a/config/events.d/60.nfs
+++ b/config/events.d/60.nfs
@@ -78,15 +78,20 @@ case "$1" in
                 (
                         service_name="nfs_knfsd"
                         ctdb_counter_incr
-                       ctdb_check_counter_limit 10 quiet >/dev/null
+
+                       ctdb_check_counter_equal 10 || {
+                               echo "Trying to restart NFS service"
+                               startstop_nfs restart >/dev/null 2>&1 &
+                               exit 0
+                       }
+
+                       ctdb_check_counter_limit 15 quiet >/dev/null
                 ) || {
                         echo "$ctdb_check_rpc_out"
                         echo "Trying to restart NFS service"
                         startstop_nfs restart
                         exit 1
                 }
-               # we haven't hit the failure limit so restart quietly
-               startstop_nfs restart >/dev/null 2>&1 &
             fi
         }
  
@@ -107,7 +112,25 @@ case "$1" in
  
         # mount needs special handling since it is sometimes not started
         # correctly on RHEL5
-       ctdb_check_rpc "MOUNTD" 100005 1 || {
+       if ctdb_check_rpc "MOUNTD" 100005 1 >/dev/null ; then
+               (service_name="nfs_mountd"; ctdb_counter_init)
+       else
+       (
+               service_name="nfs_mountd"
+               ctdb_counter_incr
+
+               ctdb_check_counter_equal 5 || {
+                       p="rpc.mountd"
+                       cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
+                       echo "Trying to restart MOUNTD [${cmd}]"
+                       killall -q -9 $p
+                       $cmd &
+                       exit 0
+               }
+
+               ctdb_check_counter_limit 10 quiet >/dev/null
+       ) || {
+               echo "$ctdb_check_rpc_out"
                 p="rpc.mountd"
                 cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
                 echo "Trying to restart MOUNTD [${cmd}]"
@@ -115,6 +138,9 @@ case "$1" in
                 $cmd &
                 exit 1
         }
+       fi
+
+
         # rquotad needs special handling since it is sometimes not started
         # correctly on RHEL5
         # this is not a critical service so we dont flag the node as unhealthy
diff --git a/config/functions b/config/functions

index 610085b67700feb1faad5c715df79cb506ff4cde..4acfc4ffab3f4347dce18f23be6a2e1f0e8c4be9 100755 (executable)
--- a/config/functions
+++ b/config/functions
@@ -571,6 +571,19 @@ ctdb_check_counter_limit () {
         echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
      fi
  }
+ctdb_check_counter_equal () {
+    _ctdb_counter_common
+
+    _limit=$1
+
+    # unary counting!
+    _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
+    if [ $_size -eq $_limit ] ; then
+       return 1
+    fi
+    return 0
+}
+
  ########################################################
  
  ctdb_spool_dir="/var/spool/ctdb"
author	Ronnie Sahlberg <ronniesahlberg@gmail.com>
	Wed, 17 Nov 2010 02:50:56 +0000 (13:50 +1100)
committer	Ronnie Sahlberg <ronniesahlberg@gmail.com>
	Wed, 17 Nov 2010 02:50:56 +0000 (13:50 +1100)
config/events.d/60.nfs		patch \| blob \| history
config/functions		patch \| blob \| history