60.nfs
[sahlberg/ctdb.git] / config / events.d / 60.nfs
index 15c2b899c1b5411d2b329f6521e633e7717244d9..87955df92ccd4595210943f734d83a1f0afe657c 100755 (executable)
@@ -2,10 +2,11 @@
 # script to manage nfs in a clustered environment
 
 start_nfs() {
-       /bin/mkdir -p $CTDB_BASE/state/nfs
-       /bin/mkdir -p $CTDB_BASE/state/statd/ip
+       /bin/mkdir -p $CTDB_VARDIR/state/nfs
+       /bin/mkdir -p $CTDB_VARDIR/state/statd/ip
        startstop_nfs stop
        startstop_nfs start
+       echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle
 }
 
 . $CTDB_BASE/functions
@@ -16,9 +17,11 @@ service_stop="startstop_nfs stop"
 
 loadconfig
 
+[ "$NFS_SERVER_MODE" != "GANESHA" ] || exit 0
+
 ctdb_start_stop_service
 
-echo XX 60.nfs   $@
+is_ctdb_managed_service || exit 0
 
 case "$1" in 
      init)
@@ -26,7 +29,8 @@ case "$1" in
        ;;
      startup)
        ctdb_service_start
-       touch $CTDB_BASE/state/statd/update-trigger
+       mkdir -p $CTDB_VARDIR/state/statd
+       touch $CTDB_VARDIR/state/statd/update-trigger
        ;;
 
      shutdown)
@@ -47,26 +51,52 @@ case "$1" in
            exit 0
        fi
 
+       update_tickles 2049
+
        # check that statd responds to rpc requests
        # if statd is not running we try to restart it
-       rpcinfo -u localhost 100024 1 > /dev/null || {
-               RPCSTATDOPTS=""
-               [ -n "$STATD_HOSTNAME" ] && RPCSTATDOPTS="$RPCSTATDOPTS -n $STATD_HOSTNAME"
-               [ -n "$STATD_PORT" ] && RPCSTATDOPTS="$RPCSTATDOPTS -p $STATD_PORT"
-               [ -n "$STATD_OUTGOING_PORT" ] && RPCSTATDOPTS="$RPCSTATDOPTS -o $STATD_OUTGOING_PORT"
-               rpc.statd $RPCSTATDOPTS 
-               echo "ERROR: STATD is not responding. Trying to restart it. [rpc.statd $RPCSTATDOPTS]"
-       }
+       if ctdb_check_rpc "STATD" 100024 1 >/dev/null ; then
+               (service_name="nfs_statd"; ctdb_counter_init)
+       else
+               p="rpc.statd" ; cmd="$p"
+               cmd="${cmd}${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
+               cmd="${cmd}${STATD_PORT:+ -p }${STATD_PORT}"
+               cmd="${cmd}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
+               (
+                       service_name="nfs_statd"
+                       ctdb_counter_incr
+                       ctdb_check_counter_limit 10 quiet >/dev/null
+               ) || {
+                       echo "$ctdb_check_rpc_out"
+                       echo "Trying to restart STATD [$cmd]"
+               }
+               $cmd
+       fi
 
 
        # check that NFS responds to rpc requests
        [ "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" = "yes" ] || {
-           (ctdb_check_rpc "NFS" 100003 3)
-           [ $? = "0" ] || {
-               echo "Trying to restart NFS service"
-               startstop_nfs restart
-               exit 1
-           }
+           if ctdb_check_rpc "NFS" 100003 3 >/dev/null ; then
+               (service_name="nfs_knfsd"; ctdb_counter_init)
+           else
+               (
+                       service_name="nfs_knfsd"
+                       ctdb_counter_incr
+
+                       ctdb_check_counter_equal 10 || {
+                               echo "Trying to restart NFS service"
+                               startstop_nfs restart >/dev/null 2>&1 &
+                               exit 0
+                       }
+
+                       ctdb_check_counter_limit 15 quiet >/dev/null
+               ) || {
+                       echo "$ctdb_check_rpc_out"
+                       echo "Trying to restart NFS service"
+                       startstop_nfs restart
+                       exit 1
+               }
+           fi
        }
 
        # and that its directories are available
@@ -77,36 +107,79 @@ case "$1" in
        } || exit $?
 
        # check that lockd responds to rpc requests
-       ctdb_check_rpc "lockd" 100021 1
+       if ctdb_check_rpc "LOCKD" 100021 1 >/dev/null ; then
+               (service_name="lockd"; ctdb_counter_init)
+       else
+               (
+                       service_name="lockd"
+                       ctdb_counter_incr
+
+                       ctdb_check_counter_equal 10 || {
+                               echo "Trying to restart NFS lock service"
+                               startstop_nfs restart >/dev/null 2>&1 &
+                               startstop_nfslock restart  >/dev/null 2>&1 &
+                               exit 0
+                       }
+
+                       ctdb_check_counter_limit 15 quiet >/dev/null
+       ) || {
+                       echo "$ctdb_check_rpc_out"
+                       echo "Trying to restart NFS lock service"
+                       startstop_nfs restart
+                       startstop_nfslock restart
+                       exit 1
+               }
+       fi
 
        # mount needs special handling since it is sometimes not started
        # correctly on RHEL5
-       rpcinfo -u localhost 100005 1 > /dev/null || {
-               echo "ERROR: MOUNTD is not running. Trying to restart it."
-               RPCMOUNTDOPTS=""
-               [ -n "$MOUNTD_PORT" ] && RPCMOUNTDOPTS="$RPCMOUNTDOPTS -p $MOUNTD_PORT"
-               killall -q -9 rpc.mountd
-               rpc.mountd $RPCMOUNTDOPTS &
+       if ctdb_check_rpc "MOUNTD" 100005 1 >/dev/null ; then
+               (service_name="nfs_mountd"; ctdb_counter_init)
+       else
+       (
+               service_name="nfs_mountd"
+               ctdb_counter_incr
+
+               ctdb_check_counter_equal 5 || {
+                       p="rpc.mountd"
+                       cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
+                       echo "Trying to restart MOUNTD [${cmd}]"
+                       killall -q -9 $p
+                       $cmd &
+                       exit 0
+               }
+
+               ctdb_check_counter_limit 10 quiet >/dev/null
+       ) || {
+               echo "$ctdb_check_rpc_out"
+               p="rpc.mountd"
+               cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
+               echo "Trying to restart MOUNTD [${cmd}]"
+               killall -q -9 $p
+               $cmd &
                exit 1
        }
+       fi
+
+
        # rquotad needs special handling since it is sometimes not started
        # correctly on RHEL5
        # this is not a critical service so we dont flag the node as unhealthy
-       rpcinfo -u localhost 100011 1 > /dev/null || {
-               echo "ERROR: RQUOTAD is not running. Trying to restart it."
-               RPCRQUOTADOPTS=""
-               [ -n "$RQUOTAD_PORT" ] && RPCRQUOTADOPTS="$RPCRQUOTADOPTS -p $RQUOTAD_PORT"
-               killall -q -9 rpc.rquotad
-               rpc.rquotad $RPCRQUOTADOPTS &
+       ctdb_check_rpc "RQUOTAD" 100011 1 || {
+               p="rpc.rquotad"
+               cmd="${p}${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
+               echo "Trying to restart RQUOTAD [${cmd}]"
+               killall -q -9 $p
+               $cmd &
        }
 
        # once every 60 seconds, update the statd state database for which
        # clients need notifications
-       LAST_UPDATE=`stat --printf="%Y" $CTDB_BASE/state/statd/update-trigger`
+       LAST_UPDATE=`stat --printf="%Y" $CTDB_VARDIR/state/statd/update-trigger 2>/dev/null`
        CURRENT_TIME=`date +"%s"`
-       expr "$CURRENT_TIME" ">" "(" "$LAST_UPDATE" "+" "60" ")" >/dev/null 2>/dev/null
-       [ $? = "0" ] && {
-           touch $CTDB_BASE/state/statd/update-trigger
+       [ $CURRENT_TIME -ge $(($LAST_UPDATE + 60)) ] && {
+           mkdir -p $CTDB_VARDIR/state/statd
+           touch $CTDB_VARDIR/state/statd/update-trigger
            $CTDB_BASE/statd-callout updatelocal &
            $CTDB_BASE/statd-callout updateremote &
        }