# script to manage nfs in a clustered environment
start_nfs() {
- /bin/mkdir -p $CTDB_BASE/state/nfs
- /bin/mkdir -p $CTDB_BASE/state/statd/ip
- /bin/mkdir -p $STATD_SHARED_DIRECTORY
+ /bin/mkdir -p $CTDB_VARDIR/state/nfs
+ /bin/mkdir -p $CTDB_VARDIR/state/statd/ip
startstop_nfs stop
startstop_nfs start
-}
-
-reconfigure_nfs() {
- # always restart the lockmanager so that we start with a clusterwide
- # graceperiod when ip addresses has changed
- [ -x $CTDB_BASE/statd-callout ] && {
- $CTDB_BASE/statd-callout notify &
- } >/dev/null 2>&1
-
+ echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle
}
. $CTDB_BASE/functions
service_name="nfs"
service_start="start_nfs"
service_stop="startstop_nfs stop"
-service_reconfigure="reconfigure_nfs"
loadconfig
-[ -z "$STATD_SHARED_DIRECTORY" ] && exit 0
+[ "$NFS_SERVER_MODE" != "GANESHA" ] || exit 0
ctdb_start_stop_service
-case $cmd in
+is_ctdb_managed_service || exit 0
+
+case "$1" in
+ init)
+ # read statd from persistent database
+ ;;
startup)
ctdb_service_start
+ mkdir -p $CTDB_VARDIR/state/statd
+ touch $CTDB_VARDIR/state/statd/update-trigger
;;
shutdown)
takeip)
ctdb_service_set_reconfigure
- touch $CTDB_BASE/state/statd/ip/$2
;;
releaseip)
ctdb_service_set_reconfigure
- /bin/rm -f $CTDB_BASE/state/statd/ip/$2
- ;;
-
- recovered)
- # if we have taken or released any ips we must
- # restart the lock manager so that we enter a clusterwide grace period
- if ctdb_service_needs_reconfigure ; then
- ctdb_service_reconfigure
- fi
;;
monitor)
exit 0
fi
+ update_tickles 2049
+
# check that statd responds to rpc requests
# if statd is not running we try to restart it
- rpcinfo -u localhost 100024 1 > /dev/null || {
- RPCSTATDOPTS=""
- [ -n "$STATD_HOSTNAME" ] && RPCSTATDOPTS="$RPCSTATDOPTS -n $STATD_HOSTNAME"
- [ -n "$STATD_PORT" ] && RPCSTATDOPTS="$RPCSTATDOPTS -p $STATD_PORT"
- [ -n "$STATD_OUTGOING_PORT" ] && RPCSTATDOPTS="$RPCSTATDOPTS -o $STATD_OUTGOING_PORT"
- rpc.statd $RPCSTATDOPTS
- echo "ERROR: STATD is not responding. Trying to restart it. [rpc.statd $RPCSTATDOPTS]"
- }
+ if ctdb_check_rpc "STATD" 100024 1 >/dev/null ; then
+ (service_name="nfs_statd"; ctdb_counter_init)
+ else
+ p="rpc.statd" ; cmd="$p"
+ cmd="${cmd}${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
+ cmd="${cmd}${STATD_PORT:+ -p }${STATD_PORT}"
+ cmd="${cmd}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
+ (
+ service_name="nfs_statd"
+ ctdb_counter_incr
+ ctdb_check_counter_limit 10 quiet >/dev/null
+ ) || {
+ echo "$ctdb_check_rpc_out"
+ echo "Trying to restart STATD [$cmd]"
+ }
+ $cmd
+ fi
# check that NFS responds to rpc requests
- ctdb_check_rpc "NFS" 100003 3
+ [ "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" = "yes" ] || {
+ if ctdb_check_rpc "NFS" 100003 3 >/dev/null ; then
+ (service_name="nfs_knfsd"; ctdb_counter_init)
+ else
+ (
+ service_name="nfs_knfsd"
+ ctdb_counter_incr
+
+ ctdb_check_counter_equal 10 || {
+ echo "Trying to restart NFS service"
+ startstop_nfs restart >/dev/null 2>&1 &
+ exit 0
+ }
+
+ ctdb_check_counter_limit 15 quiet >/dev/null
+ ) || {
+ echo "$ctdb_check_rpc_out"
+ echo "Trying to restart NFS service"
+ startstop_nfs restart
+ exit 1
+ }
+ fi
+ }
# and that its directories are available
[ "$CTDB_NFS_SKIP_SHARE_CHECK" = "yes" ] || {
exportfs | grep -v '^#' | grep '^/' |
- sed -e 's/[[:space:]]*[^[:space:]]*$//' |
+ sed -e 's/[[:space:]]\+[^[:space:]]*$//' |
ctdb_check_directories
} || exit $?
# check that lockd responds to rpc requests
- ctdb_check_rpc "lockd" 100021 1
- echo "$STATD_SHARED_DIRECTORY" | ctdb_check_directories "statd" || \
- exit $?
+ if ctdb_check_rpc "LOCKD" 100021 1 >/dev/null ; then
+ (service_name="lockd"; ctdb_counter_init)
+ else
+ (
+ service_name="lockd"
+ ctdb_counter_incr
+
+ ctdb_check_counter_equal 10 || {
+ echo "Trying to restart NFS lock service"
+ startstop_nfs restart >/dev/null 2>&1 &
+ startstop_nfslock restart >/dev/null 2>&1 &
+ exit 0
+ }
+
+ ctdb_check_counter_limit 15 quiet >/dev/null
+ ) || {
+ echo "$ctdb_check_rpc_out"
+ echo "Trying to restart NFS lock service"
+ startstop_nfs restart
+ startstop_nfslock restart
+ exit 1
+ }
+ fi
# mount needs special handling since it is sometimes not started
# correctly on RHEL5
- rpcinfo -u localhost 100005 1 > /dev/null || {
- echo "ERROR: MOUNTD is not running. Trying to restart it."
- RPCMOUNTDOPTS=""
- [ -n "$MOUNTD_PORT" ] && RPCMOUNTDOPTS="$RPCMOUNTDOPTS -p $MOUNTD_PORT"
- killall -q -9 rpc.mountd
- rpc.mountd $RPCMOUNTDOPTS &
+ if ctdb_check_rpc "MOUNTD" 100005 1 >/dev/null ; then
+ (service_name="nfs_mountd"; ctdb_counter_init)
+ else
+ (
+ service_name="nfs_mountd"
+ ctdb_counter_incr
+
+ ctdb_check_counter_equal 5 || {
+ p="rpc.mountd"
+ cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
+ echo "Trying to restart MOUNTD [${cmd}]"
+ killall -q -9 $p
+ $cmd &
+ exit 0
+ }
+
+ ctdb_check_counter_limit 10 quiet >/dev/null
+ ) || {
+ echo "$ctdb_check_rpc_out"
+ p="rpc.mountd"
+ cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
+ echo "Trying to restart MOUNTD [${cmd}]"
+ killall -q -9 $p
+ $cmd &
exit 1
}
+ fi
+
+
+ # rquotad needs special handling since it is sometimes not started
+ # correctly on RHEL5
+ # this is not a critical service so we dont flag the node as unhealthy
+ ctdb_check_rpc "RQUOTAD" 100011 1 || {
+ p="rpc.rquotad"
+ cmd="${p}${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
+ echo "Trying to restart RQUOTAD [${cmd}]"
+ killall -q -9 $p
+ $cmd &
+ }
+
+ # once every 60 seconds, update the statd state database for which
+ # clients need notifications
+ LAST_UPDATE=`stat --printf="%Y" $CTDB_VARDIR/state/statd/update-trigger 2>/dev/null`
+ CURRENT_TIME=`date +"%s"`
+ [ $CURRENT_TIME -ge $(($LAST_UPDATE + 60)) ] && {
+ mkdir -p $CTDB_VARDIR/state/statd
+ touch $CTDB_VARDIR/state/statd/update-trigger
+ $CTDB_BASE/statd-callout updatelocal &
+ $CTDB_BASE/statd-callout updateremote &
+ }
;;
- status)
- ctdb_checkstatus || exit $?
+ ipreallocated)
+ # if the ips have been reallocated, we must restart the lockmanager
+ # across all nodes and ping all statd listeners
+ [ -x $CTDB_BASE/statd-callout ] && {
+ $CTDB_BASE/statd-callout notify &
+ } >/dev/null 2>&1
+ ;;
+ *)
+ ctdb_standard_event_handler "$@"
;;
esac