# check that statd responds to rpc requests
# if statd is not running we try to restart it
- rpcinfo -u localhost 100024 1 > /dev/null || {
- RPCSTATDOPTS=""
- [ -n "$STATD_HOSTNAME" ] && RPCSTATDOPTS="$RPCSTATDOPTS -n $STATD_HOSTNAME"
- [ -n "$STATD_PORT" ] && RPCSTATDOPTS="$RPCSTATDOPTS -p $STATD_PORT"
- [ -n "$STATD_OUTGOING_PORT" ] && RPCSTATDOPTS="$RPCSTATDOPTS -o $STATD_OUTGOING_PORT"
- rpc.statd $RPCSTATDOPTS
- echo "ERROR: STATD is not responding. Trying to restart it. [rpc.statd $RPCSTATDOPTS]"
- }
+ if ctdb_check_rpc "STATD" 100024 1 >/dev/null ; then
+ (service_name="nfs_statd"; ctdb_counter_init)
+ else
+ p="rpc.statd" ; cmd="$p"
+ cmd="${cmd}${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
+ cmd="${cmd}${STATD_PORT:+ -p }${STATD_PORT}"
+ cmd="${cmd}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
+ (
+ service_name="nfs_statd"
+ ctdb_counter_incr
+ ctdb_check_counter_limit 10 quiet >/dev/null
+ ) || {
+ echo "$ctdb_check_rpc_out"
+ echo "Trying to restart STATD [$cmd]"
+ }
+ $cmd
+ fi
# check that NFS responds to rpc requests
[ "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" = "yes" ] || {
- (ctdb_check_rpc "NFS" 100003 3)
- [ $? = "0" ] || {
- echo "Trying to restart NFS service"
- startstop_nfs restart
- exit 1
- }
+ if ctdb_check_rpc "NFS" 100003 3 >/dev/null ; then
+ (service_name="nfs_knfsd"; ctdb_counter_init)
+ else
+ (
+ service_name="nfs_knfsd"
+ ctdb_counter_incr
+ ctdb_check_counter_limit 10 quiet >/dev/null
+ ) || {
+ echo "$ctdb_check_rpc_out"
+ echo "Trying to restart NFS service"
+ startstop_nfs restart
+ exit 1
+ }
+ # we haven't hit the failure limit so restart quietly
+ startstop_nfs restart >/dev/null 2>&1 &
+ fi
}
# and that its directories are available
} || exit $?
# check that lockd responds to rpc requests
- (ctdb_check_rpc "lockd" 100021 1)
- [ $? = "0" ] || {
+ ctdb_check_rpc "LOCKD" 100021 1 || {
echo "Trying to restart lock manager service"
startstop_nfs restart
startstop_nfslock restart
# mount needs special handling since it is sometimes not started
# correctly on RHEL5
- rpcinfo -u localhost 100005 1 > /dev/null || {
- echo "ERROR: MOUNTD is not running. Trying to restart it."
- RPCMOUNTDOPTS=""
- [ -n "$MOUNTD_PORT" ] && RPCMOUNTDOPTS="$RPCMOUNTDOPTS -p $MOUNTD_PORT"
- killall -q -9 rpc.mountd
- rpc.mountd $RPCMOUNTDOPTS &
+ ctdb_check_rpc "MOUNTD" 100005 1 || {
+ p="rpc.mountd"
+ cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
+ echo "Trying to restart MOUNTD [${cmd}]"
+ killall -q -9 $p
+ $cmd &
exit 1
}
# rquotad needs special handling since it is sometimes not started
# correctly on RHEL5
# this is not a critical service so we dont flag the node as unhealthy
- rpcinfo -u localhost 100011 1 > /dev/null || {
- echo "ERROR: RQUOTAD is not running. Trying to restart it."
- RPCRQUOTADOPTS=""
- [ -n "$RQUOTAD_PORT" ] && RPCRQUOTADOPTS="$RPCRQUOTADOPTS -p $RQUOTAD_PORT"
- killall -q -9 rpc.rquotad
- rpc.rquotad $RPCRQUOTADOPTS &
+ ctdb_check_rpc "RQUOTAD" 100011 1 || {
+ p="rpc.rquotad"
+ cmd="${p}${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
+ echo "Trying to restart RQUOTAD [${cmd}]"
+ killall -q -9 $p
+ $cmd &
}
# once every 60 seconds, update the statd state database for which