config/events.d/60.nfs

   1 #!/bin/sh
   2 # script to manage nfs in a clustered environment
   3
   4 start_nfs() {
   5         /bin/mkdir -p $CTDB_VARDIR/state/nfs
   6         /bin/mkdir -p $CTDB_VARDIR/state/statd/ip
   7         startstop_nfs stop
   8         startstop_nfs start
   9         echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle
  10 }
  11
  12 . $CTDB_BASE/functions
  13
  14 service_name="nfs"
  15 service_start="start_nfs"
  16 service_stop="startstop_nfs stop"
  17 service_reconfigure="startstop_nfs restart"
  18
  19 nfs_check_thread_count ()
  20 {
  21     [ "$CTDB_MONITOR_NFS_THREAD_COUNT" = "yes" ] || return 0
  22
  23     # If $RPCNFSDCOUNT/$USE_KERNEL_NFSD_NUMBER isn't set then we could
  24     # guess the default from the initscript.  However, let's just
  25     # assume that those using the default don't care about the number
  26     # of threads and that they have switched on this feature in error.
  27     _configured_threads="${RPCNFSDCOUNT:-${USE_KERNEL_NFSD_NUMBER}}"
  28     [ -n "$_configured_threads" ] || return 0
  29
  30     # nfsd should be running the configured number of threads.  If
  31     # there are a different number of threads then tell nfsd the
  32     # correct number.
  33     _running_threads=$(get_proc "fs/nfsd/threads")
  34     # Intentionally not arithmetic comparison - avoids extra errors
  35     # when get_proc() fails...
  36     if [ "$_running_threads" != "$_configured_threads" ] ; then
  37         echo "Attempting to correct number of nfsd threads from ${_running_threads} to ${_configured_threads}"
  38         set_proc "fs/nfsd/threads" "$_configured_threads"
  39     fi
  40 }
  41
  42 loadconfig
  43
  44 [ "$NFS_SERVER_MODE" != "GANESHA" ] || exit 0
  45
  46 ctdb_start_stop_service
  47
  48 is_ctdb_managed_service || exit 0
  49
  50 case "$1" in
  51      init)
  52         # read statd from persistent database
  53         ;;
  54      startup)
  55         ctdb_service_start
  56         mkdir -p $CTDB_VARDIR/state/statd
  57         touch $CTDB_VARDIR/state/statd/update-trigger
  58         ;;
  59
  60      shutdown)
  61         ctdb_service_stop
  62         ;;
  63
  64      takeip)
  65         ctdb_service_set_reconfigure
  66         ;;
  67
  68      releaseip)
  69         ctdb_service_set_reconfigure
  70         ;;
  71
  72       monitor)
  73         if ctdb_service_needs_reconfigure ; then
  74             ctdb_service_reconfigure
  75             exit 0
  76         fi
  77
  78         update_tickles 2049
  79
  80         # check that statd responds to rpc requests
  81         # if statd is not running we try to restart it
  82         # we only do this IF we have a rpc.statd command.
  83         # For platforms where rpc.statd does not exist, we skip
  84         # the check completely
  85         p="rpc.statd"
  86         which $p >/dev/null 2>/dev/null && {
  87                 if ctdb_check_rpc "STATD" 100024 1 >/dev/null ; then
  88                         (service_name="nfs_statd"; ctdb_counter_init)
  89                 else
  90                         cmd="$p"
  91                         cmd="${cmd}${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
  92                         cmd="${cmd}${STATD_PORT:+ -p }${STATD_PORT}"
  93                         cmd="${cmd}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
  94                         (
  95                                 service_name="nfs_statd"
  96                                 ctdb_counter_incr
  97                                 ctdb_check_counter_limit 10 quiet >/dev/null
  98                         ) || {
  99                                 echo "$ctdb_check_rpc_out"
 100                                 echo "Trying to restart STATD [$cmd]"
 101                                 $cmd
 102                         }
 103                 fi
 104         }
 105
 106         # check that NFS responds to rpc requests
 107         if [ "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" != "yes" ] ; then
 108             if ctdb_check_rpc "NFS" 100003 3 >/dev/null ; then
 109                 ctdb_counter_init "nfs_knfsd"
 110             else
 111                 ctdb_counter_incr "nfs_knfsd"
 112
 113                 if ! ctdb_check_counter "quiet" % 10 "nfs_knfsd" ; then
 114                     echo "Trying to restart NFS service"
 115                     startstop_nfs restart >/dev/null 2>&1 &
 116                 fi
 117
 118                 ctdb_check_counter "error" -ge 2 "nfs_knfsd"
 119             fi
 120         fi
 121
 122         # check that lockd responds to rpc requests
 123         if ctdb_check_rpc "LOCKD" 100021 1 >/dev/null ; then
 124                 (service_name="lockd"; ctdb_counter_init)
 125         else
 126                 (
 127                         service_name="lockd"
 128                         ctdb_counter_incr
 129
 130                         ctdb_check_counter_equal 10 || {
 131                                 echo "Trying to restart NFS lock service"
 132                                 startstop_nfs restart >/dev/null 2>&1 &
 133                                 startstop_nfslock restart  >/dev/null 2>&1 &
 134                                 exit 0
 135                         }
 136
 137                         ctdb_check_counter_limit 15 quiet >/dev/null
 138         ) || {
 139                         echo "$ctdb_check_rpc_out"
 140                         echo "Trying to restart NFS lock service"
 141                         startstop_nfs restart
 142                         startstop_nfslock restart
 143                         exit 1
 144                 }
 145         fi
 146
 147         # mount needs special handling since it is sometimes not started
 148         # correctly on RHEL5
 149         if ctdb_check_rpc "MOUNTD" 100005 1 >/dev/null ; then
 150                 (service_name="nfs_mountd"; ctdb_counter_init)
 151         else
 152         (
 153                 service_name="nfs_mountd"
 154                 ctdb_counter_incr
 155
 156                 ctdb_check_counter_equal 5 || {
 157                         p="rpc.mountd"
 158                         cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
 159                         echo "Trying to restart MOUNTD [${cmd}]"
 160                         killall -q -9 $p
 161                         $cmd &
 162                         exit 0
 163                 }
 164
 165                 ctdb_check_counter_limit 10 quiet >/dev/null
 166         ) || {
 167                 echo "$ctdb_check_rpc_out"
 168                 p="rpc.mountd"
 169                 cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
 170                 echo "Trying to restart MOUNTD [${cmd}]"
 171                 killall -q -9 $p
 172                 $cmd &
 173                 exit 1
 174         }
 175         fi
 176
 177
 178         # rquotad needs special handling since it is sometimes not started
 179         # correctly on RHEL5
 180         # this is not a critical service so we dont flag the node as unhealthy
 181         ctdb_check_rpc "RQUOTAD" 100011 1 || {
 182                 p="rpc.rquotad"
 183                 cmd="${p}${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
 184                 echo "Trying to restart RQUOTAD [${cmd}]"
 185                 killall -q -9 $p
 186                 $cmd &
 187         }
 188
 189         nfs_check_thread_count
 190
 191         # once every 600 seconds, update the statd state database for which
 192         # clients need notifications
 193         LAST_UPDATE=`stat --printf="%Y" $CTDB_VARDIR/state/statd/update-trigger 2>/dev/null`
 194         CURRENT_TIME=`date +"%s"`
 195         [ $CURRENT_TIME -ge $(($LAST_UPDATE + 600)) ] && {
 196             mkdir -p $CTDB_VARDIR/state/statd
 197             touch $CTDB_VARDIR/state/statd/update-trigger
 198             $CTDB_BASE/statd-callout updatelocal &
 199             $CTDB_BASE/statd-callout updateremote &
 200         }
 201         ;;
 202
 203     ipreallocated)
 204         # if the ips have been reallocated, we must restart the lockmanager
 205         # across all nodes and ping all statd listeners
 206         [ -x $CTDB_BASE/statd-callout ] && {
 207                 $CTDB_BASE/statd-callout notify &
 208         } >/dev/null 2>&1
 209         ;;
 210     *)
 211         ctdb_standard_event_handler "$@"
 212         ;;
 213 esac
 214
 215 exit 0