ac22b7b5e08826ac5bc0e45cb5be21d5ea230f91
[ctdb.git] / config / events.d / 60.nfs
1 #!/bin/sh
2 # script to manage nfs in a clustered environment
3
4 start_nfs() {
5         /bin/mkdir -p $CTDB_VARDIR/state/nfs
6         /bin/mkdir -p $CTDB_VARDIR/state/statd/ip
7         startstop_nfs stop
8         startstop_nfs start
9         echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle
10 }
11
12 . $CTDB_BASE/functions
13
14 service_name="nfs"
15 service_start="start_nfs"
16 service_stop="startstop_nfs stop"
17 service_reconfigure="startstop_nfs restart"
18
19 nfs_check_thread_count ()
20 {
21     [ "$CTDB_MONITOR_NFS_THREAD_COUNT" = "yes" ] || return 0
22
23     # If $RPCNFSDCOUNT/$USE_KERNEL_NFSD_NUMBER isn't set then we could
24     # guess the default from the initscript.  However, let's just
25     # assume that those using the default don't care about the number
26     # of threads and that they have switched on this feature in error.
27     _configured_threads="${RPCNFSDCOUNT:-${USE_KERNEL_NFSD_NUMBER}}"
28     [ -n "$_configured_threads" ] || return 0
29
30     # nfsd should be running the configured number of threads.  If
31     # there are a different number of threads then tell nfsd the
32     # correct number.  
33     _running_threads=$(get_proc "fs/nfsd/threads")
34     # Intentionally not arithmetic comparison - avoids extra errors
35     # when get_proc() fails...
36     if [ "$_running_threads" != "$_configured_threads" ] ; then
37         echo "Attempting to correct number of nfsd threads from ${_running_threads} to ${_configured_threads}"
38         set_proc "fs/nfsd/threads" "$_configured_threads"
39     fi
40 }
41
42 loadconfig
43
44 [ "$NFS_SERVER_MODE" != "GANESHA" ] || exit 0
45
46 ctdb_start_stop_service
47
48 is_ctdb_managed_service || exit 0
49
50 case "$1" in 
51      init)
52         # read statd from persistent database
53         ;;
54      startup)
55         ctdb_service_start
56         mkdir -p $CTDB_VARDIR/state/statd
57         touch $CTDB_VARDIR/state/statd/update-trigger
58         ;;
59
60      shutdown)
61         ctdb_service_stop
62         ;;
63
64      takeip)
65         ctdb_service_set_reconfigure
66         ;;
67
68      releaseip)
69         ctdb_service_set_reconfigure
70         ;;
71
72       monitor)
73         if ctdb_service_needs_reconfigure ; then
74             ctdb_service_reconfigure
75             exit 0
76         fi
77
78         update_tickles 2049
79
80         # check that statd responds to rpc requests
81         # if statd is not running we try to restart it
82         # we only do this IF we have a rpc.statd command.
83         # For platforms where rpc.statd does not exist, we skip
84         # the check completely
85         p="rpc.statd"
86         which $p >/dev/null 2>/dev/null && {
87                 if ctdb_check_rpc "STATD" 100024 1 >/dev/null ; then
88                         (service_name="nfs_statd"; ctdb_counter_init)
89                 else
90                         cmd="$p"
91                         cmd="${cmd}${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
92                         cmd="${cmd}${STATD_PORT:+ -p }${STATD_PORT}"
93                         cmd="${cmd}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
94                         (
95                                 service_name="nfs_statd"
96                                 ctdb_counter_incr
97                                 ctdb_check_counter_limit 10 quiet >/dev/null
98                         ) || {
99                                 echo "$ctdb_check_rpc_out"
100                                 echo "Trying to restart STATD [$cmd]"
101                                 $cmd
102                         }
103                 fi
104         }
105
106         # check that NFS responds to rpc requests
107         if [ "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" != "yes" ] ; then
108             if ctdb_check_rpc "NFS" 100003 3 >/dev/null ; then
109                 ctdb_counter_init "nfs_knfsd"
110             else
111                 ctdb_counter_incr "nfs_knfsd"
112
113                 if ! ctdb_check_counter "quiet" % 10 "nfs_knfsd" ; then
114                     echo "Trying to restart NFS service"
115                     startstop_nfs restart >/dev/null 2>&1 &
116                 fi
117
118                 ctdb_check_counter "error" -ge 2 "nfs_knfsd"
119             fi
120         fi
121
122         # check that lockd responds to rpc requests
123         if ctdb_check_rpc "LOCKD" 100021 1 >/dev/null ; then
124                 (service_name="lockd"; ctdb_counter_init)
125         else
126                 (
127                         service_name="lockd"
128                         ctdb_counter_incr
129
130                         ctdb_check_counter_equal 10 || {
131                                 echo "Trying to restart NFS lock service"
132                                 startstop_nfs restart >/dev/null 2>&1 &
133                                 startstop_nfslock restart  >/dev/null 2>&1 &
134                                 exit 0
135                         }
136
137                         ctdb_check_counter_limit 15 quiet >/dev/null
138         ) || {
139                         echo "$ctdb_check_rpc_out"
140                         echo "Trying to restart NFS lock service"
141                         startstop_nfs restart
142                         startstop_nfslock restart
143                         exit 1
144                 }
145         fi
146
147         # mount needs special handling since it is sometimes not started
148         # correctly on RHEL5
149         if ctdb_check_rpc "MOUNTD" 100005 1 >/dev/null ; then
150                 (service_name="nfs_mountd"; ctdb_counter_init)
151         else
152         (
153                 service_name="nfs_mountd"
154                 ctdb_counter_incr
155
156                 ctdb_check_counter_equal 5 || {
157                         p="rpc.mountd"
158                         cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
159                         echo "Trying to restart MOUNTD [${cmd}]"
160                         killall -q -9 $p
161                         $cmd &
162                         exit 0
163                 }
164
165                 ctdb_check_counter_limit 10 quiet >/dev/null
166         ) || {
167                 echo "$ctdb_check_rpc_out"
168                 p="rpc.mountd"
169                 cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
170                 echo "Trying to restart MOUNTD [${cmd}]"
171                 killall -q -9 $p
172                 $cmd &
173                 exit 1
174         }
175         fi
176
177
178         # rquotad needs special handling since it is sometimes not started
179         # correctly on RHEL5
180         # this is not a critical service so we dont flag the node as unhealthy
181         ctdb_check_rpc "RQUOTAD" 100011 1 || {
182                 p="rpc.rquotad"
183                 cmd="${p}${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
184                 echo "Trying to restart RQUOTAD [${cmd}]"
185                 killall -q -9 $p
186                 $cmd &
187         }
188
189         nfs_check_thread_count
190
191         # once every 600 seconds, update the statd state database for which
192         # clients need notifications
193         LAST_UPDATE=`stat --printf="%Y" $CTDB_VARDIR/state/statd/update-trigger 2>/dev/null`
194         CURRENT_TIME=`date +"%s"`
195         [ $CURRENT_TIME -ge $(($LAST_UPDATE + 600)) ] && {
196             mkdir -p $CTDB_VARDIR/state/statd
197             touch $CTDB_VARDIR/state/statd/update-trigger
198             $CTDB_BASE/statd-callout updatelocal &
199             $CTDB_BASE/statd-callout updateremote &
200         }
201         ;;
202
203     ipreallocated)
204         # if the ips have been reallocated, we must restart the lockmanager
205         # across all nodes and ping all statd listeners
206         [ -x $CTDB_BASE/statd-callout ] && {
207                 $CTDB_BASE/statd-callout notify &
208         } >/dev/null 2>&1
209         ;;
210     *)
211         ctdb_standard_event_handler "$@"
212         ;;
213 esac
214
215 exit 0