Try to restart LOCKD if "service nfslock start" fails.
[sahlberg/ctdb.git] / config / events.d / 60.nfs
1 #!/bin/sh
2 # script to manage nfs in a clustered environment
3
4 start_nfs() {
5         /bin/mkdir -p $CTDB_BASE/state/nfs
6         /bin/mkdir -p $CTDB_BASE/state/statd/ip
7         /bin/mkdir -p $STATD_SHARED_DIRECTORY
8         startstop_nfs stop
9         startstop_nfs start
10 }
11
12 reconfigure_nfs() {
13         # always restart the lockmanager so that we start with a clusterwide
14         # graceperiod when ip addresses has changed
15         [ -x $CTDB_BASE/statd-callout ] && {
16                 $CTDB_BASE/statd-callout notify &
17         } >/dev/null 2>&1
18
19 }
20
21 . $CTDB_BASE/functions
22
23 service_name="nfs"
24 service_start="start_nfs"
25 service_stop="startstop_nfs stop"
26 service_reconfigure="reconfigure_nfs"
27
28 loadconfig
29
30 [ -z "$STATD_SHARED_DIRECTORY" ] && exit 0
31
32 ctdb_start_stop_service
33
34 case "$1" in 
35      startup)
36         ctdb_service_start
37         ;;
38
39      shutdown)
40         ctdb_service_stop
41         ;;
42
43      takeip)
44         ctdb_service_set_reconfigure
45         touch $CTDB_BASE/state/statd/ip/$3
46         ;;
47
48      releaseip)
49         ctdb_service_set_reconfigure
50         /bin/rm -f $CTDB_BASE/state/statd/ip/$3
51         ;;
52
53      recovered)
54         # if we have taken or released any ips we must 
55         # restart the lock manager so that we enter a clusterwide grace period
56         if ctdb_service_needs_reconfigure ; then
57             ctdb_service_reconfigure
58         fi
59         ;;
60
61       monitor)
62         if ctdb_service_needs_reconfigure ; then
63             ctdb_service_reconfigure
64             exit 0
65         fi
66
67         # check that statd responds to rpc requests
68         # if statd is not running we try to restart it
69         rpcinfo -u localhost 100024 1 > /dev/null || {
70                 RPCSTATDOPTS=""
71                 [ -n "$STATD_HOSTNAME" ] && RPCSTATDOPTS="$RPCSTATDOPTS -n $STATD_HOSTNAME"
72                 [ -n "$STATD_PORT" ] && RPCSTATDOPTS="$RPCSTATDOPTS -p $STATD_PORT"
73                 [ -n "$STATD_OUTGOING_PORT" ] && RPCSTATDOPTS="$RPCSTATDOPTS -o $STATD_OUTGOING_PORT"
74                 rpc.statd $RPCSTATDOPTS 
75                 echo "ERROR: STATD is not responding. Trying to restart it. [rpc.statd $RPCSTATDOPTS]"
76         }
77
78         # check that NFS responds to rpc requests
79         [ "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" = "yes" ] || {
80             (ctdb_check_rpc "NFS" 100003 3)
81             [ $? = "0" ] || {
82                 echo "Trying to restart NFS service"
83                 startstop_nfs restart
84                 exit 1
85             }
86         }
87
88         # and that its directories are available
89         [ "$CTDB_NFS_SKIP_SHARE_CHECK" = "yes" ] || {
90             exportfs | grep -v '^#' | grep '^/' |
91             sed -e 's/[[:space:]]*[^[:space:]]*$//' |
92             ctdb_check_directories
93         } || exit $?
94
95         # check that lockd responds to rpc requests
96         (ctdb_check_rpc "lockd" 100021 1)
97         [ $? = "0" ] || {
98             echo "Trying to restart lock manager service"
99             startstop_nfslock restart
100             exit 1
101         }
102
103         echo "$STATD_SHARED_DIRECTORY" | ctdb_check_directories "statd" || \
104             exit $?
105
106         # mount needs special handling since it is sometimes not started
107         # correctly on RHEL5
108         rpcinfo -u localhost 100005 1 > /dev/null || {
109                 echo "ERROR: MOUNTD is not running. Trying to restart it."
110                 RPCMOUNTDOPTS=""
111                 [ -n "$MOUNTD_PORT" ] && RPCMOUNTDOPTS="$RPCMOUNTDOPTS -p $MOUNTD_PORT"
112                 killall -q -9 rpc.mountd
113                 rpc.mountd $RPCMOUNTDOPTS &
114                 exit 1
115         }
116         # rquotad needs special handling since it is sometimes not started
117         # correctly on RHEL5
118         # this is not a critical service so we dont flag the node as unhealthy
119         rpcinfo -u localhost 100011 1 > /dev/null || {
120                 echo "ERROR: RQUOTAD is not running. Trying to restart it."
121                 RPCRQUOTADOPTS=""
122                 [ -n "$RQUOTAD_PORT" ] && RPCRQUOTADOPTS="$RPCRQUOTADOPTS -p $RQUOTAD_PORT"
123                 killall -q -9 rpc.rquotad
124                 rpc.rquotad $RPCRQUOTADOPTS &
125         }
126         ;;
127
128     *)
129         ctdb_standard_event_handler "$@"
130         ;;
131 esac
132
133 exit 0