eventscript: Fix link creation failure if the link already exist but the target path...
[ctdb.git] / config / events.d / 60.ganesha
1 #!/bin/sh
2 # script to manage nfs in a clustered environment
3
4 [ -n "$CTDB_BASE" ] || \
5     export CTDB_BASE=$(cd -P $(dirname "$0") ; dirname "$PWD")
6
7 . $CTDB_BASE/functions
8
9 GANRECDIR="/var/lib/nfs/ganesha"
10 GANRECDIR2="/var/lib/nfs/ganesha/recevents"
11 GPFS_STATE="/usr/lpp/mmfs/bin/mmgetstate"
12 GANRECDIR3="/var/lib/nfs/ganesha_local"
13
14
15 service_start ()
16 {
17     startstop_ganesha stop
18     startstop_ganesha start
19     set_proc "sys/net/ipv4/tcp_tw_recycle" 1
20 }
21
22 service_stop ()
23 {
24     startstop_ganesha stop
25 }
26
27 service_reconfigure ()
28 {
29     # if the ips have been reallocated, we must restart ganesha
30     # across all nodes and ping all statd listeners
31     [ -x $CTDB_BASE/statd-callout ] && {
32         $CTDB_BASE/statd-callout notify &
33     } >/dev/null 2>&1
34 }
35
36 loadconfig "nfs"
37
38
39 [ -n "$CTDB_CLUSTER_FILESYSTEM_TYPE" ] || CTDB_CLUSTER_FILESYSTEM_TYPE="gpfs"
40
41 service_name="nfs-ganesha-$CTDB_CLUSTER_FILESYSTEM_TYPE"
42
43 [ "${CTDB_NFS_SERVER_MODE:-${NFS_SERVER_MODE}}" = "ganesha" ] || exit 0
44
45 ctdb_setup_service_state_dir
46
47 ctdb_start_stop_service
48
49 is_ctdb_managed_service || exit 0
50
51 ctdb_service_check_reconfigure
52
53 get_cluster_fs_state  ()
54 {
55     case $CTDB_CLUSTER_FILESYSTEM_TYPE in
56         gpfs)
57             STATE=`$GPFS_STATE | awk 'NR <= 3 {next} {printf "%-6s", $3}'`
58             echo $STATE
59             ;;
60         *)
61             die "File system $CTDB_CLUSTER_FILESYSTEM_TYPE not supported"
62             ;;
63    esac
64 }
65
66 create_ganesha_recdirs ()
67 {
68     if [ -z "$(mount -t $CTDB_CLUSTER_FILESYSTEM_TYPE)" ]; then
69       echo "startup $CTDB_CLUSTER_FILESYSTEM_TYPE not ready"
70       exit 1
71     fi
72     MNTPT=`mount -t $CTDB_CLUSTER_FILESYSTEM_TYPE | sort | awk '{print $3}' | head -n 1`
73     mkdir -p $MNTPT/.ganesha
74     if [ -e $GANRECDIR ]; then
75         if [ ! -L $GANRECDIR ] ; then
76             rm -rf $GANRECDIR
77             if ! ln -s $MNTPT/.ganesha  $GANRECDIR ; then
78                 echo "ln failed"
79             fi
80         fi
81     else
82         if ! ln -sf $MNTPT/.ganesha  $GANRECDIR ; then
83             echo "ln failed"
84         fi
85     fi
86
87     mkdir -p $GANRECDIR2
88     mkdir -p $GANRECDIR3
89 }
90
91 monitor_ganesha_nfsd ()
92 {
93         create_ganesha_recdirs
94         service_name=${service_name}_process
95
96         PIDFILE="/var/run/ganesha.pid"
97         CUR_STATE=`get_cluster_fs_state`
98         GANESHA="/usr/bin/$CTDB_CLUSTER_FILESYSTEM_TYPE.ganesha.nfsd"
99         if { read PID < $PIDFILE && \
100             grep "$GANESHA" "/proc/$PID/cmdline" ; } >/dev/null 2>&1 ; then
101                 ctdb_counter_init "$service_name"
102         else
103             if [ $CUR_STATE = "active" ]; then
104                 echo "Trying fast restart of NFS service"
105                 startstop_ganesha restart
106                 ctdb_counter_incr "$service_name"
107                 ctdb_check_counter "error" "-ge" "6" "$service_name"
108             fi
109         fi
110
111         service_name="nfs-ganesha-$CTDB_CLUSTER_FILESYSTEM_TYPE"_service
112         # check that NFS is posting forward progress
113         if [ $CUR_STATE = "active" -a "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" != "yes" ] ; then
114             MAXREDS=2
115             MAXSTALL=120
116             RESTART=0
117
118             NUMREDS=`ls $GANRECDIR3 | grep "red" | wc -l`
119             LASTONE=`ls -t $GANRECDIR3 | sed 's/_/ /' | awk 'NR > 1 {next} {printf $1} '`
120             # Beware of startup
121             if [ -z $LASTONE ] ; then
122                 LASTONE=`date +"%s"`
123             fi
124             TNOW=$(date +"%s")
125             TSTALL=$(($TNOW - $LASTONE))
126             if [ $NUMREDS -ge $MAXREDS ] ; then
127                 echo restarting because of $NUMREDS red conditions
128                 RESTART=1
129                 ctdb_counter_incr "$service_name"
130                 ctdb_check_counter "error" "-ge" "6" "$service_name"
131             fi
132             if [ $TSTALL -ge $MAXSTALL ] ; then
133                 echo restarting because of $TSTALL second stall
134                 RESTART=1
135                 ctdb_counter_incr "$service_name"
136                 ctdb_check_counter "error" "-ge" "6" "$service_name"
137             fi
138             if [ $RESTART -gt 0 ] ; then
139                 startstop_ganesha restart
140             else
141                 ctdb_counter_init "$service_name"
142             fi
143         fi
144 }
145
146 ############################################################
147
148 case "$1" in
149      init)
150         # read statd from persistent database
151         ;;
152      startup)
153         create_ganesha_recdirs
154         ctdb_service_start
155         ;;
156
157      shutdown)
158         ctdb_service_stop
159         ;;
160
161      takeip)
162         if [ -n "$2" ] ; then
163             case  $CTDB_CLUSTER_FILESYSTEM_TYPE in
164                 gpfs)
165                     NNUM=`/usr/lpp/mmfs/bin/mmlsconfig myNodeConfigNumber | awk '{print $2}'`
166                     TDATE=`date +"%s"`
167                     TOUCHTGT=$1"_"$TDATE"_"$NNUM"_"$3"_"$4"_"$2
168                     touch $GANRECDIR2/$TOUCHTGT
169                     ;;
170             esac
171         fi
172         ctdb_service_set_reconfigure
173         ;;
174
175      releaseip)
176         if [ -n "$2" ] ; then
177             case  $CTDB_CLUSTER_FILESYSTEM_TYPE in
178                 gpfs)
179                     NNUM=`/usr/lpp/mmfs/bin/mmlsconfig myNodeConfigNumber | awk '{print $2}'`
180                     TDATE=`date +"%s"`
181                     TOUCHTGT=$1"_"$TDATE"_"$NNUM"_"$3"_"$4"_"$2
182                     touch $GANRECDIR2/$TOUCHTGT
183                 ;;
184             esac
185         fi
186         ctdb_service_set_reconfigure
187         ;;
188
189      monitor)
190         update_tickles 2049
191
192         # check that statd responds to rpc requests
193         # if statd is not running we try to restart it
194         # we only do this IF we have a rpc.statd command.
195         # For platforms where rpc.statd does not exist, we skip
196         # the check completely
197         p="rpc.statd"
198         which $p >/dev/null 2>/dev/null && \
199             nfs_check_rpc_service "statd" \
200                 -ge 6 "verbose unhealthy" \
201                 -eq 4 "verbose restart" \
202                 -eq 2 "restart:b"
203
204         if [ "$CTDB_SKIP_GANESHA_NFSD_CHECK" != "yes" ] ; then
205             monitor_ganesha_nfsd
206         fi
207
208         # rquotad is sometimes not started correctly on RHEL5
209         # not a critical service so we dont flag the node as unhealthy
210         nfs_check_rpc_service "rquotad" \
211             -gt 0 "verbose restart:b"
212
213         # Check that directories for shares actually exist.
214         [ "$CTDB_NFS_SKIP_SHARE_CHECK" = "yes" ] || {
215             grep Path /etc/ganesha/$CTDB_CLUSTER_FILESYSTEM_TYPE.ganesha.exports.conf |
216             cut -f2 -d\" | ctdb_check_directories
217         } || exit $?
218
219         # once every 60 seconds, update the statd state database for which
220         # clients need notifications
221         nfs_statd_update 60
222         ;;
223
224      *)
225         ctdb_standard_event_handler "$@"
226         ;;
227 esac
228
229 exit 0