eventscripts: 60.nfs uses nfs_check_rpc_services() to check NFS RPC services
authorMartin Schwenke <martin@meltin.net>
Tue, 23 Apr 2013 02:17:31 +0000 (12:17 +1000)
committerMartin Schwenke <martin@meltin.net>
Tue, 7 May 2013 02:55:09 +0000 (12:55 +1000)
* New directory nfs-rpc-checks.d/ replaces hardcoded rules in 60.nfs

* Installation and packaging additions to handle nfs-rpc-checks.d/

* Unit test updates, including deleting 1 test that sanity checked
  test infrastructure

* Test infrastructure changes to use nfs-rpc-checks.d/

Note that this removes support for $CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK in
60.nfs.  To get the equivalent behaviour, edit 20.nfsd.check and
remove/comment all lines.

Signed-off-by: Martin Schwenke <martin@meltin.net>
14 files changed:
Makefile.in
config/events.d/60.nfs
config/nfs-rpc-checks.d/10.statd.check [new file with mode: 0644]
config/nfs-rpc-checks.d/20.nfsd.check [new file with mode: 0644]
config/nfs-rpc-checks.d/30.lockd.check [new file with mode: 0644]
config/nfs-rpc-checks.d/40.mountd.check [new file with mode: 0644]
config/nfs-rpc-checks.d/50.rquotad.check [new file with mode: 0644]
packaging/RPM/ctdb.spec.in
tests/INSTALL
tests/eventscripts/60.nfs.monitor.100.sh [deleted file]
tests/eventscripts/60.nfs.monitor.112.sh
tests/eventscripts/etc-ctdb/nfs-rpc-checks.d [new symlink]
tests/eventscripts/etc-ctdb/rc.local.nfs.monitor.get-limits [deleted file]
tests/eventscripts/scripts/local.sh

index 87a3bd32a8015a9745f6170d68ae2f21ce910ca5..7ffc3dc750d3b0f4d7e3446dd5ebcc14b20fd85c 100755 (executable)
@@ -317,6 +317,7 @@ install: all manpages $(PMDA_INSTALL)
        mkdir -p $(DESTDIR)$(includedir)
        mkdir -p $(DESTDIR)$(etcdir)/ctdb
        mkdir -p $(DESTDIR)$(etcdir)/ctdb/events.d
+       mkdir -p $(DESTDIR)$(etcdir)/ctdb/nfs-rpc-checks.d
        mkdir -p $(DESTDIR)$(docdir)/ctdb
        ${INSTALLCMD} -m 644 ctdb.pc $(DESTDIR)$(libdir)/pkgconfig
        ${INSTALLCMD} -m 755 bin/ctdb $(DESTDIR)$(bindir)
@@ -355,6 +356,11 @@ install: all manpages $(PMDA_INSTALL)
        ${INSTALLCMD} -m 755 config/events.d/62.cnfs $(DESTDIR)$(etcdir)/ctdb/events.d
        ${INSTALLCMD} -m 755 config/events.d/70.iscsi $(DESTDIR)$(etcdir)/ctdb/events.d
        ${INSTALLCMD} -m 755 config/events.d/91.lvs $(DESTDIR)$(etcdir)/ctdb/events.d
+       ${INSTALLCMD} -m 644 config/nfs-rpc-checks.d/10.statd.check $(DESTDIR)$(etcdir)/ctdb/nfs-rpc-checks.d/
+       ${INSTALLCMD} -m 644 config/nfs-rpc-checks.d/20.nfsd.check $(DESTDIR)$(etcdir)/ctdb/nfs-rpc-checks.d/
+       ${INSTALLCMD} -m 644 config/nfs-rpc-checks.d/30.lockd.check $(DESTDIR)$(etcdir)/ctdb/nfs-rpc-checks.d/
+       ${INSTALLCMD} -m 644 config/nfs-rpc-checks.d/40.mountd.check $(DESTDIR)$(etcdir)/ctdb/nfs-rpc-checks.d/
+       ${INSTALLCMD} -m 644 config/nfs-rpc-checks.d/50.rquotad.check $(DESTDIR)$(etcdir)/ctdb/nfs-rpc-checks.d/
        ${INSTALLCMD} -m 755 tools/ctdb_diagnostics $(DESTDIR)$(bindir)
        ${INSTALLCMD} -m 755 tools/onnode $(DESTDIR)$(bindir)
        if [ -f doc/ctdb.1 ];then ${INSTALLCMD} -d $(DESTDIR)$(mandir)/man1; fi
index ac82b56dd9a8e2d71068196af699f1d605f1ac9b..46456bc44232e59c0c93877e4138103dce5d4239 100755 (executable)
@@ -77,34 +77,7 @@ case "$1" in
 
        update_tickles 2049
 
-       # check that statd responds to rpc requests
-       nfs_check_rpc_service "statd" \
-           -ge 6 "verbose unhealthy" \
-           -eq 4 "verbose restart" \
-           -eq 2 "restart:bs"
-
-       # check that NFS responds to rpc requests
-       if [ "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" != "yes" ] ; then
-           nfs_check_rpc_service "knfsd" \
-               -ge 6 "verbose unhealthy" \
-               -eq 4 "verbose restart" \
-               -eq 2 "restart:bs"
-       fi
-
-       # check that lockd responds to rpc requests
-       nfs_check_rpc_service "lockd" \
-           -ge 15 "verbose restart:b unhealthy" \
-           -eq 10 "restart:bs"
-
-       # mountd is sometimes not started correctly on RHEL5
-       nfs_check_rpc_service "mountd" \
-           -ge 10 "verbose restart:b unhealthy" \
-           -eq 5 "restart:b"
-
-       # rquotad is sometimes not started correctly on RHEL5
-       # not a critical service so we dont flag the node as unhealthy
-       nfs_check_rpc_service "rquotad" \
-           -gt 0 "verbose restart:b"
+       nfs_check_rpc_services
 
        # once every 600 seconds, update the statd state database for which
        # clients need notifications
diff --git a/config/nfs-rpc-checks.d/10.statd.check b/config/nfs-rpc-checks.d/10.statd.check
new file mode 100644 (file)
index 0000000..dd5e15d
--- /dev/null
@@ -0,0 +1,3 @@
+-ge 6 verbose unhealthy
+-eq 4 verbose restart
+-eq 2 restart:bs
diff --git a/config/nfs-rpc-checks.d/20.nfsd.check b/config/nfs-rpc-checks.d/20.nfsd.check
new file mode 100644 (file)
index 0000000..dd5e15d
--- /dev/null
@@ -0,0 +1,3 @@
+-ge 6 verbose unhealthy
+-eq 4 verbose restart
+-eq 2 restart:bs
diff --git a/config/nfs-rpc-checks.d/30.lockd.check b/config/nfs-rpc-checks.d/30.lockd.check
new file mode 100644 (file)
index 0000000..4bda6c3
--- /dev/null
@@ -0,0 +1,2 @@
+-ge 15 verbose restart:b unhealthy
+-eq 10 restart:bs
diff --git a/config/nfs-rpc-checks.d/40.mountd.check b/config/nfs-rpc-checks.d/40.mountd.check
new file mode 100644 (file)
index 0000000..6b4f801
--- /dev/null
@@ -0,0 +1,2 @@
+-ge 10 verbose restart:b unhealthy
+-eq 5 restart:b
diff --git a/config/nfs-rpc-checks.d/50.rquotad.check b/config/nfs-rpc-checks.d/50.rquotad.check
new file mode 100644 (file)
index 0000000..1ebb828
--- /dev/null
@@ -0,0 +1 @@
+-gt 0 verbose restart:b
index cb860443e531209a6d964759b80a4b3edc68e0ae..83261c5483997bf956ce48e31df176a25975bdbf 100644 (file)
@@ -156,6 +156,11 @@ rm -rf $RPM_BUILD_ROOT
 %{_sysconfdir}/ctdb/events.d/62.cnfs
 %{_sysconfdir}/ctdb/events.d/70.iscsi
 %{_sysconfdir}/ctdb/events.d/91.lvs
+%config(noreplace) %{_sysconfdir}/ctdb/nfs-rpc-checks.d/10.statd.check
+%config(noreplace) %{_sysconfdir}/ctdb/nfs-rpc-checks.d/20.nfsd.check
+%config(noreplace) %{_sysconfdir}/ctdb/nfs-rpc-checks.d/30.lockd.check
+%config(noreplace) %{_sysconfdir}/ctdb/nfs-rpc-checks.d/40.mountd.check
+%config(noreplace) %{_sysconfdir}/ctdb/nfs-rpc-checks.d/50.rquotad.check
 %{_sysconfdir}/ctdb/statd-callout
 %{_sbindir}/ctdbd
 %{_bindir}/ctdb
index 8c3f777ea4e72e70e919abdb14f0152054d0bbb6..5581989619b542c3f1c8e6c874e321c0e5bedea8 100755 (executable)
@@ -71,7 +71,7 @@ for d in $data_subdirs ; do
 done
 # Some of the unit tests have relative symlinks back to in-tree bits
 # and pieces.  These links will be broken!
-for i in "events.d" "functions" ; do
+for i in "events.d" "functions" "nfs-rpc-checks.d" ; do
     ln -sf "${etcdir}/ctdb/${i}" "${ctdb_datadir}/eventscripts/etc-ctdb/${i}"
 done
 # test_wrap needs to set TEST_BIN_DIR
diff --git a/tests/eventscripts/60.nfs.monitor.100.sh b/tests/eventscripts/60.nfs.monitor.100.sh
deleted file mode 100755 (executable)
index e846d82..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/sh
-
-. "${TEST_SCRIPTS_DIR}/unit.sh"
-
-define_test "get RPC service fail limits/actions"
-
-setup_nfs
-
-set -e
-
-rm -f "$rpc_fail_limits_file"
-CTDB_RC_LOCAL="$CTDB_BASE/rc.local.nfs.monitor.get-limits" \
-    "${CTDB_BASE}/events.d/60.nfs" "monitor" >"$rpc_fail_limits_file"
-
-services="knfsd|mountd|rquotad|lockd|statd"
-
-echo "Doing rough check of file format..."
-
-! grep -v -E "^(${services}) " "$rpc_fail_limits_file"
index 666a38aaaf62e2e43a79674e06f18810ef048db6..c5c39b26e6792222534a88dbfd4bf17c0c313f4c 100755 (executable)
@@ -10,6 +10,6 @@ setup_nfs
 rpc_services_down "nfs"
 
 iterate_test 6 'ok_null' \
-    2 'rpc_set_service_failure_response "knfsd"' \
-    4 'rpc_set_service_failure_response "knfsd"' \
-    6 'rpc_set_service_failure_response "knfsd"'
+    2 'rpc_set_service_failure_response "nfsd"' \
+    4 'rpc_set_service_failure_response "nfsd"' \
+    6 'rpc_set_service_failure_response "nfsd"'
diff --git a/tests/eventscripts/etc-ctdb/nfs-rpc-checks.d b/tests/eventscripts/etc-ctdb/nfs-rpc-checks.d
new file mode 120000 (symlink)
index 0000000..991b966
--- /dev/null
@@ -0,0 +1 @@
+../../../config/nfs-rpc-checks.d
\ No newline at end of file
diff --git a/tests/eventscripts/etc-ctdb/rc.local.nfs.monitor.get-limits b/tests/eventscripts/etc-ctdb/rc.local.nfs.monitor.get-limits
deleted file mode 100755 (executable)
index 96e4cff..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-# Hey Emacs, this is a -*- shell-script -*- !!!  :-)
-
-# This scripts nobble the 60.nfs monitor event so that it prints out
-# the service fail limits for each RPC service.
-
-CTDB_INIT_STYLE="redhat"
-PATH="${EVENTSCRIPTS_PATH}:$PATH"
-
-service () { : ; }
-
-update_tickles () { : ; }
-
-ctdb_setup_service_state_dir "nfs"
-
-CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK="no"
-CTDB_NFS_SKIP_SHARE_CHECK="yes"
-
-# Ugly but necessary - if this file was touched less then 60 seconds
-# ago then this skips some code.
-touch "$service_state_dir/update-trigger"
-
-nfs_check_rpc_service ()
-{
-    echo "$*"
-}
index 5ce6b7316114e385f681d3b2bfbce111b6dd368a..3f558309251a71cfd27bb927f5272e9d02b87220 100644 (file)
@@ -554,23 +554,10 @@ setup_nfs ()
     export FAKE_RPCINFO_SERVICES=""
 
     export CTDB_NFS_SKIP_SHARE_CHECK="no"
-    export CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK="no"
 
     # Reset the failcounts for nfs services.
     eventscript_call eval rm -f '$ctdb_fail_dir/nfs_*'
 
-    rpc_fail_limits_file="${EVENTSCRIPTS_TESTS_VAR_DIR}/rpc_fail_limits"
-
-    # Force this file to exist so tests can be individually run.
-    if [ ! -f "$rpc_fail_limits_file" ] ; then
-       # This is gross... but is needed to fake through the nfs monitor event.
-       eventscript_call ctdb_service_managed
-       service "nfs" force-started  # might not be enough
-       CTDB_RC_LOCAL="$CTDB_BASE/rc.local.nfs.monitor.get-limits" \
-           CTDB_MANAGES_NFS="yes" \
-           "${CTDB_BASE}/events.d/60.nfs" "monitor" >"$rpc_fail_limits_file"
-    fi
-    
     if [ "$1" != "down" ] ; then
        debug "Setting up NFS environment: all RPC services up, NFS managed by CTDB"
 
@@ -648,31 +635,14 @@ rpc_set_service_failure_response ()
     # Default
     ok_null
 
-    _ts=$(sed -n -e "s@^${_progname} @@p" "$rpc_fail_limits_file")
-
-    while [ -n "$_ts" ] ; do
-       # Get the triple: operator, fail limit and actions.
-       _op="${_ts%% *}" ; _ts="${_ts#* }"
-       _li="${_ts%% *}" ; _ts="${_ts#* }"
-       # We've lost some of the quoting but we can simulate
-       # because we know an operator is always the first in a
-       # triple.
-       _actions=""
-       while [ -n "$_ts" ] ; do
-           # If this is an operator then we've got all of the
-           # actions.
-           case "$_ts" in
-               -*) break ;;
-           esac
-
-           _actions="${_actions}${_actions:+ }${_ts%% *}"
-           # Special case for end of list.
-           if [ "$_ts" != "${_ts#* }" ] ; then
-               _ts="${_ts#* }"
-           else
-               _ts=""
-           fi
-       done
+    _file=$(ls "${CTDB_BASE}/nfs-rpc-checks.d/"[0-9][0-9]."${_progname}.check")
+    [ -r "$_file" ] || die "RPC check file \"$_file\" does not exist or is not unique"
+
+    while read _op _li _actions ; do
+       # Skip comments
+       case "$_op" in
+           \#*) continue ;;
+       esac
 
        if [ "$_numfails" "$_op" "$_li" ] ; then
            _out=""
@@ -683,7 +653,7 @@ rpc_set_service_failure_response ()
                        _ver=1
                        _pn="$_progname"
                        case "$_progname" in
-                           knfsd) _ver=3 ; _pn="nfs" ;;
+                           nfsd) _ver=3 ; _pn="nfs" ;;
                            lockd) _ver=4 ; _pn="nlockmgr" ;;
                            statd) _pn="status" ;;
                        esac
@@ -701,13 +671,13 @@ program $_pn version $_ver is not available"
                                ;;
                        esac
                        case "${_progname}${_action#restart}" in
-                           knfsd)
+                           nfsd)
                                _t="\
 Trying to restart NFS service
 Starting nfslock: OK
 Starting nfs: OK"
                                ;;
-                           knfsd:bs)
+                           nfsd:bs)
                                _t="Trying to restart NFS service"
                                ;;
                            lockd|lockd:b)
@@ -731,7 +701,7 @@ Starting nfslock: OK"
            required_result $_rc "$_out"
            return
        fi
-    done
+    done <"$_file"
 }
 
 ######################################################################