ctdb-scripts: Dump stack traces of smbd processes after shutdown
[samba.git] / ctdb / tests / eventscripts / scripts / local.sh
index 85894a41b826bde99b9303c01bdb9636f0373a0e..ea900ebcd0d8c99e32ef48af9915c9c46b843bb6 100644 (file)
@@ -9,28 +9,36 @@ EVENTSCRIPTS_PATH=""
 
 if [ -d "${TEST_SUBDIR}/stubs" ] ; then
     EVENTSCRIPTS_PATH="${TEST_SUBDIR}/stubs"
+    case "$EVENTSCRIPTS_PATH" in
+       /*) : ;;
+       *) EVENTSCRIPTS_PATH="${PWD}/${EVENTSCRIPTS_PATH}" ;;
+    esac
+    export CTDB_HELPER_BINDIR="$EVENTSCRIPTS_PATH"
 fi
 
 export EVENTSCRIPTS_PATH
 
 PATH="${EVENTSCRIPTS_PATH}:${PATH}"
 
+export CTDB="ctdb"
+
 export EVENTSCRIPTS_TESTS_VAR_DIR="${TEST_VAR_DIR}/unit_eventscripts"
 if [ -d "$EVENTSCRIPTS_TESTS_VAR_DIR" -a \
     "$EVENTSCRIPTS_TESTS_VAR_DIR" != "/unit_eventscripts" ] ; then
     rm -r "$EVENTSCRIPTS_TESTS_VAR_DIR"
 fi
 mkdir -p "$EVENTSCRIPTS_TESTS_VAR_DIR"
-export CTDB_VARDIR="$EVENTSCRIPTS_TESTS_VAR_DIR/ctdb"
+export CTDB_SCRIPT_VARDIR="$EVENTSCRIPTS_TESTS_VAR_DIR/script-state"
 
-export CTDB_LOGFILE="${EVENTSCRIPTS_TESTS_VAR_DIR}/log.ctdb"
-touch "$CTDB_LOGFILE" || die "Unable to create CTDB_LOGFILE=$CTDB_LOGFILE"
+export CTDB_LOGGING="file:${EVENTSCRIPTS_TESTS_VAR_DIR}/log.ctdb"
+touch "${CTDB_LOGGING#file:}" || \
+    die "Unable to setup logging for \"$CTDB_LOGGING\""
 
-if [ -d "${TEST_SUBDIR}/etc" ] ; then    
+if [ -d "${TEST_SUBDIR}/etc" ] ; then
     cp -a "${TEST_SUBDIR}/etc" "$EVENTSCRIPTS_TESTS_VAR_DIR"
-    export CTDB_ETCDIR="${EVENTSCRIPTS_TESTS_VAR_DIR}/etc"
+    export CTDB_SYS_ETCDIR="${EVENTSCRIPTS_TESTS_VAR_DIR}/etc"
 else
-    die "Unable to setup \$CTDB_ETCDIR"
+    die "Unable to setup \$CTDB_SYS_ETCDIR"
 fi
 
 if [ -d "${TEST_SUBDIR}/etc-ctdb" ] ; then
@@ -74,17 +82,6 @@ else
     debug () { : ; }
 fi
 
-eventscripts_tests_cleanup_hooks=""
-
-# This loses quoting!
-eventscripts_test_add_cleanup ()
-{
-    eventscripts_tests_cleanup_hooks="${eventscripts_tests_cleanup_hooks}${eventscripts_tests_cleanup_hooks:+ ; }$*"
-}
-
-trap 'eval $eventscripts_tests_cleanup_hooks' 0
-
-
 ######################################################################
 
 # General setup fakery
@@ -118,7 +115,10 @@ setup_generic ()
 
 
     export CTDB_DBDIR="${EVENTSCRIPTS_TESTS_VAR_DIR}/db"
-    mkdir -p "${CTDB_DBDIR}/persistent"
+    export CTDB_DBDIR_PERSISTENT="${CTDB_DBDIR}/persistent"
+    export CTDB_DBDIR_STATE="${CTDB_DBDIR}/state"
+    mkdir -p "$CTDB_DBDIR_PERSISTENT"
+    mkdir -p "$CTDB_DBDIR_STATE"
 
     export FAKE_TDBTOOL_SUPPORTS_CHECK="yes"
     export FAKE_TDB_IS_OK
@@ -259,16 +259,43 @@ eventscript_call ()
     )
 }
 
-# Set output for ctdb command.  Option 1st argument is return code.
-ctdb_set_output ()
+# For now this creates the same public addresses each time.  However,
+# it could be made more flexible.
+setup_public_addresses ()
 {
-    _out="$EVENTSCRIPTS_TESTS_VAR_DIR/ctdb.out"
-    cat >"$_out"
+    if [ -f "$CTDB_PUBLIC_ADDRESSES" -a \
+           "${CTDB_PUBLIC_ADDRESSES%/*}" = "$EVENTSCRIPTS_TESTS_VAR_DIR" ] ; then
+       rm "$CTDB_PUBLIC_ADDRESSES"
+    fi
 
-    _rc="$EVENTSCRIPTS_TESTS_VAR_DIR/ctdb.rc"
-    echo "${1:-0}" >"$_rc"
+    export CTDB_PUBLIC_ADDRESSES=$(mktemp \
+                                      --tmpdir="$EVENTSCRIPTS_TESTS_VAR_DIR" \
+                                      "public-addresses-XXXXXXXX")
+
+    echo "Setting up CTDB_PUBLIC_ADDRESSES=${CTDB_PUBLIC_ADDRESSES}"
+    cat >"$CTDB_PUBLIC_ADDRESSES" <<EOF
+10.0.0.1/24 dev123
+10.0.0.2/24 dev123
+10.0.0.3/24 dev123
+10.0.0.4/24 dev123
+10.0.0.5/24 dev123
+10.0.0.6/24 dev123
+10.0.1.1/24 dev456
+10.0.1.2/24 dev456
+10.0.1.3/24 dev456
+EOF
+}
 
-    eventscripts_test_add_cleanup "rm -f $_out $_rc"
+# Need to cope with ctdb_get_pnn().  If a test changes PNN then it
+# needs to be using a different state directory, otherwise the wrong
+# PNN can already be cached in the state directory.
+ctdb_set_pnn ()
+{
+    export FAKE_CTDB_PNN="$1"
+    echo "Setting up PNN ${FAKE_CTDB_PNN}"
+
+    export CTDB_SCRIPT_VARDIR="$EVENTSCRIPTS_TESTS_VAR_DIR/script-state/${FAKE_CTDB_PNN}"
+    mkdir -p "$CTDB_SCRIPT_VARDIR"
 }
 
 setup_ctdb ()
@@ -278,23 +305,15 @@ setup_ctdb ()
     export FAKE_CTDB_NUMNODES="${1:-3}"
     echo "Setting up CTDB with ${FAKE_CTDB_NUMNODES} fake nodes"
 
-    export FAKE_CTDB_PNN="${2:-0}"
-    echo "Setting up CTDB with PNN ${FAKE_CTDB_PNN}"
-
-    export CTDB_PUBLIC_ADDRESSES="${CTDB_BASE}/public_addresses"
-    if [ -n "$3" ] ; then
-       echo "Setting up CTDB_PUBLIC_ADDRESSES: $3"
-       CTDB_PUBLIC_ADDRESSES=$(mktemp)
-       for _i in $3 ; do
-           _ip="${_i%@*}"
-           _ifaces="${_i#*@}"
-           echo "${_ip} ${_ifaces}" >>"$CTDB_PUBLIC_ADDRESSES"
-       done
-       eventscripts_test_add_cleanup "rm -f $CTDB_PUBLIC_ADDRESSES"
-    fi
+    ctdb_set_pnn "${2:-0}"
+
+    setup_public_addresses
 
     export FAKE_CTDB_STATE="$EVENTSCRIPTS_TESTS_VAR_DIR/fake-ctdb"
 
+    export FAKE_CTDB_EXTRA_CONFIG="$EVENTSCRIPTS_TESTS_VAR_DIR/fake-config.sh"
+    rm -f "$FAKE_CTDB_EXTRA_CONFIG"
+
     export FAKE_CTDB_IFACES_DOWN="$FAKE_CTDB_STATE/ifaces-down"
     mkdir -p "$FAKE_CTDB_IFACES_DOWN"
     rm -f "$FAKE_CTDB_IFACES_DOWN"/*
@@ -304,29 +323,45 @@ setup_ctdb ()
     rm -f "$FAKE_CTDB_SCRIPTSTATUS"/*
 
     export CTDB_PARTIALLY_ONLINE_INTERFACES
+
+    export FAKE_CTDB_TUNABLES_OK="MonitorInterval TDBMutexEnabled DatabaseHashSize"
+    export FAKE_CTDB_TUNABLES_OBSOLETE="EventScriptUnhealthyOnTimeout"
+}
+
+setup_config ()
+{
+    cat >"$FAKE_CTDB_EXTRA_CONFIG"
+}
+
+validate_percentage ()
+{
+    case "$1" in
+       [0-9]|[0-9][0-9]|100) return 0 ;;
+       *) echo "WARNING: ${1} is an invalid percentage${2:+\" in }${2}${2:+\"}"
+          return 1
+    esac
 }
 
 setup_memcheck ()
 {
+    _mem_usage="${1:-10}" # Default is 10%
+    _swap_usage="${2:-0}" # Default is  0%
+
     setup_ctdb
 
-    _swap_total="5857276"
+    _swap_total=5857276
+    _swap_free=$(( (100 - $_swap_usage) * $_swap_total / 100 ))
 
-    if [ "$1" = "bad" ] ; then
-       _swap_free="   4352"
-       _mem_cached=" 112"
-       _mem_free=" 468"
-    else
-       _swap_free="$_swap_total"
-       _mem_cached="1112"
-       _mem_free="1468"
-    fi
+    _mem_total=3940712
+    _mem_free=225268
+    _mem_buffers=146120
+    _mem_cached=$(( $_mem_total * (100 - $_mem_usage) / 100 - $_mem_free - $_mem_buffers ))
 
     export FAKE_PROC_MEMINFO="\
-MemTotal:        3940712 kB
-MemFree:          225268 kB
-Buffers:          146120 kB
-Cached:          1139348 kB
+MemTotal:        ${_mem_total} kB
+MemFree:          ${_mem_free} kB
+Buffers:          ${_mem_buffers} kB
+Cached:          ${_mem_cached} kB
 SwapCached:        56016 kB
 Active:          2422104 kB
 Inactive:        1019928 kB
@@ -340,21 +375,24 @@ SwapTotal:       ${_swap_total} kB
 SwapFree:        ${_swap_free} kB
 ..."
 
-    export FAKE_FREE_M="\
-             total       used       free     shared    buffers     cached
-Mem:          3848       3634        213          0        142       ${_mem_cached}
--/+ buffers/cache:       2379       ${_mem_free}
-Swap:         5719        246       5473"
+    export CTDB_MONITOR_MEMORY_USAGE
+    export CTDB_MONITOR_SWAP_USAGE
+}
 
-    export CTDB_MONITOR_FREE_MEMORY
-    export CTDB_MONITOR_FREE_MEMORY_WARN
-    export CTDB_CHECK_SWAP_IS_NOT_USED
+setup_fscheck ()
+{
+    export FAKE_FS_USE="${1:-10}"  # Default is 10% usage
+
+    # Causes some variables to be exported
+    setup_ctdb
+
+    export CTDB_MONITOR_FILESYSTEM_USAGE
 }
 
 ctdb_get_interfaces ()
 {
     # The echo/subshell forces all the output onto 1 line.
-    echo $(ctdb ifaces -Y | awk -F: 'FNR > 1 {print $2}')
+    echo $(ctdb ifaces -X | awk -F'|' 'FNR > 1 {print $2}')
 }
 
 ctdb_get_1_interface ()
@@ -377,10 +415,10 @@ ctdb_get_all_public_addresses ()
 # Each line is suitable for passing to takeip/releaseip
 ctdb_get_my_public_addresses ()
 {
-    ctdb ip -v -Y | {
+    ctdb ip -v -X | {
        read _x # skip header line
 
-       while IFS=":" read _x _ip _x _iface _x ; do
+       while IFS="|" read _x _ip _x _iface _x ; do
            [ -n "$_iface" ] || continue
            while IFS="/$IFS" read _i _maskbits _x ; do
                if [ "$_ip" = "$_i" ] ; then
@@ -449,7 +487,7 @@ create_policy_routing_config ()
     fi |
     while read _dev _ip _bits ; do
        _net=$(ipv4_host_addr_to_net "$_ip" "$_bits")
-       _gw="${_net%.*}.1" # a dumb, calculated default
+       _gw="${_net%.*}.254" # a dumb, calculated default
 
        echo "$_ip $_net"
 
@@ -478,7 +516,7 @@ check_routes ()
     fi | {
        while read _dev _ip _bits ; do
            _net=$(ipv4_host_addr_to_net "$_ip" "$_bits")
-           _gw="${_net%.*}.1" # a dumb, calculated default
+           _gw="${_net%.*}.254" # a dumb, calculated default
 
            _policy_rules="${_policy_rules}
 ${CTDB_PER_IP_ROUTING_RULE_PREF}:      from $_ip lookup ctdb.$_ip "
@@ -500,7 +538,255 @@ default via $_gw dev $_dev "
 EOF
 
        simple_test_command dump_routes
-    }
+    } || test_fail
+}
+
+######################################################################
+
+setup_ctdb_lvs ()
+{
+       lvs_state_dir="${EVENTSCRIPTS_TESTS_VAR_DIR}/lvs"
+       mkdir -p "$lvs_state_dir"
+
+       export FAKE_LVS_STATE_DIR="${lvs_state_dir}/state"
+       mkdir "$FAKE_LVS_STATE_DIR"
+
+       lvs_header=$(ipvsadm -l -n)
+
+       export CTDB_LVS_PUBLIC_IP="$1"
+       export CTDB_LVS_PUBLIC_IFACE="$2"
+
+       [ -n "$CTDB_LVS_PUBLIC_IP" ] || return 0
+       [ -n "$CTDB_LVS_PUBLIC_IFACE" ] || return 0
+
+       export CTDB_LVS_NODES=$(mktemp --tmpdir="$lvs_state_dir")
+       export FAKE_CTDB_LVS_MASTER=""
+
+       # Read from stdin
+       _pnn=0
+       while read _ip _opts ; do
+               case "$_opts" in
+               master)
+                       FAKE_CTDB_LVS_MASTER="$_pnn"
+                       echo "$_ip"
+                       ;;
+               slave-only)
+                       printf "%s\tslave-only\n" "$_ip"
+                       ;;
+               *)
+                       echo "$_ip"
+                       ;;
+               esac
+               _pnn=$(($_pnn + 1))
+       done >"$CTDB_LVS_NODES"
+}
+
+check_ipvsadm ()
+{
+       if [ "$1" = "NULL" ] ; then
+               required_result 0 <<EOF
+$lvs_header
+EOF
+       else
+               required_result 0 <<EOF
+$lvs_header
+$(cat)
+EOF
+       fi
+
+       simple_test_command ipvsadm -l -n
+}
+
+check_lvs_ip ()
+{
+       _scope="$1"
+
+       if [ "$_scope" = "NULL" ] ; then
+               required_result 0 <<EOF
+1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN
+    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
+EOF
+       else
+               required_result 0 <<EOF
+1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN
+    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
+    inet ${CTDB_LVS_PUBLIC_IP}/32 scope ${_scope} lo
+       valid_lft forever preferred_lft forever
+EOF
+       fi
+
+       simple_test_command ip addr show dev lo
+}
+
+######################################################################
+
+ctdb_catdb_format_pairs ()
+{
+    _count=0
+
+    while read _k _v ; do
+       _kn=$(echo -n "$_k" | wc -c)
+       _vn=$(echo -n "$_v" | wc -c)
+       cat <<EOF
+key(${_kn}) = "${_k}"
+dmaster: 0
+rsn: 1
+data(${_vn}) = "${_v}"
+
+EOF
+       _count=$(($_count + 1))
+    done
+
+    echo "Dumped ${_count} records"
+}
+
+check_ctdb_tdb_statd_state ()
+{
+    ctdb_get_my_public_addresses |
+    while read _x _sip _x ; do
+       for _cip ; do
+           echo "statd-state@${_sip}@${_cip}" "$FAKE_DATE_OUTPUT"
+       done
+    done |
+    ctdb_catdb_format_pairs | {
+       ok
+       simple_test_command ctdb catdb ctdb.tdb
+    } || test_fail
+}
+
+check_statd_callout_smnotify ()
+{
+    _state_even=$(( $(date '+%s') / 2 * 2))
+    _state_odd=$(($_state_even + 1))
+
+    nfs_load_config
+
+    ctdb_get_my_public_addresses |
+    while read _x _sip _x ; do
+       for _cip ; do
+           cat <<EOF
+--client=${_cip} --ip=${_sip} --server=${_sip} --stateval=${_state_even}
+--client=${_cip} --ip=${_sip} --server=${NFS_HOSTNAME} --stateval=${_state_even}
+--client=${_cip} --ip=${_sip} --server=${_sip} --stateval=${_state_odd}
+--client=${_cip} --ip=${_sip} --server=${NFS_HOSTNAME} --stateval=${_state_odd}
+EOF
+       done
+    done | {
+       ok
+       simple_test_event "notify"
+    } || test_fail
+}
+
+######################################################################
+
+setup_ctdb_natgw ()
+{
+       debug "Setting up NAT gateway"
+
+       natgw_config_dir="${TEST_VAR_DIR}/natgw_config"
+       mkdir -p "$natgw_config_dir"
+
+       # These will accumulate, 1 per test... but will be cleaned up at
+       # the end.
+       export CTDB_NATGW_NODES=$(mktemp --tmpdir="$natgw_config_dir")
+
+       # Read from stdin
+       while read _ip _opts ; do
+               case "$_opts" in
+               master)
+                       export FAKE_CTDB_NATGW_MASTER="$_ip"
+                       echo "$_ip"
+                       ;;
+               slave-only)
+                       printf "%s\tslave-only\n" "$_ip"
+                       ;;
+               *)
+                       echo "$_ip"
+                       ;;
+               esac
+       done >"$CTDB_NATGW_NODES"
+
+       # Assume all of the nodes are on a /24 network and have IPv4
+       # addresses:
+       read _ip <"$CTDB_NATGW_NODES"
+       export CTDB_NATGW_PRIVATE_NETWORK="${_ip%.*}.0/24"
+
+       # These are fixed.  Probably don't use the same network for the
+       # private node IPs.  To unset the default gateway just set it to
+       # "".  :-)
+       export CTDB_NATGW_PUBLIC_IP="10.1.1.121/24"
+       export CTDB_NATGW_PUBLIC_IFACE="eth1"
+       export CTDB_NATGW_DEFAULT_GATEWAY="10.1.1.254"
+       export CTDB_NATGW_SLAVE_ONLY=""
+}
+
+ok_natgw_master_ip_addr_show ()
+{
+    _mac=$(echo "$CTDB_NATGW_PUBLIC_IFACE" | md5sum | sed -r -e 's@(..)(..)(..)(..)(..)(..).*@\1:\2:\3:\4:\5:\6@')
+
+    # This is based on CTDB_NATGW_PUBLIC_IP
+    _brd="10.1.1.255"
+
+ok <<EOF
+1: ${CTDB_NATGW_PUBLIC_IFACE}: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
+    link/ether ${_mac} brd ff:ff:ff:ff:ff:ff
+    inet ${CTDB_NATGW_PUBLIC_IP} brd ${_brd} scope global ${CTDB_NATGW_PUBLIC_IFACE}
+       valid_lft forever preferred_lft forever
+EOF
+}
+
+ok_natgw_slave_ip_addr_show ()
+{
+    _mac=$(echo "$CTDB_NATGW_PUBLIC_IFACE" | md5sum | sed -r -e 's@(..)(..)(..)(..)(..)(..).*@\1:\2:\3:\4:\5:\6@')
+ok <<EOF
+1: ${CTDB_NATGW_PUBLIC_IFACE}: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
+    link/ether ${_mac} brd ff:ff:ff:ff:ff:ff
+EOF
+}
+
+ok_natgw_master_static_routes ()
+{
+    _nl="
+"
+    _t=""
+    for _i in $CTDB_NATGW_STATIC_ROUTES ; do
+       # This is intentionally different to the code in 11.natgw ;-)
+       case "$_i" in
+           *@*)
+               _net=$(echo "$_i" | sed -e 's|@.*||')
+               _gw=$(echo "$_i" | sed -e 's|.*@||')
+               ;;
+           *)
+               _net="$_i"
+               _gw="$CTDB_NATGW_DEFAULT_GATEWAY"
+       esac
+
+       [ -n "$_gw" ] || continue
+       _t="${_t}${_t:+${_nl}}"
+       _t="${_t}${_net} via ${_gw} dev ethXXX  metric 10 "
+    done
+    _t=$(echo "$_t" | sort)
+    ok "$_t"
+}
+
+ok_natgw_slave_static_routes ()
+{
+    _nl="
+"
+    _t=""
+    for _i in $CTDB_NATGW_STATIC_ROUTES ; do
+       # This is intentionally different to the code in 11.natgw ;-)
+       _net=$(echo "$_i" | sed -e 's|@.*||')
+
+       # The interface for the private network isn't specified as
+       # part of the NATGW configuration and isn't part of the
+       # command to add the route.  It is implicitly added by "ip
+       # route" but our stub doesn't do this and adds "ethXXX".
+       _t="${_t}${_t:+${_nl}}"
+       _t="${_t}${_net} via ${FAKE_CTDB_NATGW_MASTER} dev ethXXX  metric 10 "
+    done
+    _t=$(echo "$_t" | sort)
+    ok "$_t"
 }
 
 ######################################################################
@@ -551,10 +837,23 @@ setup_samba ()
        export FAKE_TCP_LISTEN=""
        export FAKE_WBINFO_FAIL="yes"
     fi
+}
 
-    # This is ugly but if this file isn't removed before each test
-    # then configuration changes between tests don't stick.
-    rm -f "$CTDB_VARDIR/state/samba/smb.conf.cache"
+samba_setup_fake_threads ()
+{
+       export FAKE_SMBD_THREAD_PIDS="$*"
+
+       _nl="
+"
+       _out=""
+       _count=0
+       for _pid ; do
+               [ "$_count" -lt 5 ] || break
+               _t=$(program_stack_trace "smbd" $_pid)
+               _out="${_out:+${_out}${_nl}}${_t}"
+               _count=$((_count + 1))
+       done
+       SAMBA_STACK_TRACES="$_out"
 }
 
 setup_winbind ()
@@ -609,8 +908,10 @@ setup_nfs ()
 
     export CTDB_NFS_SKIP_SHARE_CHECK="no"
 
-    export CTDB_MONITOR_NFS_THREAD_COUNT RPCNFSDCOUNT FAKE_NFSD_THREAD_PIDS
-    export CTDB_NFS_DUMP_STUCK_THREADS
+    export RPCNFSDCOUNT
+
+    # This doesn't even need to exist
+    export CTDB_NFS_EXPORTS_FILE="$EVENTSCRIPTS_TESTS_VAR_DIR/etc-exports"
 
     # Reset the failcounts for nfs services.
     eventscript_call eval rm -f '$ctdb_fail_dir/nfs_*'
@@ -619,34 +920,43 @@ setup_nfs ()
        debug "Setting up NFS environment: all RPC services up, NFS managed by CTDB"
 
        eventscript_call ctdb_service_managed
-       service "nfs" force-started  # might not be enough
+       service "nfs" force-started
+       service "nfslock" force-started
 
        export CTDB_MANAGED_SERVICES="foo nfs bar"
 
-       rpc_services_up "nfs" "mountd" "rquotad" "nlockmgr" "status"
+       rpc_services_up \
+           "portmapper" "nfs" "mountd" "rquotad" "nlockmgr" "status"
+
+       nfs_setup_fake_threads "nfsd"
+       nfs_setup_fake_threads "rpc.foobar"  # Just set the variable to empty
     else
        debug "Setting up NFS environment: all RPC services down, NFS not managed by CTDB"
 
        eventscript_call ctdb_service_unmanaged
-       service "nfs" force-stopped  # might not be enough
-       eventscript_call startstop_nfs stop
+       service "nfs" force-stopped
+       service "nfslock" force-stopped
 
        export CTDB_MANAGED_SERVICES="foo bar"
        unset CTDB_MANAGES_NFS
     fi
+
+    # This is really nasty.  However, when we test NFS we don't
+    # actually test statd-callout. If we leave it there then left
+    # over, backgrounded instances of statd-callout will do horrible
+    # things with the "ctdb ip" stub and cause the actual
+    # statd-callout tests that follow to fail.
+    rm "${CTDB_BASE}/statd-callout"
 }
 
 setup_nfs_ganesha ()
 {
     setup_nfs "$@"
-    export CTDB_NFS_SERVER_MODE="ganesha"
+    export CTDB_NFS_CALLOUT="${CTDB_BASE}/nfs-ganesha-callout"
     if [ "$1" != "down" ] ; then
        export CTDB_MANAGES_NFS="yes"
     fi
 
-    # We do not support testing the Ganesha-nfsd-specific part of the
-    # eventscript.
-    export CTDB_SKIP_GANESHA_NFSD_CHECK="yes"
     export CTDB_NFS_SKIP_SHARE_CHECK="yes"
 }
 
@@ -663,11 +973,12 @@ rpc_services_up ()
     for _i ; do
        debug "Marking RPC service \"${_i}\" as available"
        case "$_i" in
-           nfs)      _t="2:3" ;;
-           mountd)   _t="1:3" ;;
-           rquotad)  _t="1:2" ;;
-           nlockmgr) _t="3:4" ;;
-           status)   _t="1:1" ;;
+           portmapper) _t="2:4" ;;
+           nfs)        _t="2:3" ;;
+           mountd)     _t="1:3" ;;
+           rquotad)    _t="1:2" ;;
+           nlockmgr)   _t="3:4" ;;
+           status)     _t="1:1" ;;
            *) die "Internal error - unsupported RPC service \"${_i}\"" ;;
        esac
 
@@ -675,6 +986,78 @@ rpc_services_up ()
     done
 }
 
+
+nfs_load_config ()
+{
+    _etc="$CTDB_SYS_ETCDIR" # shortcut for readability
+    for _c in "$_etc/sysconfig/nfs" "$_etc/default/nfs" "$_etc/ctdb/sysconfig/nfs" ; do
+       if [ -r "$_c" ] ; then
+           . "$_c"
+           break
+       fi
+    done
+}
+
+nfs_setup_fake_threads ()
+{
+    _prog="$1" ; shift
+
+    case "$_prog" in
+       nfsd)
+           export PROCFS_PATH=$(mktemp -d --tmpdir="$EVENTSCRIPTS_TESTS_VAR_DIR")
+           _threads="${PROCFS_PATH}/fs/nfsd/threads"
+           mkdir -p $(dirname "$_threads")
+           echo $# >"$_threads"
+           export FAKE_NFSD_THREAD_PIDS="$*"
+           ;;
+       *)
+           export FAKE_RPC_THREAD_PIDS="$*"
+           ;;
+    esac
+}
+
+program_stack_trace ()
+{
+       _prog="$1"
+       _pid="$2"
+
+       cat <<EOF
+Stack trace for ${_prog}[${_pid}]:
+[<ffffffff87654321>] fake_stack_trace_for_pid_${_pid}/stack+0x0/0xff
+EOF
+}
+
+program_stack_traces ()
+{
+    _prog="$1"
+    _max="${2:-1}"
+
+    _count=1
+    for _pid in ${FAKE_NFSD_THREAD_PIDS:-$FAKE_RPC_THREAD_PIDS} ; do
+       [ $_count -le $_max ] || break
+
+       program_stack_trace "$_prog" "$_pid"
+       _count=$(($_count + 1))
+    done
+}
+
+guess_output ()
+{
+    case "$1" in
+       $CTDB_NFS_CALLOUT\ start\ nlockmgr)
+           echo "&Starting nfslock: OK"
+           ;;
+       $CTDB_NFS_CALLOUT\ start\ nfs)
+           cat <<EOF
+&Starting nfslock: OK
+&Starting nfs: OK
+EOF
+           ;;
+       *)
+           : # Nothing
+    esac
+}
+
 # Set the required result for a particular RPC program having failed
 # for a certain number of iterations.  This is probably still a work
 # in progress.  Note that we could hook aggressively
@@ -685,107 +1068,110 @@ rpc_services_up ()
 # function being incomplete.
 rpc_set_service_failure_response ()
 {
-    _progname="$1"
-    # The number of failures defaults to the iteration number.  This
-    # will be true when we fail from the 1st iteration... but we need
-    # the flexibility to set the number of failures.
-    _numfails="${2:-${iteration}}"
+    _rpc_service="$1"
+    _numfails="${2:-1}" # default 1
 
-    _etc="$CTDB_ETCDIR" # shortcut for readability
-    for _c in "$_etc/sysconfig/nfs" "$_etc/default/nfs" "$_etc/ctdb/sysconfig/nfs" ; do
-       if [ -r "$_c" ] ; then
-           . "$_c"
-           break
-       fi
-    done
+    # Default
+    ok_null
+    if [ $_numfails -eq 0 ] ; then
+       return
+    fi
+
+    nfs_load_config
 
     # A handy newline.  :-)
     _nl="
 "
 
-    # Default
-    ok_null
+    _dir="${CTDB_NFS_CHECKS_DIR:-${CTDB_BASE}/nfs-checks.d}"
 
-    _file=$(ls "${CTDB_BASE}/nfs-rpc-checks.d/"[0-9][0-9]."${_progname}.check")
+    _file=$(ls "$_dir"/[0-9][0-9]."${_rpc_service}.check")
     [ -r "$_file" ] || die "RPC check file \"$_file\" does not exist or is not unique"
 
-    while read _op _li _actions ; do
-       # Skip comments
-       case "$_op" in
-           \#*) continue ;;
-       esac
+    _out=$(mktemp --tmpdir="$EVENTSCRIPTS_TESTS_VAR_DIR")
+    _rc_file=$(mktemp --tmpdir="$EVENTSCRIPTS_TESTS_VAR_DIR")
 
-       _hit=false
-       if [ "$_op" != "%" ] ; then
-           if [ $_numfails $_op $_li ] ; then
-               _hit=true
-           fi
+    (
+       # Subshell to restrict scope variables...
+
+       # Defaults
+       family="tcp"
+       version=""
+       unhealthy_after=1
+       restart_every=0
+       service_stop_cmd=""
+       service_start_cmd=""
+       service_check_cmd=""
+       service_debug_cmd=""
+
+       # Don't bother syntax checking, eventscript does that...
+       . "$_file"
+
+       # Just use the first version, or use default.  This is dumb but
+       # handles all the cases that we care about now...
+       if [ -n "$version" ] ; then
+           _ver="${version%% *}"
        else
-           if [ $(($_numfails $_op $_li)) -eq 0 ] ; then
-               _hit=true
-           fi
+           case "$_rpc_service" in
+               portmapper) _ver="" ;;
+               *)          _ver=1  ;;
+           esac
        fi
-       if $_hit ; then
-           _out=""
-           _rc=0
-           for _action in $_actions ; do
-               case "$_action" in
-                   verbose)
-                       _ver=1
-                       _pn="$_progname"
-                       case "$_progname" in
-                           nfsd) _ver=3 ; _pn="nfs" ;;
-                           lockd) _ver=4 ; _pn="nlockmgr" ;;
-                           statd) _pn="status" ;;
-                       esac
-                       _out="\
-ERROR: $_pn failed RPC check:
+       _rpc_check_out="\
+$_rpc_service failed RPC check:
 rpcinfo: RPC: Program not registered
-program $_pn version $_ver is not available"
-                       ;;
-                   restart*)
-                       _p="rpc.${_progname}"
-                       case "$_action" in
-                           *:b) _bg="&" ;;
-                           *)   _bg=""  ;;
-                       esac
-                       case "$_progname" in
-                           nfsd)
-                               _t="\
-Trying to restart NFS service"
-
-                               if [ -n "$CTDB_NFS_DUMP_STUCK_THREADS" ] ; then
-                                   for _pid in $FAKE_NFSD_THREAD_PIDS ; do
-                                       _t="\
-$_t
-${_bg}Stack trace for stuck nfsd thread [${_pid}]:
-${_bg}[<ffffffff87654321>] fake_stack_trace_for_pid_${_pid}/stack+0x0/0xff"
-                                   done
-                               fi
-
-                               _t="\
-${_t}
-${_bg}Starting nfslock: OK
-${_bg}Starting nfs: OK"
-                               ;;
-                           lockd)
-                               _t="\
-Trying to restart lock manager service
-${_bg}Starting nfslock: OK"
-                               ;;
-                           *)
-                               _t="Trying to restart $_progname [${_p}]"
-                       esac
-                       _out="${_out}${_out:+${_nl}}${_t}"
-                       ;;
-                   unhealthy)
-                       _rc=1
-               esac
-           done
-           required_result $_rc "$_out"
-           return
+program $_rpc_service${_ver:+ version }${_ver} is not available"
+
+       if [ $unhealthy_after -gt 0 -a $_numfails -ge $unhealthy_after ] ; then
+           _unhealthy=true
+           echo 1 >"$_rc_file"
+           echo "ERROR: ${_rpc_check_out}" >>"$_out"
+       else
+           _unhealthy=false
+           echo 0 >"$_rc_file"
        fi
-    done <"$_file"
+
+       if [ $restart_every -gt 0 ] && \
+                  [ $(($_numfails % $restart_every)) -eq 0 ] ; then
+           if ! $_unhealthy ; then
+               echo "WARNING: ${_rpc_check_out}" >>"$_out"
+           fi
+
+           echo "Trying to restart service \"${_rpc_service}\"..." >>"$_out"
+
+           if [ -n "$service_debug_cmd" ] ; then
+               $service_debug_cmd 2>&1 >>"$_out"
+           fi
+
+           guess_output "$service_start_cmd" >>"$_out"
+       fi
+    )
+
+    read _rc <"$_rc_file"
+    required_result $_rc <"$_out"
+
+    rm -f "$_out" "$_rc_file"
+}
+
+######################################################################
+
+# Recovery lock fakery
+
+cleanup_reclock ()
+{
+       _pattern="${script_dir}/${script}"
+       while pgrep -f "$_pattern" >/dev/null ; do
+               echo "Waiting for backgrounded ${script} to exit..."
+               (FAKE_SLEEP_REALLY=yes sleep 1)
+       done
+}
+
+setup_reclock ()
+{
+       CTDB_RECOVERY_LOCK=$(mktemp --tmpdir="$EVENTSCRIPTS_TESTS_VAR_DIR")
+       export CTDB_RECOVERY_LOCK
+
+       test_cleanup cleanup_reclock
 }
 
 ######################################################################
@@ -867,30 +1253,33 @@ define_test ()
     # Remaining format should be NN.service.event.NNN or NN.service.NNN:
     _num="${_f##*.}"
     _f="${_f%.*}"
+
     case "$_f" in
-       *.*.*)
+       [0-9][0-9].*.*)
            script="${_f%.*}"
            event="${_f##*.}"
+           script_dir="${CTDB_BASE}/events.d"
            ;;
-       *.*)
+       [0-9][0-9].*)
            script="$_f"
            unset event
+           script_dir="${CTDB_BASE}/events.d"
+           ;;
+       *.*)
+           script="${_f%.*}"
+           event="${_f##*.}"
+           script_dir="${CTDB_BASE}"
            ;;
        *)
-           die "Internal error - unknown testcase filename format"
+           script="${_f%.*}"
+           unset event
+           script_dir="${CTDB_BASE}"
     esac
 
-    printf "%-17s %-10s %-4s - %s\n\n" "$script" "$event" "$_num" "$desc"
-}
+    [ -r "${script_dir}/${script}" ] || \
+       die "Internal error - unable to find script \"${script_dir}/${script}\""
 
-_extra_header ()
-{
-    cat <<EOF
-CTDB_BASE="$CTDB_BASE"
-CTDB_ETCDIR="$CTDB_ETCDIR"
-ctdb client is "$(which ctdb)"
-ip command is "$(which ip)"
-EOF
+    printf "%-17s %-10s %-4s - %s\n\n" "$script" "$event" "$_num" "$desc"
 }
 
 # Run an eventscript once.  The test passes if the return code and
@@ -902,18 +1291,29 @@ simple_test ()
 {
     [ -n "$event" ] || die 'simple_test: $event not set'
 
-    _extra_header=$(_extra_header)
+    args="$@"
 
-    echo "Running eventscript \"$script $event${1:+ }$*\""
-    _shell=""
-    if $TEST_COMMAND_TRACE ; then
-       _shell="sh -x"
-    else
-       _shell="sh"
-    fi
-    _out=$($_shell "${CTDB_BASE}/events.d/$script" "$event" "$@" 2>&1)
+    test_header ()
+    {
+       echo "Running script \"$script $event${args:+ }$args\""
+    }
+
+    extra_header ()
+    {
+       cat <<EOF
+
+##################################################
+CTDB_BASE="$CTDB_BASE"
+CTDB_SYS_ETCDIR="$CTDB_SYS_ETCDIR"
+ctdb client is "$(which ctdb)"
+ip command is "$(which ip)"
+EOF
+    }
+
+    script_test "${script_dir}/${script}" "$event" "$@"
 
-    result_check "$_extra_header"
+    reset_test_header
+    reset_extra_header
 }
 
 simple_test_event ()
@@ -929,101 +1329,82 @@ simple_test_event ()
 
 simple_test_command ()
 {
-    # If something has previously failed then don't continue.
-    : ${_passed:=true}
-    $_passed || return 1
-
-    echo "=================================================="
-    echo "Running command \"$*\""
-    _out=$("$@" 2>&1)
-
-    result_check
+    unit_test "$@"
 }
 
-check_ctdb_logfile ()
-{
-    # If something has previously failed then don't continue.
-    : ${_passed:=true}
-    $_passed || return 1
-
-    echo "=================================================="
-    echo "Checking CTDB_LOGFILE=\"${CTDB_LOGFILE}\""
-    _out=$(cat "$CTDB_LOGFILE" 2>&1)
-
-    result_check
-}
-
-# Run an eventscript iteratively.
+# Run an NFS eventscript iteratively.
+#
 # - 1st argument is the number of iterations.
-# - 2nd argument is something to eval to do setup for every iteration.
-#   The easiest thing to do here is to define a function and pass it
-#   here.
+#
+# - 2nd argument is the NFS/RPC service being tested
+#
+#   rpcinfo (or $service_check_cmd) is used on each iteration to test
+#   the availability of the service
+#
+#   If this is not set or null then no RPC service is checked and the
+#   required output is not reset on each iteration.  This is useful in
+#   baseline tests to confirm that the eventscript and test
+#   infrastructure is working correctly.
+#
 # - Subsequent arguments come in pairs: an iteration number and
-#   something to eval for that iteration.  Each time an iteration
+#   something to eval before that iteration.  Each time an iteration
 #   number is matched the associated argument is given to eval after
 #   the default setup is done.  The iteration numbers need to be given
 #   in ascending order.
 #
-# Some optional args can be given *before* these, surrounded by extra
-# "--" args.  These args are passed to the eventscript.  Quoting is
-# lost.
+#   These arguments can allow a service to be started or stopped
+#   before a particular iteration.
 #
-# One use of the 2nd and further arguments is to call
-# required_result() to change what is expected of a particular
-# iteration.
-iterate_test ()
+nfs_iterate_test ()
 {
-    [ -n "$event" ] || die 'simple_test: $event not set'
-
-    args=""
-    if [ "$1" = "--" ] ; then
-       shift
-       while [ "$1" != "--" ] ; do
-           args="${args}${args:+ }$1"
-           shift
-       done
+    _repeats="$1"
+    _rpc_service="$2"
+    if [ -n "$2" ] ; then
+       shift 2
+    else
        shift
     fi
 
-    _repeats="$1"
-    _setup_default="$2"
-    shift 2
-
     echo "Running $_repeats iterations of \"$script $event\" $args"
 
-    _result=true
-
-    for iteration in $(seq 1 $_repeats) ; do
-       # This is inefficient because the iteration-specific setup
-       # might completely replace the default one.  However, running
-       # the default is good because it allows you to revert to a
-       # particular result without needing to specify it explicitly.
-       eval $_setup_default
-       if [ $iteration = "$1" ] ; then
-           eval $2
+    _iterate_failcount=0
+    for _iteration in $(seq 1 $_repeats) ; do
+       # This is not a numerical comparison because $1 will often not
+       # be set.
+       if [ "$_iteration" = "$1" ] ; then
+           debug "##################################################"
+           eval "$2"
+           debug "##################################################"
            shift 2
        fi
+       if [ -n "$_rpc_service" ] ; then
+           _ok=false
+           if [ -n "$service_check_cmd" ] ; then
+               if eval "$service_check_cmd" ; then
+                   _ok=true
+               fi
+           else
+               if rpcinfo -T tcp localhost "$_rpc_service" >/dev/null 2>&1 ; then
+                   _ok=true
+               fi
+           fi
 
-       _shell=""
-       if $TEST_COMMAND_TRACE ; then
-           _shell="sh -x"
-       else
-           _shell="sh"
+           if $_ok ; then
+               _iterate_failcount=0
+           else
+               _iterate_failcount=$(($_iterate_failcount + 1))
+           fi
+           rpc_set_service_failure_response "$_rpc_service" $_iterate_failcount
        fi
-       _out=$($_shell "${CTDB_BASE}/events.d/$script" "$event" $args 2>&1)
-       _rc=$?
-
-       _fout=$(echo "$_out" | result_filter)
-
-       if [ "$_fout" = "$required_output" -a $_rc = $required_rc ] ; then
-           _passed=true
-       else
-           _passed=false
-           _result=false
+       _out=$(simple_test 2>&1)
+       _ret=$?
+       if "$TEST_VERBOSE" || [ $_ret -ne 0 ] ; then
+           echo "##################################################"
+           echo "Iteration ${_iteration}:"
+           echo "$_out"
+       fi
+       if [ $_ret -ne 0 ] ; then
+           exit $_ret
        fi
-
-       result_print "$_passed" "$_out" "$_rc" "Iteration $iteration"
     done
-
-    result_footer "$_result" "$(_extra_header)"
 }