It is hard to diagnose failures in the NFS tickle test because there's
no way of telling if the test node doesn't have the tickle or if it
didn't get propagated.
Factor out check_tickles() into local.bash and give it some
parameters.
Have the NFS test call it first to ensure the tickle has been
registered. Then use new function check_tickles_all() to ensure the
tickle has been propagated to all nodes. Give this a bit of extra
time (double the timeout) just in case we're racing with the update.
Add a useful comment to the CIFS test so that I stop asking myself how
the test could ever have worked reliably. :-)
Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
Pair-programmed-with: Amitay Isaacs <amitay@gmail.com>
(Imported from commit
bafb9151ccb5722df36f9ba168716f4f4fa01cdc)
Expected results:
-* CTDB should correctly record the socket in the nfs-tickles directory
- and should send a reset packet when the node is disabled.
+* CTDB should correctly record the socket and should send a reset
+ packet when the node is disabled.
EOF
}
#echo "Monitor interval on node $test_node is $monitor_interval seconds."
select_test_node_and_ips
+try_command_on_node $test_node "$CTDB listnodes | wc -l"
+numnodes="$out"
test_port=2049
wait_for_monitor_event $test_node
-echo "Sleeping until tickles are synchronised across nodes..."
-try_command_on_node $test_node $CTDB getvar TickleUpdateInterval
-sleep_for "${out#*= }"
+echo "Wait until NFS connection is tracked by CTDB on test node ..."
+wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket
-try_command_on_node -v any "ctdb -Y gettickles $test_ip $test_port"
+echo "Getting TicklesUpdateInterval..."
+try_command_on_node $test_node $CTDB getvar TickleUpdateInterval
+update_interval="$out"
-if [ "${out/${src_socket}/}" != "$out" ] ; then
- echo "GOOD: NFS connection tracked OK."
-else
- echo "BAD: Socket not tracked in NFS tickles."
- testfailures=1
-fi
+echo "Wait until NFS connection is tracked by CTDB on all nodes..."
+wait_until $(($update_interval * 2)) \
+ check_tickles_all $numnodes $test_ip $test_port $src_socket
tcptickle_sniff_start $src_socket "${test_ip}:${test_port}"
# This should happen as soon as connection is up... but unless we wait
# we sometimes beat the registration.
-check_tickles ()
-{
- try_command_on_node 0 ctdb gettickles $test_ip -n $test_node
- # SRC: 10.0.2.45:49091 DST: 10.0.2.143:445
- [ "${out/SRC: ${src_socket} /}" != "$out" ]
-}
-
echo "Checking if CIFS connection is tracked by CTDB..."
-wait_until 10 check_tickles
+wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket
echo "$out"
if [ "${out/SRC: ${src_socket} /}" != "$out" ] ; then
tcptickle_sniff_start $src_socket "${test_ip}:${test_port}"
+# The test node is only being disabled so the tickling is done from
+# the test node. We don't need to wait until the tickles are
+# transferred to another node.
echo "Disabling node $test_node"
try_command_on_node 1 $CTDB disable -n $test_node
wait_until_node_has_status $test_node disabled
wait_until 5 get_src_socket "$@"
}
+#######################################
+
+check_tickles ()
+{
+ local node="$1"
+ local test_ip="$2"
+ local test_port="$3"
+ local src_socket="$4"
+ try_command_on_node $node ctdb gettickles $test_ip $test_port
+ # SRC: 10.0.2.45:49091 DST: 10.0.2.143:445
+ [ "${out/SRC: ${src_socket} /}" != "$out" ]
+}
+
+check_tickles_all ()
+{
+ local numnodes="$1"
+ local test_ip="$2"
+ local test_port="$3"
+ local src_socket="$4"
+
+ try_command_on_node all ctdb gettickles $test_ip $test_port
+ # SRC: 10.0.2.45:49091 DST: 10.0.2.143:445
+ local t="${src_socket//./\\.}"
+ local count=$(grep -E -c "SRC: ${t} " <<<"$out" || true)
+ [ $count -eq $numnodes ]
+}
+
+
+
#######################################
# filename will be in $tcpdump_filename, pid in $tcpdump_pid