ctdb-scripts: Compact server-end TCP connection killing output
authorMartin Schwenke <martin@meltin.net>
Fri, 9 Jun 2017 04:34:56 +0000 (14:34 +1000)
committerMartin Schwenke <martins@samba.org>
Tue, 13 Jun 2017 07:12:19 +0000 (09:12 +0200)
When thousands of connections are being killed the logs are flooded
with information about connections that should be killed.  When some
connections are not killed then the number not killed is printed.
This is the wrong way around!  When debugging "fail-back" problems, it
is important to know details of connections that were *not* killed.
It is almost never important to know the full list of all connections
that were *supposed* to be killed.

Instead, print a summary showing how many connections of the total
were killed.  If any were not killed then print a list of remaining
connections.

Update unit tests: infrastructure for fake TCP connections, existing,
test cases, add new test cases.

Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
ctdb/config/functions
ctdb/tests/eventscripts/10.interface.releaseip.010.sh
ctdb/tests/eventscripts/10.interface.releaseip.011.sh
ctdb/tests/eventscripts/10.interface.releaseip.012.sh [new file with mode: 0755]
ctdb/tests/eventscripts/10.interface.releaseip.013.sh [new file with mode: 0755]
ctdb/tests/eventscripts/scripts/local.sh
ctdb/tests/eventscripts/stubs/ss

index f4539685137857258b46901aae1eb437f0198089..2c630bd11bee8fb5e683eab3022cd1e4747ffaa2 100755 (executable)
@@ -414,7 +414,6 @@ kill_tcp_connections ()
                139|445) __oneway=true ;;
            esac
 
-           echo "Killing TCP connection $_src $_dst"
            _connections="${_connections}${_nl}${_src} ${_dst}"
            if ! $__oneway ; then
                _connections="${_connections}${_nl}${_dst} ${_src}"
@@ -433,15 +432,22 @@ kill_tcp_connections ()
                return
        }
 
-       _remaining=$(get_tcp_connections_for_ip "$_ip" | wc -l)
-
-       if [ "$_remaining" -eq 0 ] ; then
-               echo "Killed $_killcount TCP connections to released IP $_ip"
-               return
+       _connections=$(get_tcp_connections_for_ip "$_ip")
+       if [ -z "$_connections" ] ; then
+               _remaining=0
+       else
+               _remaining=$(echo "$_connections" | wc -l)
        fi
 
-       _t="${_remaining}/${_killcount}"
-       echo "Failed to kill TCP connections for IP $_ip (${_t} remaining)"
+       _actually_killed=$((_killcount - _remaining))
+
+       _t="${_actually_killed}/${_killcount}"
+       echo "Killed ${_t} TCP connections to released IP $_ip"
+
+       if [ -n "$_connections" ] ; then
+               echo "Remaining connections:"
+               echo "$_connections" | sed -e 's|^|  |'
+       fi
     }
 }
 
index b6d9c7a8bd6f913431152b2d1ed21530e3cd0cda..095e85c9b7c20ed4e19dfdd187be4a8901bf0142 100755 (executable)
@@ -8,25 +8,15 @@ setup_ctdb
 
 ctdb_get_1_public_address |
 while read dev ip bits ; do
-    ip addr add "${ip}/${bits}" dev "$dev"
+       ip addr add "${ip}/${bits}" dev "$dev"
 
-    # Setup 10 fake connections...
-    count=10
-    out=""
-    nl="
-"
-    i=0
-    while [ $i -lt $count ] ; do
-       echo "${ip}:445 10.254.254.1:1230${i}"
-       # Expected output for killing this connection
-       out="${out}${out:+${nl}}Killing TCP connection 10.254.254.1:1230${i} ${ip}:445"
-       i=$(($i + 1))
-    done >"$FAKE_NETSTAT_TCP_ESTABLISHED_FILE"
+       count=10
+       setup_tcp_connections $count \
+                             "$ip" 445 10.254.254.0 12300
 
-    ok <<EOF
-$out
-Killed $count TCP connections to released IP $ip
+       ok <<EOF
+Killed ${count}/${count} TCP connections to released IP $ip
 EOF
 
-    simple_test $dev $ip $bits
+       simple_test $dev $ip $bits
 done
index 6203b97e2f5285aa4623b9ddbb725286e08cf877..c129346db8eeda5e7cf04b254f48adecd53f3109 100755 (executable)
@@ -8,31 +8,20 @@ setup_ctdb
 
 ctdb_get_1_public_address |
 while read dev ip bits ; do
-    ip addr add "${ip}/${bits}" dev "$dev"
-
-    # Setup 10 fake connections...
-    count=10
-    out=""
-    nl="
-"
-    i=0
-    while [ $i -lt $count ] ; do
-       echo "${ip}:445 10.254.254.1:1230${i}"
-       # Expected output for killing this connection
-       out="${out}${out:+${nl}}Killing TCP connection 10.254.254.1:1230${i} ${ip}:445"
-       i=$(($i + 1))
-    done >"$FAKE_NETSTAT_TCP_ESTABLISHED_FILE"
-
-    # Note that the fake TCP killing done by the "ctdb killtcp" stub
-    # can only kill conections in the file, so killing this connection
-    # will never succeed so it will look like a time out.
-    FAKE_NETSTAT_TCP_ESTABLISHED="${ip}:445|10.254.254.1:43210"
-
-    ok <<EOF
-Killing TCP connection 10.254.254.1:43210 ${ip}:445
-$out
-Failed to kill TCP connections for IP 10.0.0.3 (1/11 remaining)
+       ip addr add "${ip}/${bits}" dev "$dev"
+
+       count=10
+       setup_tcp_connections $count \
+                             "$ip" 445 10.254.254.0 12300
+
+       setup_tcp_connections_unkillable 1 \
+                                        "$ip" 445 10.254.254.0 43210
+
+       ok <<EOF
+Killed 10/11 TCP connections to released IP 10.0.0.3
+Remaining connections:
+  10.0.0.3:445 10.254.254.1:43211
 EOF
 
-    simple_test $dev $ip $bits
+       simple_test $dev $ip $bits
 done
diff --git a/ctdb/tests/eventscripts/10.interface.releaseip.012.sh b/ctdb/tests/eventscripts/10.interface.releaseip.012.sh
new file mode 100755 (executable)
index 0000000..53971fe
--- /dev/null
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Release 1 IP, 10 connections killed, 3 fail"
+
+setup_ctdb
+
+ctdb_get_1_public_address |
+while read dev ip bits ; do
+       ip addr add "${ip}/${bits}" dev "$dev"
+
+       count=10
+
+       setup_tcp_connections $count \
+                             "$ip" 445 10.254.254.0 12300
+
+       setup_tcp_connections_unkillable 3 \
+                                        "$ip" 445 10.254.254.0 43210
+
+       ok <<EOF
+Killed 10/13 TCP connections to released IP 10.0.0.3
+Remaining connections:
+  10.0.0.3:445 10.254.254.1:43211
+  10.0.0.3:445 10.254.254.2:43212
+  10.0.0.3:445 10.254.254.3:43213
+EOF
+
+       simple_test $dev $ip $bits
+done
diff --git a/ctdb/tests/eventscripts/10.interface.releaseip.013.sh b/ctdb/tests/eventscripts/10.interface.releaseip.013.sh
new file mode 100755 (executable)
index 0000000..91393d2
--- /dev/null
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Release 1 IP, all 10 connections kills fail"
+
+setup_ctdb
+
+ctdb_get_1_public_address |
+while read dev ip bits ; do
+       ip addr add "${ip}/${bits}" dev "$dev"
+
+       setup_tcp_connections 0
+
+       count=10
+       setup_tcp_connections_unkillable $count \
+                                        "$ip" 445 10.254.254.0 43210
+
+       ok <<EOF
+Killed 0/$count TCP connections to released IP 10.0.0.3
+Remaining connections:
+  10.0.0.3:445 10.254.254.1:43211
+  10.0.0.3:445 10.254.254.2:43212
+  10.0.0.3:445 10.254.254.3:43213
+  10.0.0.3:445 10.254.254.4:43214
+  10.0.0.3:445 10.254.254.5:43215
+  10.0.0.3:445 10.254.254.6:43216
+  10.0.0.3:445 10.254.254.7:43217
+  10.0.0.3:445 10.254.254.8:43218
+  10.0.0.3:445 10.254.254.9:43219
+  10.0.0.3:445 10.254.254.10:43220
+EOF
+
+    simple_test $dev $ip $bits
+done
index 5a638dfa70796bece95c621c1a792b816c52a958..0e16f5b1b1d3db8835d1e31a7349e08545eb6324 100644 (file)
@@ -124,8 +124,7 @@ setup_generic ()
     export FAKE_TDB_IS_OK
     export FAKE_DATE_OUTPUT
 
-    export FAKE_NETSTAT_TCP_ESTABLISHED FAKE_TCP_LISTEN FAKE_NETSTAT_UNIX_LISTEN
-    export FAKE_NETSTAT_TCP_ESTABLISHED_FILE=$(mktemp --tmpdir="$EVENTSCRIPTS_TESTS_VAR_DIR")
+    export FAKE_TCP_LISTEN FAKE_NETSTAT_UNIX_LISTEN
 }
 
 tcp_port_down ()
@@ -136,6 +135,42 @@ tcp_port_down ()
     done
 }
 
+_tcp_connections ()
+{
+       _count="$1"
+       _sip="$2"
+       _sport="$3"
+       _cip_base="$4"
+       _cport_base="$5"
+
+       _cip_prefix="${_cip_base%.*}"
+       _cip_suffix="${_cip_base##*.}"
+
+       for _i in $(seq 1 $_count) ; do
+               _cip_last=$((_cip_suffix + _i))
+               _cip="${_cip_prefix}.${_cip_last}"
+               _cport=$((_cport_base + _i))
+               echo "${_sip}:${_sport} ${_cip}:${_cport}"
+       done
+}
+
+setup_tcp_connections ()
+{
+       _t==$(mktemp --tmpdir="$EVENTSCRIPTS_TESTS_VAR_DIR")
+       export FAKE_NETSTAT_TCP_ESTABLISHED_FILE"$_t"
+       _tcp_connections "$@" >"$FAKE_NETSTAT_TCP_ESTABLISHED_FILE"
+}
+
+setup_tcp_connections_unkillable ()
+{
+       # These connections are listed by the "ss" stub but are not
+       # killed by the "ctdb killtcp" stub.  So killing these
+       # connections will never succeed... and will look like a time
+       # out.
+       _t=$(_tcp_connections "$@" | sed -e 's/ /|/g')
+       export FAKE_NETSTAT_TCP_ESTABLISHED="$_t"
+}
+
 shares_missing ()
 {
     _fmt="$1" ; shift
index e8d804481df34b5d92e1afb3e25939b6e0341711..1a3db0d04679e4b946527590c6677be633b41370 100755 (executable)
@@ -77,6 +77,10 @@ ss_tcp_established ()
            echo 0 0 "$src" "$dst"
        fi
     done
+
+    if [ -z "$FAKE_NETSTAT_TCP_ESTABLISHED_FILE" ] ; then
+           return
+    fi
     while read src dst ; do
        if filter_socket "$srcs" "$sports" "$src" ; then
            echo 0 0 "$src" "$dst"