eventscripts: Add optional counter name argument to some counter functions
[ctdb.git] / config / functions
index 4acfc4ffab3f4347dce18f23be6a2e1f0e8c4be9..2acac25510fba976734fd4e83bdc5c246ce1b99f 100755 (executable)
@@ -29,6 +29,26 @@ loadconfig () {
     _loadconfig "$@"
 }
 
+##############################################################
+# check number of args for different events
+ctdb_check_args ()
+{
+    case "$1" in
+       takeip|releaseip)
+           if [ $# != 4 ]; then
+               echo "ERROR: must supply interface, IP and maskbits"
+               exit 1
+           fi
+           ;;
+       updateip)
+           if [ $# != 5 ]; then
+               echo "ERROR: must supply old interface, new interface, IP and maskbits"
+               exit 1
+           fi
+           ;;
+    esac
+}
+
 ##############################################################
 # determine on what type of system (init style) we are running
 detect_init_style() {
@@ -139,6 +159,24 @@ ctdb_wait_tcp_ports() {
 }
 
 
+######################################################
+# wrapper around /proc/ settings to allow them to be hooked
+# for testing
+# 1st arg is relative path under /proc/, 2nd arg is value to set
+set_proc ()
+{
+    echo "$2" >"/proc/$1"
+}
+
+######################################################
+# wrapper around getting file contents from /proc/ to allow
+# this to be hooked for testing
+# 1st arg is relative path under /proc/
+get_proc ()
+{
+    cat "/proc/$1"
+}
+
 ######################################################
 # check that a rpc server is registered with portmap
 # and responding to requests
@@ -191,16 +229,164 @@ ctdb_check_directories() {
 # check a set of tcp ports
 # usage: ctdb_check_tcp_ports <ports...>
 ######################################################
-ctdb_check_tcp_ports() {
 
-    for p ; do
-       if ! netstat -a -t -n | grep -q "0\.0\.0\.0:$p .*LISTEN" ; then
-            if ! netstat -a -t -n | grep -q ":::$p .*LISTEN" ; then
-               echo "ERROR: $service_name tcp port $p is not responding"
+# This flag file is created when a service is initially started.  It
+# is deleted the first time TCP port checks for that service succeed.
+# Until then ctdb_check_tcp_ports() prints a more subtle "error"
+# message if a port check fails.
+_ctdb_check_tcp_common ()
+{
+    _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started"
+}
+
+ctdb_check_tcp_init ()
+{
+    _ctdb_check_tcp_common
+    mkdir -p "${_ctdb_service_started_file%/*}" # dirname
+    touch "$_ctdb_service_started_file"
+}
+
+ctdb_check_tcp_ports()
+{
+    if [ -z "$1" ] ; then
+       echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
+       exit 1
+    fi
+
+    # Set default value for CTDB_TCP_PORT_CHECKS if unset.
+    # If any of these defaults are unsupported then this variable can
+    # be overridden in /etc/sysconfig/ctdb or via a file in
+    # /etc/ctdb/rc.local.d/.
+    : ${CTDB_TCP_PORT_CHECKERS:=ctdb nmap netstat}
+
+    for _c in $CTDB_TCP_PORT_CHECKERS ; do
+       ctdb_check_tcp_ports_$_c "$@"
+       case "$?" in
+           0)
+               _ctdb_check_tcp_common
+               rm -f "$_ctdb_service_started_file"
+               return 0
+               ;;
+           1)
+               _ctdb_check_tcp_common
+               if [ ! -f "$_ctdb_service_started_file" ] ; then
+                   echo "ERROR: $service_name tcp port $_p is not responding"
+                   cat <<EOF
+$ctdb_check_tcp_ports_debug
+EOF
+               else
+                   echo "INFO: $service_name tcp port $_p is not responding"
+               fi
+
                return 1
-            fi
-       fi
+               ;;
+           127)
+               : # Not implemented
+               ;;
+           *)
+               
+       esac
+    done
+
+    echo "INTERNAL ERROR: ctdb_check_ports - no working checkers in CTDB_TCP_PORT_CHECKERS=\"$CTDB_TCP_PORT_CHECKERS\""
+
+    return 127
+}
+
+ctdb_check_tcp_ports_netstat ()
+{
+    _cmd='netstat -l -t -n'
+    _ns=$($_cmd 2>&1)
+    if [ $? -eq 127 ] ; then
+       # netstat probably not installed - unlikely?
+       ctdb_check_tcp_ports_debug="$_ns"
+       return 127
+    fi
+
+    for _p ; do  # process each function argument (port)
+       for _a in '0\.0\.0\.0' '::' ; do
+           _pat="[[:space:]]${_a}:${_p}[[:space:]]+[^[:space:]]+[[:space:]]+LISTEN"
+           if echo "$_ns" | grep -E -q "$_pat" ; then
+               # We matched the port, so process next port
+               continue 2
+           fi
+       done
+
+       # We didn't match the port, so flag an error.
+       ctdb_check_tcp_ports_debug="$_cmd shows this output:
+$_ns"
+       return 1
+    done
+
+    return 0
+}
+
+ctdb_check_tcp_ports_nmap ()
+{
+    # nmap wants a comma-separated list of ports
+    _ports=""
+    for _p ; do
+       _ports="${_ports}${_ports:+,}${_p}"
+    done
+
+    _cmd="nmap -n -oG - -PS 127.0.0.1 -p $_ports"
+
+    _nmap_out=$($_cmd 2>&1)
+    if [ $? -eq 127 ] ; then
+       # nmap probably not installed
+       ctdb_check_tcp_ports_debug="$_nmap_out"
+       return 127
+    fi
+
+    # get the port-related output
+    _port_info=$(echo "$_nmap_out" | sed -n -r -e 's@^.*Ports:[[:space:]]@@p')
+
+    for _p ; do
+       # looking for something like this:
+       #  445/open/tcp//microsoft-ds///
+       # possibly followed by a comma
+       _t="$_p/open/tcp//"
+       case "$_port_info" in
+           # The info we're after must be either at the beginning of
+           # the string or it must follow a space.
+            $_t*|*\ $_t*) : ;;
+           *)
+               # Nope, flag an error...
+               ctdb_check_tcp_ports_debug="$_cmd shows this output:
+$_nmap_out"
+               return 1
+       esac
     done
+
+    return 0
+}
+
+# Use the new "ctdb checktcpport" command to check the port.
+# This is very cheap.
+ctdb_check_tcp_ports_ctdb ()
+{
+    for _p ; do  # process each function argument (port)
+       _cmd="ctdb checktcpport $_p"
+       _out=$($_cmd 2>&1)
+       _ret=$?
+       case "$_ret" in
+           0)
+               ctdb_check_tcp_ports_debug="\"$_cmd\" was able to bind to port"
+               return 1
+               ;;
+           98)
+               # Couldn't bind, something already listening, next port...
+               continue
+               ;;
+           *)
+               ctdb_check_tcp_ports_debug="$_cmd (exited with $_ret) with output:
+$_out"
+               # assume not implemented
+               return 127
+       esac
+    done
+
+    return 0
 }
 
 ######################################################
@@ -375,7 +561,11 @@ startstop_nfs() {
                        service nfsserver stop > /dev/null 2>&1
                        ;;
                restart)
-                       service nfsserver restart
+                       echo 0 >/proc/fs/nfsd/threads
+                       service nfsserver stop > /dev/null 2>&1
+                       pkill -9 nfsd
+                       nfs_dump_some_threads
+                       service nfsserver start
                        ;;
                esac
                ;;
@@ -390,8 +580,13 @@ startstop_nfs() {
                        service nfslock stop > /dev/null 2>&1
                        ;;
                restart)
-                       service nfslock restart
-                       service nfs restart
+                       echo 0 >/proc/fs/nfsd/threads
+                       service nfs stop > /dev/null 2>&1
+                       service nfslock stop > /dev/null 2>&1
+                       pkill -9 nfsd
+                       nfs_dump_some_threads
+                       service nfslock start
+                       service nfs start
                        ;;
                esac
                ;;
@@ -402,6 +597,28 @@ startstop_nfs() {
        esac
 }
 
+# Dump up to the configured number of nfsd thread backtraces.
+nfs_dump_some_threads ()
+{
+    [ -n "$CTDB_NFS_DUMP_STUCK_THREADS" ] || return 0
+
+    # Optimisation to avoid running an unnecessary pidof
+    [ $CTDB_NFS_DUMP_STUCK_THREADS -gt 0 ] || return 0
+
+    _count=0
+    for _pid in $(pidof nfsd) ; do
+       [ $_count -le $CTDB_NFS_DUMP_STUCK_THREADS ] || break
+
+       # Do this first to avoid racing with thread exit
+       _stack=$(get_proc "${_pid}/stack" 2>/dev/null)
+       if [ -n "$_stack" ] ; then
+           echo "Stack trace for stuck nfsd thread [${_pid}]:"
+           echo "$_stack"
+           _count=$(($_count + 1))
+       fi
+    done
+}
+
 ########################################################
 # start/stop the nfs lockmanager service on different platforms
 ########################################################
@@ -542,16 +759,17 @@ setup_iface_ip_readd_script()
 # ctdb_check_counter_limit succeeds when count >= <limit>
 ########################################################
 _ctdb_counter_common () {
-    _counter_file="$ctdb_fail_dir/$service_name"
+    _service_name="${1:-${service_name}}"
+    _counter_file="$ctdb_fail_dir/$_service_name"
     mkdir -p "${_counter_file%/*}" # dirname
 }
 ctdb_counter_init () {
-    _ctdb_counter_common
+    _ctdb_counter_common "$1"
 
     >"$_counter_file"
 }
 ctdb_counter_incr () {
-    _ctdb_counter_common
+    _ctdb_counter_common "$1"
 
     # unary counting!
     echo -n 1 >> "$_counter_file"
@@ -565,10 +783,10 @@ ctdb_check_counter_limit () {
     # unary counting!
     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
     if [ $_size -ge $_limit ] ; then
-       echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
+       echo "ERROR: more than $_limit consecutive failures for $_service_name, marking cluster unhealthy"
        exit 1
     elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
-       echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
+       echo "WARNING: less than $_limit consecutive failures ($_size) for $_service_name, not unhealthy yet"
     fi
 }
 ctdb_check_counter_equal () {
@@ -644,6 +862,7 @@ ctdb_service_unset_reconfigure ()
 
 ctdb_service_reconfigure ()
 {
+    echo "Reconfiguring service \"$service_name\"..."
     if [ -n "$service_reconfigure" ] ; then
        eval $service_reconfigure
     else
@@ -662,51 +881,62 @@ ctdb_compat_managed_service ()
 
 is_ctdb_managed_service ()
 {
+    _service_name="${1:-${service_name}}"
+
     t=" $CTDB_MANAGED_SERVICES "
 
     ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
     ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
     ctdb_compat_managed_service "$CTDB_MANAGES_SCP"      "scp"
-    ctdb_compat_managed_service "$CTDB_MANAGES_WINDBIND" "windbind"
+    ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND"  "winbind"
     ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
     ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
     ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs"
+    ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs-ganesha-gpfs"
 
-    # Returns 0 if "<space>$service_name<space>" appears in $t
-    [ "${t#* ${service_name} }" != "${t}" ]
+    # Returns 0 if "<space>$_service_name<space>" appears in $t
+    [ "${t#* ${_service_name} }" != "${t}" ]
 }
 
 ctdb_start_stop_service ()
 {
-    _active="$ctdb_active_dir/$service_name"
+    # Do nothing unless configured to...
+    [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] || return 0
+
+    _service_name="${1:-${service_name}}"
+
+    [ "$event_name" = "monitor" ] || return 0
 
-    if is_ctdb_managed_service ; then
+    _active="$ctdb_active_dir/$_service_name"
+    if is_ctdb_managed_service "$_service_name"; then
        if ! [ -e "$_active" ] ; then
-           echo "Starting service $service_name"
+           echo "Starting service $_service_name"
            ctdb_service_start || exit $?
            mkdir -p "$ctdb_active_dir"
            touch "$_active"
            exit 0
        fi
-    elif ! is_ctdb_managed_service ; then
+    else
        if [ -e "$_active" ] ; then
-           echo "Stopping service $service_name"
+           echo "Stopping service $_service_name"
+           CTDB_AUTOSTOPPING="$_service_name"
            ctdb_service_stop || exit $?
            rm -f "$_active"
+           exit 0
        fi
-       exit 0
     fi
 }
 
 ctdb_service_start ()
 {
     if [ -n "$service_start" ] ; then
-       eval $service_start
+       eval $service_start || return $?
     else
-       service "$service_name" start
+       service "$service_name" start || return $?
     fi
     ctdb_counter_init
+    ctdb_check_tcp_init
 }
 
 ctdb_service_stop ()
@@ -733,70 +963,6 @@ ctdb_standard_event_handler ()
     esac
 }
 
-ipv4_host_addr_to_net_addr()
-{
-       local HOST=$1
-       local MASKBITS=$2
-
-       local HOST0=$(echo $HOST | awk -F . '{print $4}')
-       local HOST1=$(echo $HOST | awk -F . '{print $3}')
-       local HOST2=$(echo $HOST | awk -F . '{print $2}')
-       local HOST3=$(echo $HOST | awk -F . '{print $1}')
-
-       local HOST_NUM=$(( $HOST0 + $HOST1 * 256 + $HOST2 * (256 ** 2) + $HOST3 * (256 ** 3) ))
-
-       local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
-
-       local NET_NUM=$(( $HOST_NUM & $MASK_NUM))
-
-       local NET0=$(( $NET_NUM & 255 ))
-       local NET1=$(( ($NET_NUM & (255 * 256)) / 256 ))
-       local NET2=$(( ($NET_NUM & (255 * 256**2)) / 256**2 ))
-       local NET3=$(( ($NET_NUM & (255 * 256**3)) / 256**3 ))
-
-       echo "$NET3.$NET2.$NET1.$NET0"
-}
-
-ipv4_maskbits_to_net_mask()
-{
-       local MASKBITS=$1
-
-       local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
-
-       local MASK0=$(( $MASK_NUM & 255 ))
-       local MASK1=$(( ($MASK_NUM & (255 * 256)) / 256 ))
-       local MASK2=$(( ($MASK_NUM & (255 * 256**2)) / 256**2 ))
-       local MASK3=$(( ($MASK_NUM & (255 * 256**3)) / 256**3 ))
-
-       echo "$MASK3.$MASK2.$MASK1.$MASK0"
-}
-
-ipv4_is_valid_addr()
-{
-       local ADDR=$1
-       local fail=0
-
-       local N=`echo $ADDR | sed -e 's/[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*//'`
-       test -n "$N" && fail=1
-
-       local ADDR0=$(echo $ADDR | awk -F . '{print $4}')
-       local ADDR1=$(echo $ADDR | awk -F . '{print $3}')
-       local ADDR2=$(echo $ADDR | awk -F . '{print $2}')
-       local ADDR3=$(echo $ADDR | awk -F . '{print $1}')
-
-       test "$ADDR0" -gt 255 && fail=1
-       test "$ADDR1" -gt 255 && fail=1
-       test "$ADDR2" -gt 255 && fail=1
-       test "$ADDR3" -gt 255 && fail=1
-
-       test x"$fail" != x"0" && {
-               #echo "IPv4: '$ADDR' is not a valid address"
-               return 1;
-       }
-
-       return 0;
-}
-
 # iptables doesn't like being re-entered, so flock-wrap it.
 iptables()
 {
@@ -875,3 +1041,4 @@ update_tickles ()
 script_name="${0##*/}"       # basename
 service_name="$script_name"  # default is just the script name
 service_fail_limit=1
+event_name="$1"