#######################################
# pull in a system config file, if any
-loadconfig() {
+_loadconfig() {
if [ -z "$1" ] ; then
foo="${service_config:-${service_name}}"
fi
}
+loadconfig () {
+ _loadconfig "$@"
+}
+
+##############################################################
+# check number of args for different events
+ctdb_check_args ()
+{
+ case "$1" in
+ takeip|releaseip)
+ if [ $# != 4 ]; then
+ echo "ERROR: must supply interface, IP and maskbits"
+ exit 1
+ fi
+ ;;
+ updateip)
+ if [ $# != 5 ]; then
+ echo "ERROR: must supply old interface, new interface, IP and maskbits"
+ exit 1
+ fi
+ ;;
+ esac
+}
+
##############################################################
# determine on what type of system (init style) we are running
detect_init_style() {
}
+######################################################
+# wrapper around /proc/ settings to allow them to be hooked
+# for testing
+# 1st arg is relative path under /proc/, 2nd arg is value to set
+set_proc ()
+{
+ echo "$2" >"/proc/$1"
+}
+
+######################################################
+# wrapper around getting file contents from /proc/ to allow
+# this to be hooked for testing
+# 1st arg is relative path under /proc/
+get_proc ()
+{
+ cat "/proc/$1"
+}
+
######################################################
# check that a rpc server is registered with portmap
# and responding to requests
progname="$1"
prognum="$2"
version="$3"
- rpcinfo -u localhost $prognum $version > /dev/null || {
- echo "ERROR: $progname not responding to rpc requests"
- exit 1
- }
+
+ ctdb_check_rpc_out=$(rpcinfo -u localhost $prognum $version 2>&1)
+ if [ $? -ne 0 ] ; then
+ ctdb_check_rpc_out="ERROR: $progname failed RPC check:
+$ctdb_check_rpc_out"
+ echo "$ctdb_check_rpc_out"
+ return 1
+ fi
}
######################################################
# check a set of tcp ports
# usage: ctdb_check_tcp_ports <ports...>
######################################################
-ctdb_check_tcp_ports() {
- for p ; do
- if ! netstat -a -t -n | grep -q "0\.0\.0\.0:$p .*LISTEN" ; then
- if ! netstat -a -t -n | grep -q ":::$p .*LISTEN" ; then
- echo "ERROR: $service_name tcp port $p is not responding"
+# This flag file is created when a service is initially started. It
+# is deleted the first time TCP port checks for that service succeed.
+# Until then ctdb_check_tcp_ports() prints a more subtle "error"
+# message if a port check fails.
+_ctdb_check_tcp_common ()
+{
+ _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started"
+}
+
+ctdb_check_tcp_init ()
+{
+ _ctdb_check_tcp_common
+ mkdir -p "${_ctdb_service_started_file%/*}" # dirname
+ touch "$_ctdb_service_started_file"
+}
+
+ctdb_check_tcp_ports()
+{
+ if [ -z "$1" ] ; then
+ echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
+ exit 1
+ fi
+
+ # Set default value for CTDB_TCP_PORT_CHECKS if unset.
+ # If any of these defaults are unsupported then this variable can
+ # be overridden in /etc/sysconfig/ctdb or via a file in
+ # /etc/ctdb/rc.local.d/.
+ : ${CTDB_TCP_PORT_CHECKERS:=ctdb nmap netstat}
+
+ for _c in $CTDB_TCP_PORT_CHECKERS ; do
+ ctdb_check_tcp_ports_$_c "$@"
+ case "$?" in
+ 0)
+ _ctdb_check_tcp_common
+ rm -f "$_ctdb_service_started_file"
+ return 0
+ ;;
+ 1)
+ _ctdb_check_tcp_common
+ if [ ! -f "$_ctdb_service_started_file" ] ; then
+ echo "ERROR: $service_name tcp port $_p is not responding"
+ cat <<EOF
+$ctdb_check_tcp_ports_debug
+EOF
+ else
+ echo "INFO: $service_name tcp port $_p is not responding"
+ fi
+
return 1
- fi
- fi
+ ;;
+ 127)
+ : # Not implemented
+ ;;
+ *)
+
+ esac
+ done
+
+ echo "INTERNAL ERROR: ctdb_check_ports - no working checkers in CTDB_TCP_PORT_CHECKERS=\"$CTDB_TCP_PORT_CHECKERS\""
+
+ return 127
+}
+
+ctdb_check_tcp_ports_netstat ()
+{
+ _cmd='netstat -l -t -n'
+ _ns=$($_cmd 2>&1)
+ if [ $? -eq 127 ] ; then
+ # netstat probably not installed - unlikely?
+ ctdb_check_tcp_ports_debug="$_ns"
+ return 127
+ fi
+
+ for _p ; do # process each function argument (port)
+ for _a in '0\.0\.0\.0' '::' ; do
+ _pat="[[:space:]]${_a}:${_p}[[:space:]]+[^[:space:]]+[[:space:]]+LISTEN"
+ if echo "$_ns" | grep -E -q "$_pat" ; then
+ # We matched the port, so process next port
+ continue 2
+ fi
+ done
+
+ # We didn't match the port, so flag an error.
+ ctdb_check_tcp_ports_debug="$_cmd shows this output:
+$_ns"
+ return 1
done
+
+ return 0
+}
+
+ctdb_check_tcp_ports_nmap ()
+{
+ # nmap wants a comma-separated list of ports
+ _ports=""
+ for _p ; do
+ _ports="${_ports}${_ports:+,}${_p}"
+ done
+
+ _cmd="nmap -n -oG - -PS 127.0.0.1 -p $_ports"
+
+ _nmap_out=$($_cmd 2>&1)
+ if [ $? -eq 127 ] ; then
+ # nmap probably not installed
+ ctdb_check_tcp_ports_debug="$_nmap_out"
+ return 127
+ fi
+
+ # get the port-related output
+ _port_info=$(echo "$_nmap_out" | sed -n -r -e 's@^.*Ports:[[:space:]]@@p')
+
+ for _p ; do
+ # looking for something like this:
+ # 445/open/tcp//microsoft-ds///
+ # possibly followed by a comma
+ _t="$_p/open/tcp//"
+ case "$_port_info" in
+ # The info we're after must be either at the beginning of
+ # the string or it must follow a space.
+ $_t*|*\ $_t*) : ;;
+ *)
+ # Nope, flag an error...
+ ctdb_check_tcp_ports_debug="$_cmd shows this output:
+$_nmap_out"
+ return 1
+ esac
+ done
+
+ return 0
+}
+
+# Use the new "ctdb checktcpport" command to check the port.
+# This is very cheap.
+ctdb_check_tcp_ports_ctdb ()
+{
+ for _p ; do # process each function argument (port)
+ _cmd="ctdb checktcpport $_p"
+ _out=$($_cmd 2>&1)
+ _ret=$?
+ case "$_ret" in
+ 0)
+ ctdb_check_tcp_ports_debug="\"$_cmd\" was able to bind to port"
+ return 1
+ ;;
+ 98)
+ # Couldn't bind, something already listening, next port...
+ continue
+ ;;
+ *)
+ ctdb_check_tcp_ports_debug="$_cmd (exited with $_ret) with output:
+$_out"
+ # assume not implemented
+ return 127
+ esac
+ done
+
+ return 0
}
######################################################
_failed=0
_killcount=0
- connfile="$CTDB_BASE/state/connections.$_IP"
+ connfile="$CTDB_VARDIR/state/connections.$_IP"
netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
_failed=0
_killcount=0
- connfile="$CTDB_BASE/state/connections.$_IP"
+ connfile="$CTDB_VARDIR/state/connections.$_IP"
netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
_failed=0
_killcount=0
- connfile="$CTDB_BASE/state/connections.$_IP"
+ connfile="$CTDB_VARDIR/state/connections.$_IP"
netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
service nfsserver stop > /dev/null 2>&1
;;
restart)
- service nfsserver restart
+ echo 0 >/proc/fs/nfsd/threads
+ service nfsserver stop > /dev/null 2>&1
+ pkill -9 nfsd
+ nfs_dump_some_threads
+ service nfsserver start
;;
esac
;;
service nfslock stop > /dev/null 2>&1
;;
restart)
- service nfslock restart
- service nfs restart
+ echo 0 >/proc/fs/nfsd/threads
+ service nfs stop > /dev/null 2>&1
+ service nfslock stop > /dev/null 2>&1
+ pkill -9 nfsd
+ nfs_dump_some_threads
+ service nfslock start
+ service nfs start
;;
esac
;;
esac
}
+# Dump up to the configured number of nfsd thread backtraces.
+nfs_dump_some_threads ()
+{
+ [ -n "$CTDB_NFS_DUMP_STUCK_THREADS" ] || return 0
+
+ # Optimisation to avoid running an unnecessary pidof
+ [ $CTDB_NFS_DUMP_STUCK_THREADS -gt 0 ] || return 0
+
+ _count=0
+ for _pid in $(pidof nfsd) ; do
+ [ $_count -le $CTDB_NFS_DUMP_STUCK_THREADS ] || break
+
+ # Do this first to avoid racing with thread exit
+ _stack=$(get_proc "${_pid}/stack" 2>/dev/null)
+ if [ -n "$_stack" ] ; then
+ echo "Stack trace for stuck nfsd thread [${_pid}]:"
+ echo "$_stack"
+ _count=$(($_count + 1))
+ fi
+ done
+}
+
########################################################
# start/stop the nfs lockmanager service on different platforms
########################################################
stop)
service nfsserver stop > /dev/null 2>&1
;;
+ restart)
+ service nfsserver stop
+ service nfsserver start
+ ;;
esac
;;
rhel)
stop)
service nfslock stop > /dev/null 2>&1
;;
+ restart)
+ service nfslock stop
+ service nfslock start
+ ;;
esac
;;
*)
local _iface=$1
local _ip=$2
local _maskbits=$3
- local _state_dir="$CTDB_BASE/state/interface_modify"
+ local _state_dir="$CTDB_VARDIR/state/interface_modify"
local _lockfile="$_state_dir/$_iface.flock"
local _readd_base="$_state_dir/$_iface.readd.d"
local _iface=$1
local _ip=$2
local _maskbits=$3
- local _state_dir="$CTDB_BASE/state/interface_modify"
+ local _state_dir="$CTDB_VARDIR/state/interface_modify"
local _lockfile="$_state_dir/$_iface.flock"
local _readd_base="$_state_dir/$_iface.readd.d"
local _ip=$2
local _maskbits=$3
local _readd_script=$4
- local _state_dir="$CTDB_BASE/state/interface_modify"
+ local _state_dir="$CTDB_VARDIR/state/interface_modify"
local _lockfile="$_state_dir/$_iface.flock"
local _readd_base="$_state_dir/$_iface.readd.d"
# ctdb_check_counter_limit succeeds when count >= <limit>
########################################################
_ctdb_counter_common () {
- _counter_file="$ctdb_fail_dir/$service_name"
+ _service_name="${1:-${service_name}}"
+ _counter_file="$ctdb_fail_dir/$_service_name"
mkdir -p "${_counter_file%/*}" # dirname
}
ctdb_counter_init () {
- _ctdb_counter_common
+ _ctdb_counter_common "$1"
>"$_counter_file"
}
ctdb_counter_incr () {
- _ctdb_counter_common
+ _ctdb_counter_common "$1"
# unary counting!
echo -n 1 >> "$_counter_file"
# unary counting!
_size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
if [ $_size -ge $_limit ] ; then
- echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
+ echo "ERROR: more than $_limit consecutive failures for $_service_name, marking cluster unhealthy"
exit 1
elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
- echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
+ echo "WARNING: less than $_limit consecutive failures ($_size) for $_service_name, not unhealthy yet"
+ fi
+}
+ctdb_check_counter_equal () {
+ _ctdb_counter_common
+
+ _limit=$1
+
+ # unary counting!
+ _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
+ if [ $_size -eq $_limit ] ; then
+ return 1
fi
+ return 0
}
+
########################################################
ctdb_spool_dir="/var/spool/ctdb"
ctdb_service_reconfigure ()
{
+ echo "Reconfiguring service \"$service_name\"..."
if [ -n "$service_reconfigure" ] ; then
eval $service_reconfigure
else
is_ctdb_managed_service ()
{
+ _service_name="${1:-${service_name}}"
+
t=" $CTDB_MANAGED_SERVICES "
ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD" "vsftpd"
ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA" "samba"
ctdb_compat_managed_service "$CTDB_MANAGES_SCP" "scp"
- ctdb_compat_managed_service "$CTDB_MANAGES_WINDBIND" "windbind"
+ ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND" "winbind"
ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "httpd"
ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI" "iscsi"
ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD" "clamd"
ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs"
+ ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs-ganesha-gpfs"
- # Returns 0 if "<space>$service_name<space>" appears in $t
- [ "${t#* ${service_name} }" != "${t}" ]
+ # Returns 0 if "<space>$_service_name<space>" appears in $t
+ [ "${t#* ${_service_name} }" != "${t}" ]
}
ctdb_start_stop_service ()
{
- _active="$ctdb_active_dir/$service_name"
+ # Do nothing unless configured to...
+ [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] || return 0
+
+ _service_name="${1:-${service_name}}"
- if is_ctdb_managed_service ; then
+ [ "$event_name" = "monitor" ] || return 0
+
+ _active="$ctdb_active_dir/$_service_name"
+ if is_ctdb_managed_service "$_service_name"; then
if ! [ -e "$_active" ] ; then
- echo "Starting service $service_name"
+ echo "Starting service $_service_name"
ctdb_service_start || exit $?
mkdir -p "$ctdb_active_dir"
touch "$_active"
exit 0
fi
- elif ! is_ctdb_managed_service ; then
+ else
if [ -e "$_active" ] ; then
- echo "Stopping service $service_name"
+ echo "Stopping service $_service_name"
+ CTDB_AUTOSTOPPING="$_service_name"
ctdb_service_stop || exit $?
rm -f "$_active"
+ exit 0
fi
- exit 0
fi
}
ctdb_service_start ()
{
if [ -n "$service_start" ] ; then
- eval $service_start
+ eval $service_start || return $?
else
- service "$service_name" start
+ service "$service_name" start || return $?
fi
ctdb_counter_init
+ ctdb_check_tcp_init
}
ctdb_service_stop ()
esac
}
-ipv4_host_addr_to_net_addr()
+# iptables doesn't like being re-entered, so flock-wrap it.
+iptables()
{
- local HOST=$1
- local MASKBITS=$2
-
- local HOST0=$(echo $HOST | awk -F . '{print $4}')
- local HOST1=$(echo $HOST | awk -F . '{print $3}')
- local HOST2=$(echo $HOST | awk -F . '{print $2}')
- local HOST3=$(echo $HOST | awk -F . '{print $1}')
-
- local HOST_NUM=$(( $HOST0 + $HOST1 * 256 + $HOST2 * (256 ** 2) + $HOST3 * (256 ** 3) ))
-
- local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
-
- local NET_NUM=$(( $HOST_NUM & $MASK_NUM))
-
- local NET0=$(( $NET_NUM & 255 ))
- local NET1=$(( ($NET_NUM & (255 * 256)) / 256 ))
- local NET2=$(( ($NET_NUM & (255 * 256**2)) / 256**2 ))
- local NET3=$(( ($NET_NUM & (255 * 256**3)) / 256**3 ))
-
- echo "$NET3.$NET2.$NET1.$NET0"
+ flock -w 30 /var/ctdb/iptables-ctdb.flock /sbin/iptables "$@"
}
-ipv4_maskbits_to_net_mask()
-{
- local MASKBITS=$1
-
- local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
-
- local MASK0=$(( $MASK_NUM & 255 ))
- local MASK1=$(( ($MASK_NUM & (255 * 256)) / 256 ))
- local MASK2=$(( ($MASK_NUM & (255 * 256**2)) / 256**2 ))
- local MASK3=$(( ($MASK_NUM & (255 * 256**3)) / 256**3 ))
+########################################################
+# tickle handling
+########################################################
- echo "$MASK3.$MASK2.$MASK1.$MASK0"
-}
+# Temporary directory for tickles.
+tickledir="$CTDB_VARDIR/state/tickles"
+mkdir -p "$tickledir"
-ipv4_is_valid_addr()
+update_tickles ()
{
- local ADDR=$1
- local fail=0
-
- local N=`echo $ADDR | sed -e 's/[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*//'`
- test -n "$N" && fail=1
-
- local ADDR0=$(echo $ADDR | awk -F . '{print $4}')
- local ADDR1=$(echo $ADDR | awk -F . '{print $3}')
- local ADDR2=$(echo $ADDR | awk -F . '{print $2}')
- local ADDR3=$(echo $ADDR | awk -F . '{print $1}')
-
- test "$ADDR0" -gt 255 && fail=1
- test "$ADDR1" -gt 255 && fail=1
- test "$ADDR2" -gt 255 && fail=1
- test "$ADDR3" -gt 255 && fail=1
-
- test x"$fail" != x"0" && {
- #echo "IPv4: '$ADDR' is not a valid address"
- return 1;
- }
+ _port="$1"
+
+ mkdir -p "$tickledir" # Just in case
+
+ # Who am I?
+ _pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
+
+ # What public IPs do I hold?
+ _ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
+
+ # IPs as a regexp choice
+ _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
+
+ # Record connections to our public IPs in a temporary file
+ _my_connections="${tickledir}/${_port}.connections"
+ rm -f "$_my_connections"
+ netstat -tn |
+ awk -v destpat="^${_ipschoice}:${_port}\$" \
+ '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
+ sort >"$_my_connections"
+
+ # Record our current tickles in a temporary file
+ _my_tickles="${tickledir}/${_port}.tickles"
+ rm -f "$_my_tickles"
+ for _i in $_ips ; do
+ ctdb -Y gettickles $_i $_port |
+ awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
+ done |
+ sort >"$_my_tickles"
+
+ # Add tickles for connections that we haven't already got tickles for
+ comm -23 "$_my_connections" "$_my_tickles" |
+ while read _src _dst ; do
+ ctdb addtickle $_src $_dst
+ done
- return 0;
-}
+ # Remove tickles for connections that are no longer there
+ comm -13 "$_my_connections" "$_my_tickles" |
+ while read _src _dst ; do
+ ctdb deltickle $_src $_dst
+ done
-# iptables doesn't like being re-entered, so flock-wrap it.
-iptables()
-{
- flock -w 30 /var/ctdb/iptables-ctdb.flock /sbin/iptables "$@"
+ rm -f "$_my_connections" "$_my_tickles"
}
########################################################
script_name="${0##*/}" # basename
service_name="$script_name" # default is just the script name
service_fail_limit=1
+event_name="$1"