Eventscripts: Untested factorisations and introduction of status event.
authorMartin Schwenke <martin@meltin.net>
Fri, 13 Nov 2009 07:28:25 +0000 (18:28 +1100)
committerMartin Schwenke <martin@meltin.net>
Fri, 13 Nov 2009 07:28:25 +0000 (18:28 +1100)
This is the first stage of an experimental change to eventscripts.
Ronnie and I did a few hours of factorisation of 40.vsftpd and applied
many of the changes to 41.httpd.  Other eventscripts were also
modified.

At this stage this is completely untested.

Signed-off-by: Martin Schwenke <martin@meltin.net>
(This used to be ctdb commit 364e70b763f0ccd7714d15723ad3ea4d7e2968a1)

ctdb/config/events.d/40.vsftpd
ctdb/config/events.d/41.httpd
ctdb/config/events.d/50.samba
ctdb/config/events.d/70.iscsi
ctdb/config/functions

index 315c150262a6d566621bc01807707c2efc791949..a87b07b1e663cf44a9005f6c19ec61d9b2925ad4 100755 (executable)
@@ -1,68 +1,61 @@
 #!/bin/sh
 # event strict to manage vsftpd in a cluster environment
 
+service_name="vsftpd"
+# make sure the service is stopped first
+service_start="service vsftpd stop > /dev/null 2>&1 ; service vsftpd start"
+service_stop="service vsftpd stop"
+service_reconfigure="service $service_name restart"
+service_fail_limit=2
+service_tcp_ports=21
+
 . $CTDB_BASE/functions
-loadconfig ctdb
-loadconfig vsftpd
 
-[ "$CTDB_MANAGES_VSFTPD" = "yes" ] || exit 0
+loadconfig
 
-cmd="$1"
-shift
+ctdb_start_stop_service
 
-# Count the number of monitor failures.  The cluster only becomes
-# unhealthy after 2 failures.
-VSFTPD_FAILS="fail-count"
-VSFTPD_LIMIT=2
+is_ctdb_managed_service || exit 0
 
 case $cmd in 
      startup)
-       /bin/mkdir -p $CTDB_BASE/state/vsftpd
-
-       # make sure the service is stopped first
-       service vsftpd stop > /dev/null 2>&1
-       service vsftpd start
-
-       ctdb_counter_init "$VSFTPD_FAILS"
+       ctdb_service_start
        ;;
 
      shutdown)
-       service vsftpd stop
-       ;;
-
-     takeip)
-       echo "restart" > $CTDB_BASE/state/vsftpd/restart
+       ctdb_service_stop
        ;;
 
-     releaseip)
-       echo "restart" > $CTDB_BASE/state/vsftpd/restart
+     takeip|releaseip)
+       ctdb_service_set_reconfigure
        ;;
 
      recovered)
        # if we have taken or released any ips we must 
        # restart vsftpd to ensure that all tcp connections are reset
-       [ -f $CTDB_BASE/state/vsftpd/restart ] && {
-               service vsftpd stop > /dev/null 2>&1
-               service vsftpd start
-               /bin/rm -f $CTDB_BASE/state/vsftpd/restart 2>/dev/null
-               ctdb_counter_init "$VSFTPD_FAILS"
-       } >/dev/null 2>&1
+       if ctdb_service_needs_reconfigure ; then
+           ctdb_service_reconfigure
+       fi
        ;;
 
      monitor)
-       # Subshell catches the "exit 1"
-       if (ctdb_check_tcp_ports "ftp" 21) ; then
-           ctdb_counter_init "$VSFTPD_FAILS"
-       else
-           ctdb_counter_incr "$VSFTPD_FAILS"
-           if ctdb_counter_limit "$VSFTPD_FAILS" $VSFTPD_LIMIT ; then
-               echo "ERROR: more than $VSFTPD_LIMIT consecutive failures, marking cluster unhealthy"
-               exit 1
+       if ctdb_service_needs_reconfigure ; then
+           ctdb_service_reconfigure
+           exit 0
+       fi
+
+       if [ -n "$service_tcp_ports" ] ; then
+           if ctdb_check_tcp_ports $service_tcp_ports ; then
+               ctdb_counter_init
            else
-               echo "WARNING: less than $VSFTPD_LIMIT consecutive failures, not unhealthy yet"
+               ctdb_counter_incr
+               ctdb_check_counter_limit
+               exit 0 # only count 1 failure per monitor event
            fi
-               
-       fi
+       fi      
+       ;;
+    status)
+       ctdb_checkstatus || exit $?
        ;;
 esac
 
index d6e7f9923584693415d387b961aac905e060786f..421f42a86526449fde565c614c8f9acf0f52113d 100755 (executable)
@@ -2,64 +2,57 @@
 # event script to manage httpd in a cluster environment
 
 . $CTDB_BASE/functions
-loadconfig ctdb
 
 detect_init_style
 
 case $CTDB_INIT_STYLE in
-       redhat)
-               CTDB_SERVICE_HTTP="httpd"
-               CTDB_CONFIG_HTTP="http"
-               ;;
-       suse)
-               CTDB_SERVICE_HTTP="apache2"
-               CTDB_CONFIG_HTTP="apache2"
-               ;;
-       debian)
-               CTDB_SERVICE_HTTP="apache2"
-               CTDB_CONFIG_HTTP="apache2"
-               ;;
-       *)
-               # should not happen.
-               # for now use red hat style as default
-               CTDB_SERVICE_HTTP="httpd"
-               CTDB_CONFIG_HTTP="http"
-               ;;
+    redhat)
+       service_name="httpd"
+       service_config="http"
+       ;;
+    suse|debian|*)
+       service_name="apache2"
+       service_config="apache2"
+       ;;
 esac
 
-loadconfig "${CTDB_CONFIG_HTTP}"
-
-[ "$CTDB_MANAGES_HTTPD" = "yes" ] || exit 0
-
-cmd="$1"
-shift
-
 # RHEL5 sometimes use a SIGKILL to terminate httpd, which then leaks
 # semaphores.  This is a hack to clean them up.
 cleanup_httpd_semaphore_leak() {
-    killall -q -0 "${CTDB_SERVICE_HTTP}" ||
+    killall -q -0 "$service_name" ||
     for i in $(ipcs -s | awk '$3 == "apache" { print $2 }') ; do
        ipcrm -s $i
     done
 }
 
+##########
+
+service_start="cleanup_httpd_semaphore_leak; service $service_name start"
+service_stop="service $service_name stop; killall -q -9 $service_name"
+service_reconfigure="service $service_name restart"
+
+loadconfig
+
+ctdb_start_stop_service
+
 case $cmd in
     startup)
-       cleanup_httpd_semaphore_leak
-       service "${CTDB_SERVICE_HTTP}" start
+       ctdb_service_start
        ;;
 
     shutdown)
-       service "${CTDB_SERVICE_HTTP}" stop
-       killall -q -9 "${CTDB_SERVICE_HTTP}"
+       ctdb_service_stop
        ;;
 
-     monitor)
-       ( ctdb_check_tcp_ports "http" 80 )
-       if [ $? -ne 0 ] ; then
+    monitor)
+       if ctdb_service_needs_reconfigure ; then
+           ctdb_service_reconfigure
+           exit 0
+       fi
+
+       if ! ctdb_check_tcp_ports 80 ; then
            echo "HTTPD is not running. Trying to restart HTTPD."
-           cleanup_httpd_semaphore_leak
-           service "${CTDB_SERVICE_HTTP}" start
+           ctdb_service_start
            exit 1
        fi
        ;;
index 814fb9aec45df487589e0af149ccdd03c262d8a7..bd4c5ff0063f920aedd88aec44eb704d13be3842 100755 (executable)
@@ -3,6 +3,8 @@
 
 PATH=/bin:/usr/bin:$PATH
 
+service_name="samba"
+
 . $CTDB_BASE/functions
 loadconfig ctdb
 loadconfig samba
@@ -245,7 +247,7 @@ case $cmd in
                [ -z "$smb_ports" ] && {
                        smb_ports=`testparm_cat --parameter-name="smb ports"`
                }
-               ctdb_check_tcp_ports "Samba" $smb_ports
+               ctdb_check_tcp_ports $smb_ports || exit $?
        }
 
        # check winbind is OK
index 426e412d5107752ab83a40d06a312801d3f8f33e..b32494bc37c0bf4417da371eeff87c971e3487f3 100755 (executable)
@@ -1,11 +1,9 @@
 #!/bin/sh
 # ctdb event script for TGTD based iSCSI
 
-PATH=/bin:/usr/bin:$PATH
+service_name="iscsi"
 
 . $CTDB_BASE/functions
-loadconfig ctdb
-loadconfig iscsi
 
 cmd="$1"
 shift
@@ -64,7 +62,7 @@ case $cmd in
 
      monitor)
        [ -f $CTDB_BASE/state/iscsi/iscsi_active ] && {
-               ctdb_check_tcp_ports "iscsi" 3260
+               ctdb_check_tcp_ports 3260 || exit $?
        }
        ;;
 esac
index 043051b23ec92768e9638e28933f93091e7b1cbd..2b2f6b32babafdaad25402cedba1783e4f72375d 100644 (file)
@@ -1,9 +1,17 @@
 # utility functions for ctdb event scripts
 
+PATH=/bin:/usr/bin:$PATH
+
 #######################################
 # pull in a system config file, if any
 loadconfig() {
     name="$1"
+
+    if [ -n "$name" ] ; then
+       loadconfig ctdb
+       loadconfig "${service_config:-${service_name}}"
+    fi
+
     if [ -f /etc/sysconfig/$name ]; then
        . /etc/sysconfig/$name
     elif [ -f /etc/default/$name ]; then
@@ -31,37 +39,25 @@ detect_init_style() {
 ######################################################
 # simulate /sbin/service on platforms that don't have it
 service() { 
-  service_name="$1"
-  op="$2"
+  _service_name="$1"
+  _op="$2"
 
   # do nothing, when no service was specified
-  test "x$service_name" = "x" && return
+  test "x$_service_name" = "x" && return
 
   if [ -x /sbin/service ]; then
-      /sbin/service "$service_name" "$op"
-  elif [ -x /etc/init.d/$service_name ]; then
-      /etc/init.d/$service_name "$op"
-  elif [ -x /etc/rc.d/init.d/$service_name ]; then
-      /etc/rc.d/init.d/$service_name "$op"
+      /sbin/service "$_service_name" "$_op"
+  elif [ -x /etc/init.d/$_service_name ]; then
+      /etc/init.d/$_service_name "$_op"
+  elif [ -x /etc/rc.d/init.d/$_service_name ]; then
+      /etc/rc.d/init.d/$_service_name "$_op"
   fi
 }
 
 ######################################################
 # simulate /sbin/service (niced) on platforms that don't have it
 nice_service() { 
-  service_name="$1"
-  op="$2"
-
-  # do nothing, when no service was specified
-  test "x$service_name" = "x" && return
-
-  if [ -x /sbin/service ]; then
-      nice /sbin/service "$service_name" "$op"
-  elif [ -x /etc/init.d/$service_name ]; then
-      nice /etc/init.d/$service_name "$op"
-  elif [ -x /etc/rc.d/init.d/$service_name ]; then
-      nice /etc/rc.d/init.d/$service_name "$op"
-  fi
+    nice service "$@"
 }
 
 ######################################################
@@ -110,13 +106,13 @@ ctdb_wait_tcp_ports() {
                  (netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
              else 
                  echo "No tool to check tcp ports availabe. can not check in ctdb_wait_tcp_ports"
-                 return
+                 return 127
              fi
          done
          [ $all_ok -eq 1 ] || sleep 1
          ctdb status > /dev/null 2>&1 || {
                echo "ctdb daemon has died. Exiting tcp wait $service_name"
-               exit 1
+               return 1
          }
   done
   echo "Local tcp services for $service_name are up"
@@ -202,10 +198,7 @@ ctdb_check_directories() {
 # usage: ctdb_check_tcp_ports SERVICE_NAME <ports...>
 ######################################################
 ctdb_check_tcp_ports() {
-  service_name="$1"
-  shift
-  wait_ports="$*"
-  [ -z "$wait_ports" ] && return;
+  [ -z "$1" ] && return;
 
   # check availability of netcat or netstat first
   NETCAT=""
@@ -224,7 +217,7 @@ ctdb_check_tcp_ports() {
       NETCAT=/bin/nc
   fi
 
-  for p in $wait_ports; do
+  for p ; do
       all_ok=1
 
       if [ "x${NETCAT}" != "x" ]; then
@@ -507,31 +500,161 @@ remove_ip() {
 # ctdb_counter_limit succeeds when count >= <limit>
 ########################################################
 _ctdb_counter_common () {
-    _tag="$1"
     _eventscript="${0##*/}" # basename
-
-    _counter_file="$CTDB_BASE/state/${_eventscript}-${_tag}"
+    _counter_file="$ctdb_fail_dir/${service_name:-${_eventscript}}"
     mkdir -p "${_counter_file%/*}" # dirname
 }
 ctdb_counter_init () {
-    _ctdb_counter_common "$1"
+    _ctdb_counter_common
 
     echo -n > "$_counter_file"
 }
 ctdb_counter_incr () {
-    _ctdb_counter_common "$1"
+    _ctdb_counter_common
 
     # unary counting!
     echo -n 1 >> "$_counter_file"
 }
-ctdb_counter_limit () {
-    _ctdb_counter_common "$1"
-    _limit="$2"
+ctdb_check_counter_limit () {
+    _ctdb_counter_common
 
     # unary counting!
     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
-    [ $_size -ge $_limit ]
+    if [ $_size -ge $service_fail_limit ] ; then
+       echo "ERROR: more than $service_fail_limit consecutive failures, marking cluster unhealthy"
+       exit 1
+    else
+       echo "WARNING: less than $service_fail_limit consecutive failures, not unhealthy yet"
+    fi
+}
+########################################################
+
+ctdb_spool_dir="/var/spool/ctdb"
+ctdb_status_dir="$ctdb_spool_dir/status"
+ctdb_fail_dir="$ctdb_spool_dir/failcount"
+ctdb_active_dir="$ctdb_spool_dir/active"
+
+ctdb_checkstatus ()
+{
+    if [ -r "$ctdb_status_dir/$service_name/unhealthy" ] ; then
+       log_status_cat "unhealthy" "$ctdb_status_dir/$service_name/unhealthy"
+       return 1
+    elif [ -r "$ctdb_status_dir/$service_name/banned" ] ; then
+       log_status_cat "banned" "$ctdb_status_dir/$service_name/banned"
+       return 2
+    else
+       return 0
+    fi
+}
+
+ctdb_setstatus ()
+{
+    d="$ctdb_status_dir/$service_name"
+    case "$1" in
+       unhealthy|banned)
+           mkdir -p "$d"
+           cat "$2" >"$d/$1"
+           ;;
+       *)
+           for i in "banned" "unhealthy" ; do
+               rm -f "$d/$i"
+           done
+           ;;
+    esac
+}
+
+ctdb_service_needs_reconfigure ()
+{
+    [ -e "$ctdb_status_dir/$service_name/reconfigure" ]
 }
+
+ctdb_service_set_reconfigure ()
+{
+    d="$ctdb_status_dir/$service_name"
+    mkdir -p "$d"
+    >"$d/reconfigure"
+}
+
+ctdb_service_unset_reconfigure ()
+{
+    rm -f "$ctdb_status_dir/$service_name/reconfigure"
+}
+
+ctdb_service_reconfigure ()
+{
+    if [ -n "$service_reconfigure" ] ; then
+       $service_reconfigure
+    else
+       service "$service_name" restart
+    fi
+    ctdb_service_unset_reconfigure
+    ctdb_counter_init
+}
+
+ctdb_compat_managed_service ()
+{
+    if [ "$1" = "yes" ] ; then
+       t="$t $2 "
+    fi
+}
+
+is_ctdb_managed_service ()
+{
+    t=" $CTDB_MANAGED_SERVICES "
+
+    ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
+    ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
+    ctdb_compat_managed_service "$CTDB_MANAGES_SCP"      "scp"
+    ctdb_compat_managed_service "$CTDB_MANAGES_WINDBIND" "windbind"
+    ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
+    ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
+    ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
+
+    # Returns 0 if "<space>$service_name<space>" appears in $t
+    [ "${t#* ${service_name} }" != "${t}" ]
+}
+
+ctdb_start_stop_service ()
+{
+    _active="$ctdb_active_dir/$service_name"
+
+    if is_ctdb_managed_service ; then
+       if ! [ -e "$_active" ] ; then
+           echo "Starting service $service_name"
+           ctdb_service_start || exit $?
+           mkdir -p "$ctdb_active_dir"
+           touch "$_active"
+           exit 0
+       fi
+    elif ! is_ctdb_managed_service ; then
+       if [ -e "$_active" ] ; then
+           echo "Stopping service $service_name"
+           ctdb_service_stop || exit $?
+           rm -f "$_active"
+       fi
+       exit 0
+    fi
+}
+
+ctdb_service_start ()
+{
+    if [ -n "$service_start" ] ; then
+       $service_start
+    else
+       service "$service_name" start
+    fi
+    ctdb_counter_init
+}
+
+ctdb_service_stop ()
+{
+    if [ -n "$service_stop" ] ; then
+       $service_stop
+    else
+       service "$service_name" stop
+    fi
+}
+
 ########################################################
 # load a site local config file
 ########################################################
@@ -546,4 +669,5 @@ ctdb_counter_limit () {
        done
 }
 
-
+ctdb_event="$1" ; shift
+cmd="$ctdb_event"