Eventscripts: Untested factorisations and introduction of status event.

author Martin Schwenke <martin@meltin.net>

Fri, 13 Nov 2009 07:28:25 +0000 (18:28 +1100)

committer Martin Schwenke <martin@meltin.net>

Fri, 13 Nov 2009 07:28:25 +0000 (18:28 +1100)
author Martin Schwenke <martin@meltin.net>
Fri, 13 Nov 2009 07:28:25 +0000 (18:28 +1100)
committer Martin Schwenke <martin@meltin.net>
Fri, 13 Nov 2009 07:28:25 +0000 (18:28 +1100)
diff --git a/ctdb/config/events.d/40.vsftpd b/ctdb/config/events.d/40.vsftpd

index 315c150262a6d566621bc01807707c2efc791949..a87b07b1e663cf44a9005f6c19ec61d9b2925ad4 100755 (executable)
--- a/ctdb/config/events.d/40.vsftpd
+++ b/ctdb/config/events.d/40.vsftpd
@@ -1,68 +1,61 @@
  #!/bin/sh
  # event strict to manage vsftpd in a cluster environment
  
+service_name="vsftpd"
+# make sure the service is stopped first
+service_start="service vsftpd stop > /dev/null 2>&1 ; service vsftpd start"
+service_stop="service vsftpd stop"
+service_reconfigure="service $service_name restart"
+service_fail_limit=2
+service_tcp_ports=21
+
  . $CTDB_BASE/functions
-loadconfig ctdb
-loadconfig vsftpd
  
-[ "$CTDB_MANAGES_VSFTPD" = "yes" ] || exit 0
+loadconfig
  
-cmd="$1"
-shift
+ctdb_start_stop_service
  
-# Count the number of monitor failures.  The cluster only becomes
-# unhealthy after 2 failures.
-VSFTPD_FAILS="fail-count"
-VSFTPD_LIMIT=2
+is_ctdb_managed_service || exit 0
  
  case $cmd in 
       startup)
-       /bin/mkdir -p $CTDB_BASE/state/vsftpd
-
-       # make sure the service is stopped first
-       service vsftpd stop > /dev/null 2>&1
-       service vsftpd start
-
-       ctdb_counter_init "$VSFTPD_FAILS"
+       ctdb_service_start
         ;;
  
       shutdown)
-       service vsftpd stop
-       ;;
-
-     takeip)
-       echo "restart" > $CTDB_BASE/state/vsftpd/restart
+       ctdb_service_stop
         ;;
  
-     releaseip)
-       echo "restart" > $CTDB_BASE/state/vsftpd/restart
+     takeip|releaseip)
+       ctdb_service_set_reconfigure
         ;;
  
       recovered)
         # if we have taken or released any ips we must 
         # restart vsftpd to ensure that all tcp connections are reset
-       [ -f $CTDB_BASE/state/vsftpd/restart ] && {
-               service vsftpd stop > /dev/null 2>&1
-               service vsftpd start
-               /bin/rm -f $CTDB_BASE/state/vsftpd/restart 2>/dev/null
-               ctdb_counter_init "$VSFTPD_FAILS"
-       } >/dev/null 2>&1
+       if ctdb_service_needs_reconfigure ; then
+           ctdb_service_reconfigure
+       fi
         ;;
  
       monitor)
-       # Subshell catches the "exit 1"
-       if (ctdb_check_tcp_ports "ftp" 21) ; then
-           ctdb_counter_init "$VSFTPD_FAILS"
-       else
-           ctdb_counter_incr "$VSFTPD_FAILS"
-           if ctdb_counter_limit "$VSFTPD_FAILS" $VSFTPD_LIMIT ; then
-               echo "ERROR: more than $VSFTPD_LIMIT consecutive failures, marking cluster unhealthy"
-               exit 1
+       if ctdb_service_needs_reconfigure ; then
+           ctdb_service_reconfigure
+           exit 0
+       fi
+
+       if [ -n "$service_tcp_ports" ] ; then
+           if ctdb_check_tcp_ports $service_tcp_ports ; then
+               ctdb_counter_init
             else
-               echo "WARNING: less than $VSFTPD_LIMIT consecutive failures, not unhealthy yet"
+               ctdb_counter_incr
+               ctdb_check_counter_limit
+               exit 0 # only count 1 failure per monitor event
             fi
-               
-       fi
+       fi      
+       ;;
+    status)
+       ctdb_checkstatus || exit $?
         ;;
  esac
  
diff --git a/ctdb/config/events.d/41.httpd b/ctdb/config/events.d/41.httpd

index d6e7f9923584693415d387b961aac905e060786f..421f42a86526449fde565c614c8f9acf0f52113d 100755 (executable)
--- a/ctdb/config/events.d/41.httpd
+++ b/ctdb/config/events.d/41.httpd
@@ -2,64 +2,57 @@
  # event script to manage httpd in a cluster environment
  
  . $CTDB_BASE/functions
-loadconfig ctdb
  
  detect_init_style
  
  case $CTDB_INIT_STYLE in
-       redhat)
-               CTDB_SERVICE_HTTP="httpd"
-               CTDB_CONFIG_HTTP="http"
-               ;;
-       suse)
-               CTDB_SERVICE_HTTP="apache2"
-               CTDB_CONFIG_HTTP="apache2"
-               ;;
-       debian)
-               CTDB_SERVICE_HTTP="apache2"
-               CTDB_CONFIG_HTTP="apache2"
-               ;;
-       *)
-               # should not happen.
-               # for now use red hat style as default
-               CTDB_SERVICE_HTTP="httpd"
-               CTDB_CONFIG_HTTP="http"
-               ;;
+    redhat)
+       service_name="httpd"
+       service_config="http"
+       ;;
+    suse|debian|*)
+       service_name="apache2"
+       service_config="apache2"
+       ;;
  esac
  
-loadconfig "${CTDB_CONFIG_HTTP}"
-
-[ "$CTDB_MANAGES_HTTPD" = "yes" ] || exit 0
-
-cmd="$1"
-shift
-
  # RHEL5 sometimes use a SIGKILL to terminate httpd, which then leaks
  # semaphores.  This is a hack to clean them up.
  cleanup_httpd_semaphore_leak() {
-    killall -q -0 "${CTDB_SERVICE_HTTP}" ||
+    killall -q -0 "$service_name" ||
      for i in $(ipcs -s | awk '$3 == "apache" { print $2 }') ; do
         ipcrm -s $i
      done
  }
  
+##########
+
+service_start="cleanup_httpd_semaphore_leak; service $service_name start"
+service_stop="service $service_name stop; killall -q -9 $service_name"
+service_reconfigure="service $service_name restart"
+
+loadconfig
+
+ctdb_start_stop_service
+
  case $cmd in
      startup)
-       cleanup_httpd_semaphore_leak
-       service "${CTDB_SERVICE_HTTP}" start
+       ctdb_service_start
         ;;
  
      shutdown)
-       service "${CTDB_SERVICE_HTTP}" stop
-       killall -q -9 "${CTDB_SERVICE_HTTP}"
+       ctdb_service_stop
         ;;
  
-     monitor)
-       ( ctdb_check_tcp_ports "http" 80 )
-       if [ $? -ne 0 ] ; then
+    monitor)
+       if ctdb_service_needs_reconfigure ; then
+           ctdb_service_reconfigure
+           exit 0
+       fi
+
+       if ! ctdb_check_tcp_ports 80 ; then
             echo "HTTPD is not running. Trying to restart HTTPD."
-           cleanup_httpd_semaphore_leak
-           service "${CTDB_SERVICE_HTTP}" start
+           ctdb_service_start
             exit 1
         fi
         ;;
diff --git a/ctdb/config/events.d/50.samba b/ctdb/config/events.d/50.samba

index 814fb9aec45df487589e0af149ccdd03c262d8a7..bd4c5ff0063f920aedd88aec44eb704d13be3842 100755 (executable)
--- a/ctdb/config/events.d/50.samba
+++ b/ctdb/config/events.d/50.samba
@@ -3,6 +3,8 @@
  
  PATH=/bin:/usr/bin:$PATH
  
+service_name="samba"
+
  . $CTDB_BASE/functions
  loadconfig ctdb
  loadconfig samba
@@ -245,7 +247,7 @@ case $cmd in
                 [ -z "$smb_ports" ] && {
                         smb_ports=`testparm_cat --parameter-name="smb ports"`
                 }
-               ctdb_check_tcp_ports "Samba" $smb_ports
+               ctdb_check_tcp_ports $smb_ports || exit $?
         }
  
         # check winbind is OK
diff --git a/ctdb/config/events.d/70.iscsi b/ctdb/config/events.d/70.iscsi

index 426e412d5107752ab83a40d06a312801d3f8f33e..b32494bc37c0bf4417da371eeff87c971e3487f3 100755 (executable)
--- a/ctdb/config/events.d/70.iscsi
+++ b/ctdb/config/events.d/70.iscsi
@@ -1,11 +1,9 @@
  #!/bin/sh
  # ctdb event script for TGTD based iSCSI
  
-PATH=/bin:/usr/bin:$PATH
+service_name="iscsi"
  
  . $CTDB_BASE/functions
-loadconfig ctdb
-loadconfig iscsi
  
  cmd="$1"
  shift
@@ -64,7 +62,7 @@ case $cmd in
  
       monitor)
         [ -f $CTDB_BASE/state/iscsi/iscsi_active ] && {
-               ctdb_check_tcp_ports "iscsi" 3260
+               ctdb_check_tcp_ports 3260 || exit $?
         }
         ;;
  esac
diff --git a/ctdb/config/functions b/ctdb/config/functions

index 043051b23ec92768e9638e28933f93091e7b1cbd..2b2f6b32babafdaad25402cedba1783e4f72375d 100644 (file)
--- a/ctdb/config/functions
+++ b/ctdb/config/functions
@@ -1,9 +1,17 @@
  # utility functions for ctdb event scripts
  
+PATH=/bin:/usr/bin:$PATH
+
  #######################################
  # pull in a system config file, if any
  loadconfig() {
      name="$1"
+
+    if [ -n "$name" ] ; then
+       loadconfig ctdb
+       loadconfig "${service_config:-${service_name}}"
+    fi
+
      if [ -f /etc/sysconfig/$name ]; then
         . /etc/sysconfig/$name
      elif [ -f /etc/default/$name ]; then
@@ -31,37 +39,25 @@ detect_init_style() {
  ######################################################
  # simulate /sbin/service on platforms that don't have it
  service() { 
-  service_name="$1"
-  op="$2"
+  _service_name="$1"
+  _op="$2"
  
    # do nothing, when no service was specified
-  test "x$service_name" = "x" && return
+  test "x$_service_name" = "x" && return
  
    if [ -x /sbin/service ]; then
-      /sbin/service "$service_name" "$op"
-  elif [ -x /etc/init.d/$service_name ]; then
-      /etc/init.d/$service_name "$op"
-  elif [ -x /etc/rc.d/init.d/$service_name ]; then
-      /etc/rc.d/init.d/$service_name "$op"
+      /sbin/service "$_service_name" "$_op"
+  elif [ -x /etc/init.d/$_service_name ]; then
+      /etc/init.d/$_service_name "$_op"
+  elif [ -x /etc/rc.d/init.d/$_service_name ]; then
+      /etc/rc.d/init.d/$_service_name "$_op"
    fi
  }
  
  ######################################################
  # simulate /sbin/service (niced) on platforms that don't have it
  nice_service() { 
-  service_name="$1"
-  op="$2"
-
-  # do nothing, when no service was specified
-  test "x$service_name" = "x" && return
-
-  if [ -x /sbin/service ]; then
-      nice /sbin/service "$service_name" "$op"
-  elif [ -x /etc/init.d/$service_name ]; then
-      nice /etc/init.d/$service_name "$op"
-  elif [ -x /etc/rc.d/init.d/$service_name ]; then
-      nice /etc/rc.d/init.d/$service_name "$op"
-  fi
+    nice service "$@"
  }
  
  ######################################################
@@ -110,13 +106,13 @@ ctdb_wait_tcp_ports() {
                   (netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
               else 
                   echo "No tool to check tcp ports availabe. can not check in ctdb_wait_tcp_ports"
-                 return
+                 return 127
               fi
           done
           [ $all_ok -eq 1 ] || sleep 1
           ctdb status > /dev/null 2>&1 || {
                 echo "ctdb daemon has died. Exiting tcp wait $service_name"
-               exit 1
+               return 1
           }
    done
    echo "Local tcp services for $service_name are up"
@@ -202,10 +198,7 @@ ctdb_check_directories() {
  # usage: ctdb_check_tcp_ports SERVICE_NAME <ports...>
  ######################################################
  ctdb_check_tcp_ports() {
-  service_name="$1"
-  shift
-  wait_ports="$*"
-  [ -z "$wait_ports" ] && return;
+  [ -z "$1" ] && return;
  
    # check availability of netcat or netstat first
    NETCAT=""
@@ -224,7 +217,7 @@ ctdb_check_tcp_ports() {
        NETCAT=/bin/nc
    fi
  
-  for p in $wait_ports; do
+  for p ; do
        all_ok=1
  
        if [ "x${NETCAT}" != "x" ]; then
@@ -507,31 +500,161 @@ remove_ip() {
  # ctdb_counter_limit succeeds when count >= <limit>
  ########################################################
  _ctdb_counter_common () {
-    _tag="$1"
      _eventscript="${0##*/}" # basename
-
-    _counter_file="$CTDB_BASE/state/${_eventscript}-${_tag}"
+    _counter_file="$ctdb_fail_dir/${service_name:-${_eventscript}}"
      mkdir -p "${_counter_file%/*}" # dirname
  }
  ctdb_counter_init () {
-    _ctdb_counter_common "$1"
+    _ctdb_counter_common
  
      echo -n > "$_counter_file"
  }
  ctdb_counter_incr () {
-    _ctdb_counter_common "$1"
+    _ctdb_counter_common
  
      # unary counting!
      echo -n 1 >> "$_counter_file"
  }
-ctdb_counter_limit () {
-    _ctdb_counter_common "$1"
-    _limit="$2"
+ctdb_check_counter_limit () {
+    _ctdb_counter_common
  
      # unary counting!
      _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
-    [ $_size -ge $_limit ]
+    if [ $_size -ge $service_fail_limit ] ; then
+       echo "ERROR: more than $service_fail_limit consecutive failures, marking cluster unhealthy"
+       exit 1
+    else
+       echo "WARNING: less than $service_fail_limit consecutive failures, not unhealthy yet"
+    fi
+}
+########################################################
+
+ctdb_spool_dir="/var/spool/ctdb"
+ctdb_status_dir="$ctdb_spool_dir/status"
+ctdb_fail_dir="$ctdb_spool_dir/failcount"
+ctdb_active_dir="$ctdb_spool_dir/active"
+
+ctdb_checkstatus ()
+{
+    if [ -r "$ctdb_status_dir/$service_name/unhealthy" ] ; then
+       log_status_cat "unhealthy" "$ctdb_status_dir/$service_name/unhealthy"
+       return 1
+    elif [ -r "$ctdb_status_dir/$service_name/banned" ] ; then
+       log_status_cat "banned" "$ctdb_status_dir/$service_name/banned"
+       return 2
+    else
+       return 0
+    fi
+}
+
+ctdb_setstatus ()
+{
+    d="$ctdb_status_dir/$service_name"
+    case "$1" in
+       unhealthy|banned)
+           mkdir -p "$d"
+           cat "$2" >"$d/$1"
+           ;;
+       *)
+           for i in "banned" "unhealthy" ; do
+               rm -f "$d/$i"
+           done
+           ;;
+    esac
+}
+
+ctdb_service_needs_reconfigure ()
+{
+    [ -e "$ctdb_status_dir/$service_name/reconfigure" ]
  }
+
+ctdb_service_set_reconfigure ()
+{
+    d="$ctdb_status_dir/$service_name"
+    mkdir -p "$d"
+    >"$d/reconfigure"
+}
+
+ctdb_service_unset_reconfigure ()
+{
+    rm -f "$ctdb_status_dir/$service_name/reconfigure"
+}
+
+ctdb_service_reconfigure ()
+{
+    if [ -n "$service_reconfigure" ] ; then
+       $service_reconfigure
+    else
+       service "$service_name" restart
+    fi
+    ctdb_service_unset_reconfigure
+    ctdb_counter_init
+}
+
+ctdb_compat_managed_service ()
+{
+    if [ "$1" = "yes" ] ; then
+       t="$t $2 "
+    fi
+}
+
+is_ctdb_managed_service ()
+{
+    t=" $CTDB_MANAGED_SERVICES "
+
+    ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
+    ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
+    ctdb_compat_managed_service "$CTDB_MANAGES_SCP"      "scp"
+    ctdb_compat_managed_service "$CTDB_MANAGES_WINDBIND" "windbind"
+    ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
+    ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
+    ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
+
+    # Returns 0 if "<space>$service_name<space>" appears in $t
+    [ "${t#* ${service_name} }" != "${t}" ]
+}
+
+ctdb_start_stop_service ()
+{
+    _active="$ctdb_active_dir/$service_name"
+
+    if is_ctdb_managed_service ; then
+       if ! [ -e "$_active" ] ; then
+           echo "Starting service $service_name"
+           ctdb_service_start || exit $?
+           mkdir -p "$ctdb_active_dir"
+           touch "$_active"
+           exit 0
+       fi
+    elif ! is_ctdb_managed_service ; then
+       if [ -e "$_active" ] ; then
+           echo "Stopping service $service_name"
+           ctdb_service_stop || exit $?
+           rm -f "$_active"
+       fi
+       exit 0
+    fi
+}
+
+ctdb_service_start ()
+{
+    if [ -n "$service_start" ] ; then
+       $service_start
+    else
+       service "$service_name" start
+    fi
+    ctdb_counter_init
+}
+
+ctdb_service_stop ()
+{
+    if [ -n "$service_stop" ] ; then
+       $service_stop
+    else
+       service "$service_name" stop
+    fi
+}
+
  ########################################################
  # load a site local config file
  ########################################################
@@ -546,4 +669,5 @@ ctdb_counter_limit () {
         done
  }
  
-
+ctdb_event="$1" ; shift
+cmd="$ctdb_event"
author	Martin Schwenke <martin@meltin.net>
	Fri, 13 Nov 2009 07:28:25 +0000 (18:28 +1100)
committer	Martin Schwenke <martin@meltin.net>
	Fri, 13 Nov 2009 07:28:25 +0000 (18:28 +1100)
ctdb/config/events.d/40.vsftpd		patch \| blob \| history
ctdb/config/events.d/41.httpd		patch \| blob \| history
ctdb/config/events.d/50.samba		patch \| blob \| history
ctdb/config/events.d/70.iscsi		patch \| blob \| history
ctdb/config/functions		patch \| blob \| history