1 # Hey Emacs, this is a -*- shell-script -*- !!!
3 # utility functions for ctdb event scripts
5 [ -z "$CTDB_VARDIR" ] && {
6 if [ -d "/var/lib/ctdb" ] ; then
7 export CTDB_VARDIR="/var/lib/ctdb"
9 export CTDB_VARDIR="/var/ctdb"
12 [ -z "$CTDB_ETCDIR" ] && {
13 export CTDB_ETCDIR="/etc"
16 #######################################
17 # pull in a system config file, if any
21 foo="${service_config:-${service_name}}"
22 if [ -n "$foo" ] ; then
28 if [ "$1" != "ctdb" ] ; then
36 if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
37 . $CTDB_ETCDIR/sysconfig/$1
38 elif [ -f $CTDB_ETCDIR/default/$1 ]; then
39 . $CTDB_ETCDIR/default/$1
40 elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
41 . $CTDB_BASE/sysconfig/$1
44 if [ "$1" = "ctdb" ] ; then
45 _config="${CTDB_BASE}/ctdbd.conf"
46 if [ -r "$_config" ] ; then
56 ##############################################################
58 # CTDB_SCRIPT_DEBUGLEVEL can be overwritten by setting it in a
62 if [ ${CTDB_SCRIPT_DEBUGLEVEL:-2} -ge 4 ] ; then
63 # If there are arguments then echo them. Otherwise expect to
64 # use stdin, which allows us to pass lots of debug using a
87 # Log given message or stdin to either syslog or a CTDB log file
88 # $1 is the tag passed to logger if syslog is in use.
93 case "$CTDB_LOGGING" in
95 if [ -n "$CTDB_LOGGING" ] ; then
96 _file="${CTDB_LOGGING#file:}"
98 _file="/var/log/log.ctdb"
101 if [ -n "$*" ] ; then
109 # Handle all syslog:* variants here too. There's no tool to do
110 # the lossy things, so just use logger.
111 logger -t "ctdbd: ${_tag}" $*
116 # When things are run in the background in an eventscript then logging
117 # output might get lost. This is the "solution". :-)
118 background_with_logging ()
121 "$@" 2>&1 </dev/null |
122 script_log "${script_name}&"
128 ##############################################################
129 # check number of args for different events
135 echo "ERROR: must supply interface, IP and maskbits"
141 echo "ERROR: must supply old interface, new interface, IP and maskbits"
148 ##############################################################
149 # determine on what type of system (init style) we are running
152 # only do detection if not already set:
153 [ -z "$CTDB_INIT_STYLE" ] || return
155 if [ -x /sbin/startproc ]; then
156 CTDB_INIT_STYLE="suse"
157 elif [ -x /sbin/start-stop-daemon ]; then
158 CTDB_INIT_STYLE="debian"
160 CTDB_INIT_STYLE="redhat"
164 ######################################################
165 # simulate /sbin/service on platforms that don't have it
166 # _service() makes it easier to hook the service() function for
173 # do nothing, when no service was specified
174 [ -z "$_service_name" ] && return
176 if [ -x /sbin/service ]; then
177 $_nice /sbin/service "$_service_name" "$_op"
178 elif [ -x /usr/sbin/service ]; then
179 $_nice /usr/sbin/service "$_service_name" "$_op"
180 elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
181 $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
182 elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
183 $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
193 ######################################################
194 # simulate /sbin/service (niced) on platforms that don't have it
201 ######################################################
202 # Cached retrieval of PNN from local node. This never changes so why
203 # open a client connection to the server each time this is needed?
204 # This sets $pnn - this avoid an unnecessary subprocess.
207 _pnn_file="$CTDB_VARDIR/state/my-pnn"
208 if [ ! -f "$_pnn_file" ] ; then
209 ctdb pnn | sed -e 's@.*:@@' >"$_pnn_file"
212 read pnn <"$_pnn_file"
215 ######################################################
216 # wrapper around /proc/ settings to allow them to be hooked
218 # 1st arg is relative path under /proc/, 2nd arg is value to set
221 echo "$2" >"/proc/$1"
226 if [ -w "/proc/$1" ] ; then
231 ######################################################
232 # wrapper around getting file contents from /proc/ to allow
233 # this to be hooked for testing
234 # 1st arg is relative path under /proc/
240 ######################################################
241 # Print up to $_max kernel stack traces for processes named $_program
242 program_stack_traces ()
248 for _pid in $(pidof "$_prog") ; do
249 [ $_count -le $_max ] || break
251 # Do this first to avoid racing with process exit
252 _stack=$(get_proc "${_pid}/stack" 2>/dev/null)
253 if [ -n "$_stack" ] ; then
254 echo "Stack trace for ${_prog}[${_pid}]:"
256 _count=$(($_count + 1))
261 ######################################################
262 # Check the health of NFS services
264 # Use .check files in given directory.
265 # Default is "${CTDB_BASE}/nfs-checks.d/"
266 ######################################################
267 nfs_check_services ()
269 _dir="${1:-${CTDB_NFS_CHECKS_DIR:-${CTDB_BASE}/nfs-checks.d}}"
271 # Files must end with .check - avoids editor backups, RPM fu, ...
272 for _f in "$_dir"/[0-9][0-9].*.check ; do
274 _progname="${_t##*/[0-9][0-9].}"
276 nfs_check_service "$_progname" <"$_f"
280 ######################################################
281 # Check the health of an NFS service
283 # $1 - progname, passed to rpcinfo (looked up in /etc/rpc)
285 # Reads variables from stdin
289 # * family - "tcp" or "udp" or space separated list
291 # * version - optional, RPC service version number
292 # default is to omit to check for any version
293 # * unhealthy_after - number of check fails before unhealthy
295 # * restart_every - number of check fails before restart
296 # default: 0, meaning no restart
297 # * service_stop_cmd - command to stop service
298 # default: no default, must be provided if
300 # * service_start_cmd - command to start service
301 # default: no default, must be provided if
303 # * service_debug_cmd - command to debug a service after trying to stop it;
304 # for example, it can be useful to print stack
305 # traces of threads that have not exited, since
306 # they may be stuck doing I/O;
307 # no default, see also function program_stack_traces()
309 # Quoting in values is not preserved
311 ######################################################
317 # Subshell to restrict scope variables...
328 # Eval line-by-line. Expands variable references in values.
329 # Also allows variable name checking, which seems useful.
330 while read _line ; do
332 \#*|"") : ;; # Ignore comments, blank lines
335 unhealthy_after=*|restart_every=*|\
336 service_stop_cmd=*|service_start_cmd=*|\
342 echo "ERROR: Unknown variable for ${_progname}: ${_line}"
347 _service_name="nfs_${_progname}"
349 if nfs_check_rpcinfo \
350 "$_progname" "$version" "$family" >/dev/null ; then
351 if [ $unhealthy_after -ne 1 -o $restart_every -ne 0 ] ; then
352 ctdb_counter_init "$_service_name"
357 ctdb_counter_incr "$_service_name"
358 _failcount=$(ctdb_counter_get "$_service_name")
361 if [ $unhealthy_after -gt 0 ] ; then
362 if [ $_failcount -ge $unhealthy_after ] ; then
364 echo "ERROR: $ctdb_check_rpc_out"
368 if [ $restart_every -gt 0 ] ; then
369 if [ $(($_failcount % $restart_every)) -eq 0 ] ; then
370 if ! $_unhealthy ; then
371 echo "WARNING: $ctdb_check_rpc_out"
377 if $_unhealthy ; then
385 # Uses: stop_service, start_service, debug_stuck_threads
386 nfs_restart_service ()
388 if [ -z "$service_stop_cmd" -o -z "$service_start_cmd" ] ; then
389 die "ERROR: Can not restart service \"${_progname}\" without corresponding service_start_cmd/service_stop_cmd settings"
392 echo "Trying to restart service \"${_progname}\"..."
393 # Using eval means variables can contain semicolon separated commands
394 eval "$service_stop_cmd"
395 if [ -n "$service_debug_cmd" ] ; then
396 eval "$service_debug_cmd"
398 background_with_logging eval "$service_start_cmd"
401 ######################################################
402 # Check an RPC service with rpcinfo
403 ######################################################
406 _progname="$1" # passed to rpcinfo (looked up in /etc/rpc)
407 _version="$2" # optional, not passed if empty/unset
408 _family="${3:-tcp}" # optional, default is "tcp"
410 _localhost="${CTDB_RPCINFO_LOCALHOST:-127.0.0.1}"
412 if ! ctdb_check_rpc_out=$(rpcinfo -T $_family $_localhost \
413 $_progname $_version 2>&1) ; then
414 ctdb_check_rpc_out="$_progname failed RPC check:
416 echo "$ctdb_check_rpc_out"
423 _progname="$1" # passed to rpcinfo (looked up in /etc/rpc)
424 _versions="$2" # optional, space separated, not passed if empty/unset
425 _families="${3:-tcp}" # optional, space separated, default is "tcp"
427 for _family in $_families ; do
428 if [ -n "$_versions" ] ; then
429 for _version in $_versions ; do
430 ctdb_check_rpc $_progname $_version $_family || return $?
433 ctdb_check_rpc $_progname "" $_family || return $?
438 ######################################################
439 # Ensure $service_name is set
440 assert_service_name ()
442 [ -n "$service_name" ] || die "INTERNAL ERROR: \$service_name not set"
445 ######################################################
446 # check a set of directories is available
447 # return 1 on a missing directory
448 # directories are read from stdin
449 ######################################################
450 ctdb_check_directories_probe()
452 while IFS="" read d ; do
458 [ -d "${d}/." ] || return 1
463 ######################################################
464 # check a set of directories is available
465 # directories are read from stdin
466 ######################################################
467 ctdb_check_directories()
469 ctdb_check_directories_probe || {
470 echo "ERROR: $service_name directory \"$d\" not available"
475 ######################################################
476 # check a set of tcp ports
477 # usage: ctdb_check_tcp_ports <ports...>
478 ######################################################
480 # This flag file is created when a service is initially started. It
481 # is deleted the first time TCP port checks for that service succeed.
482 # Until then ctdb_check_tcp_ports() prints a more subtle "error"
483 # message if a port check fails.
484 _ctdb_check_tcp_common ()
487 _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started"
490 ctdb_check_tcp_init ()
492 _ctdb_check_tcp_common
493 mkdir -p "${_ctdb_service_started_file%/*}" # dirname
494 touch "$_ctdb_service_started_file"
497 # Check whether something is listening on all of the given TCP ports
498 # using the "ctdb checktcpport" command.
499 ctdb_check_tcp_ports()
501 if [ -z "$1" ] ; then
502 echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
506 for _p ; do # process each function argument (port)
507 _cmd="ctdb checktcpport $_p"
512 _ctdb_check_tcp_common
513 if [ ! -f "$_ctdb_service_started_file" ] ; then
514 echo "ERROR: $service_name tcp port $_p is not responding"
515 debug "\"ctdb checktcpport $_p\" was able to bind to port"
517 echo "INFO: $service_name tcp port $_p is not responding"
523 # Couldn't bind, something already listening, next port...
527 echo "ERROR: unexpected error running \"ctdb checktcpport\""
529 ctdb checktcpport (exited with $_ret) with output:
536 # All ports listening
537 _ctdb_check_tcp_common
538 rm -f "$_ctdb_service_started_file"
542 ######################################################
543 # check a unix socket
544 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
545 ######################################################
546 ctdb_check_unix_socket() {
548 [ -z "$socket_path" ] && return
550 if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
551 echo "ERROR: $service_name socket $socket_path not found"
556 ######################################################
557 # check a command returns zero status
558 # usage: ctdb_check_command <command>
559 ######################################################
560 ctdb_check_command ()
562 _out=$("$@" 2>&1) || {
563 echo "ERROR: $* returned error"
569 ################################################
570 # kill off any TCP connections with the given IP
571 ################################################
572 kill_tcp_connections ()
577 if [ "$2" = "oneway" ] ; then
581 get_tcp_connections_for_ip "$_ip" | {
586 while read _dst _src; do
587 _destport="${_dst##*:}"
590 # we only do one-way killtcp for CIFS
591 139|445) __oneway=true ;;
594 echo "Killing TCP connection $_src $_dst"
595 _connections="${_connections}${_nl}${_src} ${_dst}"
596 if ! $__oneway ; then
597 _connections="${_connections}${_nl}${_dst} ${_src}"
600 _killcount=$(($_killcount + 1))
603 if [ $_killcount -eq 0 ] ; then
607 echo "$_connections" | ctdb killtcp || {
608 echo "Failed to send killtcp control"
614 _remaining=$(get_tcp_connections_for_ip $_ip | wc -l)
616 if [ $_remaining -eq 0 ] ; then
617 echo "Killed $_killcount TCP connections to released IP $_ip"
621 _count=$(($_count + 1))
622 if [ $_count -gt 3 ] ; then
623 echo "Timed out killing tcp connections for IP $_ip ($_remaining remaining)"
627 echo "Waiting for $_remaining connections to be killed for IP $_ip"
633 ##################################################################
634 # kill off the local end for any TCP connections with the given IP
635 ##################################################################
636 kill_tcp_connections_local_only ()
638 kill_tcp_connections "$1" "oneway"
641 ##################################################################
642 # tickle any TCP connections with the given IP
643 ##################################################################
644 tickle_tcp_connections ()
648 get_tcp_connections_for_ip "$_ip" |
652 while read dest src; do
653 echo "Tickle TCP connection $src $dest"
654 ctdb tickle $src $dest >/dev/null 2>&1 || _failed=true
655 echo "Tickle TCP connection $dest $src"
656 ctdb tickle $dest $src >/dev/null 2>&1 || _failed=true
660 echo "Failed to send tickle control"
665 get_tcp_connections_for_ip ()
669 netstat -tn | awk -v ip=$_ip \
670 'index($1, "tcp") == 1 && \
671 (index($4, ip ":") == 1 || index($4, "::ffff:" ip ":") == 1) \
672 && $6 == "ESTABLISHED" \
676 ##################################################################
677 # use statd-callout to update NFS lock info
678 ##################################################################
679 nfs_update_lock_info ()
681 if [ -x "$CTDB_BASE/statd-callout" ] ; then
682 "$CTDB_BASE/statd-callout" update
686 ########################################################
687 # start/stop the Ganesha nfs service
688 ########################################################
691 _service_name="nfs-ganesha-$CTDB_CLUSTER_FILESYSTEM_TYPE"
694 service "$_service_name" start
697 service "$_service_name" stop
700 service "$_service_name" stop
701 nfs_dump_some_threads "rpc.statd"
702 service "$_service_name" start
707 # Dump up to the configured number of nfsd thread backtraces.
708 nfs_dump_some_threads ()
712 _num="${CTDB_NFS_DUMP_STUCK_THREADS:-5}"
713 [ $_num -gt 0 ] || return 0
715 program_stack_traces "$_prog" $_num
718 ########################################################
726 # Ensure interface is up
727 ip link set "$_iface" up || \
728 die "Failed to bringup interface $_iface"
730 # Only need to define broadcast for IPv4
736 ip addr add "$_ip/$_maskbits" $_bcast dev "$_iface" || {
737 echo "Failed to add $_ip/$_maskbits on dev $_iface"
741 # Wait 5 seconds for IPv6 addresses to stop being tentative...
742 if [ -z "$_bcast" ] ; then
743 for _x in $(seq 1 10) ; do
744 ip addr show to "${_ip}/128" | grep -q "tentative" || break
748 # If the address was a duplicate then it won't be on the
749 # interface so flag an error.
750 _t=$(ip addr show to "${_ip}/128")
753 echo "Failed to add $_ip/$_maskbits on dev $_iface"
756 *tentative*|*dadfailed*)
757 echo "Failed to add $_ip/$_maskbits on dev $_iface"
758 ip addr del "$_ip/$_maskbits" dev "$_iface"
765 delete_ip_from_iface()
771 # This could be set globally for all interfaces but it is probably
772 # better to avoid surprises, so limit it the interfaces where CTDB
773 # has public IP addresses. There isn't anywhere else convenient
774 # to do this so just set it each time. This is much cheaper than
775 # remembering and re-adding secondaries.
776 set_proc "sys/net/ipv4/conf/${_iface}/promote_secondaries" 1
778 ip addr del "$_ip/$_maskbits" dev "$_iface" || {
779 echo "Failed to del $_ip on dev $_iface"
784 # If the given IP is hosted then print 2 items: maskbits and iface
790 *:*) _family="inet6" ; _bits=128 ;;
791 *) _family="inet" ; _bits=32 ;;
794 ip addr show to "${_addr}/${_bits}" 2>/dev/null | \
795 awk -v family="${_family}" \
796 'NR == 1 { iface = $2; sub(":$", "", iface) ; \
797 sub("@.*", "", iface) } \
798 $1 ~ /inet/ { mask = $2; sub(".*/", "", mask); \
799 print mask, iface, family }'
804 _addr="${1%/*}" # Remove optional maskbits
806 set -- $(ip_maskbits_iface $_addr)
807 if [ -n "$1" ] ; then
810 echo "Removing public address $_addr/$_maskbits from device $_iface"
811 delete_ip_from_iface $_iface $_addr $_maskbits >/dev/null 2>&1
815 drop_all_public_ips ()
817 while read _ip _x ; do
819 done <"${CTDB_PUBLIC_ADDRESSES:-/dev/null}"
824 set_proc_maybe sys/net/ipv4/route/flush 1
825 set_proc_maybe sys/net/ipv6/route/flush 1
828 ########################################################
830 _ctdb_counter_common () {
831 _service_name="${1:-${service_name:-${script_name}}}"
832 _counter_file="$ctdb_fail_dir/$_service_name"
833 mkdir -p "${_counter_file%/*}" # dirname
835 ctdb_counter_init () {
836 _ctdb_counter_common "$1"
840 ctdb_counter_incr () {
841 _ctdb_counter_common "$1"
844 echo -n 1 >> "$_counter_file"
846 ctdb_counter_get () {
847 _ctdb_counter_common "$1"
849 stat -c "%s" "$_counter_file" 2>/dev/null || echo 0
851 ctdb_check_counter () {
852 _msg="${1:-error}" # "error" - anything else is silent on fail
853 _op="${2:--ge}" # an integer operator supported by test
854 _limit="${3:-${service_fail_limit}}"
857 _size=$(ctdb_counter_get "$1")
860 if [ "$_op" != "%" ] ; then
861 if [ $_size $_op $_limit ] ; then
865 if [ $(($_size $_op $_limit)) -eq 0 ] ; then
870 if [ "$_msg" = "error" ] ; then
871 echo "ERROR: $_size consecutive failures for $_service_name, marking node unhealthy"
879 ########################################################
881 ctdb_status_dir="$CTDB_VARDIR/state/service_status"
882 ctdb_fail_dir="$CTDB_VARDIR/state/failcount"
884 ctdb_setup_service_state_dir ()
886 service_state_dir="$CTDB_VARDIR/state/service_state/${1:-${service_name}}"
887 mkdir -p "$service_state_dir" || {
888 echo "Error creating state dir \"$service_state_dir\""
893 ########################################################
894 # Managed status history, for auto-start/stop
896 ctdb_managed_dir="$CTDB_VARDIR/state/managed_history"
898 _ctdb_managed_common ()
900 _ctdb_managed_file="$ctdb_managed_dir/$service_name"
903 ctdb_service_managed ()
906 mkdir -p "$ctdb_managed_dir"
907 touch "$_ctdb_managed_file"
910 ctdb_service_unmanaged ()
913 rm -f "$_ctdb_managed_file"
916 is_ctdb_previously_managed_service ()
919 [ -f "$_ctdb_managed_file" ]
922 ########################################################
923 # Check and set status
927 echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)"
932 if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
933 log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
935 elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
936 log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
945 d="$ctdb_status_dir/$script_name"
952 for i in "banned" "unhealthy" ; do
959 ##################################################################
960 # Reconfigure a service on demand
962 _ctdb_service_reconfigure_common ()
964 _d="$ctdb_status_dir/${service_name}"
966 _ctdb_service_reconfigure_flag="$_d/reconfigure"
969 ctdb_service_needs_reconfigure ()
971 _ctdb_service_reconfigure_common
972 [ -e "$_ctdb_service_reconfigure_flag" ]
975 ctdb_service_set_reconfigure ()
977 _ctdb_service_reconfigure_common
978 >"$_ctdb_service_reconfigure_flag"
981 ctdb_service_unset_reconfigure ()
983 _ctdb_service_reconfigure_common
984 rm -f "$_ctdb_service_reconfigure_flag"
987 ctdb_service_reconfigure ()
989 echo "Reconfiguring service \"${service_name}\"..."
990 ctdb_service_unset_reconfigure
991 service_reconfigure || return $?
995 # Default service_reconfigure() function does nothing.
996 service_reconfigure ()
1001 ctdb_reconfigure_take_lock ()
1003 _ctdb_service_reconfigure_common
1004 _lock="${_d}/reconfigure_lock"
1005 mkdir -p "${_lock%/*}" # dirname
1010 # This is overkill but will work if we need to extend this to
1011 # allow certain events to run multiple times in parallel
1012 # (e.g. takeip) and write multiple PIDs to the file.
1014 if [ -n "$_locker_event" ] ; then
1015 while read _pid ; do
1016 if [ -n "$_pid" -a "$_pid" != $$ ] && \
1017 kill -0 "$_pid" 2>/dev/null ; then
1023 printf "%s\n%s\n" "$event_name" $$ >"$_lock"
1028 ctdb_reconfigure_release_lock ()
1030 _ctdb_service_reconfigure_common
1031 _lock="${_d}/reconfigure_lock"
1036 ctdb_replay_monitor_status ()
1038 echo "Replaying previous status for this script due to reconfigure..."
1039 # Leading separator ('|') is missing in some versions...
1040 _out=$(ctdb scriptstatus -X | grep -E "^\|?monitor\|${script_name}\|")
1041 # Output looks like this:
1042 # |monitor|60.nfs|1|ERROR|1314764004.030861|1314764004.035514|foo bar|
1043 # This is the cheapest way of getting fields in the middle.
1044 set -- $(IFS="|" ; echo $_out)
1047 # The error output field can include colons so we'll try to
1048 # preserve them. The weak checking at the beginning tries to make
1049 # this work for both broken (no leading '|') and fixed output.
1051 _err_out="${_out#*monitor|${script_name}|*|*|*|*|}"
1053 OK) : ;; # Do nothing special.
1055 # Recast this as an error, since we can't exit with the
1056 # correct negative number.
1058 _err_out="[Replay of TIMEDOUT scriptstatus - note incorrect return code.] ${_err_out}"
1061 # Recast this as an OK, since we can't exit with the
1062 # correct negative number.
1064 _err_out="[Replay of DISABLED scriptstatus - note incorrect return code.] ${_err_out}"
1066 *) : ;; # Must be ERROR, do nothing special.
1068 if [ -n "$_err_out" ] ; then
1074 ctdb_service_check_reconfigure ()
1078 # We only care about some events in this function. For others we
1080 case "$event_name" in
1081 monitor|ipreallocated|reconfigure) : ;;
1085 if ctdb_reconfigure_take_lock ; then
1086 # No events covered by this function are running, so proceed
1088 case "$event_name" in
1090 (ctdb_service_reconfigure)
1094 if ctdb_service_needs_reconfigure ; then
1095 ctdb_service_reconfigure
1100 ctdb_reconfigure_release_lock
1102 # Somebody else is running an event we don't want to collide
1103 # with. We proceed with caution.
1104 case "$event_name" in
1106 # Tell whoever called us to retry.
1110 # Defer any scheduled reconfigure and just run the
1111 # rest of the ipreallocated event, as per the
1112 # eventscript. There's an assumption here that the
1113 # event doesn't depend on any scheduled reconfigure.
1114 # This is true in the current code.
1118 # There is most likely a reconfigure in progress so
1119 # the service is possibly unstable. As above, we
1120 # defer any scheduled reconfigured. We also replay
1121 # the previous monitor status since that's the best
1122 # information we have.
1123 ctdb_replay_monitor_status
1129 ##################################################################
1130 # Does CTDB manage this service? - and associated auto-start/stop
1132 ctdb_compat_managed_service ()
1134 if [ "$1" = "yes" -a "$2" = "$service_name" ] ; then
1135 CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
1139 is_ctdb_managed_service ()
1143 # $t is used just for readability and to allow better accurate
1144 # matching via leading/trailing spaces
1145 t=" $CTDB_MANAGED_SERVICES "
1147 # Return 0 if "<space>$service_name<space>" appears in $t
1148 if [ "${t#* ${service_name} }" != "${t}" ] ; then
1152 # If above didn't match then update $CTDB_MANAGED_SERVICES for
1153 # backward compatibility and try again.
1154 ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD" "vsftpd"
1155 ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA" "samba"
1156 ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND" "winbind"
1157 ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "apache2"
1158 ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "httpd"
1159 ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI" "iscsi"
1160 ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD" "clamd"
1161 ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs"
1162 ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs-ganesha-gpfs"
1164 t=" $CTDB_MANAGED_SERVICES "
1166 # Return 0 if "<space>$service_name<space>" appears in $t
1167 [ "${t#* ${service_name} }" != "${t}" ]
1170 ctdb_start_stop_service ()
1174 # Allow service-start/service-stop pseudo-events to start/stop
1175 # services when we're not auto-starting/stopping and we're not
1177 case "$event_name" in
1179 if is_ctdb_managed_service ; then
1180 die 'service-start event not permitted when service is managed'
1182 if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
1183 die 'service-start event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
1189 if is_ctdb_managed_service ; then
1190 die 'service-stop event not permitted when service is managed'
1192 if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
1193 die 'service-stop event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
1200 # Do nothing unless configured to...
1201 [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] || return 0
1203 [ "$event_name" = "monitor" ] || return 0
1205 if is_ctdb_managed_service ; then
1206 if ! is_ctdb_previously_managed_service ; then
1207 echo "Starting service \"$service_name\" - now managed"
1208 background_with_logging ctdb_service_start
1212 if is_ctdb_previously_managed_service ; then
1213 echo "Stopping service \"$service_name\" - no longer managed"
1214 background_with_logging ctdb_service_stop
1220 ctdb_service_start ()
1222 # The service is marked managed if we've ever tried to start it.
1223 ctdb_service_managed
1225 service_start || return $?
1231 ctdb_service_stop ()
1233 ctdb_service_unmanaged
1237 # Default service_start() and service_stop() functions.
1239 # These may be overridden in an eventscript.
1242 service "$service_name" start
1247 service "$service_name" stop
1250 ##################################################################
1252 ctdb_standard_event_handler ()
1269 _family="$1" ; shift
1270 if [ "$_family" = "inet6" ] ; then
1271 _iptables_cmd="ip6tables"
1273 _iptables_cmd="iptables"
1276 # iptables doesn't like being re-entered, so flock-wrap it.
1277 flock -w 30 "${CTDB_VARDIR}/iptables-ctdb.flock" "$_iptables_cmd" "$@"
1280 # AIX (and perhaps others?) doesn't have mktemp
1281 if ! type mktemp >/dev/null 2>&1 ; then
1285 if [ "$1" = "-d" ] ; then
1289 _d="${TMPDIR:-/tmp}"
1290 _hex10=$(dd if=/dev/urandom count=20 2>/dev/null | \
1292 sed -e 's@\(..........\).*@\1@')
1293 _t="${_d}/tmp.${_hex10}"
1306 ########################################################
1308 ########################################################
1314 tickledir="$CTDB_VARDIR/state/tickles"
1315 mkdir -p "$tickledir"
1319 # What public IPs do I hold?
1320 _ips=$(ctdb -X ip | awk -F'|' -v pnn=$pnn '$3 == pnn {print $2}')
1322 # IPs as a regexp choice
1323 _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
1325 # Record connections to our public IPs in a temporary file
1326 _my_connections="${tickledir}/${_port}.connections"
1327 rm -f "$_my_connections"
1329 awk -v destpat="^${_ipschoice}:${_port}\$" \
1330 '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
1331 sort >"$_my_connections"
1333 # Record our current tickles in a temporary file
1334 _my_tickles="${tickledir}/${_port}.tickles"
1335 rm -f "$_my_tickles"
1336 for _i in $_ips ; do
1337 ctdb -X gettickles $_i $_port |
1338 awk -F'|' 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
1340 sort >"$_my_tickles"
1342 # Add tickles for connections that we haven't already got tickles for
1343 comm -23 "$_my_connections" "$_my_tickles" |
1344 while read _src _dst ; do
1345 ctdb addtickle $_src $_dst
1348 # Remove tickles for connections that are no longer there
1349 comm -13 "$_my_connections" "$_my_tickles" |
1350 while read _src _dst ; do
1351 ctdb deltickle $_src $_dst
1354 rm -f "$_my_connections" "$_my_tickles"
1357 ########################################################
1358 # load a site local config file
1359 ########################################################
1361 [ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
1365 [ -x $CTDB_BASE/rc.local ] && {
1366 . $CTDB_BASE/rc.local
1369 [ -d $CTDB_BASE/rc.local.d ] && {
1370 for i in $CTDB_BASE/rc.local.d/* ; do
1371 [ -x "$i" ] && . "$i"
1375 script_name="${0##*/}" # basename
1376 service_fail_limit=1