ctdb/config/functions

   1 # Hey Emacs, this is a -*- shell-script -*- !!!
   2
   3 # utility functions for ctdb event scripts
   4
   5 [ -z "$CTDB_VARDIR" ] && {
   6     if [ -d "/var/lib/ctdb" ] ; then
   7         export CTDB_VARDIR="/var/lib/ctdb"
   8     else
   9         export CTDB_VARDIR="/var/ctdb"
  10     fi
  11 }
  12 [ -z "$CTDB_ETCDIR" ] && {
  13     export CTDB_ETCDIR="/etc"
  14 }
  15
  16 #######################################
  17 # pull in a system config file, if any
  18 _loadconfig() {
  19
  20     if [ -z "$1" ] ; then
  21         foo="${service_config:-${service_name}}"
  22         if [ -n "$foo" ] ; then
  23             loadconfig "$foo"
  24             return
  25         fi
  26     fi
  27
  28     if [ "$1" != "ctdb" ] ; then
  29         loadconfig "ctdb"
  30     fi
  31
  32     if [ -z "$1" ] ; then
  33         return
  34     fi
  35
  36     if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
  37         . $CTDB_ETCDIR/sysconfig/$1
  38     elif [ -f $CTDB_ETCDIR/default/$1 ]; then
  39         . $CTDB_ETCDIR/default/$1
  40     elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
  41         . $CTDB_BASE/sysconfig/$1
  42     fi
  43
  44     if [ "$1" = "ctdb" ] ; then
  45         _config="${CTDB_BASE}/ctdbd.conf"
  46         if [ -r "$_config" ] ; then
  47             . "$_config"
  48         fi
  49     fi
  50 }
  51
  52 loadconfig () {
  53     _loadconfig "$@"
  54 }
  55
  56 ##############################################################
  57
  58 # CTDB_SCRIPT_DEBUGLEVEL can be overwritten by setting it in a
  59 # configuration file.
  60 debug ()
  61 {
  62     if [ ${CTDB_SCRIPT_DEBUGLEVEL:-2} -ge 4 ] ; then
  63         # If there are arguments then echo them.  Otherwise expect to
  64         # use stdin, which allows us to pass lots of debug using a
  65         # here document.
  66         if [ -n "$1" ] ; then
  67             echo "DEBUG: $*"
  68         elif ! tty -s ; then
  69             sed -e 's@^@DEBUG: @'
  70         fi
  71     fi
  72 }
  73
  74 die ()
  75 {
  76     _msg="$1"
  77     _rc="${2:-1}"
  78
  79     echo "$_msg"
  80     exit $_rc
  81 }
  82
  83 # Log given message or stdin to either syslog or a CTDB log file
  84 # $1 is the tag passed to logger if syslog is in use.
  85 script_log ()
  86 {
  87     _tag="$1" ; shift
  88
  89     if [ "$CTDB_SYSLOG" = "yes" ] ; then
  90         logger -t "ctdbd: ${_tag}" $*
  91     else
  92         {
  93             if [ -n "$*" ] ; then
  94                 echo "$*"
  95             else
  96                 cat
  97             fi
  98         } >>"${CTDB_LOGFILE:-/var/log/log.ctdb}"
  99     fi
 100 }
 101
 102 # When things are run in the background in an eventscript then logging
 103 # output might get lost.  This is the "solution".  :-)
 104 background_with_logging ()
 105 {
 106     (
 107         "$@" 2>&1 </dev/null |
 108         script_log "${script_name}&"
 109     )&
 110
 111     return 0
 112 }
 113
 114 ##############################################################
 115 # check number of args for different events
 116 ctdb_check_args ()
 117 {
 118     case "$1" in
 119         takeip|releaseip)
 120             if [ $# != 4 ]; then
 121                 echo "ERROR: must supply interface, IP and maskbits"
 122                 exit 1
 123             fi
 124             ;;
 125         updateip)
 126             if [ $# != 5 ]; then
 127                 echo "ERROR: must supply old interface, new interface, IP and maskbits"
 128                 exit 1
 129             fi
 130             ;;
 131     esac
 132 }
 133
 134 ##############################################################
 135 # determine on what type of system (init style) we are running
 136 detect_init_style() {
 137     # only do detection if not already set:
 138     test "x$CTDB_INIT_STYLE" != "x" && return
 139
 140     if [ -x /sbin/startproc ]; then
 141         CTDB_INIT_STYLE="suse"
 142     elif [ -x /sbin/start-stop-daemon ]; then
 143         CTDB_INIT_STYLE="debian"
 144     else
 145         CTDB_INIT_STYLE="redhat"
 146     fi
 147 }
 148
 149 ######################################################
 150 # simulate /sbin/service on platforms that don't have it
 151 # _service() makes it easier to hook the service() function for
 152 # testing.
 153 _service ()
 154 {
 155   _service_name="$1"
 156   _op="$2"
 157
 158   # do nothing, when no service was specified
 159   [ -z "$_service_name" ] && return
 160
 161   if [ -x /sbin/service ]; then
 162       $_nice /sbin/service "$_service_name" "$_op"
 163   elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
 164       $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
 165   elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
 166       $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
 167   fi
 168 }
 169
 170 service()
 171 {
 172     _nice=""
 173     _service "$@"
 174 }
 175
 176 ######################################################
 177 # simulate /sbin/service (niced) on platforms that don't have it
 178 nice_service()
 179 {
 180     _nice="nice"
 181     _service "$@"
 182 }
 183
 184 ######################################################
 185 # wrapper around /proc/ settings to allow them to be hooked
 186 # for testing
 187 # 1st arg is relative path under /proc/, 2nd arg is value to set
 188 set_proc ()
 189 {
 190     echo "$2" >"/proc/$1"
 191 }
 192
 193 ######################################################
 194 # wrapper around getting file contents from /proc/ to allow
 195 # this to be hooked for testing
 196 # 1st arg is relative path under /proc/
 197 get_proc ()
 198 {
 199     cat "/proc/$1"
 200 }
 201
 202 ######################################################
 203 # Check that an RPC service is healthy -
 204 # this includes allowing a certain number of failures
 205 # before marking the NFS service unhealthy.
 206 #
 207 # usage: nfs_check_rpc_service SERVICE_NAME [ triple ...]
 208 #
 209 # each triple is a set of 3 arguments: an operator, a
 210 # fail count limit and an action string.
 211 #
 212 # For example:
 213 #
 214 #       nfs_check_rpc_service "lockd" \
 215 #           -ge 15 "verbose restart unhealthy" \
 216 #           -eq 10 "restart:bs"
 217 #
 218 # says that if lockd is down for 15 iterations then do
 219 # a verbose restart of lockd and mark the node unhealthy.
 220 # Before this, after 10 iterations of failure, the
 221 # service is restarted silently in the background.
 222 # Order is important: the number of failures need to be
 223 # specified in reverse order because processing stops
 224 # after the first condition that is true.
 225 ######################################################
 226 nfs_check_rpc_service ()
 227 {
 228     _prog_name="$1" ; shift
 229
 230     if _nfs_check_rpc_common "$_prog_name" ; then
 231         return
 232     fi
 233
 234     while [ -n "$3" ] ; do
 235         if _nfs_check_rpc_action "$1" "$2" "$3" ; then
 236             break
 237         fi
 238         shift 3
 239     done
 240 }
 241
 242 # The new way of doing things...
 243 nfs_check_rpc_services ()
 244 {
 245     # Files must end with .check - avoids editor backups, RPM fu, ...
 246     for _f in "${CTDB_BASE}/nfs-rpc-checks.d/"[0-9][0-9].*.check ; do
 247         _t="${_f%.check}"
 248         _prog_name="${_t##*/[0-9][0-9].}"
 249
 250         if _nfs_check_rpc_common "$_prog_name" ; then
 251             # This RPC service is up, check next service...
 252             continue
 253         fi
 254
 255         # Check each line in the file in turn until one of the limit
 256         # checks is hit...
 257         while read _cmp _lim _rest ; do
 258             # Skip comments
 259             case "$_cmp" in
 260                 \#*) continue ;;
 261             esac
 262
 263             if _nfs_check_rpc_action "$_cmp" "$_lim" "$_rest" ; then
 264                 # Limit was hit on this line, no further checking...
 265                 break
 266             fi
 267         done <"$_f"
 268     done
 269 }
 270
 271 _nfs_check_rpc_common ()
 272 {
 273     _prog_name="$1"
 274
 275     # Some platforms don't have separate programs for all services.
 276     case "$_prog_name" in
 277         statd)
 278             which "rpc.${_prog_name}" >/dev/null 2>&1 || return 0
 279     esac
 280
 281     case "$_prog_name" in
 282         nfsd)
 283             _rpc_prog=nfs
 284             _version=3
 285             ;;
 286         mountd)
 287             _rpc_prog=mountd
 288             _version=1
 289             ;;
 290         rquotad)
 291             _rpc_prog=rquotad
 292             _version=1
 293             ;;
 294         lockd)
 295             _rpc_prog=nlockmgr
 296             _version=4
 297             ;;
 298         statd)
 299             _rpc_prog=status
 300             _version=1
 301             ;;
 302         *)
 303             echo "Internal error: unknown RPC program \"$_prog_name\"."
 304             exit 1
 305     esac
 306
 307     _service_name="nfs_${_prog_name}"
 308
 309     if ctdb_check_rpc "$_rpc_prog" $_version >/dev/null ; then
 310         ctdb_counter_init "$_service_name"
 311         return 0
 312     fi
 313
 314     ctdb_counter_incr "$_service_name"
 315
 316     return 1
 317 }
 318
 319 _nfs_check_rpc_action ()
 320 {
 321     _cmp="$1"
 322     _limit="$2"
 323     _actions="$3"
 324
 325     if ctdb_check_counter "quiet" "$_cmp" "$_limit" "$_service_name" ; then
 326         return 1
 327     fi
 328
 329     for _action in $_actions ; do
 330         case "$_action" in
 331             verbose)
 332                 echo "$ctdb_check_rpc_out"
 333                 ;;
 334             restart)
 335                 _nfs_restart_rpc_service "$_prog_name"
 336                 ;;
 337             restart:b)
 338                 _nfs_restart_rpc_service "$_prog_name" true
 339                 ;;
 340             unhealthy)
 341                 exit 1
 342                 ;;
 343             *)
 344                 echo "Internal error: unknown action \"$_action\"."
 345                 exit 1
 346         esac
 347     done
 348
 349     return 0
 350 }
 351
 352 _nfs_restart_rpc_service ()
 353 {
 354     _prog_name="$1"
 355     _background="${2:-false}"
 356
 357     if $_background ; then
 358         _maybe_background="background_with_logging"
 359     else
 360         _maybe_background=""
 361     fi
 362
 363     _p="rpc.${_prog_name}"
 364
 365     case "$_prog_name" in
 366         nfsd)
 367             echo "Trying to restart NFS service"
 368             $_maybe_background startstop_nfs restart
 369             ;;
 370         mountd)
 371             echo "Trying to restart $_prog_name [${_p}]"
 372             killall -q -9 "$_p"
 373             $_maybe_background $_p ${MOUNTD_PORT:+-p} $MOUNTD_PORT
 374             ;;
 375         rquotad)
 376             echo "Trying to restart $_prog_name [${_p}]"
 377             killall -q -9 "$_p"
 378             $_maybe_background $_p ${RQUOTAD_PORT:+-p} $RQUOTAD_PORT
 379             ;;
 380         lockd)
 381             echo "Trying to restart lock manager service"
 382             $_maybe_background startstop_nfslock restart
 383             ;;
 384         statd)
 385             echo "Trying to restart $_prog_name [${_p}]"
 386             killall -q -9 "$_p"
 387             $_maybe_background $_p \
 388                 ${STATD_HOSTNAME:+-n} $STATD_HOSTNAME \
 389                 ${STATD_PORT:+-p} $STATD_PORT \
 390                 ${STATD_OUTGOING_PORT:+-o} $STATD_OUTGOING_PORT
 391             ;;
 392         *)
 393             echo "Internal error: unknown RPC program \"$_prog_name\"."
 394             exit 1
 395     esac
 396 }
 397
 398 ######################################################
 399 # check that a rpc server is registered with portmap
 400 # and responding to requests
 401 # usage: ctdb_check_rpc SERVICE_NAME VERSION
 402 ######################################################
 403 ctdb_check_rpc ()
 404 {
 405     progname="$1"
 406     version="$2"
 407
 408     _localhost="${CTDB_RPCINFO_LOCALHOST:-127.0.0.1}"
 409
 410     if ! ctdb_check_rpc_out=$(rpcinfo -u $_localhost $progname $version 2>&1) ; then
 411         ctdb_check_rpc_out="ERROR: $progname failed RPC check:
 412 $ctdb_check_rpc_out"
 413         echo "$ctdb_check_rpc_out"
 414         return 1
 415     fi
 416 }
 417
 418 ######################################################
 419 # Ensure $service_name is set
 420 assert_service_name ()
 421 {
 422     [ -n "$service_name" ] || die "INTERNAL ERROR: \$service_name not set"
 423 }
 424
 425 ######################################################
 426 # check a set of directories is available
 427 # return 1 on a missing directory
 428 # directories are read from stdin
 429 ######################################################
 430 ctdb_check_directories_probe()
 431 {
 432     while IFS="" read d ; do
 433         case "$d" in
 434             *%*)
 435                 continue
 436                 ;;
 437             *)
 438                 [ -d "${d}/." ] || return 1
 439         esac
 440     done
 441 }
 442
 443 ######################################################
 444 # check a set of directories is available
 445 # directories are read from stdin
 446 ######################################################
 447 ctdb_check_directories()
 448 {
 449     ctdb_check_directories_probe || {
 450         echo "ERROR: $service_name directory \"$d\" not available"
 451         exit 1
 452     }
 453 }
 454
 455 ######################################################
 456 # check a set of tcp ports
 457 # usage: ctdb_check_tcp_ports <ports...>
 458 ######################################################
 459
 460 # This flag file is created when a service is initially started.  It
 461 # is deleted the first time TCP port checks for that service succeed.
 462 # Until then ctdb_check_tcp_ports() prints a more subtle "error"
 463 # message if a port check fails.
 464 _ctdb_check_tcp_common ()
 465 {
 466     assert_service_name
 467     _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started"
 468 }
 469
 470 ctdb_check_tcp_init ()
 471 {
 472     _ctdb_check_tcp_common
 473     mkdir -p "${_ctdb_service_started_file%/*}" # dirname
 474     touch "$_ctdb_service_started_file"
 475 }
 476
 477 ctdb_check_tcp_ports()
 478 {
 479     if [ -z "$1" ] ; then
 480         echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
 481         exit 1
 482     fi
 483
 484     # Set default value for CTDB_TCP_PORT_CHECKERS if unset.
 485     # If any of these defaults are unsupported then this variable can
 486     # be overridden in /etc/sysconfig/ctdb or via a file in
 487     # /etc/ctdb/rc.local.d/.
 488     : ${CTDB_TCP_PORT_CHECKERS:=ctdb nmap netstat}
 489
 490     for _c in $CTDB_TCP_PORT_CHECKERS ; do
 491         ctdb_check_tcp_ports_$_c "$@"
 492         case "$?" in
 493             0)
 494                 _ctdb_check_tcp_common
 495                 rm -f "$_ctdb_service_started_file"
 496                 return 0
 497                 ;;
 498             1)
 499                 _ctdb_check_tcp_common
 500                 if [ ! -f "$_ctdb_service_started_file" ] ; then
 501                     echo "ERROR: $service_name tcp port $_p is not responding"
 502                     debug <<EOF
 503 $ctdb_check_tcp_ports_debug
 504 EOF
 505                 else
 506                     echo "INFO: $service_name tcp port $_p is not responding"
 507                 fi
 508
 509                 return 1
 510                 ;;
 511             127)
 512                 debug <<EOF
 513 ctdb_check_ports - checker $_c not implemented
 514 output from checker was:
 515 $ctdb_check_tcp_ports_debug
 516 EOF
 517                 ;;
 518             *)
 519
 520         esac
 521     done
 522
 523     echo "INTERNAL ERROR: ctdb_check_ports - no working checkers in CTDB_TCP_PORT_CHECKERS=\"$CTDB_TCP_PORT_CHECKERS\""
 524
 525     return 127
 526 }
 527
 528 ctdb_check_tcp_ports_netstat ()
 529 {
 530     _cmd='netstat -l -t -n'
 531     _ns=$($_cmd 2>&1)
 532     if [ $? -eq 127 ] ; then
 533         # netstat probably not installed - unlikely?
 534         ctdb_check_tcp_ports_debug="$_ns"
 535         return 127
 536     fi
 537
 538     for _p ; do  # process each function argument (port)
 539         for _a in '0\.0\.0\.0' '::' ; do
 540             _pat="[[:space:]]${_a}:${_p}[[:space:]]+[^[:space:]]+[[:space:]]+LISTEN"
 541             if echo "$_ns" | grep -E -q "$_pat" ; then
 542                 # We matched the port, so process next port
 543                 continue 2
 544             fi
 545         done
 546
 547         # We didn't match the port, so flag an error.
 548         ctdb_check_tcp_ports_debug="$_cmd shows this output:
 549 $_ns"
 550         return 1
 551     done
 552
 553     return 0
 554 }
 555
 556 ctdb_check_tcp_ports_nmap ()
 557 {
 558     # nmap wants a comma-separated list of ports
 559     _ports=""
 560     for _p ; do
 561         _ports="${_ports}${_ports:+,}${_p}"
 562     done
 563
 564     _cmd="nmap -n -oG - -PS 127.0.0.1 -p $_ports"
 565
 566     _nmap_out=$($_cmd 2>&1)
 567     if [ $? -eq 127 ] ; then
 568         # nmap probably not installed
 569         ctdb_check_tcp_ports_debug="$_nmap_out"
 570         return 127
 571     fi
 572
 573     # get the port-related output
 574     _port_info=$(echo "$_nmap_out" | sed -n -r -e 's@^.*Ports:[[:space:]]@@p')
 575
 576     for _p ; do
 577         # looking for something like this:
 578         #  445/open/tcp//microsoft-ds///
 579         # possibly followed by a comma
 580         _t="$_p/open/tcp//"
 581         case "$_port_info" in
 582             # The info we're after must be either at the beginning of
 583             # the string or it must follow a space.
 584             $_t*|*\ $_t*) : ;;
 585             *)
 586                 # Nope, flag an error...
 587                 ctdb_check_tcp_ports_debug="$_cmd shows this output:
 588 $_nmap_out"
 589                 return 1
 590         esac
 591     done
 592
 593     return 0
 594 }
 595
 596 # Use the new "ctdb checktcpport" command to check the port.
 597 # This is very cheap.
 598 ctdb_check_tcp_ports_ctdb ()
 599 {
 600     for _p ; do  # process each function argument (port)
 601         _cmd="ctdb checktcpport $_p"
 602         _out=$($_cmd 2>&1)
 603         _ret=$?
 604         case "$_ret" in
 605             0)
 606                 ctdb_check_tcp_ports_debug="\"$_cmd\" was able to bind to port"
 607                 return 1
 608                 ;;
 609             98)
 610                 # Couldn't bind, something already listening, next port...
 611                 continue
 612                 ;;
 613             *)
 614                 ctdb_check_tcp_ports_debug="$_cmd (exited with $_ret) with output:
 615 $_out"
 616                 # assume not implemented
 617                 return 127
 618         esac
 619     done
 620
 621     return 0
 622 }
 623
 624 ######################################################
 625 # check a unix socket
 626 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
 627 ######################################################
 628 ctdb_check_unix_socket() {
 629     socket_path="$1"
 630     [ -z "$socket_path" ] && return
 631
 632     if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
 633         echo "ERROR: $service_name socket $socket_path not found"
 634         return 1
 635     fi
 636 }
 637
 638 ######################################################
 639 # check a command returns zero status
 640 # usage: ctdb_check_command <command>
 641 ######################################################
 642 ctdb_check_command ()
 643 {
 644     _out=$("$@" 2>&1) || {
 645         echo "ERROR: $* returned error"
 646         echo "$_out" | debug
 647         exit 1
 648     }
 649 }
 650
 651 ################################################
 652 # kill off any TCP connections with the given IP
 653 ################################################
 654 kill_tcp_connections ()
 655 {
 656     _ip="$1"
 657
 658     _oneway=false
 659     if [ "$2" = "oneway" ] ; then
 660         _oneway=true
 661     fi
 662
 663     get_tcp_connections_for_ip "$_ip" | {
 664         _killcount=0
 665         _connections=""
 666         _nl="
 667 "
 668         while read _dst _src; do
 669             _destport="${_dst##*:}"
 670             __oneway=$_oneway
 671             case $_destport in
 672                 # we only do one-way killtcp for CIFS
 673                 139|445) __oneway=true ;;
 674             esac
 675
 676             echo "Killing TCP connection $_src $_dst"
 677             _connections="${_connections}${_nl}${_src} ${_dst}"
 678             if ! $__oneway ; then
 679                 _connections="${_connections}${_nl}${_dst} ${_src}"
 680             fi
 681
 682             _killcount=$(($_killcount + 1))
 683         done
 684
 685         if [ $_killcount -eq 0 ] ; then
 686             return
 687         fi
 688
 689         echo "$_connections" | ctdb killtcp || {
 690             echo "Failed to send killtcp control"
 691             return
 692         }
 693
 694         _count=0
 695         while : ; do
 696             _remaining=$(get_tcp_connections_for_ip $_ip | wc -l)
 697
 698             if [ $_remaining -eq 0 ] ; then
 699                 echo "Killed $_killcount TCP connections to released IP $_ip"
 700                 return
 701             fi
 702
 703             _count=$(($_count + 1))
 704             if [ $_count -gt 3 ] ; then
 705                 echo "Timed out killing tcp connections for IP $_ip"
 706                 return
 707             fi
 708
 709             echo "Waiting for $_remaining connections to be killed for IP $_ip"
 710             sleep 1
 711         done
 712     }
 713 }
 714
 715 ##################################################################
 716 # kill off the local end for any TCP connections with the given IP
 717 ##################################################################
 718 kill_tcp_connections_local_only ()
 719 {
 720     kill_tcp_connections "$1" "oneway"
 721 }
 722
 723 ##################################################################
 724 # tickle any TCP connections with the given IP
 725 ##################################################################
 726 tickle_tcp_connections ()
 727 {
 728     _ip="$1"
 729
 730     get_tcp_connections_for_ip "$_ip" |
 731     {
 732         _failed=false
 733
 734         while read dest src; do
 735             echo "Tickle TCP connection $src $dest"
 736             ctdb tickle $src $dest >/dev/null 2>&1 || _failed=true
 737             echo "Tickle TCP connection $dest $src"
 738             ctdb tickle $dest $src >/dev/null 2>&1 || _failed=true
 739         done
 740
 741         if $_failed ; then
 742             echo "Failed to send tickle control"
 743         fi
 744     }
 745 }
 746
 747 get_tcp_connections_for_ip ()
 748 {
 749     _ip="$1"
 750
 751     netstat -tn | awk -v ip=$_ip \
 752         'index($1, "tcp") == 1 && \
 753          (index($4, ip ":") == 1 || index($4, "::ffff:" ip ":") == 1) \
 754          && $6 == "ESTABLISHED" \
 755          {print $4" "$5}'
 756 }
 757
 758 ########################################################
 759 # start/stop the Ganesha nfs service
 760 ########################################################
 761 startstop_ganesha()
 762 {
 763     _service_name="nfs-ganesha-$CTDB_CLUSTER_FILESYSTEM_TYPE"
 764     case "$1" in
 765         start)
 766             service "$_service_name" start
 767             ;;
 768         stop)
 769             service "$_service_name" stop
 770             ;;
 771         restart)
 772             service "$_service_name" restart
 773             ;;
 774     esac
 775 }
 776
 777 ########################################################
 778 # start/stop the nfs service on different platforms
 779 ########################################################
 780 startstop_nfs() {
 781         PLATFORM="unknown"
 782         [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
 783                 PLATFORM="sles"
 784         }
 785         [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
 786                 PLATFORM="rhel"
 787         }
 788
 789         case $PLATFORM in
 790         sles)
 791                 case $1 in
 792                 start)
 793                         service nfsserver start
 794                         ;;
 795                 stop)
 796                         service nfsserver stop > /dev/null 2>&1
 797                         ;;
 798                 restart)
 799                         set_proc "fs/nfsd/threads" 0
 800                         service nfsserver stop > /dev/null 2>&1
 801                         pkill -9 nfsd
 802                         nfs_dump_some_threads
 803                         service nfsserver start
 804                         ;;
 805                 esac
 806                 ;;
 807         rhel)
 808                 case $1 in
 809                 start)
 810                         service nfslock start
 811                         service nfs start
 812                         ;;
 813                 stop)
 814                         service nfs stop
 815                         service nfslock stop
 816                         ;;
 817                 restart)
 818                         set_proc "fs/nfsd/threads" 0
 819                         service nfs stop > /dev/null 2>&1
 820                         service nfslock stop > /dev/null 2>&1
 821                         pkill -9 nfsd
 822                         nfs_dump_some_threads
 823                         service nfslock start
 824                         service nfs start
 825                         ;;
 826                 esac
 827                 ;;
 828         *)
 829                 echo "Unknown platform. NFS is not supported with ctdb"
 830                 exit 1
 831                 ;;
 832         esac
 833 }
 834
 835 # Dump up to the configured number of nfsd thread backtraces.
 836 nfs_dump_some_threads ()
 837 {
 838     [ -n "$CTDB_NFS_DUMP_STUCK_THREADS" ] || return 0
 839
 840     # Optimisation to avoid running an unnecessary pidof
 841     [ $CTDB_NFS_DUMP_STUCK_THREADS -gt 0 ] || return 0
 842
 843     _count=0
 844     for _pid in $(pidof nfsd) ; do
 845         [ $_count -le $CTDB_NFS_DUMP_STUCK_THREADS ] || break
 846
 847         # Do this first to avoid racing with thread exit
 848         _stack=$(get_proc "${_pid}/stack" 2>/dev/null)
 849         if [ -n "$_stack" ] ; then
 850             echo "Stack trace for stuck nfsd thread [${_pid}]:"
 851             echo "$_stack"
 852             _count=$(($_count + 1))
 853         fi
 854     done
 855 }
 856
 857 ########################################################
 858 # start/stop the nfs lockmanager service on different platforms
 859 ########################################################
 860 startstop_nfslock() {
 861         PLATFORM="unknown"
 862         [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
 863                 PLATFORM="sles"
 864         }
 865         [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
 866                 PLATFORM="rhel"
 867         }
 868
 869         case $PLATFORM in
 870         sles)
 871                 # for sles there is no service for lockmanager
 872                 # so we instead just shutdown/restart nfs
 873                 case $1 in
 874                 start)
 875                         service nfsserver start
 876                         ;;
 877                 stop)
 878                         service nfsserver stop > /dev/null 2>&1
 879                         ;;
 880                 restart)
 881                         service nfsserver stop > /dev/null 2>&1
 882                         service nfsserver start
 883                         ;;
 884                 esac
 885                 ;;
 886         rhel)
 887                 case $1 in
 888                 start)
 889                         service nfslock start
 890                         ;;
 891                 stop)
 892                         service nfslock stop > /dev/null 2>&1
 893                         ;;
 894                 restart)
 895                         service nfslock stop > /dev/null 2>&1
 896                         service nfslock start
 897                         ;;
 898                 esac
 899                 ;;
 900         *)
 901                 echo "Unknown platform. NFS locking is not supported with ctdb"
 902                 exit 1
 903                 ;;
 904         esac
 905 }
 906
 907 # Periodically update the statd database
 908 nfs_statd_update ()
 909 {
 910     _update_period="$1"
 911
 912     _statd_update_trigger="$service_state_dir/update-trigger"
 913     [ -f "$_statd_update_trigger" ] || touch "$_statd_update_trigger"
 914
 915     _last_update=$(stat --printf="%Y" "$_statd_update_trigger")
 916     _current_time=$(date +"%s")
 917     if [ $(( $_current_time - $_last_update)) -ge $_update_period ] ; then
 918         touch "$_statd_update_trigger"
 919         $CTDB_BASE/statd-callout updatelocal &
 920         $CTDB_BASE/statd-callout updateremote &
 921     fi
 922 }
 923
 924 add_ip_to_iface()
 925 {
 926     _iface=$1
 927     _ip=$2
 928     _maskbits=$3
 929
 930     _lockfile="${CTDB_VARDIR}/state/interface_modify_${_iface}.flock"
 931     mkdir -p "${_lockfile%/*}" # dirname
 932     [ -f "$_lockfile" ] || touch "$_lockfile"
 933
 934     (
 935         # Note: use of return/exit/die() below only gets us out of the
 936         # sub-shell, which is actually what we want.  That is, the
 937         # function should just return non-zero.
 938
 939         flock --timeout 30 0 || \
 940             die "add_ip_to_iface: unable to get lock for ${_iface}"
 941
 942         # Ensure interface is up
 943         ip link set "$_iface" up || \
 944             die "Failed to bringup interface $_iface"
 945
 946         ip addr add "$_ip/$_maskbits" brd + dev "$_iface" || \
 947             die "Failed to add $_ip/$_maskbits on dev $_iface"
 948     ) <"$_lockfile"
 949
 950     # Do nothing here - return above only gets us out of the subshell
 951     # and doing anything here will affect the return code.
 952 }
 953
 954 delete_ip_from_iface()
 955 {
 956     _iface=$1
 957     _ip=$2
 958     _maskbits=$3
 959
 960     _lockfile="${CTDB_VARDIR}/state/interface_modify_${_iface}.flock"
 961     mkdir -p "${_lockfile%/*}" # dirname
 962     [ -f "$_lockfile" ] || touch "$_lockfile"
 963
 964     (
 965         # Note: use of return/exit/die() below only gets us out of the
 966         # sub-shell, which is actually what we want.  That is, the
 967         # function should just return non-zero.
 968
 969         flock --timeout 30 0 || \
 970             die "delete_ip_from_iface: unable to get lock for ${_iface}"
 971
 972         _im="$_ip/$_maskbits"  # shorthand for readability
 973
 974         # "ip addr del" will delete all secondary IPs if this is the
 975         # primary.  To work around this _very_ annoying behaviour we
 976         # have to keep a record of the secondaries and re-add them
 977         # afterwards.  Yuck!
 978
 979         _secondaries=""
 980         if ip addr list dev "$_iface" primary | grep -Fq "inet $_im " ; then
 981             _secondaries=$(ip addr list dev "$_iface" secondary | \
 982                 awk '$1 == "inet" { print $2 }')
 983         fi
 984
 985         local _rc=0
 986         ip addr del "$_im" dev "$_iface" || {
 987             echo "Failed to del $_ip on dev $_iface"
 988             _rc=1
 989         }
 990
 991         if [ -n "$_secondaries" ] ; then
 992             for _i in $_secondaries; do
 993                 if ip addr list dev "$_iface" | grep -Fq "inet $_i" ; then
 994                     echo "Kept secondary $_i on dev $_iface"
 995                 else
 996                     echo "Re-adding secondary address $_i to dev $_iface"
 997                     ip addr add $_i brd + dev $_iface || {
 998                         echo "Failed to re-add address $_i to dev $_iface"
 999                         _rc=1
1000                     }
1001                 fi
1002             done
1003         fi
1004
1005         return $_rc
1006     ) <"$_lockfile"
1007
1008     # Do nothing here - return above only gets us out of the subshell
1009     # and doing anything here will affect the return code.
1010 }
1011
1012 # If the given IP is hosted then print 2 items: maskbits and iface
1013 ip_maskbits_iface ()
1014 {
1015     _addr="$1"
1016
1017     ip addr show to "${_addr}/32" 2>/dev/null | \
1018         awk '$1 == "inet" { print gensub(".*/", "", 1, $2), $NF }'
1019 }
1020
1021 drop_ip ()
1022 {
1023     _addr="${1%/*}"  # Remove optional maskbits
1024
1025     set -- $(ip_maskbits_iface $_addr)
1026     if [ -n "$1" ] ; then
1027         _maskbits="$1"
1028         _iface="$2"
1029         echo "Removing public address $_addr/$_maskbits from device $_iface"
1030         delete_ip_from_iface $_iface $_addr $_maskbits >/dev/null 2>&1
1031     fi
1032 }
1033
1034 drop_all_public_ips ()
1035 {
1036     while read _ip _x ; do
1037         drop_ip "$_ip"
1038     done <"${CTDB_PUBLIC_ADDRESSES:-/dev/null}"
1039 }
1040
1041 ########################################################
1042 # Simple counters
1043 _ctdb_counter_common () {
1044     _service_name="${1:-${service_name:-${script_name}}}"
1045     _counter_file="$ctdb_fail_dir/$_service_name"
1046     mkdir -p "${_counter_file%/*}" # dirname
1047 }
1048 ctdb_counter_init () {
1049     _ctdb_counter_common "$1"
1050
1051     >"$_counter_file"
1052 }
1053 ctdb_counter_incr () {
1054     _ctdb_counter_common "$1"
1055
1056     # unary counting!
1057     echo -n 1 >> "$_counter_file"
1058 }
1059 ctdb_check_counter () {
1060     _msg="${1:-error}"  # "error"  - anything else is silent on fail
1061     _op="${2:--ge}"  # an integer operator supported by test
1062     _limit="${3:-${service_fail_limit}}"
1063     shift 3
1064     _ctdb_counter_common "$1"
1065
1066     # unary counting!
1067     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
1068     _hit=false
1069     if [ "$_op" != "%" ] ; then
1070         if [ $_size $_op $_limit ] ; then
1071             _hit=true
1072         fi
1073     else
1074         if [ $(($_size $_op $_limit)) -eq 0 ] ; then
1075             _hit=true
1076         fi
1077     fi
1078     if $_hit ; then
1079         if [ "$_msg" = "error" ] ; then
1080             echo "ERROR: $_size consecutive failures for $_service_name, marking node unhealthy"
1081             exit 1
1082         else
1083             return 1
1084         fi
1085     fi
1086 }
1087
1088 ########################################################
1089
1090 ctdb_status_dir="$CTDB_VARDIR/status"
1091 ctdb_fail_dir="$CTDB_VARDIR/failcount"
1092
1093 ctdb_setup_service_state_dir ()
1094 {
1095     service_state_dir="$CTDB_VARDIR/state/${1:-${service_name}}"
1096     mkdir -p "$service_state_dir" || {
1097         echo "Error creating state dir \"$service_state_dir\""
1098         exit 1
1099     }
1100 }
1101
1102 ########################################################
1103 # Managed status history, for auto-start/stop
1104
1105 ctdb_managed_dir="$CTDB_VARDIR/managed_history"
1106
1107 _ctdb_managed_common ()
1108 {
1109     _ctdb_managed_file="$ctdb_managed_dir/$service_name"
1110 }
1111
1112 ctdb_service_managed ()
1113 {
1114     _ctdb_managed_common
1115     mkdir -p "$ctdb_managed_dir"
1116     touch "$_ctdb_managed_file"
1117 }
1118
1119 ctdb_service_unmanaged ()
1120 {
1121     _ctdb_managed_common
1122     rm -f "$_ctdb_managed_file"
1123 }
1124
1125 is_ctdb_previously_managed_service ()
1126 {
1127     _ctdb_managed_common
1128     [ -f "$_ctdb_managed_file" ]
1129 }
1130
1131 ########################################################
1132 # Check and set status
1133
1134 log_status_cat ()
1135 {
1136     echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)"
1137 }
1138
1139 ctdb_checkstatus ()
1140 {
1141     if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
1142         log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
1143         return 1
1144     elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
1145         log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
1146         return 2
1147     else
1148         return 0
1149     fi
1150 }
1151
1152 ctdb_setstatus ()
1153 {
1154     d="$ctdb_status_dir/$script_name"
1155     case "$1" in
1156         unhealthy|banned)
1157             mkdir -p "$d"
1158             cat "$2" >"$d/$1"
1159             ;;
1160         *)
1161             for i in "banned" "unhealthy" ; do
1162                 rm -f "$d/$i"
1163             done
1164             ;;
1165     esac
1166 }
1167
1168 ##################################################################
1169 # Reconfigure a service on demand
1170
1171 _ctdb_service_reconfigure_common ()
1172 {
1173     _d="$ctdb_status_dir/${service_name}"
1174     mkdir -p "$_d"
1175     _ctdb_service_reconfigure_flag="$_d/reconfigure"
1176 }
1177
1178 ctdb_service_needs_reconfigure ()
1179 {
1180     _ctdb_service_reconfigure_common
1181     [ -e "$_ctdb_service_reconfigure_flag" ]
1182 }
1183
1184 ctdb_service_set_reconfigure ()
1185 {
1186     _ctdb_service_reconfigure_common
1187     >"$_ctdb_service_reconfigure_flag"
1188 }
1189
1190 ctdb_service_unset_reconfigure ()
1191 {
1192     _ctdb_service_reconfigure_common
1193     rm -f "$_ctdb_service_reconfigure_flag"
1194 }
1195
1196 ctdb_service_reconfigure ()
1197 {
1198     echo "Reconfiguring service \"${service_name}\"..."
1199     ctdb_service_unset_reconfigure
1200     service_reconfigure || return $?
1201     ctdb_counter_init
1202 }
1203
1204 # Default service_reconfigure() function does nothing.
1205 service_reconfigure ()
1206 {
1207     :
1208 }
1209
1210 ctdb_reconfigure_try_lock ()
1211 {
1212     _ctdb_service_reconfigure_common
1213     _lock="${_d}/reconfigure_lock"
1214     mkdir -p "${_lock%/*}" # dirname
1215     touch "$_lock"
1216
1217     (
1218         flock 0
1219         # This is overkill but will work if we need to extend this to
1220         # allow certain events to run multiple times in parallel
1221         # (e.g. takeip) and write multiple PIDs to the file.
1222         read _locker_event
1223         if [ -n "$_locker_event" ] ; then
1224             while read _pid ; do
1225                 if [ -n "$_pid" -a "$_pid" != $$ ] && \
1226                     kill -0 "$_pid" 2>/dev/null ; then
1227                     exit 1
1228                 fi
1229             done
1230         fi
1231
1232         printf "%s\n%s\n" "$event_name" $$ >"$_lock"
1233         exit 0
1234     ) <"$_lock"
1235 }
1236
1237 ctdb_replay_monitor_status ()
1238 {
1239     echo "Replaying previous status for this script due to reconfigure..."
1240     # Leading colon (':') is missing in some versions...
1241     _out=$(ctdb scriptstatus -Y | grep -E "^:?monitor:${script_name}:")
1242     # Output looks like this:
1243     # :monitor:60.nfs:1:ERROR:1314764004.030861:1314764004.035514:foo bar:
1244     # This is the cheapest way of getting fields in the middle.
1245     set -- $(IFS=":" ; echo $_out)
1246     _code="$3"
1247     _status="$4"
1248     # The error output field can include colons so we'll try to
1249     # preserve them.  The weak checking at the beginning tries to make
1250     # this work for both broken (no leading ':') and fixed output.
1251     _out="${_out%:}"
1252     _err_out="${_out#*monitor:${script_name}:*:*:*:*:}"
1253     case "$_status" in
1254         OK) : ;;  # Do nothing special.
1255         TIMEDOUT)
1256             # Recast this as an error, since we can't exit with the
1257             # correct negative number.
1258             _code=1
1259             _err_out="[Replay of TIMEDOUT scriptstatus - note incorrect return code.] ${_err_out}"
1260             ;;
1261         DISABLED)
1262             # Recast this as an OK, since we can't exit with the
1263             # correct negative number.
1264             _code=0
1265             _err_out="[Replay of DISABLED scriptstatus - note incorrect return code.] ${_err_out}"
1266             ;;
1267         *) : ;;  # Must be ERROR, do nothing special.
1268     esac
1269     if [ -n "$_err_out" ] ; then
1270         echo "$_err_out"
1271     fi
1272     exit $_code
1273 }
1274
1275 ctdb_service_check_reconfigure ()
1276 {
1277     assert_service_name
1278
1279     # We only care about some events in this function.  For others we
1280     # return now.
1281     case "$event_name" in
1282         monitor|ipreallocated|reconfigure) : ;;
1283         *) return 0 ;;
1284     esac
1285
1286     if ctdb_reconfigure_try_lock ; then
1287         # No events covered by this function are running, so proceed
1288         # with gay abandon.
1289         case "$event_name" in
1290             reconfigure)
1291                 (ctdb_service_reconfigure)
1292                 exit $?
1293                 ;;
1294             ipreallocated)
1295                 if ctdb_service_needs_reconfigure ; then
1296                     ctdb_service_reconfigure
1297                 fi
1298                 ;;
1299             monitor)
1300                 if ctdb_service_needs_reconfigure ; then
1301                     ctdb_service_reconfigure
1302                     # Given that the reconfigure might not have
1303                     # resulted in the service being stable yet, we
1304                     # replay the previous status since that's the best
1305                     # information we have.
1306                     ctdb_replay_monitor_status
1307                 fi
1308                 ;;
1309         esac
1310     else
1311         # Somebody else is running an event we don't want to collide
1312         # with.  We proceed with caution.
1313         case "$event_name" in
1314             reconfigure)
1315                 # Tell whoever called us to retry.
1316                 exit 2
1317                 ;;
1318             ipreallocated)
1319                 # Defer any scheduled reconfigure and just run the
1320                 # rest of the ipreallocated event, as per the
1321                 # eventscript.  There's an assumption here that the
1322                 # event doesn't depend on any scheduled reconfigure.
1323                 # This is true in the current code.
1324                 return 0
1325                 ;;
1326             monitor)
1327                 # There is most likely a reconfigure in progress so
1328                 # the service is possibly unstable.  As above, we
1329                 # defer any scheduled reconfigured.  We also replay
1330                 # the previous monitor status since that's the best
1331                 # information we have.
1332                 ctdb_replay_monitor_status
1333                 ;;
1334         esac
1335     fi
1336 }
1337
1338 ##################################################################
1339 # Does CTDB manage this service? - and associated auto-start/stop
1340
1341 ctdb_compat_managed_service ()
1342 {
1343     if [ "$1" = "yes" -a "$2" = "$service_name" ] ; then
1344         CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
1345     fi
1346 }
1347
1348 is_ctdb_managed_service ()
1349 {
1350     assert_service_name
1351
1352     # $t is used just for readability and to allow better accurate
1353     # matching via leading/trailing spaces
1354     t=" $CTDB_MANAGED_SERVICES "
1355
1356     # Return 0 if "<space>$service_name<space>" appears in $t
1357     if [ "${t#* ${service_name} }" != "${t}" ] ; then
1358         return 0
1359     fi
1360
1361     # If above didn't match then update $CTDB_MANAGED_SERVICES for
1362     # backward compatibility and try again.
1363     ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
1364     ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
1365     ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND"  "winbind"
1366     ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "apache2"
1367     ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
1368     ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
1369     ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
1370     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs"
1371     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs-ganesha-gpfs"
1372
1373     t=" $CTDB_MANAGED_SERVICES "
1374
1375     # Return 0 if "<space>$service_name<space>" appears in $t
1376     [ "${t#* ${service_name} }" != "${t}" ]
1377 }
1378
1379 ctdb_start_stop_service ()
1380 {
1381     assert_service_name
1382
1383     # Allow service-start/service-stop pseudo-events to start/stop
1384     # services when we're not auto-starting/stopping and we're not
1385     # monitoring.
1386     case "$event_name" in
1387         service-start)
1388             if is_ctdb_managed_service ; then
1389                 die 'service-start event not permitted when service is managed'
1390             fi
1391             if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
1392                 die 'service-start event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
1393             fi
1394             ctdb_service_start
1395             exit $?
1396             ;;
1397         service-stop)
1398             if is_ctdb_managed_service ; then
1399                 die 'service-stop event not permitted when service is managed'
1400             fi
1401             if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
1402                 die 'service-stop event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
1403             fi
1404             ctdb_service_stop
1405             exit $?
1406             ;;
1407     esac
1408
1409     # Do nothing unless configured to...
1410     [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] || return 0
1411
1412     [ "$event_name" = "monitor" ] || return 0
1413
1414     if is_ctdb_managed_service ; then
1415         if ! is_ctdb_previously_managed_service ; then
1416             echo "Starting service \"$service_name\" - now managed"
1417             background_with_logging ctdb_service_start
1418             exit $?
1419         fi
1420     else
1421         if is_ctdb_previously_managed_service ; then
1422             echo "Stopping service \"$service_name\" - no longer managed"
1423             background_with_logging ctdb_service_stop
1424             exit $?
1425         fi
1426     fi
1427 }
1428
1429 ctdb_service_start ()
1430 {
1431     # The service is marked managed if we've ever tried to start it.
1432     ctdb_service_managed
1433
1434     service_start || return $?
1435
1436     ctdb_counter_init
1437     ctdb_check_tcp_init
1438 }
1439
1440 ctdb_service_stop ()
1441 {
1442     ctdb_service_unmanaged
1443     service_stop
1444 }
1445
1446 # Default service_start() and service_stop() functions.
1447
1448 # These may be overridden in an eventscript.  When overriding, the
1449 # following convention must be followed.  If these functions are
1450 # called with no arguments then they may use internal logic to
1451 # determine whether the service is managed and, therefore, whether
1452 # they should take any action.  However, if the service name is
1453 # specified as an argument then an attempt must be made to start or
1454 # stop the service.  This is because the auto-start/stop code calls
1455 # them with the service name as an argument.
1456 service_start ()
1457 {
1458     service "$service_name" start
1459 }
1460
1461 service_stop ()
1462 {
1463     service "$service_name" stop
1464 }
1465
1466 ##################################################################
1467
1468 ctdb_standard_event_handler ()
1469 {
1470     case "$1" in
1471         status)
1472             ctdb_checkstatus
1473             exit
1474             ;;
1475         setstatus)
1476             shift
1477             ctdb_setstatus "$@"
1478             exit
1479             ;;
1480     esac
1481 }
1482
1483 # iptables doesn't like being re-entered, so flock-wrap it.
1484 iptables()
1485 {
1486         flock -w 30 $CTDB_VARDIR/iptables-ctdb.flock /sbin/iptables "$@"
1487 }
1488
1489 # AIX (and perhaps others?) doesn't have mktemp
1490 if ! which mktemp >/dev/null 2>&1 ; then
1491     mktemp ()
1492     {
1493         _dir=false
1494         if [ "$1" = "-d" ] ; then
1495             _dir=true
1496             shift
1497         fi
1498         _d="${TMPDIR:-/tmp}"
1499         _hex10=$(dd if=/dev/urandom count=20 2>/dev/null | \
1500             md5sum | \
1501             sed -e 's@\(..........\).*@\1@')
1502         _t="${_d}/tmp.${_hex10}"
1503         (
1504             umask 077
1505             if $_dir ; then
1506                 mkdir "$_t"
1507             else
1508                 >"$_t"
1509             fi
1510         )
1511         echo "$_t"
1512     }
1513 fi
1514
1515 ########################################################
1516 # tickle handling
1517 ########################################################
1518
1519 update_tickles ()
1520 {
1521         _port="$1"
1522
1523         tickledir="$CTDB_VARDIR/state/tickles"
1524         mkdir -p "$tickledir"
1525
1526         # Who am I?
1527         _pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
1528
1529         # What public IPs do I hold?
1530         _ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
1531
1532         # IPs as a regexp choice
1533         _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
1534
1535         # Record connections to our public IPs in a temporary file
1536         _my_connections="${tickledir}/${_port}.connections"
1537         rm -f "$_my_connections"
1538         netstat -tn |
1539         awk -v destpat="^${_ipschoice}:${_port}\$" \
1540           '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
1541         sort >"$_my_connections"
1542
1543         # Record our current tickles in a temporary file
1544         _my_tickles="${tickledir}/${_port}.tickles"
1545         rm -f "$_my_tickles"
1546         for _i in $_ips ; do
1547                 ctdb -Y gettickles $_i $_port |
1548                 awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
1549         done |
1550         sort >"$_my_tickles"
1551
1552         # Add tickles for connections that we haven't already got tickles for
1553         comm -23 "$_my_connections" "$_my_tickles" |
1554         while read _src _dst ; do
1555                 ctdb addtickle $_src $_dst
1556         done
1557
1558         # Remove tickles for connections that are no longer there
1559         comm -13 "$_my_connections" "$_my_tickles" |
1560         while read _src _dst ; do
1561                 ctdb deltickle $_src $_dst
1562         done
1563
1564         rm -f "$_my_connections" "$_my_tickles"
1565 }
1566
1567 ########################################################
1568 # load a site local config file
1569 ########################################################
1570
1571 [ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
1572         . "$CTDB_RC_LOCAL"
1573 }
1574
1575 [ -x $CTDB_BASE/rc.local ] && {
1576         . $CTDB_BASE/rc.local
1577 }
1578
1579 [ -d $CTDB_BASE/rc.local.d ] && {
1580         for i in $CTDB_BASE/rc.local.d/* ; do
1581                 [ -x "$i" ] && . "$i"
1582         done
1583 }
1584
1585 script_name="${0##*/}"       # basename
1586 service_fail_limit=1
1587 event_name="$1"