ctdb/config/functions

   1 # Hey Emacs, this is a -*- shell-script -*- !!!
   2
   3 # utility functions for ctdb event scripts
   4
   5 PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
   6
   7 [ -z "$CTDB_VARDIR" ] && {
   8     if [ -d "/var/lib/ctdb" ] ; then
   9         export CTDB_VARDIR="/var/lib/ctdb"
  10     else
  11         export CTDB_VARDIR="/var/ctdb"
  12     fi
  13 }
  14 [ -z "$CTDB_ETCDIR" ] && {
  15     export CTDB_ETCDIR="/etc"
  16 }
  17
  18 #######################################
  19 # pull in a system config file, if any
  20 _loadconfig() {
  21
  22     if [ -z "$1" ] ; then
  23         foo="${service_config:-${service_name}}"
  24         if [ -n "$foo" ] ; then
  25             loadconfig "$foo"
  26         fi
  27     elif [ "$1" != "ctdb" ] ; then
  28         loadconfig "ctdb"
  29     fi
  30
  31     if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
  32         . $CTDB_ETCDIR/sysconfig/$1
  33     elif [ -f $CTDB_ETCDIR/default/$1 ]; then
  34         . $CTDB_ETCDIR/default/$1
  35     elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
  36         . $CTDB_BASE/sysconfig/$1
  37     fi
  38 }
  39
  40 loadconfig () {
  41     _loadconfig "$@"
  42 }
  43
  44 ##############################################################
  45
  46 # CTDB_SCRIPT_DEBUGLEVEL can be overwritten by setting it in a
  47 # configuration file.
  48 debug ()
  49 {
  50     if [ ${CTDB_SCRIPT_DEBUGLEVEL:-2} -ge 4 ] ; then
  51         # If there are arguments then echo them.  Otherwise expect to
  52         # use stdin, which allows us to pass lots of debug using a
  53         # here document.
  54         if [ -n "$1" ] ; then
  55             echo "DEBUG: $*"
  56         elif ! tty -s ; then
  57             sed -e 's@^@DEBUG: @'
  58         fi
  59     fi
  60 }
  61
  62 die ()
  63 {
  64     _msg="$1"
  65     _rc="${2:-1}"
  66
  67     echo "$_msg"
  68     exit $_rc
  69 }
  70
  71 # Log given message or stdin to either syslog or a CTDB log file
  72 # $1 is the tag passed to logger if syslog is in use.
  73 script_log ()
  74 {
  75     _tag="$1" ; shift
  76
  77     _using_syslog=false
  78     if [ "$CTDB_SYSLOG" = "yes" -o -z "$CTDB_LOGFILE" ] ; then
  79         _using_syslog=true
  80     fi
  81     case "$CTDB_OPTIONS" in
  82         *--syslog*) _using_syslog=true ;;
  83     esac
  84
  85     if $_using_syslog ; then
  86         logger -t "ctdbd: ${_tag}" $*
  87     else
  88         {
  89             if [ -n "$*" ] ; then
  90                 echo "$*"
  91             else
  92                 cat
  93             fi
  94         } >>"${CTDB_LOGFILE:-/var/log/log.ctdb}"
  95     fi
  96 }
  97
  98 # When things are run in the background in an eventscript then logging
  99 # output might get lost.  This is the "solution".  :-)
 100 background_with_logging ()
 101 {
 102     (
 103         "$@" 2>&1 </dev/null |
 104         script_log "${script_name}&"
 105     )&
 106
 107     return 0
 108 }
 109
 110 ##############################################################
 111 # check number of args for different events
 112 ctdb_check_args ()
 113 {
 114     case "$1" in
 115         takeip|releaseip)
 116             if [ $# != 4 ]; then
 117                 echo "ERROR: must supply interface, IP and maskbits"
 118                 exit 1
 119             fi
 120             ;;
 121         updateip)
 122             if [ $# != 5 ]; then
 123                 echo "ERROR: must supply old interface, new interface, IP and maskbits"
 124                 exit 1
 125             fi
 126             ;;
 127     esac
 128 }
 129
 130 ##############################################################
 131 # determine on what type of system (init style) we are running
 132 detect_init_style() {
 133     # only do detection if not already set:
 134     test "x$CTDB_INIT_STYLE" != "x" && return
 135
 136     if [ -x /sbin/startproc ]; then
 137         CTDB_INIT_STYLE="suse"
 138     elif [ -x /sbin/start-stop-daemon ]; then
 139         CTDB_INIT_STYLE="debian"
 140     else
 141         CTDB_INIT_STYLE="redhat"
 142     fi
 143 }
 144
 145 ######################################################
 146 # simulate /sbin/service on platforms that don't have it
 147 # _service() makes it easier to hook the service() function for
 148 # testing.
 149 _service ()
 150 {
 151   _service_name="$1"
 152   _op="$2"
 153
 154   # do nothing, when no service was specified
 155   [ -z "$_service_name" ] && return
 156
 157   if [ -x /sbin/service ]; then
 158       $_nice /sbin/service "$_service_name" "$_op"
 159   elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
 160       $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
 161   elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
 162       $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
 163   fi
 164 }
 165
 166 service()
 167 {
 168     _nice=""
 169     _service "$@"
 170 }
 171
 172 ######################################################
 173 # simulate /sbin/service (niced) on platforms that don't have it
 174 nice_service()
 175 {
 176     _nice="nice"
 177     _service "$@"
 178 }
 179
 180 ######################################################
 181 # wrapper around /proc/ settings to allow them to be hooked
 182 # for testing
 183 # 1st arg is relative path under /proc/, 2nd arg is value to set
 184 set_proc ()
 185 {
 186     echo "$2" >"/proc/$1"
 187 }
 188
 189 ######################################################
 190 # wrapper around getting file contents from /proc/ to allow
 191 # this to be hooked for testing
 192 # 1st arg is relative path under /proc/
 193 get_proc ()
 194 {
 195     cat "/proc/$1"
 196 }
 197
 198 ######################################################
 199 # Check that an RPC service is healthy -
 200 # this includes allowing a certain number of failures
 201 # before marking the NFS service unhealthy.
 202 #
 203 # usage: nfs_check_rpc_service SERVICE_NAME [ triple ...]
 204 #
 205 # each triple is a set of 3 arguments: an operator, a
 206 # fail count limit and an action string.
 207 #
 208 # For example:
 209 #
 210 #       nfs_check_rpc_service "lockd" \
 211 #           -ge 15 "verbose restart unhealthy" \
 212 #           -eq 10 "restart:bs"
 213 #
 214 # says that if lockd is down for 15 iterations then do
 215 # a verbose restart of lockd and mark the node unhealthy.
 216 # Before this, after 10 iterations of failure, the
 217 # service is restarted silently in the background.
 218 # Order is important: the number of failures need to be
 219 # specified in reverse order because processing stops
 220 # after the first condition that is true.
 221 ######################################################
 222 nfs_check_rpc_service ()
 223 {
 224     _prog_name="$1" ; shift
 225
 226     _v=""
 227     case "$1" in
 228         -*) : ;;
 229         *) _v="$1" ; shift ;;
 230     esac
 231
 232     _version=${_v:-1}
 233     _rpc_prog="$_prog_name"
 234     _restart=""
 235     _opts=""
 236     case "$_prog_name" in
 237         knfsd)
 238             _rpc_prog=nfs
 239             _version=${_v:-3}
 240             _restart="echo 'Trying to restart NFS service'"
 241             _restart="${_restart}; startstop_nfs restart"
 242             ;;
 243         ganesha)
 244             _rpc_prog=nfs
 245             _version=${_v:-3}
 246             _restart="echo 'Trying to restart Ganesha NFS service'"
 247             _restart="${_restart}; startstop_ganesha restart"
 248             ;;
 249         mountd)
 250             _opts="${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
 251             ;;
 252         rquotad)
 253             _opts="${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
 254             ;;
 255         lockd)
 256             _rpc_prog=nlockmgr
 257             _version=${_v:-4}
 258             _restart="echo 'Trying to restart lock manager service'"
 259             _restart="${_restart}; startstop_nfslock restart"
 260             ;;
 261         statd)
 262             _rpc_prog=status
 263             _opts="${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
 264             _opts="${_opts}${STATD_PORT:+ -p }${STATD_PORT}"
 265             _opts="${_opts}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
 266             ;;
 267         *)
 268             echo "Internal error: unknown RPC program \"$_prog_name\"."
 269             exit 1
 270     esac
 271
 272     _service_name="nfs_${_prog_name}"
 273
 274     if ctdb_check_rpc "$_rpc_prog" $_version >/dev/null ; then
 275         ctdb_counter_init "$_service_name"
 276         return 0
 277     fi
 278
 279     ctdb_counter_incr "$_service_name"
 280
 281     while [ -n "$3" ] ; do
 282         ctdb_check_counter "quiet" "$1" "$2" "$_service_name" || {
 283             for _action in $3 ; do
 284                 case "$_action" in
 285                     verbose)
 286                         echo "$ctdb_check_rpc_out"
 287                         ;;
 288                     restart|restart:*)
 289                         # No explicit command specified, construct rpc command.
 290                         if [ -z "$_restart" ] ; then
 291                             _p="rpc.${_prog_name}"
 292                             _restart="echo 'Trying to restart $_prog_name [${_p}${_opts}]'"
 293                             _restart="${_restart}; killall -q -9 $_p"
 294                             _restart="${_restart}; $_p $_opts"
 295                         fi
 296
 297                         # Process restart flags...
 298                         _flags="${_action#restart:}"
 299                         # There may not have been a colon...
 300                         [ "$_flags" != "$_action" ] || _flags=""
 301                         # q=quiet - everything to /dev/null
 302                         if [ "${_flags#*q}" != "$_flags" ] ; then
 303                             _restart="{ ${_restart} ; } >/dev/null 2>&1"
 304                         fi
 305                         # s=stealthy - last command to /dev/null
 306                         if [ "${_flags#*s}" != "$_flags" ] ; then
 307                             _restart="${_restart} >/dev/null 2>&1"
 308                         fi
 309                         # b=background - the whole thing, easy and reliable
 310                         if [ "${_flags#*b}" != "$_flags" ] ; then
 311                             _restart="{ ${_restart} ; } &"
 312                         fi
 313
 314                         # Do it!
 315                         eval "${_restart}"
 316                         ;;
 317                     unhealthy)
 318                         exit 1
 319                         ;;
 320                     *)
 321                         echo "Internal error: unknown action \"$_action\"."
 322                         exit 1
 323                 esac
 324             done
 325
 326             # Only process the first action group.
 327             break
 328         }
 329         shift 3
 330     done
 331 }
 332
 333 ######################################################
 334 # check that a rpc server is registered with portmap
 335 # and responding to requests
 336 # usage: ctdb_check_rpc SERVICE_NAME VERSION
 337 ######################################################
 338 ctdb_check_rpc ()
 339 {
 340     progname="$1"
 341     version="$2"
 342
 343     if ! ctdb_check_rpc_out=$(rpcinfo -u localhost $progname $version 2>&1) ; then
 344         ctdb_check_rpc_out="ERROR: $progname failed RPC check:
 345 $ctdb_check_rpc_out"
 346         echo "$ctdb_check_rpc_out"
 347         return 1
 348     fi
 349 }
 350
 351 ######################################################
 352 # Ensure $service_name is set
 353 assert_service_name ()
 354 {
 355     [ -n "$service_name" ] || die "INTERNAL ERROR: \$service_name not set"
 356 }
 357
 358 ######################################################
 359 # check a set of directories is available
 360 # return 1 on a missing directory
 361 # directories are read from stdin
 362 ######################################################
 363 ctdb_check_directories_probe()
 364 {
 365     while IFS="" read d ; do
 366         case "$d" in
 367             *%*)
 368                 continue
 369                 ;;
 370             *)
 371                 [ -d "${d}/." ] || return 1
 372         esac
 373     done
 374 }
 375
 376 ######################################################
 377 # check a set of directories is available
 378 # directories are read from stdin
 379 ######################################################
 380 ctdb_check_directories()
 381 {
 382     ctdb_check_directories_probe || {
 383         echo "ERROR: $service_name directory \"$d\" not available"
 384         exit 1
 385     }
 386 }
 387
 388 ######################################################
 389 # check a set of tcp ports
 390 # usage: ctdb_check_tcp_ports <ports...>
 391 ######################################################
 392
 393 # This flag file is created when a service is initially started.  It
 394 # is deleted the first time TCP port checks for that service succeed.
 395 # Until then ctdb_check_tcp_ports() prints a more subtle "error"
 396 # message if a port check fails.
 397 _ctdb_check_tcp_common ()
 398 {
 399     assert_service_name
 400     _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started"
 401 }
 402
 403 ctdb_check_tcp_init ()
 404 {
 405     _ctdb_check_tcp_common
 406     mkdir -p "${_ctdb_service_started_file%/*}" # dirname
 407     touch "$_ctdb_service_started_file"
 408 }
 409
 410 ctdb_check_tcp_ports()
 411 {
 412     if [ -z "$1" ] ; then
 413         echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
 414         exit 1
 415     fi
 416
 417     # Set default value for CTDB_TCP_PORT_CHECKS if unset.
 418     # If any of these defaults are unsupported then this variable can
 419     # be overridden in /etc/sysconfig/ctdb or via a file in
 420     # /etc/ctdb/rc.local.d/.
 421     : ${CTDB_TCP_PORT_CHECKERS:=ctdb nmap netstat}
 422
 423     for _c in $CTDB_TCP_PORT_CHECKERS ; do
 424         ctdb_check_tcp_ports_$_c "$@"
 425         case "$?" in
 426             0)
 427                 _ctdb_check_tcp_common
 428                 rm -f "$_ctdb_service_started_file"
 429                 return 0
 430                 ;;
 431             1)
 432                 _ctdb_check_tcp_common
 433                 if [ ! -f "$_ctdb_service_started_file" ] ; then
 434                     echo "ERROR: $service_name tcp port $_p is not responding"
 435                     debug <<EOF
 436 $ctdb_check_tcp_ports_debug
 437 EOF
 438                 else
 439                     echo "INFO: $service_name tcp port $_p is not responding"
 440                 fi
 441
 442                 return 1
 443                 ;;
 444             127)
 445                 debug <<EOF
 446 ctdb_check_ports - checker $_c not implemented
 447 output from checker was:
 448 $ctdb_check_tcp_ports_debug
 449 EOF
 450                 ;;
 451             *)
 452
 453         esac
 454     done
 455
 456     echo "INTERNAL ERROR: ctdb_check_ports - no working checkers in CTDB_TCP_PORT_CHECKERS=\"$CTDB_TCP_PORT_CHECKERS\""
 457
 458     return 127
 459 }
 460
 461 ctdb_check_tcp_ports_netstat ()
 462 {
 463     _cmd='netstat -l -t -n'
 464     _ns=$($_cmd 2>&1)
 465     if [ $? -eq 127 ] ; then
 466         # netstat probably not installed - unlikely?
 467         ctdb_check_tcp_ports_debug="$_ns"
 468         return 127
 469     fi
 470
 471     for _p ; do  # process each function argument (port)
 472         for _a in '0\.0\.0\.0' '::' ; do
 473             _pat="[[:space:]]${_a}:${_p}[[:space:]]+[^[:space:]]+[[:space:]]+LISTEN"
 474             if echo "$_ns" | grep -E -q "$_pat" ; then
 475                 # We matched the port, so process next port
 476                 continue 2
 477             fi
 478         done
 479
 480         # We didn't match the port, so flag an error.
 481         ctdb_check_tcp_ports_debug="$_cmd shows this output:
 482 $_ns"
 483         return 1
 484     done
 485
 486     return 0
 487 }
 488
 489 ctdb_check_tcp_ports_nmap ()
 490 {
 491     # nmap wants a comma-separated list of ports
 492     _ports=""
 493     for _p ; do
 494         _ports="${_ports}${_ports:+,}${_p}"
 495     done
 496
 497     _cmd="nmap -n -oG - -PS 127.0.0.1 -p $_ports"
 498
 499     _nmap_out=$($_cmd 2>&1)
 500     if [ $? -eq 127 ] ; then
 501         # nmap probably not installed
 502         ctdb_check_tcp_ports_debug="$_nmap_out"
 503         return 127
 504     fi
 505
 506     # get the port-related output
 507     _port_info=$(echo "$_nmap_out" | sed -n -r -e 's@^.*Ports:[[:space:]]@@p')
 508
 509     for _p ; do
 510         # looking for something like this:
 511         #  445/open/tcp//microsoft-ds///
 512         # possibly followed by a comma
 513         _t="$_p/open/tcp//"
 514         case "$_port_info" in
 515             # The info we're after must be either at the beginning of
 516             # the string or it must follow a space.
 517             $_t*|*\ $_t*) : ;;
 518             *)
 519                 # Nope, flag an error...
 520                 ctdb_check_tcp_ports_debug="$_cmd shows this output:
 521 $_nmap_out"
 522                 return 1
 523         esac
 524     done
 525
 526     return 0
 527 }
 528
 529 # Use the new "ctdb checktcpport" command to check the port.
 530 # This is very cheap.
 531 ctdb_check_tcp_ports_ctdb ()
 532 {
 533     for _p ; do  # process each function argument (port)
 534         _cmd="ctdb checktcpport $_p"
 535         _out=$($_cmd 2>&1)
 536         _ret=$?
 537         case "$_ret" in
 538             0)
 539                 ctdb_check_tcp_ports_debug="\"$_cmd\" was able to bind to port"
 540                 return 1
 541                 ;;
 542             98)
 543                 # Couldn't bind, something already listening, next port...
 544                 continue
 545                 ;;
 546             *)
 547                 ctdb_check_tcp_ports_debug="$_cmd (exited with $_ret) with output:
 548 $_out"
 549                 # assume not implemented
 550                 return 127
 551         esac
 552     done
 553
 554     return 0
 555 }
 556
 557 ######################################################
 558 # check a unix socket
 559 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
 560 ######################################################
 561 ctdb_check_unix_socket() {
 562     socket_path="$1"
 563     [ -z "$socket_path" ] && return
 564
 565     if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
 566         echo "ERROR: $service_name socket $socket_path not found"
 567         return 1
 568     fi
 569 }
 570
 571 ######################################################
 572 # check a command returns zero status
 573 # usage: ctdb_check_command <command>
 574 ######################################################
 575 ctdb_check_command ()
 576 {
 577     _out=$("$@" 2>&1) || {
 578         echo "ERROR: $* returned error"
 579         echo "$_out" | debug
 580         exit 1
 581     }
 582 }
 583
 584 ################################################
 585 # kill off any TCP connections with the given IP
 586 ################################################
 587 kill_tcp_connections() {
 588     _IP="$1"
 589     _failed=0
 590
 591     _killcount=0
 592     connfile="$CTDB_VARDIR/state/connections.$_IP"
 593     mkdir -p "${connfile%/*}" # dirname
 594     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
 595     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
 596
 597     while read dest src; do
 598         echo "Killing TCP connection $src $dest"
 599         ctdb killtcp $src $dest >/dev/null 2>&1 || _failed=1
 600         _destport="${dest##*:}"
 601         case $_destport in
 602           # we only do one-way killtcp for CIFS
 603           139|445) : ;;
 604           # for all others we do 2-way
 605           *)
 606                 ctdb killtcp $dest $src >/dev/null 2>&1 || _failed=1
 607                 ;;
 608         esac
 609         _killcount=`expr $_killcount + 1`
 610      done < $connfile
 611     rm -f $connfile
 612
 613     [ $_failed = 0 ] || {
 614         echo "Failed to send killtcp control"
 615         return;
 616     }
 617     [ $_killcount -gt 0 ] || {
 618         return;
 619     }
 620     _count=0
 621     while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
 622         sleep 1
 623         _count=`expr $_count + 1`
 624         [ $_count -gt 3 ] && {
 625             echo "Timed out killing tcp connections for IP $_IP"
 626             return;
 627         }
 628     done
 629     echo "killed $_killcount TCP connections to released IP $_IP"
 630 }
 631
 632 ##################################################################
 633 # kill off the local end for any TCP connections with the given IP
 634 ##################################################################
 635 kill_tcp_connections_local_only() {
 636     _IP="$1"
 637     _failed=0
 638
 639     _killcount=0
 640     connfile="$CTDB_VARDIR/state/connections.$_IP"
 641     mkdir -p "${connfile%/*}" # dirname
 642     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
 643     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
 644
 645     while read dest src; do
 646         echo "Killing TCP connection $src $dest"
 647         ctdb killtcp $src $dest >/dev/null 2>&1 || _failed=1
 648         _killcount=`expr $_killcount + 1`
 649      done < $connfile
 650     rm -f $connfile
 651
 652     [ $_failed = 0 ] || {
 653         echo "Failed to send killtcp control"
 654         return;
 655     }
 656     [ $_killcount -gt 0 ] || {
 657         return;
 658     }
 659     _count=0
 660     while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
 661         sleep 1
 662         _count=`expr $_count + 1`
 663         [ $_count -gt 3 ] && {
 664             echo "Timed out killing tcp connections for IP $_IP"
 665             return;
 666         }
 667     done
 668     echo "killed $_killcount TCP connections to released IP $_IP"
 669 }
 670
 671 ##################################################################
 672 # tickle any TCP connections with the given IP
 673 ##################################################################
 674 tickle_tcp_connections() {
 675     _IP="$1"
 676     _failed=0
 677
 678     _killcount=0
 679     connfile="$CTDB_VARDIR/state/connections.$_IP"
 680     mkdir -p "${connfile%/*}" # dirname
 681     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
 682     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
 683
 684     while read dest src; do
 685         echo "Tickle TCP connection $src $dest"
 686         ctdb tickle $src $dest >/dev/null 2>&1 || _failed=1
 687         echo "Tickle TCP connection $dest $src"
 688         ctdb tickle $dest $src >/dev/null 2>&1 || _failed=1
 689      done < $connfile
 690     rm -f $connfile
 691
 692     [ $_failed = 0 ] || {
 693         echo "Failed to send tickle control"
 694         return;
 695     }
 696 }
 697
 698 ########################################################
 699 # start/stop the Ganesha nfs service
 700 ########################################################
 701 startstop_ganesha()
 702 {
 703     _service_name="nfs-ganesha-$CTDB_CLUSTER_FILESYSTEM_TYPE"
 704     case "$1" in
 705         start)
 706             service "$_service_name" start
 707             ;;
 708         stop)
 709             service "$_service_name" stop
 710             ;;
 711         restart)
 712             service "$_service_name" restart
 713             ;;
 714     esac
 715 }
 716
 717 ########################################################
 718 # start/stop the nfs service on different platforms
 719 ########################################################
 720 startstop_nfs() {
 721         PLATFORM="unknown"
 722         [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
 723                 PLATFORM="sles"
 724         }
 725         [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
 726                 PLATFORM="rhel"
 727         }
 728
 729         case $PLATFORM in
 730         sles)
 731                 case $1 in
 732                 start)
 733                         service nfsserver start
 734                         ;;
 735                 stop)
 736                         service nfsserver stop > /dev/null 2>&1
 737                         ;;
 738                 restart)
 739                         set_proc "fs/nfsd/threads" 0
 740                         service nfsserver stop > /dev/null 2>&1
 741                         pkill -9 nfsd
 742                         service nfsserver start
 743                         ;;
 744                 esac
 745                 ;;
 746         rhel)
 747                 case $1 in
 748                 start)
 749                         service nfslock start
 750                         service nfs start
 751                         ;;
 752                 stop)
 753                         service nfs stop
 754                         service nfslock stop
 755                         ;;
 756                 restart)
 757                         set_proc "fs/nfsd/threads" 0
 758                         service nfs stop > /dev/null 2>&1
 759                         service nfslock stop > /dev/null 2>&1
 760                         pkill -9 nfsd
 761                         service nfslock start
 762                         service nfs start
 763                         ;;
 764                 esac
 765                 ;;
 766         *)
 767                 echo "Unknown platform. NFS is not supported with ctdb"
 768                 exit 1
 769                 ;;
 770         esac
 771 }
 772
 773 ########################################################
 774 # start/stop the nfs lockmanager service on different platforms
 775 ########################################################
 776 startstop_nfslock() {
 777         PLATFORM="unknown"
 778         [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
 779                 PLATFORM="sles"
 780         }
 781         [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
 782                 PLATFORM="rhel"
 783         }
 784
 785         case $PLATFORM in
 786         sles)
 787                 # for sles there is no service for lockmanager
 788                 # so we instead just shutdown/restart nfs
 789                 case $1 in
 790                 start)
 791                         service nfsserver start
 792                         ;;
 793                 stop)
 794                         service nfsserver stop > /dev/null 2>&1
 795                         ;;
 796                 restart)
 797                         service nfsserver stop
 798                         service nfsserver start
 799                         ;;
 800                 esac
 801                 ;;
 802         rhel)
 803                 case $1 in
 804                 start)
 805                         service nfslock start
 806                         ;;
 807                 stop)
 808                         service nfslock stop > /dev/null 2>&1
 809                         ;;
 810                 restart)
 811                         service nfslock stop
 812                         service nfslock start
 813                         ;;
 814                 esac
 815                 ;;
 816         *)
 817                 echo "Unknown platform. NFS locking is not supported with ctdb"
 818                 exit 1
 819                 ;;
 820         esac
 821 }
 822
 823 add_ip_to_iface()
 824 {
 825     _iface=$1
 826     _ip=$2
 827     _maskbits=$3
 828
 829     _lockfile="${CTDB_VARDIR}/state/interface_modify_${_iface}.flock"
 830     mkdir -p "${_lockfile%/*}" # dirname
 831     [ -f "$_lockfile" ] || touch "$_lockfile"
 832
 833     (
 834         # Note: use of return/exit/die() below only gets us out of the
 835         # sub-shell, which is actually what we want.  That is, the
 836         # function should just return non-zero.
 837
 838         flock --timeout 30 0 || \
 839             die "add_ip_to_iface: unable to get lock for ${_iface}"
 840
 841         # Ensure interface is up
 842         ip link set "$_iface" up || \
 843             die "Failed to bringup interface $_iface"
 844
 845         ip addr add "$_ip/$_maskbits" brd + dev "$_iface" || \
 846             die "Failed to add $_ip/$_maskbits on dev $_iface"
 847     ) <"$_lockfile"
 848
 849     # Do nothing here - return above only gets us out of the subshell
 850     # and doing anything here will affect the return code.
 851 }
 852
 853 delete_ip_from_iface()
 854 {
 855     _iface=$1
 856     _ip=$2
 857     _maskbits=$3
 858
 859     _lockfile="${CTDB_VARDIR}/state/interface_modify_${_iface}.flock"
 860     mkdir -p "${_lockfile%/*}" # dirname
 861     [ -f "$_lockfile" ] || touch "$_lockfile"
 862
 863     (
 864         # Note: use of return/exit/die() below only gets us out of the
 865         # sub-shell, which is actually what we want.  That is, the
 866         # function should just return non-zero.
 867
 868         flock --timeout 30 0 || \
 869             die "delete_ip_from_iface: unable to get lock for ${_iface}"
 870
 871         _im="$_ip/$_maskbits"  # shorthand for readability
 872
 873         # "ip addr del" will delete all secondary IPs if this is the
 874         # primary.  To work around this _very_ annoying behaviour we
 875         # have to keep a record of the secondaries and re-add them
 876         # afterwards.  Yuck!
 877
 878         _secondaries=""
 879         if ip addr list dev "$_iface" primary | grep -Fq "inet $_im " ; then
 880             _secondaries=$(ip addr list dev "$_iface" secondary | \
 881                 awk '$1 == "inet" { print $2 }')
 882         fi
 883
 884         local _rc=0
 885         ip addr del "$_im" dev "$_iface" || {
 886             echo "Failed to del $_ip on dev $_iface"
 887             _rc=1
 888         }
 889
 890         if [ -n "$_secondaries" ] ; then
 891             for _i in $_secondaries; do
 892                 if ip addr list dev "$_iface" | grep -Fq "inet $_i" ; then
 893                     echo "Kept secondary $_i on dev $_iface"
 894                 else
 895                     echo "Re-adding secondary address $_i to dev $_iface"
 896                     ip addr add $_i brd + dev $_iface || {
 897                         echo "Failed to re-add address $_i to dev $_iface"
 898                         _rc=1
 899                     }
 900                 fi
 901             done
 902         fi
 903
 904         return $_rc
 905     ) <"$_lockfile"
 906
 907     # Do nothing here - return above only gets us out of the subshell
 908     # and doing anything here will affect the return code.
 909 }
 910
 911 # If the given IP is hosted then print 2 items: maskbits and iface
 912 ip_maskbits_iface ()
 913 {
 914     _addr="$1"
 915
 916     ip addr show to "${_addr}/32" 2>/dev/null | \
 917         awk '$1 == "inet" { print gensub(".*/", "", 1, $2), $NF }'
 918 }
 919
 920 drop_ip ()
 921 {
 922     _addr="${1%/*}"  # Remove optional maskbits
 923     _log_tag="$2"
 924
 925     set -- $(ip_maskbits_iface $_addr)
 926     if [ -n "$1" ] ; then
 927         _maskbits="$1"
 928         _iface="$2"
 929         if [ -n "$_log_tag" ] ; then
 930             script_log "$_log_tag" \
 931                 "Removing public address $_addr/$_maskbits from device $_iface"
 932         fi
 933         ip addr del $_addr/$_maskbits dev $_iface >/dev/null 2>&1
 934     fi
 935 }
 936
 937 drop_all_public_ips ()
 938 {
 939     _log_tag="$1"
 940
 941     while read _ip _x ; do
 942         drop_ip "$_ip" "$_log_tag"
 943     done <"${CTDB_PUBLIC_ADDRESSES:-/dev/null}"
 944 }
 945
 946 ########################################################
 947 # some simple logic for counting events - per eventscript
 948 # usage: ctdb_counter_init
 949 #        ctdb_counter_incr
 950 #        ctdb_check_counter_limit <limit>
 951 # ctdb_check_counter_limit fails when count >= <limit>
 952 ########################################################
 953 _ctdb_counter_common () {
 954     _service_name="${1:-${service_name:-${script_name}}}"
 955     _counter_file="$ctdb_fail_dir/$_service_name"
 956     mkdir -p "${_counter_file%/*}" # dirname
 957 }
 958 ctdb_counter_init () {
 959     _ctdb_counter_common "$1"
 960
 961     >"$_counter_file"
 962 }
 963 ctdb_counter_incr () {
 964     _ctdb_counter_common "$1"
 965
 966     # unary counting!
 967     echo -n 1 >> "$_counter_file"
 968 }
 969 ctdb_check_counter_limit () {
 970     _ctdb_counter_common
 971
 972     _limit="${1:-${service_fail_limit}}"
 973     _quiet="$2"
 974
 975     # unary counting!
 976     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
 977     if [ $_size -ge $_limit ] ; then
 978         echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
 979         exit 1
 980     elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
 981         echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
 982     fi
 983 }
 984 ctdb_check_counter () {
 985     _msg="${1:-error}"  # "error"  - anything else is silent on fail
 986     _op="${2:--ge}"  # an integer operator supported by test
 987     _limit="${3:-${service_fail_limit}}"
 988     shift 3
 989     _ctdb_counter_common "$1"
 990
 991     # unary counting!
 992     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
 993     if [ $_size $_op $_limit ] ; then
 994         if [ "$_msg" = "error" ] ; then
 995             echo "ERROR: $_limit consecutive failures for $_service_name, marking node unhealthy"
 996             exit 1
 997         else
 998             return 1
 999         fi
1000     fi
1001 }
1002
1003 ########################################################
1004
1005 ctdb_status_dir="$CTDB_VARDIR/status"
1006 ctdb_fail_dir="$CTDB_VARDIR/failcount"
1007
1008 ctdb_setup_service_state_dir ()
1009 {
1010     service_state_dir="$CTDB_VARDIR/state/${1:-${service_name}}"
1011     mkdir -p "$service_state_dir" || {
1012         echo "Error creating state dir \"$service_state_dir\""
1013         exit 1
1014     }
1015 }
1016
1017 ########################################################
1018 # Managed status history, for auto-start/stop
1019
1020 ctdb_managed_dir="$CTDB_VARDIR/managed_history"
1021
1022 _ctdb_managed_common ()
1023 {
1024     _ctdb_managed_file="$ctdb_managed_dir/$service_name"
1025 }
1026
1027 ctdb_service_managed ()
1028 {
1029     _ctdb_managed_common
1030     mkdir -p "$ctdb_managed_dir"
1031     touch "$_ctdb_managed_file"
1032 }
1033
1034 ctdb_service_unmanaged ()
1035 {
1036     _ctdb_managed_common
1037     rm -f "$_ctdb_managed_file"
1038 }
1039
1040 is_ctdb_previously_managed_service ()
1041 {
1042     _ctdb_managed_common
1043     [ -f "$_ctdb_managed_file" ]
1044 }
1045
1046 ########################################################
1047 # Check and set status
1048
1049 log_status_cat ()
1050 {
1051     echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)"
1052 }
1053
1054 ctdb_checkstatus ()
1055 {
1056     if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
1057         log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
1058         return 1
1059     elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
1060         log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
1061         return 2
1062     else
1063         return 0
1064     fi
1065 }
1066
1067 ctdb_setstatus ()
1068 {
1069     d="$ctdb_status_dir/$script_name"
1070     case "$1" in
1071         unhealthy|banned)
1072             mkdir -p "$d"
1073             cat "$2" >"$d/$1"
1074             ;;
1075         *)
1076             for i in "banned" "unhealthy" ; do
1077                 rm -f "$d/$i"
1078             done
1079             ;;
1080     esac
1081 }
1082
1083 ##################################################################
1084 # Reconfigure a service on demand
1085
1086 _ctdb_service_reconfigure_common ()
1087 {
1088     _d="$ctdb_status_dir/${service_name}"
1089     mkdir -p "$_d"
1090     _ctdb_service_reconfigure_flag="$_d/reconfigure"
1091 }
1092
1093 ctdb_service_needs_reconfigure ()
1094 {
1095     _ctdb_service_reconfigure_common
1096     [ -e "$_ctdb_service_reconfigure_flag" ]
1097 }
1098
1099 ctdb_service_set_reconfigure ()
1100 {
1101     _ctdb_service_reconfigure_common
1102     >"$_ctdb_service_reconfigure_flag"
1103 }
1104
1105 ctdb_service_unset_reconfigure ()
1106 {
1107     _ctdb_service_reconfigure_common
1108     rm -f "$_ctdb_service_reconfigure_flag"
1109 }
1110
1111 ctdb_service_reconfigure ()
1112 {
1113     echo "Reconfiguring service \"${service_name}\"..."
1114     ctdb_service_unset_reconfigure
1115     service_reconfigure || return $?
1116     ctdb_counter_init
1117 }
1118
1119 # Default service_reconfigure() function does nothing.
1120 service_reconfigure ()
1121 {
1122     :
1123 }
1124
1125 ctdb_reconfigure_try_lock ()
1126 {
1127     _ctdb_service_reconfigure_common
1128     _lock="${_d}/reconfigure_lock"
1129     mkdir -p "${_lock%/*}" # dirname
1130     touch "$_lock"
1131
1132     (
1133         flock 0
1134         # This is overkill but will work if we need to extend this to
1135         # allow certain events to run multiple times in parallel
1136         # (e.g. takeip) and write multiple PIDs to the file.
1137         read _locker_event
1138         if [ -n "$_locker_event" ] ; then
1139             while read _pid ; do
1140                 if [ -n "$_pid" -a "$_pid" != $$ ] && \
1141                     kill -0 "$_pid" 2>/dev/null ; then
1142                     exit 1
1143                 fi
1144             done
1145         fi
1146
1147         printf "%s\n%s\n" "$event_name" $$ >"$_lock"
1148         exit 0
1149     ) <"$_lock"
1150 }
1151
1152 ctdb_replay_monitor_status ()
1153 {
1154     echo "Replaying previous status for this script due to reconfigure..."
1155     # Leading colon (':') is missing in some versions...
1156     _out=$(ctdb scriptstatus -Y | grep -E "^:?monitor:${script_name}:")
1157     # Output looks like this:
1158     # :monitor:60.nfs:1:ERROR:1314764004.030861:1314764004.035514:foo bar:
1159     # This is the cheapest way of getting fields in the middle.
1160     set -- $(IFS=":" ; echo $_out)
1161     _code="$3"
1162     _status="$4"
1163     # The error output field can include colons so we'll try to
1164     # preserve them.  The weak checking at the beginning tries to make
1165     # this work for both broken (no leading ':') and fixed output.
1166     _out="${_out%:}"
1167     _err_out="${_out#*monitor:${script_name}:*:*:*:*:}"
1168     case "$_status" in
1169         OK) : ;;  # Do nothing special.
1170         TIMEDOUT)
1171             # Recast this as an error, since we can't exit with the
1172             # correct negative number.
1173             _code=1
1174             _err_out="[Replay of TIMEDOUT scriptstatus - note incorrect return code.] ${_err_out}"
1175             ;;
1176         DISABLED)
1177             # Recast this as an OK, since we can't exit with the
1178             # correct negative number.
1179             _code=0
1180             _err_out="[Replay of DISABLED scriptstatus - note incorrect return code.] ${_err_out}"
1181             ;;
1182         *) : ;;  # Must be ERROR, do nothing special.
1183     esac
1184     echo "$_err_out"
1185     exit $_code
1186 }
1187
1188 ctdb_service_check_reconfigure ()
1189 {
1190     assert_service_name
1191
1192     # We only care about some events in this function.  For others we
1193     # return now.
1194     case "$event_name" in
1195         monitor|ipreallocated|reconfigure) : ;;
1196         *) return 0 ;;
1197     esac
1198
1199     if ctdb_reconfigure_try_lock ; then
1200         # No events covered by this function are running, so proceed
1201         # with gay abandon.
1202         case "$event_name" in
1203             reconfigure)
1204                 (ctdb_service_reconfigure)
1205                 exit $?
1206                 ;;
1207             ipreallocated)
1208                 if ctdb_service_needs_reconfigure ; then
1209                     ctdb_service_reconfigure
1210                 fi
1211                 ;;
1212             monitor)
1213                 if ctdb_service_needs_reconfigure ; then
1214                     ctdb_service_reconfigure
1215                     # Given that the reconfigure might not have
1216                     # resulted in the service being stable yet, we
1217                     # replay the previous status since that's the best
1218                     # information we have.
1219                     ctdb_replay_monitor_status
1220                 fi
1221                 ;;
1222         esac
1223     else
1224         # Somebody else is running an event we don't want to collide
1225         # with.  We proceed with caution.
1226         case "$event_name" in
1227             reconfigure)
1228                 # Tell whoever called us to retry.
1229                 exit 2
1230                 ;;
1231             ipreallocated)
1232                 # Defer any scheduled reconfigure and just run the
1233                 # rest of the ipreallocated event, as per the
1234                 # eventscript.  There's an assumption here that the
1235                 # event doesn't depend on any scheduled reconfigure.
1236                 # This is true in the current code.
1237                 return 0
1238                 ;;
1239             monitor)
1240                 # There is most likely a reconfigure in progress so
1241                 # the service is possibly unstable.  As above, we
1242                 # defer any scheduled reconfigured.  We also replay
1243                 # the previous monitor status since that's the best
1244                 # information we have.
1245                 ctdb_replay_monitor_status
1246                 ;;
1247         esac
1248     fi
1249 }
1250
1251 ##################################################################
1252 # Does CTDB manage this service? - and associated auto-start/stop
1253
1254 ctdb_compat_managed_service ()
1255 {
1256     if [ "$1" = "yes" -a "$2" = "$service_name" ] ; then
1257         CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
1258     fi
1259 }
1260
1261 is_ctdb_managed_service ()
1262 {
1263     assert_service_name
1264
1265     # $t is used just for readability and to allow better accurate
1266     # matching via leading/trailing spaces
1267     t=" $CTDB_MANAGED_SERVICES "
1268
1269     # Return 0 if "<space>$service_name<space>" appears in $t
1270     if [ "${t#* ${service_name} }" != "${t}" ] ; then
1271         return 0
1272     fi
1273
1274     # If above didn't match then update $CTDB_MANAGED_SERVICES for
1275     # backward compatibility and try again.
1276     ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
1277     ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
1278     ctdb_compat_managed_service "$CTDB_MANAGES_SCP"      "scp"
1279     ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND"  "winbind"
1280     ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "apache2"
1281     ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
1282     ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
1283     ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
1284     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs"
1285     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs-ganesha-gpfs"
1286
1287     t=" $CTDB_MANAGED_SERVICES "
1288
1289     # Return 0 if "<space>$service_name<space>" appears in $t
1290     [ "${t#* ${service_name} }" != "${t}" ]
1291 }
1292
1293 ctdb_start_stop_service ()
1294 {
1295     assert_service_name
1296
1297     # Allow service-start/service-stop pseudo-events to start/stop
1298     # services when we're not auto-starting/stopping and we're not
1299     # monitoring.
1300     case "$event_name" in
1301         service-start)
1302             if is_ctdb_managed_service ; then
1303                 die 'service-start event not permitted when service is managed'
1304             fi
1305             if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
1306                 die 'service-start event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
1307             fi
1308             ctdb_service_start
1309             exit $?
1310             ;;
1311         service-stop)
1312             if is_ctdb_managed_service ; then
1313                 die 'service-stop event not permitted when service is managed'
1314             fi
1315             if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
1316                 die 'service-stop event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
1317             fi
1318             ctdb_service_stop
1319             exit $?
1320             ;;
1321     esac
1322
1323     # Do nothing unless configured to...
1324     [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] || return 0
1325
1326     [ "$event_name" = "monitor" ] || return 0
1327
1328     if is_ctdb_managed_service ; then
1329         if ! is_ctdb_previously_managed_service ; then
1330             echo "Starting service \"$service_name\" - now managed"
1331             background_with_logging ctdb_service_start
1332             exit $?
1333         fi
1334     else
1335         if is_ctdb_previously_managed_service ; then
1336             echo "Stopping service \"$service_name\" - no longer managed"
1337             background_with_logging ctdb_service_stop
1338             exit $?
1339         fi
1340     fi
1341 }
1342
1343 ctdb_service_start ()
1344 {
1345     # The service is marked managed if we've ever tried to start it.
1346     ctdb_service_managed
1347
1348     service_start || return $?
1349
1350     ctdb_counter_init
1351     ctdb_check_tcp_init
1352 }
1353
1354 ctdb_service_stop ()
1355 {
1356     ctdb_service_unmanaged
1357     service_stop
1358 }
1359
1360 # Default service_start() and service_stop() functions.
1361
1362 # These may be overridden in an eventscript.  When overriding, the
1363 # following convention must be followed.  If these functions are
1364 # called with no arguments then they may use internal logic to
1365 # determine whether the service is managed and, therefore, whether
1366 # they should take any action.  However, if the service name is
1367 # specified as an argument then an attempt must be made to start or
1368 # stop the service.  This is because the auto-start/stop code calls
1369 # them with the service name as an argument.
1370 service_start ()
1371 {
1372     service "$service_name" start
1373 }
1374
1375 service_stop ()
1376 {
1377     service "$service_name" stop
1378 }
1379
1380 ##################################################################
1381
1382 ctdb_standard_event_handler ()
1383 {
1384     case "$1" in
1385         status)
1386             ctdb_checkstatus
1387             exit
1388             ;;
1389         setstatus)
1390             shift
1391             ctdb_setstatus "$@"
1392             exit
1393             ;;
1394     esac
1395 }
1396
1397 # iptables doesn't like being re-entered, so flock-wrap it.
1398 iptables()
1399 {
1400         flock -w 30 $CTDB_VARDIR/iptables-ctdb.flock /sbin/iptables "$@"
1401 }
1402
1403 ########################################################
1404 # tickle handling
1405 ########################################################
1406
1407 update_tickles ()
1408 {
1409         _port="$1"
1410
1411         tickledir="$CTDB_VARDIR/state/tickles"
1412         mkdir -p "$tickledir"
1413
1414         # Who am I?
1415         _pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
1416
1417         # What public IPs do I hold?
1418         _ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
1419
1420         # IPs as a regexp choice
1421         _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
1422
1423         # Record connections to our public IPs in a temporary file
1424         _my_connections="${tickledir}/${_port}.connections"
1425         rm -f "$_my_connections"
1426         netstat -tn |
1427         awk -v destpat="^${_ipschoice}:${_port}\$" \
1428           '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
1429         sort >"$_my_connections"
1430
1431         # Record our current tickles in a temporary file
1432         _my_tickles="${tickledir}/${_port}.tickles"
1433         rm -f "$_my_tickles"
1434         for _i in $_ips ; do
1435                 ctdb -Y gettickles $_i $_port |
1436                 awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
1437         done |
1438         sort >"$_my_tickles"
1439
1440         # Add tickles for connections that we haven't already got tickles for
1441         comm -23 "$_my_connections" "$_my_tickles" |
1442         while read _src _dst ; do
1443                 ctdb addtickle $_src $_dst
1444         done
1445
1446         # Remove tickles for connections that are no longer there
1447         comm -13 "$_my_connections" "$_my_tickles" |
1448         while read _src _dst ; do
1449                 ctdb deltickle $_src $_dst
1450         done
1451
1452         rm -f "$_my_connections" "$_my_tickles"
1453 }
1454
1455 ########################################################
1456 # load a site local config file
1457 ########################################################
1458
1459 [ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
1460         . "$CTDB_RC_LOCAL"
1461 }
1462
1463 [ -x $CTDB_BASE/rc.local ] && {
1464         . $CTDB_BASE/rc.local
1465 }
1466
1467 [ -d $CTDB_BASE/rc.local.d ] && {
1468         for i in $CTDB_BASE/rc.local.d/* ; do
1469                 [ -x "$i" ] && . "$i"
1470         done
1471 }
1472
1473 script_name="${0##*/}"       # basename
1474 service_fail_limit=1
1475 event_name="$1"