config/events.d/01.reclock

   1 #!/bin/sh
   2 # script to check accessibility to the reclock file on a node
   3
   4 . $CTDB_BASE/functions
   5 loadconfig ctdb
   6
   7 cmd="$1"
   8 shift
   9
  10 PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
  11
  12 # Count the number of intervals that have passed when we have tried to
  13 # but failed to stat the reclock file.  after third failure the node
  14 # becomes unhealthy after the twentieth failure the node we shutdown
  15 # ctdbd
  16 RECLOCKCOUNT="fail-count"
  17
  18 case $cmd in
  19      startup)
  20         ctdb_counter_init "$RECLOCKCOUNT"
  21         ;;
  22
  23       monitor)
  24         ctdb_counter_incr "$RECLOCKCOUNT"
  25         ctdb_counter_limit "$RECLOCKCOUNT" 20 && {
  26                 echo "Reclock file can not be accessed. Shutting down."
  27                 sleep 1
  28                 ctdb shutdown
  29         }
  30
  31         RECLOCKFILE=`ctdb -Y getreclock`
  32         [ -z "$RECLOCKFILE" ] && {
  33                 # we are not using a reclock file
  34                 ctdb_counter_init "$RECLOCKCOUNT"
  35                 exit 0
  36         }
  37
  38         # try stat the reclock file as a background process
  39         # so that we dont block in case the cluster filesystem is unavailable
  40         (
  41                 stat $RECLOCKFILE && {
  42                         # we could stat the file, reset the counter
  43                         ctdb_counter_init "$RECLOCKCOUNT"
  44                 }
  45         ) >/dev/null 2>/dev/null &
  46
  47
  48         ctdb_counter_limit "$RECLOCKCOUNT" 3 && {
  49                 echo "Reclock file can not be accessed. Mark node UNHEALTHY."
  50                 exit 1;
  51         }
  52         ;;
  53 esac
  54
  55 exit 0