config/statd-callout

   1 #!/bin/sh
   2
   3 # this script needs to be installed so that statd points to it with the -H
   4 # command line argument. The easiest way to do that is to put something like this in
   5 # /etc/sysconfig/nfs:
   6 #   STATD_HOSTNAME="myhostname -H /etc/ctdb/statd-callout"
   7
   8 [ -z "$CTDB_BASE" ] && {
   9     export CTDB_BASE="/etc/ctdb"
  10 }
  11
  12 [ -z "$CTDB_VARDIR" ] && {
  13     export CTDB_VARDIR="/var/ctdb"
  14 }
  15
  16 . $CTDB_BASE/functions
  17 loadconfig ctdb
  18 loadconfig nfs
  19
  20 [ -z $NFS_HOSTNAME ] && {
  21         echo NFS_HOSTNAME is not configured. statd-callout failed.
  22         exit 0
  23 }
  24
  25 case "$1" in
  26   add-client)
  27         # the callout does not tell us to which ip the client connected
  28         # so we must add it to all the ips that we serve
  29         PNN=`ctdb xpnn | sed -e "s/.*://"`
  30         ctdb ip -Y | while read LINE; do
  31                 NODE=`echo $LINE | cut -f3 -d:`
  32                 [ "$NODE" = "$PNN" ] || {
  33                         # not us
  34                         continue
  35                 }
  36                 IP=`echo $LINE | cut -f2 -d:`
  37                 /bin/mkdir -p $CTDB_VARDIR/state/statd/ip/$IP
  38                 touch $CTDB_VARDIR/state/statd/ip/$IP/$2
  39         done
  40         ;;
  41   del-client)
  42         # the callout does not tell us to which ip the client disconnected
  43         # so we must remove it from all the ips that we serve
  44         PNN=`ctdb xpnn | sed -e "s/.*://"`
  45         ctdb ip -Y | while read LINE; do
  46                 NODE=`echo $LINE | cut -f3 -d:`
  47                 [ "$NODE" = "$PNN" ] || {
  48                         # not us
  49                         continue
  50                 }
  51                 IP=`echo $LINE | cut -f2 -d:`
  52                 mkdir -p $CTDB_VARDIR/state/statd/ip/$IP
  53                 /bin/rm -f $CTDB_VARDIR/state/statd/ip/$IP/$2
  54         done
  55         ;;
  56   updatelocal)
  57         # For all IPs we serve, collect info and push to the config database
  58         PNN=`ctdb xpnn | sed -e "s/.*://"`
  59         ctdb ip -Y | tail -n +2 | while read LINE; do
  60                 NODE=`echo $LINE | cut -f3 -d:`
  61                 [ "$NODE" = "$PNN" ] || {
  62                         continue
  63                 }
  64                 IP=`echo $LINE | cut -f2 -d:`
  65
  66                 mkdir -p $CTDB_VARDIR/state/statd/ip/$IP
  67
  68                 rm -f $CTDB_VARDIR/state/statd/ip/$IP.tar
  69                 tar cfP $CTDB_VARDIR/state/statd/ip/$IP.tar $CTDB_VARDIR/state/statd/ip/$IP
  70
  71                 rm -f $CTDB_VARDIR/state/statd/ip/$IP.rec
  72                 ctdb pfetch ctdb.tdb statd-state:$IP $CTDB_VARDIR/state/statd/ip/$IP.rec 2>/dev/null
  73                 [ "$?" = "0" ] || {
  74                         # something went wrong,  try storing this data
  75                         echo No record. Store STATD state data for $IP
  76                         ctdb pstore ctdb.tdb statd-state:$IP $CTDB_VARDIR/state/statd/ip/$IP.tar 2>/dev/null
  77                         continue
  78                 }
  79
  80                 cmp $CTDB_VARDIR/state/statd/ip/$IP.tar $CTDB_VARDIR/state/statd/ip/$IP.rec >/dev/null 2>/dev/null
  81                 [ "$?" = "0" ] || {
  82                         # something went wrong,  try storing this data
  83                         echo Updated record. Store STATD state data for $IP
  84                         ctdb pstore ctdb.tdb statd-state:$IP $CTDB_VARDIR/state/statd/ip/$IP.tar 2>/dev/null
  85                         continue
  86                 }
  87         done
  88         ;;
  89
  90   updateremote)
  91         # For all IPs we dont serve, pull the state from the database
  92         PNN=`ctdb xpnn | sed -e "s/.*://"`
  93         ctdb ip -Y | tail -n +2 | while read LINE; do
  94                 NODE=`echo $LINE | cut -f3 -d:`
  95                 [ "$NODE" = "$PNN" ] && {
  96                         continue
  97                 }
  98                 IP=`echo $LINE | cut -f2 -d:`
  99
 100                 mkdir -p $CTDB_VARDIR/state/statd/ip/$IP
 101
 102                 rm -f $CTDB_VARDIR/state/statd/ip/$IP.rec
 103                 ctdb pfetch ctdb.tdb statd-state:$IP $CTDB_VARDIR/state/statd/ip/$IP.rec 2>/dev/null
 104                 [ "$?" = "0" ] || {
 105                         continue
 106                 }
 107
 108                 rm -f $CTDB_VARDIR/state/statd/ip/$IP/*
 109                 tar xfP $CTDB_VARDIR/state/statd/ip/$IP.rec
 110         done
 111         ;;
 112
 113   notify)
 114         # we must restart the lockmanager (on all nodes) so that we get
 115         # a clusterwide grace period (so other clients dont take out
 116         # conflicting locks through other nodes before all locks have been
 117         # reclaimed)
 118
 119         # we need these settings to make sure that no tcp connections survive
 120         # across a very fast failover/failback
 121         #echo 10 > /proc/sys/net/ipv4/tcp_fin_timeout
 122         #echo 0 > /proc/sys/net/ipv4/tcp_max_tw_buckets
 123         #echo 0 > /proc/sys/net/ipv4/tcp_max_orphans
 124
 125         # Delete the notification list for statd, we dont want it to
 126         # ping any clients
 127         rm -f /var/lib/nfs/statd/sm/*
 128         rm -f /var/lib/nfs/statd/sm.bak/*
 129
 130         # we must keep a monotonically increasing state variable for the entire
 131         # cluster  so state always increases when ip addresses fail from one
 132         # node to another
 133         # We use epoch and hope the nodes are close enough in clock.
 134         # Even numbers mean service is shut down, odd numbers mean
 135         # service is started.
 136         STATE=`date +"%s"`
 137         STATE=`expr "$STATE" "/" "2"`
 138
 139
 140         # we must also let some time pass between stopping and restarting the
 141         # lockmanager since othervise there is a window where the lockmanager
 142         # will respond "strangely" immediately after restarting it, which
 143         # causes clients to fail to reclaim the locks.
 144         #
 145         startstop_nfslock stop > /dev/null 2>&1
 146         sleep 2
 147
 148         # now start lockmanager again with the new state directory.
 149         startstop_nfslock start > /dev/null 2>&1
 150
 151         # we now need to send out additional statd notifications to ensure
 152         # that clients understand that the lockmanager has restarted.
 153         # we have three cases:
 154         # 1, clients that ignore the ip address the stat notification came from
 155         #    and ONLY care about the 'name' in the notify packet.
 156         #    these clients ONLY work with lock failover IFF that name
 157         #    can be resolved into an ipaddress that matches the one used
 158         #    to mount the share.  (==linux clients)
 159         #    This is handled when starting lockmanager above,  but those
 160         #    packets are sent from the "wrong" ip address, something linux
 161         #    clients are ok with, buth other clients will barf at.
 162         # 2, Some clients only accept statd packets IFF they come from the
 163         #    'correct' ip address.
 164         # 2a,Send out the notification using the 'correct' ip address and also
 165         #    specify the 'correct' hostname in the statd packet.
 166         #    Some clients require both the correct source address and also the
 167         #    correct name. (these clients also ONLY work if the ip addresses
 168         #    used to map the share can be resolved into the name returned in
 169         #    the notify packet.)
 170         # 2b,Other clients require that the source ip address of the notify
 171         #    packet matches the ip address used to take out the lock.
 172         #    I.e. that the correct source address is used.
 173         #    These clients also require that the statd notify packet contains
 174         #    the name as the ip address used when the lock was taken out.
 175         #
 176         # Both 2a and 2b are commonly used in lockmanagers since they maximize
 177         # probability that the client will accept the statd notify packet and
 178         # not just ignore it.
 179         # For all IPs we serve, collect info and push to the config database
 180         PNN=`ctdb xpnn | sed -e "s/.*://"`
 181         ctdb ip -Y | tail -n +2 | while read LINE; do
 182                 NODE=`echo $LINE | cut -f3 -d:`
 183                 [ "$NODE" = "$PNN" ] || {
 184                         continue
 185                 }
 186                 IP=`echo $LINE | cut -f2 -d:`
 187
 188                 ls $CTDB_VARDIR/state/statd/ip/$IP | while read CLIENT; do
 189                         rm $CTDB_VARDIR/state/statd/ip/$IP/$CLIENT
 190                         /usr/bin/smnotify --client=$CLIENT --ip=$IP --server=$ip --stateval=$STATE
 191                         /usr/bin/smnotify --client=$CLIENT --ip=$IP --server=$NFS_HOSTNAME --stateval=$STATE
 192                         STATE=`expr "$STATE" "+" "1"`
 193                         /usr/bin/smnotify --client=$CLIENT --ip=$IP --server=$ip --stateval=$STATE
 194                         /usr/bin/smnotify --client=$CLIENT --ip=$IP --server=$NFS_HOSTNAME --stateval=$STATE
 195                 done
 196         done
 197         ;;
 198 esac