##############################
# ctdb: Starts the clustered tdb daemon
#
-# chkconfig: - 90 36
+# chkconfig: - 90 01
#
# description: Starts and stops the clustered tdb daemon
-# pidfile: /var/run/ctdbd/ctdbd.pid
+# pidfile: /var/run/ctdb/ctdbd.pid
#
### BEGIN INIT INFO
# Source function library.
if [ -f /etc/init.d/functions ] ; then
- . /etc/init.d/functions
+ . /etc/init.d/functions
elif [ -f /etc/rc.d/init.d/functions ] ; then
- . /etc/rc.d/init.d/functions
+ . /etc/rc.d/init.d/functions
fi
[ -f /etc/rc.status ] && {
LC_ALL=en_US.UTF-8
}
+if [ -f /lib/lsb/init-functions ] ; then
+ . /lib/lsb/init-functions
+fi
+
# Avoid using root's TMPDIR
unset TMPDIR
loadconfig ctdb
# check networking is up (for redhat)
-[ "${NETWORKING}" = "no" ] && exit 0
+[ "$NETWORKING" = "no" ] && exit 0
+
+detect_init_style
+export CTDB_INIT_STYLE
+
+ctdbd=${CTDBD:-/usr/sbin/ctdbd}
+pidfile="/var/run/ctdb/ctdbd.pid"
+
+if [ "$CTDB_VALGRIND" = "yes" ]; then
+ init_style="valgrind"
+else
+ init_style="$CTDB_INIT_STYLE"
+fi
+
+build_ctdb_options () {
+
+ maybe_set () {
+ # If the 2nd arg is null then return - don't set anything.
+ # Else if the 3rd arg is set and it doesn't match the 2nd arg
+ # then return
+ [ -z "$2" -o \( -n "$3" -a "$3" != "$2" \) ] && return
-[ -z "$CTDB_RECOVERY_LOCK" ] && {
- echo "You must configure the location of the CTDB_RECOVERY_LOCK"
- exit 1
+ val="'$2'"
+ case "$1" in
+ --*) sep="=" ;;
+ -*) sep=" " ;;
+ esac
+ # For these options we're only passing a value-less flag.
+ [ -n "$3" ] && {
+ val=""
+ sep=""
+ }
+
+ CTDB_OPTIONS="${CTDB_OPTIONS}${CTDB_OPTIONS:+ }${1}${sep}${val}"
+ }
+
+ [ -z "$CTDB_RECOVERY_LOCK" ] && {
+ echo "No recovery lock specified. Starting CTDB without split brain prevention"
+ }
+ maybe_set "--reclock" "$CTDB_RECOVERY_LOCK"
+
+ mkdir -p $(dirname "$pidfile")
+ maybe_set "--pidfile" "$pidfile"
+
+ # build up CTDB_OPTIONS variable from optional parameters
+ maybe_set "--logfile" "$CTDB_LOGFILE"
+ maybe_set "--nlist" "$CTDB_NODES"
+ maybe_set "--socket" "$CTDB_SOCKET"
+ maybe_set "--public-addresses" "$CTDB_PUBLIC_ADDRESSES"
+ maybe_set "--public-interface" "$CTDB_PUBLIC_INTERFACE"
+ maybe_set "--dbdir" "$CTDB_DBDIR"
+ maybe_set "--dbdir-persistent" "$CTDB_DBDIR_PERSISTENT"
+ maybe_set "--event-script-dir" "$CTDB_EVENT_SCRIPT_DIR"
+ maybe_set "--transport" "$CTDB_TRANSPORT"
+ maybe_set "-d" "$CTDB_DEBUGLEVEL"
+ maybe_set "--notification-script" "$CTDB_NOTIFY_SCRIPT"
+ maybe_set "--start-as-disabled" "$CTDB_START_AS_DISABLED" "yes"
+ maybe_set "--start-as-stopped " "$CTDB_START_AS_STOPPED" "yes"
+ maybe_set "--no-recmaster" "$CTDB_CAPABILITY_RECMASTER" "no"
+ maybe_set "--no-lmaster" "$CTDB_CAPABILITY_LMASTER" "no"
+ maybe_set "--lvs --single-public-ip" "$CTDB_LVS_PUBLIC_IP"
+ maybe_set "--script-log-level" "$CTDB_SCRIPT_LOG_LEVEL"
+ maybe_set "--log-ringbuf-size" "$CTDB_LOG_RINGBUF_SIZE"
+ maybe_set "--syslog" "$CTDB_SYSLOG" "yes"
+ maybe_set "--max-persistent-check-errors" "$CTDB_MAX_PERSISTENT_CHECK_ERRORS"
}
-CTDB_OPTIONS="$CTDB_OPTIONS --reclock=$CTDB_RECOVERY_LOCK"
-# build up CTDB_OPTIONS variable from optional parameters
-[ -z "$CTDB_LOGFILE" ] || CTDB_OPTIONS="$CTDB_OPTIONS --logfile=$CTDB_LOGFILE"
-[ -z "$CTDB_NODES" ] || CTDB_OPTIONS="$CTDB_OPTIONS --nlist=$CTDB_NODES"
-[ -z "$CTDB_SOCKET" ] || CTDB_OPTIONS="$CTDB_OPTIONS --socket=$CTDB_SOCKET"
-[ -z "$CTDB_PUBLIC_ADDRESSES" ] || CTDB_OPTIONS="$CTDB_OPTIONS --public-addresses=$CTDB_PUBLIC_ADDRESSES"
-[ -z "$CTDB_PUBLIC_INTERFACE" ] || CTDB_OPTIONS="$CTDB_OPTIONS --public-interface=$CTDB_PUBLIC_INTERFACE"
-[ -z "$CTDB_SINGLE_PUBLIC_IP" ] || CTDB_OPTIONS="$CTDB_OPTIONS --single-public-ip=$CTDB_SINGLE_PUBLIC_IP"
-[ -z "$CTDB_DBDIR" ] || CTDB_OPTIONS="$CTDB_OPTIONS --dbdir=$CTDB_DBDIR"
-[ -z "$CTDB_DBDIR_PERSISTENT" ] || CTDB_OPTIONS="$CTDB_OPTIONS --dbdir-persistent=$CTDB_DBDIR_PERSISTENT"
-[ -z "$CTDB_EVENT_SCRIPT_DIR" ] || CTDB_OPTIONS="$CTDB_OPTIONS --event-script-dir $CTDB_EVENT_SCRIPT_DIR"
-[ -z "$CTDB_TRANSPORT" ] || CTDB_OPTIONS="$CTDB_OPTIONS --transport $CTDB_TRANSPORT"
-[ -z "$CTDB_DEBUGLEVEL" ] || CTDB_OPTIONS="$CTDB_OPTIONS -d $CTDB_DEBUGLEVEL"
-[ -z "$CTDB_START_AS_DISABLED" ] || [ "$CTDB_START_AS_DISABLED" != "yes" ] || {
- CTDB_OPTIONS="$CTDB_OPTIONS --start-as-disabled"
+export_debug_variables ()
+{
+ export CTDB_DEBUG_HUNG_SCRIPT CTDB_EXTERNAL_TRACE
}
-[ -z "$CTDB_CAPABILITY_RECMASTER" ] || [ "$CTDB_CAPABILITY_RECMASTER" != "no" ] || {
- CTDB_OPTIONS="$CTDB_OPTIONS --no-recmaster"
+
+# Log given message or stdin to either syslog or a CTDB log file
+do_log ()
+{
+ script_log "ctdb.init" "$@"
}
-[ -z "$CTDB_CAPABILITY_LMASTER" ] || [ "$CTDB_CAPABILITY_LMASTER" != "no" ] || {
- CTDB_OPTIONS="$CTDB_OPTIONS --no-lmaster"
+
+select_tdb_checker ()
+{
+ # Find the best TDB consistency check available.
+ use_tdb_tool_check=false
+ if which tdbtool >/dev/null 2>&1 && \
+ echo "help" | tdbtool | grep -q check ; then
+
+ use_tdb_tool_check=true
+ elif which tdbtool >/dev/null 2>&1 && which tdbdump >/dev/null 2>&1 ; then
+ do_log <<EOF
+WARNING: The installed 'tdbtool' does not offer the 'check' subcommand.
+ Using 'tdbdump' for database checks.
+ Consider updating 'tdbtool' for better checks!
+EOF
+ elif which tdbdump >/dev/null 2>&1 ; then
+ do_log <<EOF
+WARNING: 'tdbtool' is not available.
+ Using 'tdbdump' to check the databases.
+ Consider installing a recent 'tdbtool' for better checks!
+EOF
+ else
+ do_log <<EOF
+WARNING: Cannot check databases since neither
+ 'tdbdump' nor 'tdbtool check' is available.
+ Consider installing tdbtool or at least tdbdump!
+EOF
+ return 1
+ fi
}
-[ -z "$CTDB_LVS_PUBLIC_IP" ] || {
- CTDB_OPTIONS="$CTDB_OPTIONS --lvs --single-public-ip=$CTDB_LVS_PUBLIC_IP"
+
+check_tdb ()
+{
+ _db="$1"
+
+ if $use_tdb_tool_check ; then
+ # tdbtool always exits with 0 :-(
+ if tdbtool "$_db" check 2>/dev/null |
+ grep -q "Database integrity is OK" ; then
+ return 0
+ else
+ return 1
+ fi
+ else
+ tdbdump "$_db" >/dev/null 2>/dev/null
+ return $?
+ fi
}
-[ -z "$CTDB_SCRIPT_LOG_LEVEL" ] || {
- CTDB_OPTIONS="$CTDB_OPTIONS --script-log-level=$CTDB_SCRIPT_LOG_LEVEL"
+
+check_persistent_databases ()
+{
+ _dir="${CTDB_DBDIR_PERSISTENT:-${CTDB_DBDIR:-/var/ctdb}/persistent}"
+ mkdir -p "$_dir" 2>/dev/null
+
+ [ "${CTDB_MAX_PERSISTENT_CHECK_ERRORS:-0}" = "0" ] || return 0
+
+ for _db in $(ls "$_dir/"*.tdb.*[0-9] 2>/dev/null) ; do
+ check_tdb $_db || {
+ do_log "Persistent database $_db is corrupted! CTDB will not start."
+ return 1
+ }
+ done
}
-detect_init_style
-export CTDB_INIT_STYLE
+check_non_persistent_databases ()
+{
+ _dir="${CTDB_DBDIR:-/var/ctdb}"
+ mkdir -p "$_dir" 2>/dev/null
-if [ "x$CTDB_VALGRIND" = "xyes" ]; then
- init_style="valgrind"
-else
- init_style="$CTDB_INIT_STYLE"
-fi
+ for _db in $(ls "${_dir}/"*.tdb.*[0-9] 2>/dev/null) ; do
+ check_tdb $_db || {
+ _backup="${_db}.$(date +'%Y%m%d.%H%M%S.%N').corrupt"
+ do_log <<EOF
+WARNING: database ${_db} is corrupted.
+ Moving to backup ${_backup} for later analysis.
+EOF
+ mv "$_db" "$_backup"
+
+ # Now remove excess backups
+ ls -td "${_db}."*".corrupt" |
+ tail -n +$((${CTDB_MAX_CORRUPT_DB_BACKUPS:-10} + 1)) |
+ xargs rm -f
+
+ }
+ done
+}
set_retval() {
- return $1
+ return $1
+}
+
+wait_until_ready () {
+ _timeout="${1:-10}" # default is 10 seconds
+
+ _count=0
+ while ! ctdb runstate startup running >/dev/null 2>&1 ; do
+ if [ $_count -ge $_timeout ] ; then
+ return 1
+ fi
+ sleep 1
+ _count=$(($_count + 1))
+ done
}
start() {
- killall -q ctdbd
- echo -n $"Starting ctdbd service: "
+ echo -n $"Starting ctdbd service: "
- # check all persistent databases that they look ok
- PERSISTENT_DB_DIR="/var/ctdb/persistent"
- [ -z "$CTDB_DBDIR" ] || {
- PERSISTENT_DB_DIR="$CTDB_DBDIR/persistent"
- }
- mkdir -p $PERSISTENT_DB_DIR 2>/dev/null
- for PDBASE in `ls $PERSISTENT_DB_DIR/*.tdb.[0-9] 2>/dev/null`; do
- /usr/bin/tdbdump $PDBASE >/dev/null 2>/dev/null || {
- echo "Persistent database $PDBASE is corrupted! CTDB will not start."
- return 1
- }
- done
+ ctdb ping >/dev/null 2>&1 && {
+ echo $"CTDB is already running"
+ return 0
+ }
- case $init_style in
- valgrind)
- valgrind -q --log-file=/var/log/ctdb_valgrind /usr/sbin/ctdbd --nosetsched $CTDB_OPTIONS
- RETVAL=0
- ;;
- suse)
- startproc /usr/sbin/ctdbd $CTDB_OPTIONS
- rc_status -v
- RETVAL=$?
- ;;
- redhat)
- daemon ctdbd $CTDB_OPTIONS
- RETVAL=$?
- echo
- [ $RETVAL -eq 0 ] && touch /var/lock/subsys/ctdb || RETVAL=1
- ;;
- ubuntu)
- start-stop-daemon --start --quiet --background --exec /usr/sbin/ctdbd -- $CTDB_OPTIONS
- RETVAL=$?
- ;;
- esac
+ # About to start new $ctdbd. The ping above has failed and any
+ # new $ctdbd will destroy the Unix domain socket, so any processes
+ # that aren't yet completely useless soon will be... so kill
+ # them.
+ pkill -9 -f "$ctdbd"
- sleep 1
- # set any tunables from the config file
- set | grep ^CTDB_SET_ | cut -d_ -f3- |
- while read v; do
- varname=`echo $v | cut -d= -f1`
- value=`echo $v | cut -d= -f2`
- ctdb setvar $varname $value || RETVAL=1
- done || exit 1
+ build_ctdb_options
- return $RETVAL
-}
+ export_debug_variables
-stop() {
- echo -n $"Shutting down ctdbd service: "
- ctdb ping >& /dev/null || {
- echo -n " Warning: ctdbd not running ! "
- case $init_style in
- suse)
- rc_status -v
- ;;
- redhat)
- echo ""
- ;;
- esac
- return 0
- }
- ctdb shutdown
- RETVAL=$?
- count=0
- if [ "$init_style" = "valgrind" ]; then
- # very crude method
- sleep 2
- pkill -9 -f valgrind
+ # make sure we drop any ips that might still be held if previous
+ # instance of ctdb got killed with -9 or similar
+ drop_all_public_ips "ctdb.init"
+
+ if select_tdb_checker ; then
+ check_persistent_databases || return $?
+ check_non_persistent_databases
+ fi
+
+ if [ "$CTDB_SUPPRESS_COREFILE" = "yes" ]; then
+ ulimit -c 0
+ else
+ ulimit -c unlimited
+ fi
+
+ case $init_style in
+ valgrind)
+ eval valgrind -q --log-file=/var/log/ctdb_valgrind \
+ $ctdbd --valgrinding "$CTDB_OPTIONS"
+ RETVAL=$?
+ echo
+ ;;
+ suse)
+ eval startproc $ctdbd "$CTDB_OPTIONS"
+ RETVAL=$?
+ ;;
+ redhat)
+ eval $ctdbd "$CTDB_OPTIONS"
+ RETVAL=$?
+ [ $RETVAL -eq 0 ] && touch /var/lock/subsys/ctdb || RETVAL=1
+ ;;
+ debian)
+ eval start-stop-daemon --start --quiet --background \
+ --exec $ctdbd -- "$CTDB_OPTIONS"
+ RETVAL=$?
+ ;;
+ esac
+
+ if [ $RETVAL -eq 0 ] ; then
+ if ! wait_until_ready ; then
+ RETVAL=1
+ echo "Timed out waiting for initialisation - killing CTDB"
+ pkill -9 -f $ctdbd >/dev/null 2>&1
fi
- while killall -q -0 ctdbd; do
- sleep 1
- count=`expr $count + 1`
- [ $count -gt 10 ] && {
- echo -n $"killing ctdbd "
- killall -q -9 ctdbd
- pkill -9 -f $CTDB_BASE/events.d/
- }
- done
+ fi
+
+ case $init_style in
+ suse)
+ set_retval $RETVAL
+ rc_status -v
+ ;;
+ redhat)
+ [ $RETVAL -eq 0 ] && success || failure
+ echo
+ ;;
+ esac
+
+ return $RETVAL
+}
+
+stop() {
+ echo -n $"Shutting down ctdbd service: "
+ pkill -0 -f $ctdbd || {
+ echo -n " Warning: ctdbd not running ! "
case $init_style in
suse)
- # re-set the return code to the recorded RETVAL
- # in order to print the correct status message
- set_retval $RETVAL
rc_status -v
;;
redhat)
- echo
- [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/ctdb
echo ""
;;
esac
- return $RETVAL
-}
+ return 0
+ }
+ ctdb shutdown >/dev/null 2>&1
+ RETVAL=$?
+ count=0
+ while pkill -0 -f $ctdbd ; do
+ sleep 1
+ count=$(($count + 1))
+ [ $count -gt 30 ] && {
+ echo -n $"killing ctdbd "
+ pkill -9 -f $ctdbd
+ pkill -9 -f $CTDB_BASE/events.d/
+ }
+ done
+ # make sure all ips are dropped, pfkill -9 might leave them hanging around
+ drop_all_public_ips
+
+ rm -f "$pidfile"
+
+ case $init_style in
+ suse)
+ # re-set the return code to the recorded RETVAL in order
+ # to print the correct status message
+ set_retval $RETVAL
+ rc_status -v
+ ;;
+ redhat)
+ [ $RETVAL -eq 0 ] && success || failure
+ [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/ctdb
+ echo ""
+ ;;
+ esac
+
+ return $RETVAL
+}
restart() {
- stop
- start
-}
+ stop
+ start
+}
-status() {
- echo -n $"Checking for ctdbd service: "
- ctdb ping >& /dev/null || {
- RETVAL=$?
- echo -n " ctdbd not running. "
- case $init_style in
- suse)
- set_retval $RETVAL
- rc_status -v
- ;;
- redhat)
- echo ""
- ;;
- esac
- return $RETVAL
- }
- echo ""
- ctdb status
-}
+# Given that CTDB_VALGRIND is a debug option we don't support the pid
+# file. We just do a quick and dirty hack instead. Otherwise we just
+# end up re-implementing each distro's pidfile support...
+check_status_valgrind ()
+{
+ if pkill -0 -f "valgrind.*${ctdbd}" ; then
+ echo "ctdbd is running under valgrind..."
+ return 0
+ else
+ echo "ctdbd is not running"
+ return 1
+ fi
+}
+check_status ()
+{
+ # Backward compatibility. When we arrange to pass --pidfile to
+ # ctdbd we also create the directory that will contain it. If
+ # that directory is missing then we don't use the pidfile to check
+ # status.
+ if [ -d $(dirname "$pidfile") ] ; then
+ _pf_opt="-p $pidfile"
+ else
+ _pf_opt=""
+ fi
+
+ case "$init_style" in
+ valgrind)
+ check_status_valgrind
+ ;;
+ suse)
+ checkproc $_pf_opt "$ctdbd"
+ rc_status -v
+ ;;
+ redhat)
+ status $_pf_opt -l "ctdb" "$ctdbd"
+ ;;
+ debian)
+ status_of_proc $_pf_opt "$ctdbd" "ctdb"
+ ;;
+ esac
+}
+
+
+[ -x "$CTDB_BASE/rc.ctdb" ] && "$CTDB_BASE/rc.ctdb" $1
case "$1" in
- start)
+ start)
start
;;
- stop)
+ stop)
stop
;;
- restart|reload)
+ restart|reload|force-reload)
restart
;;
- status)
- status
+ status)
+ check_status
;;
- condrestart)
- ctdb status > /dev/null && restart || :
+ condrestart|try-restart)
+ if check_status >/dev/null ; then
+ restart
+ fi
;;
- cron)
+ cron)
# used from cron to auto-restart ctdb
- ctdb status > /dev/null || restart
+ check_status >/dev/null || restart
;;
- *)
- echo $"Usage: $0 {start|stop|restart|status|cron|condrestart}"
+ *)
+ echo $"Usage: $0 {start|stop|restart|reload|force-reload|status|cron|condrestart|try-restart}"
exit 1
esac