2 # a script to test the basic setup of a CTDB/Samba install
3 # tridge@samba.org September 2007
4 # martin@meltin.net August 2010
9 Usage: ctdb_diagnostics [OPTION] ...
11 -n <nodes> Comma separated list of nodes to operate on
12 -c Ignore comment lines (starting with '#') in file comparisons
13 -w Ignore whitespace in file comparisons
14 --no-ads Do not use commands that assume an Active Directory Server
20 nodes=$(ctdb listnodes -Y | cut -d: -f2)
27 temp=$(getopt -n "ctdb_diagnostics" -o "n:cwh" -l no-ads,help -- "$@")
35 -n) nodes=$(echo "$2" | sed -e 's@,@ @g') ; shift 2 ;;
36 -c) diff_opts="${diff_opts} -I ^#.*" ; shift ;;
37 -w) diff_opts="${diff_opts} -w" ; shift ;;
38 --no-ads) no_ads=true ; shift ;;
49 # Use 5s ssh timeout if EXTRA_SSH_OPTS doesn't set a timeout.
50 case "$EXTRA_SSH_OPTS" in
51 *ConnectTimeout=*) : ;;
53 export EXTRA_SSH_OPTS="${EXTRA_SSH_OPTS} -o ConnectTimeout=5"
56 # Filter nodes. Remove any nodes we can't contact from $node and add
60 if onnode $_i true >/dev/null 2>&1 ; then
61 _nodes="${_nodes}${_nodes:+ }${_i}"
63 bad_nodes="${bad_nodes}${bad_nodes:+,}${_i}"
68 nodes_comma=$(echo $nodes | sed -e 's@[[:space:]]@,@g')
70 PATH="$PATH:/sbin:/usr/sbin:/usr/lpp/mmfs/bin"
72 # list of config files that must exist and that we check are the same
74 CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /etc/ctdb/nodes /etc/sysconfig/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/sysconfig/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
76 # list of config files that may exist and should be checked that they
77 # are the same on the nodes
78 CONFIG_FILES_MAY="/etc/ctdb/public_addresses /etc/ctdb/static-routes"
83 --------------------------------------------------------------------
84 ctdb_diagnostics starting. This script will gather information about
85 your ctdb cluster. You should send the output of this script along
86 with any ctdb or clustered Samba bug reports.
87 --------------------------------------------------------------------
95 NUM_ERRORS=`expr $NUM_ERRORS + 1`
96 echo " ERROR[$NUM_ERRORS]: $msg" >> $ERRORS
101 echo " ================================"
103 echo " `ls -l $fname 2>&1`"
104 cat "$fname" 2>&1 | sed 's/^/ /'
105 echo " ================================"
109 echo "running $1 on nodes $nodes_comma"
110 onnode $nodes_comma "hostname; date; $1 2>&1 | sed 's/^/ /'" 2>&1
113 show_and_compare_files () {
124 onnode $n [ -r "$f" ] || {
125 msg=$(printf "$fmt" "$f" $n)
130 fstf=/tmp/`basename $f`.node$n
131 onnode $n cat $f > $fstf 2>&1
133 echo " ================================"
134 echo " File (on node $n): $f"
135 echo " `onnode $n ls -l $f 2>&1`"
136 cat "$fstf" | sed 's/^/ /'
137 echo " ================================"
140 echo "Testing for same config file $f on node $n"
141 tmpf=/tmp/`basename $f`.node$n
142 onnode $n cat $f > $tmpf 2>&1
143 diff $diff_opts $fstf $tmpf >/dev/null 2>&1 || {
144 error "File $f is different on node $n"
145 diff -u $diff_opts $fstf $tmpf
155 ERRORS="/tmp/diag_err.$$"
159 Diagnosis started on these nodes:
163 if [ -n "$bad_nodes" ] ; then
166 NOT RUNNING DIAGNOSTICS on these uncontactable nodes:
174 For reference, here is the nodes file on the current node...
177 show_file /etc/ctdb/nodes
180 --------------------------------------------------------------------
181 Comping critical config files on nodes $nodes_comma
184 show_and_compare_files \
185 "%s is missing on node %d" \
188 show_and_compare_files \
189 "Optional file %s is not present on node %d" \
193 --------------------------------------------------------------------
194 Checking for clock drift
198 t2=`onnode $i date +%s`
200 if [ $d -gt 30 -o $d -lt -30 ]; then
201 error "time on node $i differs by $d seconds"
206 --------------------------------------------------------------------
207 Showing software versions
211 show_all "rpm -qa | egrep 'samba|ctdb|gpfs'"
213 [ -x /usr/bin/dpkg-query ] && {
214 show_all "/usr/bin/dpkg-query --show 'ctdb'"
215 show_all "/usr/bin/dpkg-query --show 'samba'"
216 #show_all "/usr/bin/dpkg-query --show 'gpfs'"
221 --------------------------------------------------------------------
222 Showing ctdb status and recent log entries
224 show_all "ctdb status; ctdb ip"
225 show_all "ctdb statistics"
226 show_all "ctdb uptime"
228 echo "Showing log.ctdb"
229 show_all "test -f /var/log/log.ctdb && tail -100 /var/log/log.ctdb"
231 echo "Showing log.ctdb"
232 show_all "test -f /var/log/log.ctdb && tail -100 /var/log/log.ctdb"
234 show_all "tail -200 /var/log/messages"
235 show_all "tail -200 /etc/ctdb/state/vacuum.log"
236 show_all "ls -lRs /var/ctdb"
237 show_all "ls -lRs /etc/ctdb"
241 --------------------------------------------------------------------
242 Showing system and process status
250 show_all "/sbin/lspci"
252 show_all "cat /proc/partitions"
253 show_all "cat /proc/cpuinfo"
254 show_all "cat /proc/scsi/scsi"
255 show_all "/sbin/ifconfig -a"
256 show_all "/sbin/ifconfig -a"
257 show_all "/sbin/ip addr list"
258 show_all "/sbin/route -n"
259 show_all "netstat -s"
261 show_all "crontab -l"
263 show_all "iptables -L -n"
264 show_all "iptables -L -n -t nat"
265 show_all "/usr/sbin/rpcinfo -p"
266 show_all "/usr/sbin/showmount -a"
267 show_all "/usr/sbin/showmount -e"
268 show_all "/usr/sbin/nfsstat -v"
269 [ -x /sbin/multipath ] && {
270 show_all "/sbin/multipath -ll"
272 [ -x /sbin/chkconfig ] && {
273 show_all "/sbin/chkconfig --list"
275 [ -x /usr/sbin/getenforce ] && {
276 show_all "/usr/sbin/getenforce"
278 [ -d /proc/net/bonding ] && {
279 for f in /proc/net/bonding/*; do
285 --------------------------------------------------------------------
288 show_all "smbstatus -n -B"
291 echo "Skipping \"net ads testjoin\" as requested"
294 show_all "net ads testjoin"
296 show_all "net conf list"
297 show_all "lsof -n | grep smbd"
298 show_all "lsof -n | grep ctdbd"
299 show_all "netstat -tan"
302 echo "Skipping \"net ads info\" as requested"
305 show_all "net ads info"
308 show_all "smbclient -U% -L 127.0.0.1"
309 WORKGROUP=`testparm -s --parameter-name=WORKGROUP 2> /dev/null`
310 show_all id "$WORKGROUP/Administrator"
312 show_all "wbinfo --online-status"
316 echo "Diagnostics finished with $NUM_ERRORS errors"