Merge commit 'martins/master'

(This used to be ctdb commit cc8c851e2e0b46f00b18a6dc61fd2774e97850dd)
author: Ronnie Sahlberg <ronniesahlberg@gmail.com> 2010-08-30 18:22:05 +1000
committer: Ronnie Sahlberg <ronniesahlberg@gmail.com> 2010-08-30 18:22:05 +1000
commit: 2b4d9170c28eccf396debaae699c660f56c37760 (patch)
tree: a3f2c5afab5657c5944669c0697f1200838b817a
parent: 92455c3dff5ff4e92f804f8f17500c7b1d1eeed5 (diff)
parent: 46b9110f88137a9cffdf2c098a39ee25226c62fe (diff)
download: samba-2b4d9170c28eccf396debaae699c660f56c37760.tar.gz
samba-2b4d9170c28eccf396debaae699c660f56c37760.tar.xz
samba-2b4d9170c28eccf396debaae699c660f56c37760.zip
7 files changed, 112 insertions, 106 deletions
diff --git a/ctdb/config/events.d/60.nfs b/ctdb/config/events.d/60.nfs
index 15c2b899c1..6acc8897a2 100755
--- a/ctdb/config/events.d/60.nfs
+++ b/ctdb/config/events.d/60.nfs
@@ -6,6 +6,7 @@ start_nfs() {
 	/bin/mkdir -p $CTDB_BASE/state/statd/ip
 	startstop_nfs stop
 	startstop_nfs start
+	echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle
 }
 
 . $CTDB_BASE/functions
@@ -47,6 +48,8 @@ case "$1" in
 	    exit 0
 	fi
 
+	update_tickles 2049
+
 	# check that statd responds to rpc requests
 	# if statd is not running we try to restart it
 	rpcinfo -u localhost 100024 1 > /dev/null || {
diff --git a/ctdb/config/events.d/61.nfstickle b/ctdb/config/events.d/61.nfstickle
deleted file mode 100755
index 3bfef4f75c..0000000000
--- a/ctdb/config/events.d/61.nfstickle
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/bin/sh
-# ctdb event script for NFS tickle acks
-
-. $CTDB_BASE/functions
-
-service_name="nfs"
-
-loadconfig
-
-service_start="mkdir -p $CTDB_BASE/state/nfstickle;mkdir -p $NFS_TICKLE_SHARED_DIRECTORY/`hostname`;echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle"
-service_reconfigure=$service_start
-
-ctdb_start_stop_service
-
-[ -z "$NFS_TICKLE_SHARED_DIRECTORY" ] && exit 0
-
-case "$1" in 
-     startup)
-	ctdb_service_start
-	;;
-	
-     takeip)
-	iface=$2
-	ip=$3
-	# first send a grat arp, to ensure the client knows the updated
-	# mac address for this IP
-	ctdb gratiousarp $ip $iface
-	# send tickle acks for all the connections the old server had
-	for f in $NFS_TICKLE_SHARED_DIRECTORY/*/$ip; do
-		[ -f $f ] && cat $f | while read dest; do
-			# send three, in case of lost packets
-			echo "Sending NFS tickle ack for $ip to $dest"
-			for i in `seq 1 3`; do
-				ctdb tickle $dest $ip:2049
-			done
-		done
-	done
-	;;
-
-     monitor)
-	mydir=$NFS_TICKLE_SHARED_DIRECTORY/`hostname`
-	rm -f $mydir/*
-	# record our connections to shared storage
-	netstat -tn |
-	awk -v mydir="$mydir" '
-$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ /:2049$/ {
-  destip = gensub(/:2049$/, "", 1, $4);
-  c[destip] = c[destip] (c[destip] ? "\n" : "" ) $5;
-}
-END {
-  for (ip in c) {
-    print c[ip] > mydir "/" ip
-  }
-}'
-	;;
-
-    *)
-	ctdb_standard_event_handler "$@"
-	;;
-esac
-
-exit 0
diff --git a/ctdb/config/functions b/ctdb/config/functions
index 35b7db2e61..a20cadf0ce 100755
--- a/ctdb/config/functions
+++ b/ctdb/config/functions
@@ -775,6 +775,61 @@ iptables()
 }
 
 ########################################################
+# tickle handling
+########################################################
+
+# Temporary directory for tickles.
+tickledir="$CTDB_BASE/state/tickles"
+mkdir -p "$tickledir"
+
+update_tickles ()
+{
+	_port="$1"
+
+	mkdir -p "$tickledir" # Just in case
+
+	# Who am I?
+	_pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
+
+	# What public IPs do I hold?
+	_ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
+
+	# IPs as a regexp choice
+	_ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
+
+	# Record connections to our public IPs in a temporary file
+	_my_connections="${tickledir}/${_port}.connections"
+	rm -f "$_my_connections"
+	netstat -tn |
+	awk -v destpat="^${_ipschoice}:${_port}\$" \
+	  '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
+	sort >"$_my_connections"
+
+	# Record our current tickles in a temporary file
+	_my_tickles="${tickledir}/${_port}.tickles"
+	rm -f "$_my_tickles"
+	for _i in $_ips ; do
+		ctdb -Y gettickles $_i $_port | 
+		awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
+	done |
+	sort >"$_my_tickles"
+
+	# Add tickles for connections that we haven't already got tickles for
+	comm -23 "$_my_connections" "$_my_tickles" |
+	while read _src _dst ; do
+		ctdb addtickle $_src $_dst
+	done
+
+	# Remove tickles for connections that are no longer there
+	comm -13 "$_my_connections" "$_my_tickles" |
+	while read _src _dst ; do
+		ctdb deltickle $_src $_dst
+	done
+
+	rm -f "$_my_connections" "$_my_tickles" 
+}
+
+########################################################
 # load a site local config file
 ########################################################
 
diff --git a/ctdb/tests/complex/31_nfs_tickle.sh b/ctdb/tests/complex/31_nfs_tickle.sh
index eb6644b63f..030e34fe02 100755
--- a/ctdb/tests/complex/31_nfs_tickle.sh
+++ b/ctdb/tests/complex/31_nfs_tickle.sh
@@ -57,7 +57,7 @@ ctdb_test_exit_hook_add ctdb_test_eventscript_uninstall
 ctdb_test_eventscript_install
 
 # We need this for later, so we know how long to sleep.
-try_command_on_node 0 $CTDB getvar MonitorInterval
+try_command_on_node any $CTDB getvar MonitorInterval
 monitor_interval="${out#*= }"
 #echo "Monitor interval on node $test_node is $monitor_interval seconds."
 
@@ -77,28 +77,40 @@ echo "Source socket is $src_socket"
 
 wait_for_monitor_event $test_node
 
-echo "Trying to determine NFS_TICKLE_SHARED_DIRECTORY..."
-f="/etc/sysconfig/nfs"
-try_command_on_node -v 0 "[ -r $f ] &&  sed -n -e s@^NFS_TICKLE_SHARED_DIRECTORY=@@p $f" || true
+echo "Sleeping until tickles are synchronised across nodes..."
+try_command_on_node $test_node $CTDB getvar TickleUpdateInterval
+sleep_for "${out#*= }"
 
-nfs_tickle_shared_directory="${out:-/gpfs/.ctdb/nfs-tickles}"
+if try_command_on_node any "test -r /etc/ctdb/events.d/61.nfstickle" ; then
+    echo "Trying to determine NFS_TICKLE_SHARED_DIRECTORY..."
+    f="/etc/sysconfig/nfs"
+    try_command_on_node -v any "[ -r $f ] &&  sed -n -e s@^NFS_TICKLE_SHARED_DIRECTORY=@@p $f" || true
 
-try_command_on_node $test_node hostname
-test_hostname=$out
+    nfs_tickle_shared_directory="${out:-/gpfs/.ctdb/nfs-tickles}"
 
-try_command_on_node -v 0 cat "${nfs_tickle_shared_directory}/$test_hostname/$test_ip"
+    try_command_on_node $test_node hostname
+    test_hostname=$out
+
+    try_command_on_node -v any cat "${nfs_tickle_shared_directory}/$test_hostname/$test_ip"
+else
+    echo "That's OK, we'll use \"ctdb gettickles\", which is newer..."
+    try_command_on_node -v any "ctdb -Y gettickles $test_ip $test_port"
+fi
 
 if [ "${out/${src_socket}/}" != "$out" ] ; then
-    echo "GOOD: NFS connection tracked OK in tickles file."
+    echo "GOOD: NFS connection tracked OK."
 else
-    echo "BAD: Socket not tracked in NFS tickles file:"
+    echo "BAD: Socket not tracked in NFS tickles."
     testfailures=1
 fi
 
 tcptickle_sniff_start $src_socket "${test_ip}:${test_port}"
 
-echo "Disabling node $test_node"
-try_command_on_node 1 $CTDB disable -n $test_node
-wait_until_node_has_status $test_node disabled
+# We need to be nasty to make that the node being failed out doesn't
+# get a chance to send any tickles and confuse our sniff.
+echo "Killing ctdbd on ${test_node}..."
+try_command_on_node $test_node killall -9 ctdbd
+
+wait_until_node_has_status $test_node disconnected
 
 tcptickle_sniff_wait_show
diff --git a/ctdb/tests/scripts/ctdb_test_functions.bash b/ctdb/tests/scripts/ctdb_test_functions.bash
index 42053c0486..1433a46ec3 100644
--- a/ctdb/tests/scripts/ctdb_test_functions.bash
+++ b/ctdb/tests/scripts/ctdb_test_functions.bash
@@ -66,7 +66,7 @@ ctdb_test_exit ()
     # now complete.
     set +e
 
-    echo "*** TEST COMPLETE (RC=$status), CLEANING UP..."
+    echo "*** TEST COMPLETED (RC=$status) AT $(date '+%F %T'), CLEANING UP..."
 
     eval "$ctdb_test_exit_hook" || true
     unset ctdb_test_exit_hook
@@ -80,7 +80,7 @@ ctdb_test_exit ()
 	# leave the recovery in restart_ctdb so that future tests that
 	# might do a manual restart mid-test will benefit.
 	echo "Forcing a recovery..."
-	onnode 0 ctdb recover
+	onnode 0 $CTDB recover
     fi
 
     exit $status
@@ -336,13 +336,15 @@ _cluster_is_healthy ()
 {
     local out x count line
 
-    out=$(ctdb -Y status 2>&1) || return 1
+    out=$($CTDB -Y status 2>/dev/null) || return 1
 
     {
         read x
 	count=0
         while read line ; do
-	    count=$(($count + 1))
+	    # We need to see valid lines if we're going to be healthy.
+	    [ "${line#:[0-9]}" != "$line" ] && count=$(($count + 1))
+	    # A line indicating a node is unhealthy causes failure.
 	    [ "${line##:*:*:*1:}" != "$line" ] && return 1
         done
 	[ $count -gt 0 ] && return $?
@@ -357,9 +359,9 @@ cluster_is_healthy ()
     else
 	echo "Cluster is UNHEALTHY"
 	if ! ${ctdb_test_restart_scheduled:-false} ; then
-	    echo "DEBUG:"
+	    echo "DEBUG AT $(date '+%F %T'):"
 	    local i
-	    for i in "onnode -q 0 ctdb status" "onnode -q 0 onnode all ctdb scriptstatus" ; do
+	    for i in "onnode -q 0 $CTDB status" "onnode -q 0 onnode all $CTDB scriptstatus" ; do
 		echo "$i"
 		$i || true
 	    done
@@ -407,7 +409,7 @@ node_has_status ()
     if [ -n "$bits" ] ; then
 	local out x line
 
-	out=$(ctdb -Y status 2>&1) || return 1
+	out=$($CTDB -Y status 2>&1) || return 1
 
 	{
             read x
@@ -420,9 +422,9 @@ node_has_status ()
 	    return 1
 	} <<<"$out" # Yay bash!
     elif [ -n "$fpat" ] ; then
-	ctdb statistics -n "$pnn" | egrep -q "$fpat"
+	$CTDB statistics -n "$pnn" | egrep -q "$fpat"
     elif [ -n "$mpat" ] ; then
-	ctdb getmonmode -n "$pnn" | egrep -q "$mpat"
+	$CTDB getmonmode -n "$pnn" | egrep -q "$mpat"
     else
 	echo 'node_has_status: unknown mode, neither $bits nor $fpat is set'
 	return 1
@@ -437,8 +439,8 @@ wait_until_node_has_status ()
 
     echo "Waiting until node $pnn has status \"$status\"..."
 
-    if ! onnode any $CTDB_TEST_WRAPPER wait_until $timeout node_has_status "$pnn" "$status" ; then
-	for i in "onnode -q any ctdb status" "onnode -q any onnode all ctdb scriptstatus" ; do
+    if ! wait_until $timeout onnode any $CTDB_TEST_WRAPPER node_has_status "$pnn" "$status" ; then
+	for i in "onnode -q any $CTDB status" "onnode -q any onnode all $CTDB scriptstatus" ; do
 	    echo "$i"
 	    $i || true
 	done
@@ -579,9 +581,9 @@ tcpdump_wait ()
 
     echo "Waiting for tcpdump to capture some packets..."
     if ! wait_until 30 tcpdump_check ; then
-	echo "DEBUG:"
+	echo "DEBUG AT $(date '+%F %T'):"
 	local i
-	for i in "onnode -q 0 ctdb status" "netstat -tanp" "tcpdump -n -e -r $tcpdump_filename" ; do
+	for i in "onnode -q 0 $CTDB status" "netstat -tanp" "tcpdump -n -e -r $tcpdump_filename" ; do
 	    echo "$i"
 	    $i || true
 	done
@@ -638,7 +640,7 @@ gratarp_sniff_wait_show ()
 daemons_stop ()
 {
     echo "Attempting to politely shutdown daemons..."
-    onnode 1 ctdb shutdown -n all || true
+    onnode 1 $CTDB shutdown -n all || true
 
     echo "Sleeping for a while..."
     sleep_for 1
@@ -794,16 +796,16 @@ _ctdb_start_post ()
     onnode -q 1  $CTDB_TEST_WRAPPER wait_until_healthy || return 1
 
     echo "Setting RerecoveryTimeout to 1"
-    onnode -pq all "ctdb setvar RerecoveryTimeout 1"
+    onnode -pq all "$CTDB setvar RerecoveryTimeout 1"
 
     # In recent versions of CTDB, forcing a recovery like this blocks
     # until the recovery is complete.  Hopefully this will help the
     # cluster to stabilise before a subsequent test.
     echo "Forcing a recovery..."
-    onnode -q 0 ctdb recover
+    onnode -q 0 $CTDB recover
     sleep_for 1
     echo "Forcing a recovery..."
-    onnode -q 0 ctdb recover
+    onnode -q 0 $CTDB recover
 
     echo "ctdb is ready"
 }
@@ -855,16 +857,16 @@ restart_ctdb ()
     onnode -q 1  $CTDB_TEST_WRAPPER wait_until_healthy || return 1
 
     echo "Setting RerecoveryTimeout to 1"
-    onnode -pq all "ctdb setvar RerecoveryTimeout 1"
+    onnode -pq all "$CTDB setvar RerecoveryTimeout 1"
 
     # In recent versions of CTDB, forcing a recovery like this blocks
     # until the recovery is complete.  Hopefully this will help the
     # cluster to stabilise before a subsequent test.
     echo "Forcing a recovery..."
-    onnode -q 0 ctdb recover
+    onnode -q 0 $CTDB recover
     sleep_for 1
     echo "Forcing a recovery..."
-    onnode -q 0 ctdb recover
+    onnode -q 0 $CTDB recover
 
     echo "ctdb is ready"
 }
@@ -1036,3 +1038,6 @@ wait_for_monitor_event ()
     wait_until 120 ! ctdb_test_eventscript_flag exists $pnn "monitor"
 
 }
+
+# Make sure that $CTDB is set.
+: ${CTDB:=ctdb}
diff --git a/ctdb/tests/simple/16_ctdb_config_add_ip.sh b/ctdb/tests/simple/16_ctdb_config_add_ip.sh
index 6fee386d08..3207661f16 100755
--- a/ctdb/tests/simple/16_ctdb_config_add_ip.sh
+++ b/ctdb/tests/simple/16_ctdb_config_add_ip.sh
@@ -115,6 +115,6 @@ if wait_until 60 ips_are_on_nodeglob $test_node ${add_ip%/*} ; then
     echo "That worked!"
 else
     echo "BAD: IP didn't get added."
-    try_command_on_node $test_node ctdb ip -n all
+    try_command_on_node $test_node $CTDB ip -n all
     exit 1
 fi
diff --git a/ctdb/tests/simple/20_ctdb_getmonmode.sh b/ctdb/tests/simple/20_ctdb_getmonmode.sh
index eab3dad45a..56a38d8525 100755
--- a/ctdb/tests/simple/20_ctdb_getmonmode.sh
+++ b/ctdb/tests/simple/20_ctdb_getmonmode.sh
@@ -18,7 +18,7 @@ Steps:
 
 1. Verify that the status on all of the ctdb nodes is 'OK'.
 2. Use 'ctdb getmodmode -n <node>' to get the current monitoring mode.
-3. Verify that it shows monitoring as 'active'.
+3. Verify that it looks sane.
 4. Verify that the command prints the output in colon-separated format
    when run with the '-Y' option.
 5. Disable monitoring on the node using 'ctdb disablemonitor'.
@@ -47,19 +47,12 @@ try_command_on_node -v 0 $CTDB getmonmode -n $test_node
 
 sanity_check_output \
     1 \
-    '^Monitoring mode:ACTIVE \(0\)$' \
+    '^Monitoring mode:(ACTIVE \(0\)|DISABLED \(1\))$' \
     "$out"
 
-colons=$(printf ':mode:\n:0:')
-
 try_command_on_node -v 0 $CTDB -Y getmonmode -n $test_node
 
-if [ "$out" = "$colons" ] ; then
-    echo "Looks OK"
-else
-    echo "BAD: -Y output isn't what was expected"
-    testfailures=1
-fi
+sanity_check_output 2 '^(:mode:|:0:|:1:)$' "$out"
 
 try_command_on_node -v 0 $CTDB disablemonitor -n $test_node
author	Ronnie Sahlberg <ronniesahlberg@gmail.com>	2010-08-30 18:22:05 +1000
committer	Ronnie Sahlberg <ronniesahlberg@gmail.com>	2010-08-30 18:22:05 +1000
commit	2b4d9170c28eccf396debaae699c660f56c37760 (patch)
tree	a3f2c5afab5657c5944669c0697f1200838b817a
parent	92455c3dff5ff4e92f804f8f17500c7b1d1eeed5 (diff)
parent	46b9110f88137a9cffdf2c098a39ee25226c62fe (diff)
download	samba-2b4d9170c28eccf396debaae699c660f56c37760.tar.gz samba-2b4d9170c28eccf396debaae699c660f56c37760.tar.xz samba-2b4d9170c28eccf396debaae699c660f56c37760.zip