summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRonnie Sahlberg <ronniesahlberg@gmail.com>2010-08-30 18:22:05 +1000
committerRonnie Sahlberg <ronniesahlberg@gmail.com>2010-08-30 18:22:05 +1000
commit2b4d9170c28eccf396debaae699c660f56c37760 (patch)
treea3f2c5afab5657c5944669c0697f1200838b817a
parent92455c3dff5ff4e92f804f8f17500c7b1d1eeed5 (diff)
parent46b9110f88137a9cffdf2c098a39ee25226c62fe (diff)
downloadsamba-2b4d9170c28eccf396debaae699c660f56c37760.tar.gz
samba-2b4d9170c28eccf396debaae699c660f56c37760.tar.xz
samba-2b4d9170c28eccf396debaae699c660f56c37760.zip
Merge commit 'martins/master'
(This used to be ctdb commit cc8c851e2e0b46f00b18a6dc61fd2774e97850dd)
-rwxr-xr-xctdb/config/events.d/60.nfs3
-rwxr-xr-xctdb/config/events.d/61.nfstickle62
-rwxr-xr-xctdb/config/functions55
-rwxr-xr-xctdb/tests/complex/31_nfs_tickle.sh38
-rw-r--r--ctdb/tests/scripts/ctdb_test_functions.bash45
-rwxr-xr-xctdb/tests/simple/16_ctdb_config_add_ip.sh2
-rwxr-xr-xctdb/tests/simple/20_ctdb_getmonmode.sh13
7 files changed, 112 insertions, 106 deletions
diff --git a/ctdb/config/events.d/60.nfs b/ctdb/config/events.d/60.nfs
index 15c2b899c1..6acc8897a2 100755
--- a/ctdb/config/events.d/60.nfs
+++ b/ctdb/config/events.d/60.nfs
@@ -6,6 +6,7 @@ start_nfs() {
/bin/mkdir -p $CTDB_BASE/state/statd/ip
startstop_nfs stop
startstop_nfs start
+ echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle
}
. $CTDB_BASE/functions
@@ -47,6 +48,8 @@ case "$1" in
exit 0
fi
+ update_tickles 2049
+
# check that statd responds to rpc requests
# if statd is not running we try to restart it
rpcinfo -u localhost 100024 1 > /dev/null || {
diff --git a/ctdb/config/events.d/61.nfstickle b/ctdb/config/events.d/61.nfstickle
deleted file mode 100755
index 3bfef4f75c..0000000000
--- a/ctdb/config/events.d/61.nfstickle
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/bin/sh
-# ctdb event script for NFS tickle acks
-
-. $CTDB_BASE/functions
-
-service_name="nfs"
-
-loadconfig
-
-service_start="mkdir -p $CTDB_BASE/state/nfstickle;mkdir -p $NFS_TICKLE_SHARED_DIRECTORY/`hostname`;echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle"
-service_reconfigure=$service_start
-
-ctdb_start_stop_service
-
-[ -z "$NFS_TICKLE_SHARED_DIRECTORY" ] && exit 0
-
-case "$1" in
- startup)
- ctdb_service_start
- ;;
-
- takeip)
- iface=$2
- ip=$3
- # first send a grat arp, to ensure the client knows the updated
- # mac address for this IP
- ctdb gratiousarp $ip $iface
- # send tickle acks for all the connections the old server had
- for f in $NFS_TICKLE_SHARED_DIRECTORY/*/$ip; do
- [ -f $f ] && cat $f | while read dest; do
- # send three, in case of lost packets
- echo "Sending NFS tickle ack for $ip to $dest"
- for i in `seq 1 3`; do
- ctdb tickle $dest $ip:2049
- done
- done
- done
- ;;
-
- monitor)
- mydir=$NFS_TICKLE_SHARED_DIRECTORY/`hostname`
- rm -f $mydir/*
- # record our connections to shared storage
- netstat -tn |
- awk -v mydir="$mydir" '
-$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ /:2049$/ {
- destip = gensub(/:2049$/, "", 1, $4);
- c[destip] = c[destip] (c[destip] ? "\n" : "" ) $5;
-}
-END {
- for (ip in c) {
- print c[ip] > mydir "/" ip
- }
-}'
- ;;
-
- *)
- ctdb_standard_event_handler "$@"
- ;;
-esac
-
-exit 0
diff --git a/ctdb/config/functions b/ctdb/config/functions
index 35b7db2e61..a20cadf0ce 100755
--- a/ctdb/config/functions
+++ b/ctdb/config/functions
@@ -775,6 +775,61 @@ iptables()
}
########################################################
+# tickle handling
+########################################################
+
+# Temporary directory for tickles.
+tickledir="$CTDB_BASE/state/tickles"
+mkdir -p "$tickledir"
+
+update_tickles ()
+{
+ _port="$1"
+
+ mkdir -p "$tickledir" # Just in case
+
+ # Who am I?
+ _pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
+
+ # What public IPs do I hold?
+ _ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
+
+ # IPs as a regexp choice
+ _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
+
+ # Record connections to our public IPs in a temporary file
+ _my_connections="${tickledir}/${_port}.connections"
+ rm -f "$_my_connections"
+ netstat -tn |
+ awk -v destpat="^${_ipschoice}:${_port}\$" \
+ '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
+ sort >"$_my_connections"
+
+ # Record our current tickles in a temporary file
+ _my_tickles="${tickledir}/${_port}.tickles"
+ rm -f "$_my_tickles"
+ for _i in $_ips ; do
+ ctdb -Y gettickles $_i $_port |
+ awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
+ done |
+ sort >"$_my_tickles"
+
+ # Add tickles for connections that we haven't already got tickles for
+ comm -23 "$_my_connections" "$_my_tickles" |
+ while read _src _dst ; do
+ ctdb addtickle $_src $_dst
+ done
+
+ # Remove tickles for connections that are no longer there
+ comm -13 "$_my_connections" "$_my_tickles" |
+ while read _src _dst ; do
+ ctdb deltickle $_src $_dst
+ done
+
+ rm -f "$_my_connections" "$_my_tickles"
+}
+
+########################################################
# load a site local config file
########################################################
diff --git a/ctdb/tests/complex/31_nfs_tickle.sh b/ctdb/tests/complex/31_nfs_tickle.sh
index eb6644b63f..030e34fe02 100755
--- a/ctdb/tests/complex/31_nfs_tickle.sh
+++ b/ctdb/tests/complex/31_nfs_tickle.sh
@@ -57,7 +57,7 @@ ctdb_test_exit_hook_add ctdb_test_eventscript_uninstall
ctdb_test_eventscript_install
# We need this for later, so we know how long to sleep.
-try_command_on_node 0 $CTDB getvar MonitorInterval
+try_command_on_node any $CTDB getvar MonitorInterval
monitor_interval="${out#*= }"
#echo "Monitor interval on node $test_node is $monitor_interval seconds."
@@ -77,28 +77,40 @@ echo "Source socket is $src_socket"
wait_for_monitor_event $test_node
-echo "Trying to determine NFS_TICKLE_SHARED_DIRECTORY..."
-f="/etc/sysconfig/nfs"
-try_command_on_node -v 0 "[ -r $f ] && sed -n -e s@^NFS_TICKLE_SHARED_DIRECTORY=@@p $f" || true
+echo "Sleeping until tickles are synchronised across nodes..."
+try_command_on_node $test_node $CTDB getvar TickleUpdateInterval
+sleep_for "${out#*= }"
-nfs_tickle_shared_directory="${out:-/gpfs/.ctdb/nfs-tickles}"
+if try_command_on_node any "test -r /etc/ctdb/events.d/61.nfstickle" ; then
+ echo "Trying to determine NFS_TICKLE_SHARED_DIRECTORY..."
+ f="/etc/sysconfig/nfs"
+ try_command_on_node -v any "[ -r $f ] && sed -n -e s@^NFS_TICKLE_SHARED_DIRECTORY=@@p $f" || true
-try_command_on_node $test_node hostname
-test_hostname=$out
+ nfs_tickle_shared_directory="${out:-/gpfs/.ctdb/nfs-tickles}"
-try_command_on_node -v 0 cat "${nfs_tickle_shared_directory}/$test_hostname/$test_ip"
+ try_command_on_node $test_node hostname
+ test_hostname=$out
+
+ try_command_on_node -v any cat "${nfs_tickle_shared_directory}/$test_hostname/$test_ip"
+else
+ echo "That's OK, we'll use \"ctdb gettickles\", which is newer..."
+ try_command_on_node -v any "ctdb -Y gettickles $test_ip $test_port"
+fi
if [ "${out/${src_socket}/}" != "$out" ] ; then
- echo "GOOD: NFS connection tracked OK in tickles file."
+ echo "GOOD: NFS connection tracked OK."
else
- echo "BAD: Socket not tracked in NFS tickles file:"
+ echo "BAD: Socket not tracked in NFS tickles."
testfailures=1
fi
tcptickle_sniff_start $src_socket "${test_ip}:${test_port}"
-echo "Disabling node $test_node"
-try_command_on_node 1 $CTDB disable -n $test_node
-wait_until_node_has_status $test_node disabled
+# We need to be nasty to make that the node being failed out doesn't
+# get a chance to send any tickles and confuse our sniff.
+echo "Killing ctdbd on ${test_node}..."
+try_command_on_node $test_node killall -9 ctdbd
+
+wait_until_node_has_status $test_node disconnected
tcptickle_sniff_wait_show
diff --git a/ctdb/tests/scripts/ctdb_test_functions.bash b/ctdb/tests/scripts/ctdb_test_functions.bash
index 42053c0486..1433a46ec3 100644
--- a/ctdb/tests/scripts/ctdb_test_functions.bash
+++ b/ctdb/tests/scripts/ctdb_test_functions.bash
@@ -66,7 +66,7 @@ ctdb_test_exit ()
# now complete.
set +e
- echo "*** TEST COMPLETE (RC=$status), CLEANING UP..."
+ echo "*** TEST COMPLETED (RC=$status) AT $(date '+%F %T'), CLEANING UP..."
eval "$ctdb_test_exit_hook" || true
unset ctdb_test_exit_hook
@@ -80,7 +80,7 @@ ctdb_test_exit ()
# leave the recovery in restart_ctdb so that future tests that
# might do a manual restart mid-test will benefit.
echo "Forcing a recovery..."
- onnode 0 ctdb recover
+ onnode 0 $CTDB recover
fi
exit $status
@@ -336,13 +336,15 @@ _cluster_is_healthy ()
{
local out x count line
- out=$(ctdb -Y status 2>&1) || return 1
+ out=$($CTDB -Y status 2>/dev/null) || return 1
{
read x
count=0
while read line ; do
- count=$(($count + 1))
+ # We need to see valid lines if we're going to be healthy.
+ [ "${line#:[0-9]}" != "$line" ] && count=$(($count + 1))
+ # A line indicating a node is unhealthy causes failure.
[ "${line##:*:*:*1:}" != "$line" ] && return 1
done
[ $count -gt 0 ] && return $?
@@ -357,9 +359,9 @@ cluster_is_healthy ()
else
echo "Cluster is UNHEALTHY"
if ! ${ctdb_test_restart_scheduled:-false} ; then
- echo "DEBUG:"
+ echo "DEBUG AT $(date '+%F %T'):"
local i
- for i in "onnode -q 0 ctdb status" "onnode -q 0 onnode all ctdb scriptstatus" ; do
+ for i in "onnode -q 0 $CTDB status" "onnode -q 0 onnode all $CTDB scriptstatus" ; do
echo "$i"
$i || true
done
@@ -407,7 +409,7 @@ node_has_status ()
if [ -n "$bits" ] ; then
local out x line
- out=$(ctdb -Y status 2>&1) || return 1
+ out=$($CTDB -Y status 2>&1) || return 1
{
read x
@@ -420,9 +422,9 @@ node_has_status ()
return 1
} <<<"$out" # Yay bash!
elif [ -n "$fpat" ] ; then
- ctdb statistics -n "$pnn" | egrep -q "$fpat"
+ $CTDB statistics -n "$pnn" | egrep -q "$fpat"
elif [ -n "$mpat" ] ; then
- ctdb getmonmode -n "$pnn" | egrep -q "$mpat"
+ $CTDB getmonmode -n "$pnn" | egrep -q "$mpat"
else
echo 'node_has_status: unknown mode, neither $bits nor $fpat is set'
return 1
@@ -437,8 +439,8 @@ wait_until_node_has_status ()
echo "Waiting until node $pnn has status \"$status\"..."
- if ! onnode any $CTDB_TEST_WRAPPER wait_until $timeout node_has_status "$pnn" "$status" ; then
- for i in "onnode -q any ctdb status" "onnode -q any onnode all ctdb scriptstatus" ; do
+ if ! wait_until $timeout onnode any $CTDB_TEST_WRAPPER node_has_status "$pnn" "$status" ; then
+ for i in "onnode -q any $CTDB status" "onnode -q any onnode all $CTDB scriptstatus" ; do
echo "$i"
$i || true
done
@@ -579,9 +581,9 @@ tcpdump_wait ()
echo "Waiting for tcpdump to capture some packets..."
if ! wait_until 30 tcpdump_check ; then
- echo "DEBUG:"
+ echo "DEBUG AT $(date '+%F %T'):"
local i
- for i in "onnode -q 0 ctdb status" "netstat -tanp" "tcpdump -n -e -r $tcpdump_filename" ; do
+ for i in "onnode -q 0 $CTDB status" "netstat -tanp" "tcpdump -n -e -r $tcpdump_filename" ; do
echo "$i"
$i || true
done
@@ -638,7 +640,7 @@ gratarp_sniff_wait_show ()
daemons_stop ()
{
echo "Attempting to politely shutdown daemons..."
- onnode 1 ctdb shutdown -n all || true
+ onnode 1 $CTDB shutdown -n all || true
echo "Sleeping for a while..."
sleep_for 1
@@ -794,16 +796,16 @@ _ctdb_start_post ()
onnode -q 1 $CTDB_TEST_WRAPPER wait_until_healthy || return 1
echo "Setting RerecoveryTimeout to 1"
- onnode -pq all "ctdb setvar RerecoveryTimeout 1"
+ onnode -pq all "$CTDB setvar RerecoveryTimeout 1"
# In recent versions of CTDB, forcing a recovery like this blocks
# until the recovery is complete. Hopefully this will help the
# cluster to stabilise before a subsequent test.
echo "Forcing a recovery..."
- onnode -q 0 ctdb recover
+ onnode -q 0 $CTDB recover
sleep_for 1
echo "Forcing a recovery..."
- onnode -q 0 ctdb recover
+ onnode -q 0 $CTDB recover
echo "ctdb is ready"
}
@@ -855,16 +857,16 @@ restart_ctdb ()
onnode -q 1 $CTDB_TEST_WRAPPER wait_until_healthy || return 1
echo "Setting RerecoveryTimeout to 1"
- onnode -pq all "ctdb setvar RerecoveryTimeout 1"
+ onnode -pq all "$CTDB setvar RerecoveryTimeout 1"
# In recent versions of CTDB, forcing a recovery like this blocks
# until the recovery is complete. Hopefully this will help the
# cluster to stabilise before a subsequent test.
echo "Forcing a recovery..."
- onnode -q 0 ctdb recover
+ onnode -q 0 $CTDB recover
sleep_for 1
echo "Forcing a recovery..."
- onnode -q 0 ctdb recover
+ onnode -q 0 $CTDB recover
echo "ctdb is ready"
}
@@ -1036,3 +1038,6 @@ wait_for_monitor_event ()
wait_until 120 ! ctdb_test_eventscript_flag exists $pnn "monitor"
}
+
+# Make sure that $CTDB is set.
+: ${CTDB:=ctdb}
diff --git a/ctdb/tests/simple/16_ctdb_config_add_ip.sh b/ctdb/tests/simple/16_ctdb_config_add_ip.sh
index 6fee386d08..3207661f16 100755
--- a/ctdb/tests/simple/16_ctdb_config_add_ip.sh
+++ b/ctdb/tests/simple/16_ctdb_config_add_ip.sh
@@ -115,6 +115,6 @@ if wait_until 60 ips_are_on_nodeglob $test_node ${add_ip%/*} ; then
echo "That worked!"
else
echo "BAD: IP didn't get added."
- try_command_on_node $test_node ctdb ip -n all
+ try_command_on_node $test_node $CTDB ip -n all
exit 1
fi
diff --git a/ctdb/tests/simple/20_ctdb_getmonmode.sh b/ctdb/tests/simple/20_ctdb_getmonmode.sh
index eab3dad45a..56a38d8525 100755
--- a/ctdb/tests/simple/20_ctdb_getmonmode.sh
+++ b/ctdb/tests/simple/20_ctdb_getmonmode.sh
@@ -18,7 +18,7 @@ Steps:
1. Verify that the status on all of the ctdb nodes is 'OK'.
2. Use 'ctdb getmodmode -n <node>' to get the current monitoring mode.
-3. Verify that it shows monitoring as 'active'.
+3. Verify that it looks sane.
4. Verify that the command prints the output in colon-separated format
when run with the '-Y' option.
5. Disable monitoring on the node using 'ctdb disablemonitor'.
@@ -47,19 +47,12 @@ try_command_on_node -v 0 $CTDB getmonmode -n $test_node
sanity_check_output \
1 \
- '^Monitoring mode:ACTIVE \(0\)$' \
+ '^Monitoring mode:(ACTIVE \(0\)|DISABLED \(1\))$' \
"$out"
-colons=$(printf ':mode:\n:0:')
-
try_command_on_node -v 0 $CTDB -Y getmonmode -n $test_node
-if [ "$out" = "$colons" ] ; then
- echo "Looks OK"
-else
- echo "BAD: -Y output isn't what was expected"
- testfailures=1
-fi
+sanity_check_output 2 '^(:mode:|:0:|:1:)$' "$out"
try_command_on_node -v 0 $CTDB disablemonitor -n $test_node