diff options
| author | Ronnie Sahlberg <ronniesahlberg@gmail.com> | 2010-08-30 18:22:05 +1000 |
|---|---|---|
| committer | Ronnie Sahlberg <ronniesahlberg@gmail.com> | 2010-08-30 18:22:05 +1000 |
| commit | 2b4d9170c28eccf396debaae699c660f56c37760 (patch) | |
| tree | a3f2c5afab5657c5944669c0697f1200838b817a | |
| parent | 92455c3dff5ff4e92f804f8f17500c7b1d1eeed5 (diff) | |
| parent | 46b9110f88137a9cffdf2c098a39ee25226c62fe (diff) | |
| download | samba-2b4d9170c28eccf396debaae699c660f56c37760.tar.gz samba-2b4d9170c28eccf396debaae699c660f56c37760.tar.xz samba-2b4d9170c28eccf396debaae699c660f56c37760.zip | |
Merge commit 'martins/master'
(This used to be ctdb commit cc8c851e2e0b46f00b18a6dc61fd2774e97850dd)
| -rwxr-xr-x | ctdb/config/events.d/60.nfs | 3 | ||||
| -rwxr-xr-x | ctdb/config/events.d/61.nfstickle | 62 | ||||
| -rwxr-xr-x | ctdb/config/functions | 55 | ||||
| -rwxr-xr-x | ctdb/tests/complex/31_nfs_tickle.sh | 38 | ||||
| -rw-r--r-- | ctdb/tests/scripts/ctdb_test_functions.bash | 45 | ||||
| -rwxr-xr-x | ctdb/tests/simple/16_ctdb_config_add_ip.sh | 2 | ||||
| -rwxr-xr-x | ctdb/tests/simple/20_ctdb_getmonmode.sh | 13 |
7 files changed, 112 insertions, 106 deletions
diff --git a/ctdb/config/events.d/60.nfs b/ctdb/config/events.d/60.nfs index 15c2b899c1..6acc8897a2 100755 --- a/ctdb/config/events.d/60.nfs +++ b/ctdb/config/events.d/60.nfs @@ -6,6 +6,7 @@ start_nfs() { /bin/mkdir -p $CTDB_BASE/state/statd/ip startstop_nfs stop startstop_nfs start + echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle } . $CTDB_BASE/functions @@ -47,6 +48,8 @@ case "$1" in exit 0 fi + update_tickles 2049 + # check that statd responds to rpc requests # if statd is not running we try to restart it rpcinfo -u localhost 100024 1 > /dev/null || { diff --git a/ctdb/config/events.d/61.nfstickle b/ctdb/config/events.d/61.nfstickle deleted file mode 100755 index 3bfef4f75c..0000000000 --- a/ctdb/config/events.d/61.nfstickle +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/sh -# ctdb event script for NFS tickle acks - -. $CTDB_BASE/functions - -service_name="nfs" - -loadconfig - -service_start="mkdir -p $CTDB_BASE/state/nfstickle;mkdir -p $NFS_TICKLE_SHARED_DIRECTORY/`hostname`;echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle" -service_reconfigure=$service_start - -ctdb_start_stop_service - -[ -z "$NFS_TICKLE_SHARED_DIRECTORY" ] && exit 0 - -case "$1" in - startup) - ctdb_service_start - ;; - - takeip) - iface=$2 - ip=$3 - # first send a grat arp, to ensure the client knows the updated - # mac address for this IP - ctdb gratiousarp $ip $iface - # send tickle acks for all the connections the old server had - for f in $NFS_TICKLE_SHARED_DIRECTORY/*/$ip; do - [ -f $f ] && cat $f | while read dest; do - # send three, in case of lost packets - echo "Sending NFS tickle ack for $ip to $dest" - for i in `seq 1 3`; do - ctdb tickle $dest $ip:2049 - done - done - done - ;; - - monitor) - mydir=$NFS_TICKLE_SHARED_DIRECTORY/`hostname` - rm -f $mydir/* - # record our connections to shared storage - netstat -tn | - awk -v mydir="$mydir" ' -$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ /:2049$/ { - destip = gensub(/:2049$/, "", 1, $4); - c[destip] = c[destip] (c[destip] ? "\n" : "" ) $5; -} -END { - for (ip in c) { - print c[ip] > mydir "/" ip - } -}' - ;; - - *) - ctdb_standard_event_handler "$@" - ;; -esac - -exit 0 diff --git a/ctdb/config/functions b/ctdb/config/functions index 35b7db2e61..a20cadf0ce 100755 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -775,6 +775,61 @@ iptables() } ######################################################## +# tickle handling +######################################################## + +# Temporary directory for tickles. +tickledir="$CTDB_BASE/state/tickles" +mkdir -p "$tickledir" + +update_tickles () +{ + _port="$1" + + mkdir -p "$tickledir" # Just in case + + # Who am I? + _pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:} + + # What public IPs do I hold? + _ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}') + + # IPs as a regexp choice + _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))" + + # Record connections to our public IPs in a temporary file + _my_connections="${tickledir}/${_port}.connections" + rm -f "$_my_connections" + netstat -tn | + awk -v destpat="^${_ipschoice}:${_port}\$" \ + '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' | + sort >"$_my_connections" + + # Record our current tickles in a temporary file + _my_tickles="${tickledir}/${_port}.tickles" + rm -f "$_my_tickles" + for _i in $_ips ; do + ctdb -Y gettickles $_i $_port | + awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }' + done | + sort >"$_my_tickles" + + # Add tickles for connections that we haven't already got tickles for + comm -23 "$_my_connections" "$_my_tickles" | + while read _src _dst ; do + ctdb addtickle $_src $_dst + done + + # Remove tickles for connections that are no longer there + comm -13 "$_my_connections" "$_my_tickles" | + while read _src _dst ; do + ctdb deltickle $_src $_dst + done + + rm -f "$_my_connections" "$_my_tickles" +} + +######################################################## # load a site local config file ######################################################## diff --git a/ctdb/tests/complex/31_nfs_tickle.sh b/ctdb/tests/complex/31_nfs_tickle.sh index eb6644b63f..030e34fe02 100755 --- a/ctdb/tests/complex/31_nfs_tickle.sh +++ b/ctdb/tests/complex/31_nfs_tickle.sh @@ -57,7 +57,7 @@ ctdb_test_exit_hook_add ctdb_test_eventscript_uninstall ctdb_test_eventscript_install # We need this for later, so we know how long to sleep. -try_command_on_node 0 $CTDB getvar MonitorInterval +try_command_on_node any $CTDB getvar MonitorInterval monitor_interval="${out#*= }" #echo "Monitor interval on node $test_node is $monitor_interval seconds." @@ -77,28 +77,40 @@ echo "Source socket is $src_socket" wait_for_monitor_event $test_node -echo "Trying to determine NFS_TICKLE_SHARED_DIRECTORY..." -f="/etc/sysconfig/nfs" -try_command_on_node -v 0 "[ -r $f ] && sed -n -e s@^NFS_TICKLE_SHARED_DIRECTORY=@@p $f" || true +echo "Sleeping until tickles are synchronised across nodes..." +try_command_on_node $test_node $CTDB getvar TickleUpdateInterval +sleep_for "${out#*= }" -nfs_tickle_shared_directory="${out:-/gpfs/.ctdb/nfs-tickles}" +if try_command_on_node any "test -r /etc/ctdb/events.d/61.nfstickle" ; then + echo "Trying to determine NFS_TICKLE_SHARED_DIRECTORY..." + f="/etc/sysconfig/nfs" + try_command_on_node -v any "[ -r $f ] && sed -n -e s@^NFS_TICKLE_SHARED_DIRECTORY=@@p $f" || true -try_command_on_node $test_node hostname -test_hostname=$out + nfs_tickle_shared_directory="${out:-/gpfs/.ctdb/nfs-tickles}" -try_command_on_node -v 0 cat "${nfs_tickle_shared_directory}/$test_hostname/$test_ip" + try_command_on_node $test_node hostname + test_hostname=$out + + try_command_on_node -v any cat "${nfs_tickle_shared_directory}/$test_hostname/$test_ip" +else + echo "That's OK, we'll use \"ctdb gettickles\", which is newer..." + try_command_on_node -v any "ctdb -Y gettickles $test_ip $test_port" +fi if [ "${out/${src_socket}/}" != "$out" ] ; then - echo "GOOD: NFS connection tracked OK in tickles file." + echo "GOOD: NFS connection tracked OK." else - echo "BAD: Socket not tracked in NFS tickles file:" + echo "BAD: Socket not tracked in NFS tickles." testfailures=1 fi tcptickle_sniff_start $src_socket "${test_ip}:${test_port}" -echo "Disabling node $test_node" -try_command_on_node 1 $CTDB disable -n $test_node -wait_until_node_has_status $test_node disabled +# We need to be nasty to make that the node being failed out doesn't +# get a chance to send any tickles and confuse our sniff. +echo "Killing ctdbd on ${test_node}..." +try_command_on_node $test_node killall -9 ctdbd + +wait_until_node_has_status $test_node disconnected tcptickle_sniff_wait_show diff --git a/ctdb/tests/scripts/ctdb_test_functions.bash b/ctdb/tests/scripts/ctdb_test_functions.bash index 42053c0486..1433a46ec3 100644 --- a/ctdb/tests/scripts/ctdb_test_functions.bash +++ b/ctdb/tests/scripts/ctdb_test_functions.bash @@ -66,7 +66,7 @@ ctdb_test_exit () # now complete. set +e - echo "*** TEST COMPLETE (RC=$status), CLEANING UP..." + echo "*** TEST COMPLETED (RC=$status) AT $(date '+%F %T'), CLEANING UP..." eval "$ctdb_test_exit_hook" || true unset ctdb_test_exit_hook @@ -80,7 +80,7 @@ ctdb_test_exit () # leave the recovery in restart_ctdb so that future tests that # might do a manual restart mid-test will benefit. echo "Forcing a recovery..." - onnode 0 ctdb recover + onnode 0 $CTDB recover fi exit $status @@ -336,13 +336,15 @@ _cluster_is_healthy () { local out x count line - out=$(ctdb -Y status 2>&1) || return 1 + out=$($CTDB -Y status 2>/dev/null) || return 1 { read x count=0 while read line ; do - count=$(($count + 1)) + # We need to see valid lines if we're going to be healthy. + [ "${line#:[0-9]}" != "$line" ] && count=$(($count + 1)) + # A line indicating a node is unhealthy causes failure. [ "${line##:*:*:*1:}" != "$line" ] && return 1 done [ $count -gt 0 ] && return $? @@ -357,9 +359,9 @@ cluster_is_healthy () else echo "Cluster is UNHEALTHY" if ! ${ctdb_test_restart_scheduled:-false} ; then - echo "DEBUG:" + echo "DEBUG AT $(date '+%F %T'):" local i - for i in "onnode -q 0 ctdb status" "onnode -q 0 onnode all ctdb scriptstatus" ; do + for i in "onnode -q 0 $CTDB status" "onnode -q 0 onnode all $CTDB scriptstatus" ; do echo "$i" $i || true done @@ -407,7 +409,7 @@ node_has_status () if [ -n "$bits" ] ; then local out x line - out=$(ctdb -Y status 2>&1) || return 1 + out=$($CTDB -Y status 2>&1) || return 1 { read x @@ -420,9 +422,9 @@ node_has_status () return 1 } <<<"$out" # Yay bash! elif [ -n "$fpat" ] ; then - ctdb statistics -n "$pnn" | egrep -q "$fpat" + $CTDB statistics -n "$pnn" | egrep -q "$fpat" elif [ -n "$mpat" ] ; then - ctdb getmonmode -n "$pnn" | egrep -q "$mpat" + $CTDB getmonmode -n "$pnn" | egrep -q "$mpat" else echo 'node_has_status: unknown mode, neither $bits nor $fpat is set' return 1 @@ -437,8 +439,8 @@ wait_until_node_has_status () echo "Waiting until node $pnn has status \"$status\"..." - if ! onnode any $CTDB_TEST_WRAPPER wait_until $timeout node_has_status "$pnn" "$status" ; then - for i in "onnode -q any ctdb status" "onnode -q any onnode all ctdb scriptstatus" ; do + if ! wait_until $timeout onnode any $CTDB_TEST_WRAPPER node_has_status "$pnn" "$status" ; then + for i in "onnode -q any $CTDB status" "onnode -q any onnode all $CTDB scriptstatus" ; do echo "$i" $i || true done @@ -579,9 +581,9 @@ tcpdump_wait () echo "Waiting for tcpdump to capture some packets..." if ! wait_until 30 tcpdump_check ; then - echo "DEBUG:" + echo "DEBUG AT $(date '+%F %T'):" local i - for i in "onnode -q 0 ctdb status" "netstat -tanp" "tcpdump -n -e -r $tcpdump_filename" ; do + for i in "onnode -q 0 $CTDB status" "netstat -tanp" "tcpdump -n -e -r $tcpdump_filename" ; do echo "$i" $i || true done @@ -638,7 +640,7 @@ gratarp_sniff_wait_show () daemons_stop () { echo "Attempting to politely shutdown daemons..." - onnode 1 ctdb shutdown -n all || true + onnode 1 $CTDB shutdown -n all || true echo "Sleeping for a while..." sleep_for 1 @@ -794,16 +796,16 @@ _ctdb_start_post () onnode -q 1 $CTDB_TEST_WRAPPER wait_until_healthy || return 1 echo "Setting RerecoveryTimeout to 1" - onnode -pq all "ctdb setvar RerecoveryTimeout 1" + onnode -pq all "$CTDB setvar RerecoveryTimeout 1" # In recent versions of CTDB, forcing a recovery like this blocks # until the recovery is complete. Hopefully this will help the # cluster to stabilise before a subsequent test. echo "Forcing a recovery..." - onnode -q 0 ctdb recover + onnode -q 0 $CTDB recover sleep_for 1 echo "Forcing a recovery..." - onnode -q 0 ctdb recover + onnode -q 0 $CTDB recover echo "ctdb is ready" } @@ -855,16 +857,16 @@ restart_ctdb () onnode -q 1 $CTDB_TEST_WRAPPER wait_until_healthy || return 1 echo "Setting RerecoveryTimeout to 1" - onnode -pq all "ctdb setvar RerecoveryTimeout 1" + onnode -pq all "$CTDB setvar RerecoveryTimeout 1" # In recent versions of CTDB, forcing a recovery like this blocks # until the recovery is complete. Hopefully this will help the # cluster to stabilise before a subsequent test. echo "Forcing a recovery..." - onnode -q 0 ctdb recover + onnode -q 0 $CTDB recover sleep_for 1 echo "Forcing a recovery..." - onnode -q 0 ctdb recover + onnode -q 0 $CTDB recover echo "ctdb is ready" } @@ -1036,3 +1038,6 @@ wait_for_monitor_event () wait_until 120 ! ctdb_test_eventscript_flag exists $pnn "monitor" } + +# Make sure that $CTDB is set. +: ${CTDB:=ctdb} diff --git a/ctdb/tests/simple/16_ctdb_config_add_ip.sh b/ctdb/tests/simple/16_ctdb_config_add_ip.sh index 6fee386d08..3207661f16 100755 --- a/ctdb/tests/simple/16_ctdb_config_add_ip.sh +++ b/ctdb/tests/simple/16_ctdb_config_add_ip.sh @@ -115,6 +115,6 @@ if wait_until 60 ips_are_on_nodeglob $test_node ${add_ip%/*} ; then echo "That worked!" else echo "BAD: IP didn't get added." - try_command_on_node $test_node ctdb ip -n all + try_command_on_node $test_node $CTDB ip -n all exit 1 fi diff --git a/ctdb/tests/simple/20_ctdb_getmonmode.sh b/ctdb/tests/simple/20_ctdb_getmonmode.sh index eab3dad45a..56a38d8525 100755 --- a/ctdb/tests/simple/20_ctdb_getmonmode.sh +++ b/ctdb/tests/simple/20_ctdb_getmonmode.sh @@ -18,7 +18,7 @@ Steps: 1. Verify that the status on all of the ctdb nodes is 'OK'. 2. Use 'ctdb getmodmode -n <node>' to get the current monitoring mode. -3. Verify that it shows monitoring as 'active'. +3. Verify that it looks sane. 4. Verify that the command prints the output in colon-separated format when run with the '-Y' option. 5. Disable monitoring on the node using 'ctdb disablemonitor'. @@ -47,19 +47,12 @@ try_command_on_node -v 0 $CTDB getmonmode -n $test_node sanity_check_output \ 1 \ - '^Monitoring mode:ACTIVE \(0\)$' \ + '^Monitoring mode:(ACTIVE \(0\)|DISABLED \(1\))$' \ "$out" -colons=$(printf ':mode:\n:0:') - try_command_on_node -v 0 $CTDB -Y getmonmode -n $test_node -if [ "$out" = "$colons" ] ; then - echo "Looks OK" -else - echo "BAD: -Y output isn't what was expected" - testfailures=1 -fi +sanity_check_output 2 '^(:mode:|:0:|:1:)$' "$out" try_command_on_node -v 0 $CTDB disablemonitor -n $test_node |
