ctdb-tests: Add a new NFS tickle test

This one ensures that a newly started node gets an up-to-date tickle list. Tweak some of the integration test functions to accommodate this. Signed-off-by: Martin Schwenke <martin@meltin.net> Reviewed-by: Amitay Isaacs <amitay@gmail.com>
author: Martin Schwenke <martin@meltin.net> 2014-02-28 15:54:54 +1100
committer: Amitay Isaacs <amitay@samba.org> 2014-03-28 05:55:13 +0100
commit: 234f8eb5712c38872444c5dd7a258903b389b062 (patch)
tree: 6afcf1f910002f6ab140cccf60a4ea1211499a72 /ctdb
parent: f99a759ce7cb5492442c018f976619f0ba439a43 (diff)
download: samba-234f8eb5712c38872444c5dd7a258903b389b062.tar.gz
samba-234f8eb5712c38872444c5dd7a258903b389b062.tar.xz
samba-234f8eb5712c38872444c5dd7a258903b389b062.zip
2 files changed, 102 insertions, 4 deletions
diff --git a/ctdb/tests/complex/34_nfs_tickle_restart.sh b/ctdb/tests/complex/34_nfs_tickle_restart.sh
new file mode 100755
index 00000000000..93587e2f316
--- /dev/null
+++ b/ctdb/tests/complex/34_nfs_tickle_restart.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+test_info()
+{
+    cat <<EOF
+Verify that a newly started CTDB node gets updated tickle details
+
+Prerequisites:
+
+* An active CTDB cluster with at least 2 nodes with public addresses.
+
+* Test must be run on a real or virtual cluster rather than against
+  local daemons.
+
+* Cluster nodes must be listening on the NFS TCP port (2049).
+
+Steps:
+
+As with 31_nfs_tickle.sh but restart a node after the tickle is
+registered.
+
+Expected results:
+
+* CTDB should correctly communicated tickles to new CTDB instances as
+  they join the cluster.
+EOF
+}
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init "$@"
+
+ctdb_test_check_real_cluster
+
+cluster_is_healthy
+
+# Reset configuration
+ctdb_restart_when_done
+
+# We need this for later, so we know how long to run nc for.
+try_command_on_node any $CTDB getvar MonitorInterval
+monitor_interval="${out#*= }"
+#echo "Monitor interval on node $test_node is $monitor_interval seconds."
+
+select_test_node_and_ips
+try_command_on_node $test_node "$CTDB listnodes -Y"
+listnodes_output="$out"
+numnodes=$(wc -l <<<"$listnodes_output")
+
+test_port=2049
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..."
+
+nc -d -w 600 $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+wait_for_monitor_event $test_node
+
+echo "Wait until NFS connection is tracked by CTDB on test node ..."
+wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket
+
+echo "Select a node to restart ctdbd"
+rn=$(awk -F: -v test_node=$test_node \
+    '$2 != test_node { print $2 ; exit }' <<<"$listnodes_output")
+
+echo "Restarting CTDB on node ${rn}"
+try_command_on_node $rn $CTDB_TEST_WRAPPER restart_ctdb_1
+
+# In some theoretical world this is racy.  In practice, the node will
+# take quite a while to become healthy, so this will beat any
+# assignment of IPs to the node.
+echo "Setting NoIPTakeover on node ${rn}"
+try_command_on_node $rn $CTDB setvar NoIPTakeover 1
+
+wait_until_healthy
+
+echo "Getting TickleUpdateInterval..."
+try_command_on_node $test_node $CTDB getvar TickleUpdateInterval
+update_interval="$out"
+
+echo "Wait until NFS connection is tracked by CTDB on all nodes..."
+if ! wait_until $(($update_interval * 2)) \
+    check_tickles_all $numnodes $test_ip $test_port $src_socket ; then
+    echo "BAD: connection not tracked on all nodes:"
+    echo "$out"
+    exit 1
+fi
+
+# We could go on to test whether the tickle ACK gets sent.  However,
+# this is tested in previous tests and the use of NoIPTakeover
+# complicates things on a 2 node cluster.
diff --git a/ctdb/tests/scripts/integration.bash b/ctdb/tests/scripts/integration.bash
index 665fc7dd694..1ff02d55688 100644
--- a/ctdb/tests/scripts/integration.bash
+++ b/ctdb/tests/scripts/integration.bash
@@ -278,7 +278,7 @@ wait_until_healthy ()
 
     echo "Waiting for cluster to become healthy..."
 
-    wait_until 120 _cluster_is_healthy
+    wait_until $timeout onnode -q any $CTDB_TEST_WRAPPER _cluster_is_healthy
 }
 
 # This function is becoming nicely overloaded.  Soon it will collapse!  :-)
@@ -438,7 +438,7 @@ _ctdb_hack_options ()
     esac
 }
 
-_restart_ctdb ()
+restart_ctdb_1 ()
 {
     _ctdb_hack_options "$@"
 
@@ -452,7 +452,7 @@ _restart_ctdb ()
 # Restart CTDB on all nodes.  Override for local daemons.
 _restart_ctdb_all ()
 {
-    onnode -p all $CTDB_TEST_WRAPPER _restart_ctdb "$@"
+    onnode -p all $CTDB_TEST_WRAPPER restart_ctdb_1 "$@"
 }
 
 # Nothing needed for a cluster.  Override for local daemons.
@@ -479,7 +479,7 @@ restart_ctdb ()
 	    continue
 	}
 
-	onnode -q 1  $CTDB_TEST_WRAPPER wait_until_healthy || {
+	wait_until_healthy || {
 	    echo "Cluster didn't become healthy.  Restarting..."
 	    continue
 	}
author	Martin Schwenke <martin@meltin.net>	2014-02-28 15:54:54 +1100
committer	Amitay Isaacs <amitay@samba.org>	2014-03-28 05:55:13 +0100
commit	234f8eb5712c38872444c5dd7a258903b389b062 (patch)
tree	6afcf1f910002f6ab140cccf60a4ea1211499a72 /ctdb
parent	f99a759ce7cb5492442c018f976619f0ba439a43 (diff)
download	samba-234f8eb5712c38872444c5dd7a258903b389b062.tar.gz samba-234f8eb5712c38872444c5dd7a258903b389b062.tar.xz samba-234f8eb5712c38872444c5dd7a258903b389b062.zip