5 files changed, 43 insertions, 91 deletions
diff --git a/ctdb/tests/scripts/ctdb_test_functions.bash b/ctdb/tests/scripts/ctdb_test_functions.bash
index 1a7935d9f5..13797dc89f 100644
--- a/ctdb/tests/scripts/ctdb_test_functions.bash
+++ b/ctdb/tests/scripts/ctdb_test_functions.bash
@@ -488,10 +488,11 @@ wait_until_node_has_status ()
     local pnn="$1"
     local status="$2"
     local timeout="${3:-30}"
+    local proxy_pnn="${4:-any}"
 
     echo "Waiting until node $pnn has status \"$status\"..."
 
-    if ! wait_until $timeout onnode any $CTDB_TEST_WRAPPER node_has_status "$pnn" "$status" ; then
+    if ! wait_until $timeout onnode $proxy_pnn $CTDB_TEST_WRAPPER node_has_status "$pnn" "$status" ; then
 	for i in "onnode -q any $CTDB status" "onnode -q any onnode all $CTDB scriptstatus" ; do
 	    echo "$i"
 	    $i || true
@@ -889,47 +890,54 @@ restart_ctdb ()
 	echo -n " (scheduled)"
     fi
     echo "..."
-    
-    local i=0
-    while : ; do
+
+    local i
+    for i in $(seq 1 5) ; do
 	if [ -n "$CTDB_NODES_SOCKETS" ] ; then
 	    daemons_stop
 	    daemons_start "$@"
 	else
 	    onnode -p all $CTDB_TEST_WRAPPER _restart_ctdb "$@"
-	fi && break
+	fi || {
+	    echo "Restart failed.  Trying again in a few seconds..."
+	    sleep_for 5
+	    continue
+	}
 
-	i=$(($i + 1))
-	[ $i -lt 5 ] || break
+	onnode -q 1  $CTDB_TEST_WRAPPER wait_until_healthy || {
+	    echo "Cluster didn't become healthy.  Restarting..."
+	    continue
+	}
 
-	echo "That didn't seem to work - sleeping for a while..."
-	sleep_for 5
-    done
-	
-    onnode -q 1  $CTDB_TEST_WRAPPER wait_until_healthy || return 1
+	local debug_out=$(onnode -p all ctdb status -Y 2>&1; onnode -p all ctdb scriptstatus 2>&1)
 
-    local debug_out=$(onnode -p all ctdb status -Y 2>&1; onnode -p all ctdb scriptstatus 2>&1)
+	echo "Setting RerecoveryTimeout to 1"
+	onnode -pq all "$CTDB setvar RerecoveryTimeout 1"
 
-    echo "Setting RerecoveryTimeout to 1"
-    onnode -pq all "$CTDB setvar RerecoveryTimeout 1"
+	# In recent versions of CTDB, forcing a recovery like this
+	# blocks until the recovery is complete.  Hopefully this will
+	# help the cluster to stabilise before a subsequent test.
+	echo "Forcing a recovery..."
+	onnode -q 0 $CTDB recover
+	sleep_for 1
+	echo "Forcing a recovery..."
+	onnode -q 0 $CTDB recover
 
-    # In recent versions of CTDB, forcing a recovery like this blocks
-    # until the recovery is complete.  Hopefully this will help the
-    # cluster to stabilise before a subsequent test.
-    echo "Forcing a recovery..."
-    onnode -q 0 $CTDB recover
-    sleep_for 1
-    echo "Forcing a recovery..."
-    onnode -q 0 $CTDB recover
+	# Cluster is still healthy.  Good, we're done!
+	if ! onnode 0 $CTDB_TEST_WRAPPER _cluster_is_healthy ; then
+	    echo "Cluster become UNHEALTHY again.  Restarting..."
+	    continue
+	fi
 
-    echo "ctdb is ready"
+	echo "ctdb is ready"
+	return 0
+    done
 
-    if ! onnode 0 $CTDB_TEST_WRAPPER _cluster_is_healthy ; then
-	echo "OUCH!  Cluster is UNHEALTHY again..."
-	echo "$debug_out"
-	# Try to make the calling test fail
-	status=1
-    fi
+    echo "Cluster UNHEALTHY...  too many attempts..."
+    echo "$debug_out"
+    # Try to make the calling test fail
+    status=1
+    return 1
 }
 
 ctdb_restart_when_done ()
diff --git a/ctdb/tests/simple/01_ctdb_version.sh b/ctdb/tests/simple/01_ctdb_version.sh
index 02cdf6ae70..50401d0ba3 100755
--- a/ctdb/tests/simple/01_ctdb_version.sh
+++ b/ctdb/tests/simple/01_ctdb_version.sh
@@ -38,6 +38,9 @@ if ! try_command_on_node -v 0 "rpm -q ctdb" ; then
     exit 0
 fi
 rpm_ver="${out#ctdb-}"
+# Some version of RPM append the architecture to the version.
+arch=$(uname -m)
+rpm_ver="${rpm_ver%.${arch}}"
 
 try_command_on_node -v 0 "$CTDB version"
 ctdb_ver="${out#CTDB version: }"
diff --git a/ctdb/tests/simple/09_ctdb_ping.sh b/ctdb/tests/simple/09_ctdb_ping.sh
index 6610431094..401e48dcf3 100755
--- a/ctdb/tests/simple/09_ctdb_ping.sh
+++ b/ctdb/tests/simple/09_ctdb_ping.sh
@@ -47,7 +47,7 @@ sanity_check_output \
 
 try_command_on_node -v 0 "$CTDB shutdown -n 1"
 
-wait_until_node_has_status 1 disconnected
+wait_until_node_has_status 1 disconnected 30 0
 
 try_command_on_node -v 0 "! $CTDB ping -n 1"
 
diff --git a/ctdb/tests/simple/20_ctdb_getmonmode.sh b/ctdb/tests/simple/20_ctdb_getmonmode.sh
deleted file mode 100755
index 56a38d8525..0000000000
--- a/ctdb/tests/simple/20_ctdb_getmonmode.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-test_info()
-{
-    cat <<EOF
-Verify 'ctdb getmonmode' works correctly.
-
-This test doesn't actually verify that enabling and disabling
-monitoring mode actually does that.  It trusts ctdb that the
-monitoring mode is modified as requested.  21_ctdb_disablemonitor.sh
-does some more useful checking.
-
-Prerequisites:
-
-* An active CTDB cluster with at least 2 active nodes.
-
-Steps:
-
-1. Verify that the status on all of the ctdb nodes is 'OK'.
-2. Use 'ctdb getmodmode -n <node>' to get the current monitoring mode.
-3. Verify that it looks sane.
-4. Verify that the command prints the output in colon-separated format
-   when run with the '-Y' option.
-5. Disable monitoring on the node using 'ctdb disablemonitor'.
-6. Verify that it shows monitoring as 'disabled'.
-
-Expected results:
-
-* 'ctdb getmonmode' works as expected.
-EOF
-}
-
-. ctdb_test_functions.bash
-
-ctdb_test_init "$@"
-
-set -e
-
-cluster_is_healthy
-
-# Reset configuration
-ctdb_restart_when_done
-
-test_node=1
-
-try_command_on_node -v 0 $CTDB getmonmode -n $test_node
-
-sanity_check_output \
-    1 \
-    '^Monitoring mode:(ACTIVE \(0\)|DISABLED \(1\))$' \
-    "$out"
-
-try_command_on_node -v 0 $CTDB -Y getmonmode -n $test_node
-
-sanity_check_output 2 '^(:mode:|:0:|:1:)$' "$out"
-
-try_command_on_node -v 0 $CTDB disablemonitor -n $test_node
-
-wait_until_node_has_status $test_node monoff
diff --git a/ctdb/tests/simple/26_ctdb_config_check_error_on_unreachable_ctdb.sh b/ctdb/tests/simple/26_ctdb_config_check_error_on_unreachable_ctdb.sh
index da5a49b67e..cddc21c6b0 100755
--- a/ctdb/tests/simple/26_ctdb_config_check_error_on_unreachable_ctdb.sh
+++ b/ctdb/tests/simple/26_ctdb_config_check_error_on_unreachable_ctdb.sh
@@ -52,7 +52,7 @@ echo "There are $num_nodes nodes."
 echo "Shutting down node ${test_node}..."
 try_command_on_node $test_node $CTDB shutdown
 
-wait_until_node_has_status $test_node disconnected
+wait_until_node_has_status $test_node disconnected 30 0
 
 pat="ctdb_control error: 'ctdb_control to disconnected node'|ctdb_control error: 'node is disconnected'|Node $test_node is DISCONNECTED"