summaryrefslogtreecommitdiffstats
path: root/ctdb/tests/complex
diff options
context:
space:
mode:
authorMartin Schwenke <martin@meltin.net>2014-02-07 17:37:00 +1100
committerAmitay Isaacs <amitay@gmail.com>2014-02-19 12:04:47 +1100
commit2532149f8f9bbe6d3c8f5ac6e5e4bc2ad1681e27 (patch)
tree9d23e4a4a0a3621cd511f5c77b45ed3247a0af86 /ctdb/tests/complex
parent79e2725f339e7c5336b4053348c4266268de6ca3 (diff)
downloadsamba-2532149f8f9bbe6d3c8f5ac6e5e4bc2ad1681e27.tar.gz
samba-2532149f8f9bbe6d3c8f5ac6e5e4bc2ad1681e27.tar.xz
samba-2532149f8f9bbe6d3c8f5ac6e5e4bc2ad1681e27.zip
ctdb-scripts: Enhancements to hung script debugging
* Add stack dumps for "interesting" processes that sometimes get stuck, so try to print stack traces for them if they appear in the pstree output. * Add new configuration variables CTDB_DEBUG_HUNG_SCRIPT_LOGFILE and CTDB_DEBUG_HUNG_SCRIPT_STACKPAT. These are primarily for testing but the latter may be useful for live debugging. * Load CTDB configuration so that above configuration variables can be set/changed without restarting ctdbd. Add a test that tries to ensure that all of this is working. Signed-off-by: Martin Schwenke <martin@meltin.net> Reviewed-by: Amitay Isaacs <amitay@gmail.com>
Diffstat (limited to 'ctdb/tests/complex')
-rwxr-xr-xctdb/tests/complex/90_debug_hung_script.sh91
1 files changed, 91 insertions, 0 deletions
diff --git a/ctdb/tests/complex/90_debug_hung_script.sh b/ctdb/tests/complex/90_debug_hung_script.sh
new file mode 100755
index 0000000000..ef6216cf94
--- /dev/null
+++ b/ctdb/tests/complex/90_debug_hung_script.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+
+test_info()
+{
+ cat <<EOF
+Verify CTDB's debugging of timed out eventscripts
+
+Prerequisites:
+
+* An active CTDB cluster with monitoring enabled
+
+Expected results:
+
+* When an eventscript times out the correct debugging is executed.
+EOF
+}
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init "$@"
+
+ctdb_test_check_real_cluster
+
+cluster_is_healthy
+
+# No need for restart when done
+
+# This is overkill but it at least provides a valid test node
+select_test_node_and_ips
+
+####################
+
+# Set this if CTDB is installed in a non-standard location on cluster
+# nodes
+[ -n "$CTDB_BASE" ] || CTDB_BASE="/etc/ctdb"
+
+####################
+
+echo "Enable eventscript for testing timeouts..."
+ctdb_test_exit_hook_add "onnode -q $test_node $CTDB disablescript 99.timeout"
+try_command_on_node $test_node $CTDB enablescript "99.timeout"
+
+####################
+
+echo "Setting monitor events to time out..."
+rc_local_d="${CTDB_BASE}/rc.local.d"
+try_command_on_node $test_node mkdir -p "$rc_local_d"
+
+rc_local_f="${rc_local_d}/timeout_config.$$"
+ctdb_test_exit_hook_add "onnode $test_node rm -f $rc_local_f"
+
+try_command_on_node $test_node mktemp
+debug_output="$out"
+ctdb_test_exit_hook_add "onnode $test_node rm -f $debug_output"
+
+try_command_on_node -i $test_node tee "$rc_local_f" <<<"\
+CTDB_RUN_TIMEOUT_MONITOR=yes
+CTDB_DEBUG_HUNG_SCRIPT_LOGFILE=\"$debug_output\"
+CTDB_DEBUG_HUNG_SCRIPT_STACKPAT='exportfs\|rpcinfo\|sleep'"
+
+try_command_on_node $test_node chmod +x "$rc_local_f"
+
+####################
+
+wait_for_monitor_event $test_node
+
+echo "Checking output of hung script debugging..."
+try_command_on_node -v $test_node cat "$debug_output"
+
+while IFS="" read pattern ; do
+ if grep -- "^${pattern}\$" <<<"$out" >/dev/null ; then
+ echo "GOOD: output contains \"$pattern\""
+ else
+ echo "BAD: output does not contain \"$pattern\""
+ exit 1
+ fi
+done <<'EOF'
+===== Start of hung script debug for PID=".*", event="monitor" =====
+===== End of hung script debug for PID=".*", event="monitor" =====
+pstree -p -a .*:
+ *\`-99\\.timeout,.* /etc/ctdb/events.d/99.timeout monitor
+ *\`-sleep,.*
+---- Stack trace of interesting process [0-9]*\\[sleep\\] ----
+[<[0-9a-f]*>] .*sleep+.*
+---- ctdb scriptstatus monitor: ----
+[0-9]* scripts were executed last monitor cycle
+99\\.timeout *Status:TIMEDOUT.*
+ *OUTPUT:sleeping for [0-9]* seconds\\.\\.\\.
+EOF