summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Schwenke <martin@meltin.net>2014-11-14 16:42:01 +1100
committerAmitay Isaacs <amitay@samba.org>2014-11-18 04:17:10 +0100
commit4cd5be87daf531cb8a67f31b91cceeaf2c488127 (patch)
tree562dd8a1016a804315a5551acc9c25fd88ddb5ad
parentf51672f5149110025088ef6d1fc59fe7208d2aae (diff)
downloadsamba-4cd5be87daf531cb8a67f31b91cceeaf2c488127.tar.gz
samba-4cd5be87daf531cb8a67f31b91cceeaf2c488127.tar.xz
samba-4cd5be87daf531cb8a67f31b91cceeaf2c488127.zip
ctdb-scripts: Try to restart statd after every 10 failures
Also add and update tests for statd stack dumps. Update the existing 60.ganesha statd test to do more iterations. Duplicate the result as a new test for 60.nfs. Signed-off-by: Martin Schwenke <martin@meltin.net> Reviewed-by: Amitay Isaacs <amitay@gmail.com>
-rwxr-xr-xctdb/config/events.d/60.ganesha1
-rw-r--r--ctdb/config/nfs-rpc-checks.d/10.statd.check1
-rwxr-xr-xctdb/tests/eventscripts/60.ganesha.monitor.141.sh18
-rwxr-xr-xctdb/tests/eventscripts/60.nfs.monitor.144.sh55
4 files changed, 74 insertions, 1 deletions
diff --git a/ctdb/config/events.d/60.ganesha b/ctdb/config/events.d/60.ganesha
index df0912d526..150be1f159 100755
--- a/ctdb/config/events.d/60.ganesha
+++ b/ctdb/config/events.d/60.ganesha
@@ -230,6 +230,7 @@ case "$1" in
p="rpc.statd"
which $p >/dev/null 2>/dev/null && \
nfs_check_rpc_service "statd" \
+ % 10 "verbose restart:b unhealthy" \
-ge 6 "verbose unhealthy" \
-eq 4 "verbose restart" \
-eq 2 "restart:b"
diff --git a/ctdb/config/nfs-rpc-checks.d/10.statd.check b/ctdb/config/nfs-rpc-checks.d/10.statd.check
index d738a3245e..526e238bcc 100644
--- a/ctdb/config/nfs-rpc-checks.d/10.statd.check
+++ b/ctdb/config/nfs-rpc-checks.d/10.statd.check
@@ -1,3 +1,4 @@
+% 10 verbose restart:b unhealthy
-ge 6 verbose unhealthy
-eq 4 verbose restart
-eq 2 restart:b
diff --git a/ctdb/tests/eventscripts/60.ganesha.monitor.141.sh b/ctdb/tests/eventscripts/60.ganesha.monitor.141.sh
index 9cd82f84cc..c9a5ab7658 100755
--- a/ctdb/tests/eventscripts/60.ganesha.monitor.141.sh
+++ b/ctdb/tests/eventscripts/60.ganesha.monitor.141.sh
@@ -2,7 +2,7 @@
. "${TEST_SCRIPTS_DIR}/unit.sh"
-define_test "statd down, 6 iterations"
+define_test "statd down, 10 iterations"
# statd fails and attempts to restart it fail.
@@ -37,3 +37,19 @@ rpcinfo: RPC: Program not registered
program status version 1 is not available
EOF
simple_test || exit $?
+simple_test || exit $?
+simple_test || exit $?
+simple_test || exit $?
+
+CTDB_NFS_DUMP_STUCK_THREADS=3
+FAKE_RPC_THREAD_PIDS=1234
+
+required_result 1 <<EOF
+ERROR: status failed RPC check:
+rpcinfo: RPC: Program not registered
+program status version 1 is not available
+Trying to restart statd [rpc.statd]
+Stack trace for rpc.statd[1234]:
+[<ffffffff87654321>] fake_stack_trace_for_pid_1234/stack+0x0/0xff
+EOF
+simple_test || exit $?
diff --git a/ctdb/tests/eventscripts/60.nfs.monitor.144.sh b/ctdb/tests/eventscripts/60.nfs.monitor.144.sh
new file mode 100755
index 0000000000..0a3beb7ca2
--- /dev/null
+++ b/ctdb/tests/eventscripts/60.nfs.monitor.144.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "statd down, 10 iterations"
+
+# statd fails and attempts to restart it fail.
+
+setup_nfs
+rpc_services_down "status"
+
+ok_null
+simple_test || exit $?
+
+ok<<EOF
+Trying to restart statd [rpc.statd]
+EOF
+simple_test || exit $?
+
+ok_null
+simple_test || exit $?
+
+ok<<EOF
+ERROR: status failed RPC check:
+rpcinfo: RPC: Program not registered
+program status version 1 is not available
+Trying to restart statd [rpc.statd]
+EOF
+simple_test || exit $?
+
+ok_null
+simple_test || exit $?
+
+required_result 1 <<EOF
+ERROR: status failed RPC check:
+rpcinfo: RPC: Program not registered
+program status version 1 is not available
+EOF
+simple_test || exit $?
+simple_test || exit $?
+simple_test || exit $?
+simple_test || exit $?
+
+CTDB_NFS_DUMP_STUCK_THREADS=3
+FAKE_RPC_THREAD_PIDS=1234
+
+required_result 1 <<EOF
+ERROR: status failed RPC check:
+rpcinfo: RPC: Program not registered
+program status version 1 is not available
+Trying to restart statd [rpc.statd]
+Stack trace for rpc.statd[1234]:
+[<ffffffff87654321>] fake_stack_trace_for_pid_1234/stack+0x0/0xff
+EOF
+simple_test || exit $?