diff options
| author | Martin Schwenke <martin@meltin.net> | 2014-11-14 16:42:01 +1100 |
|---|---|---|
| committer | Amitay Isaacs <amitay@samba.org> | 2014-11-18 04:17:10 +0100 |
| commit | 4cd5be87daf531cb8a67f31b91cceeaf2c488127 (patch) | |
| tree | 562dd8a1016a804315a5551acc9c25fd88ddb5ad | |
| parent | f51672f5149110025088ef6d1fc59fe7208d2aae (diff) | |
| download | samba-4cd5be87daf531cb8a67f31b91cceeaf2c488127.tar.gz samba-4cd5be87daf531cb8a67f31b91cceeaf2c488127.tar.xz samba-4cd5be87daf531cb8a67f31b91cceeaf2c488127.zip | |
ctdb-scripts: Try to restart statd after every 10 failures
Also add and update tests for statd stack dumps. Update the existing
60.ganesha statd test to do more iterations. Duplicate the result as
a new test for 60.nfs.
Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
| -rwxr-xr-x | ctdb/config/events.d/60.ganesha | 1 | ||||
| -rw-r--r-- | ctdb/config/nfs-rpc-checks.d/10.statd.check | 1 | ||||
| -rwxr-xr-x | ctdb/tests/eventscripts/60.ganesha.monitor.141.sh | 18 | ||||
| -rwxr-xr-x | ctdb/tests/eventscripts/60.nfs.monitor.144.sh | 55 |
4 files changed, 74 insertions, 1 deletions
diff --git a/ctdb/config/events.d/60.ganesha b/ctdb/config/events.d/60.ganesha index df0912d526..150be1f159 100755 --- a/ctdb/config/events.d/60.ganesha +++ b/ctdb/config/events.d/60.ganesha @@ -230,6 +230,7 @@ case "$1" in p="rpc.statd" which $p >/dev/null 2>/dev/null && \ nfs_check_rpc_service "statd" \ + % 10 "verbose restart:b unhealthy" \ -ge 6 "verbose unhealthy" \ -eq 4 "verbose restart" \ -eq 2 "restart:b" diff --git a/ctdb/config/nfs-rpc-checks.d/10.statd.check b/ctdb/config/nfs-rpc-checks.d/10.statd.check index d738a3245e..526e238bcc 100644 --- a/ctdb/config/nfs-rpc-checks.d/10.statd.check +++ b/ctdb/config/nfs-rpc-checks.d/10.statd.check @@ -1,3 +1,4 @@ +% 10 verbose restart:b unhealthy -ge 6 verbose unhealthy -eq 4 verbose restart -eq 2 restart:b diff --git a/ctdb/tests/eventscripts/60.ganesha.monitor.141.sh b/ctdb/tests/eventscripts/60.ganesha.monitor.141.sh index 9cd82f84cc..c9a5ab7658 100755 --- a/ctdb/tests/eventscripts/60.ganesha.monitor.141.sh +++ b/ctdb/tests/eventscripts/60.ganesha.monitor.141.sh @@ -2,7 +2,7 @@ . "${TEST_SCRIPTS_DIR}/unit.sh" -define_test "statd down, 6 iterations" +define_test "statd down, 10 iterations" # statd fails and attempts to restart it fail. @@ -37,3 +37,19 @@ rpcinfo: RPC: Program not registered program status version 1 is not available EOF simple_test || exit $? +simple_test || exit $? +simple_test || exit $? +simple_test || exit $? + +CTDB_NFS_DUMP_STUCK_THREADS=3 +FAKE_RPC_THREAD_PIDS=1234 + +required_result 1 <<EOF +ERROR: status failed RPC check: +rpcinfo: RPC: Program not registered +program status version 1 is not available +Trying to restart statd [rpc.statd] +Stack trace for rpc.statd[1234]: +[<ffffffff87654321>] fake_stack_trace_for_pid_1234/stack+0x0/0xff +EOF +simple_test || exit $? diff --git a/ctdb/tests/eventscripts/60.nfs.monitor.144.sh b/ctdb/tests/eventscripts/60.nfs.monitor.144.sh new file mode 100755 index 0000000000..0a3beb7ca2 --- /dev/null +++ b/ctdb/tests/eventscripts/60.nfs.monitor.144.sh @@ -0,0 +1,55 @@ +#!/bin/sh + +. "${TEST_SCRIPTS_DIR}/unit.sh" + +define_test "statd down, 10 iterations" + +# statd fails and attempts to restart it fail. + +setup_nfs +rpc_services_down "status" + +ok_null +simple_test || exit $? + +ok<<EOF +Trying to restart statd [rpc.statd] +EOF +simple_test || exit $? + +ok_null +simple_test || exit $? + +ok<<EOF +ERROR: status failed RPC check: +rpcinfo: RPC: Program not registered +program status version 1 is not available +Trying to restart statd [rpc.statd] +EOF +simple_test || exit $? + +ok_null +simple_test || exit $? + +required_result 1 <<EOF +ERROR: status failed RPC check: +rpcinfo: RPC: Program not registered +program status version 1 is not available +EOF +simple_test || exit $? +simple_test || exit $? +simple_test || exit $? +simple_test || exit $? + +CTDB_NFS_DUMP_STUCK_THREADS=3 +FAKE_RPC_THREAD_PIDS=1234 + +required_result 1 <<EOF +ERROR: status failed RPC check: +rpcinfo: RPC: Program not registered +program status version 1 is not available +Trying to restart statd [rpc.statd] +Stack trace for rpc.statd[1234]: +[<ffffffff87654321>] fake_stack_trace_for_pid_1234/stack+0x0/0xff +EOF +simple_test || exit $? |
