From 4cd5be87daf531cb8a67f31b91cceeaf2c488127 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 14 Nov 2014 16:42:01 +1100 Subject: ctdb-scripts: Try to restart statd after every 10 failures Also add and update tests for statd stack dumps. Update the existing 60.ganesha statd test to do more iterations. Duplicate the result as a new test for 60.nfs. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs --- ctdb/config/events.d/60.ganesha | 1 + ctdb/config/nfs-rpc-checks.d/10.statd.check | 1 + ctdb/tests/eventscripts/60.ganesha.monitor.141.sh | 18 +++++++- ctdb/tests/eventscripts/60.nfs.monitor.144.sh | 55 +++++++++++++++++++++++ 4 files changed, 74 insertions(+), 1 deletion(-) create mode 100755 ctdb/tests/eventscripts/60.nfs.monitor.144.sh diff --git a/ctdb/config/events.d/60.ganesha b/ctdb/config/events.d/60.ganesha index df0912d526..150be1f159 100755 --- a/ctdb/config/events.d/60.ganesha +++ b/ctdb/config/events.d/60.ganesha @@ -230,6 +230,7 @@ case "$1" in p="rpc.statd" which $p >/dev/null 2>/dev/null && \ nfs_check_rpc_service "statd" \ + % 10 "verbose restart:b unhealthy" \ -ge 6 "verbose unhealthy" \ -eq 4 "verbose restart" \ -eq 2 "restart:b" diff --git a/ctdb/config/nfs-rpc-checks.d/10.statd.check b/ctdb/config/nfs-rpc-checks.d/10.statd.check index d738a3245e..526e238bcc 100644 --- a/ctdb/config/nfs-rpc-checks.d/10.statd.check +++ b/ctdb/config/nfs-rpc-checks.d/10.statd.check @@ -1,3 +1,4 @@ +% 10 verbose restart:b unhealthy -ge 6 verbose unhealthy -eq 4 verbose restart -eq 2 restart:b diff --git a/ctdb/tests/eventscripts/60.ganesha.monitor.141.sh b/ctdb/tests/eventscripts/60.ganesha.monitor.141.sh index 9cd82f84cc..c9a5ab7658 100755 --- a/ctdb/tests/eventscripts/60.ganesha.monitor.141.sh +++ b/ctdb/tests/eventscripts/60.ganesha.monitor.141.sh @@ -2,7 +2,7 @@ . "${TEST_SCRIPTS_DIR}/unit.sh" -define_test "statd down, 6 iterations" +define_test "statd down, 10 iterations" # statd fails and attempts to restart it fail. @@ -37,3 +37,19 @@ rpcinfo: RPC: Program not registered program status version 1 is not available EOF simple_test || exit $? +simple_test || exit $? +simple_test || exit $? +simple_test || exit $? + +CTDB_NFS_DUMP_STUCK_THREADS=3 +FAKE_RPC_THREAD_PIDS=1234 + +required_result 1 <] fake_stack_trace_for_pid_1234/stack+0x0/0xff +EOF +simple_test || exit $? diff --git a/ctdb/tests/eventscripts/60.nfs.monitor.144.sh b/ctdb/tests/eventscripts/60.nfs.monitor.144.sh new file mode 100755 index 0000000000..0a3beb7ca2 --- /dev/null +++ b/ctdb/tests/eventscripts/60.nfs.monitor.144.sh @@ -0,0 +1,55 @@ +#!/bin/sh + +. "${TEST_SCRIPTS_DIR}/unit.sh" + +define_test "statd down, 10 iterations" + +# statd fails and attempts to restart it fail. + +setup_nfs +rpc_services_down "status" + +ok_null +simple_test || exit $? + +ok<] fake_stack_trace_for_pid_1234/stack+0x0/0xff +EOF +simple_test || exit $? -- cgit