summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRonnie Sahlberg <ronniesahlberg@gmail.com>2010-11-17 13:50:56 +1100
committerRonnie Sahlberg <ronniesahlberg@gmail.com>2010-11-17 13:54:57 +1100
commit4fe85e5be5f1a4997da07a66ca70f351de498b1a (patch)
treecd63b6cafcb6912f4a9ae6554dfaabc409d2ced0
parent8fe1ec37543fe88590feebcb87ad9614b25a3251 (diff)
downloadsamba-4fe85e5be5f1a4997da07a66ca70f351de498b1a.tar.gz
samba-4fe85e5be5f1a4997da07a66ca70f351de498b1a.tar.xz
samba-4fe85e5be5f1a4997da07a66ca70f351de498b1a.zip
add a new support function ctdb_check_counter_equal()
update nfs to try to restart the service after 10 consecutive failures and to flag the node unhealthy after 15 add similar function to mountd (This used to be ctdb commit 1569a54bb82fc433895ed68f816cf48399ad9d40)
-rwxr-xr-xctdb/config/events.d/60.nfs34
-rwxr-xr-xctdb/config/functions13
2 files changed, 43 insertions, 4 deletions
diff --git a/ctdb/config/events.d/60.nfs b/ctdb/config/events.d/60.nfs
index 038adbb094..8889cadcc2 100755
--- a/ctdb/config/events.d/60.nfs
+++ b/ctdb/config/events.d/60.nfs
@@ -78,15 +78,20 @@ case "$1" in
(
service_name="nfs_knfsd"
ctdb_counter_incr
- ctdb_check_counter_limit 10 quiet >/dev/null
+
+ ctdb_check_counter_equal 10 || {
+ echo "Trying to restart NFS service"
+ startstop_nfs restart >/dev/null 2>&1 &
+ exit 0
+ }
+
+ ctdb_check_counter_limit 15 quiet >/dev/null
) || {
echo "$ctdb_check_rpc_out"
echo "Trying to restart NFS service"
startstop_nfs restart
exit 1
}
- # we haven't hit the failure limit so restart quietly
- startstop_nfs restart >/dev/null 2>&1 &
fi
}
@@ -107,7 +112,25 @@ case "$1" in
# mount needs special handling since it is sometimes not started
# correctly on RHEL5
- ctdb_check_rpc "MOUNTD" 100005 1 || {
+ if ctdb_check_rpc "MOUNTD" 100005 1 >/dev/null ; then
+ (service_name="nfs_mountd"; ctdb_counter_init)
+ else
+ (
+ service_name="nfs_mountd"
+ ctdb_counter_incr
+
+ ctdb_check_counter_equal 5 || {
+ p="rpc.mountd"
+ cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
+ echo "Trying to restart MOUNTD [${cmd}]"
+ killall -q -9 $p
+ $cmd &
+ exit 0
+ }
+
+ ctdb_check_counter_limit 10 quiet >/dev/null
+ ) || {
+ echo "$ctdb_check_rpc_out"
p="rpc.mountd"
cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
echo "Trying to restart MOUNTD [${cmd}]"
@@ -115,6 +138,9 @@ case "$1" in
$cmd &
exit 1
}
+ fi
+
+
# rquotad needs special handling since it is sometimes not started
# correctly on RHEL5
# this is not a critical service so we dont flag the node as unhealthy
diff --git a/ctdb/config/functions b/ctdb/config/functions
index 610085b677..4acfc4ffab 100755
--- a/ctdb/config/functions
+++ b/ctdb/config/functions
@@ -571,6 +571,19 @@ ctdb_check_counter_limit () {
echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
fi
}
+ctdb_check_counter_equal () {
+ _ctdb_counter_common
+
+ _limit=$1
+
+ # unary counting!
+ _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
+ if [ $_size -eq $_limit ] ; then
+ return 1
+ fi
+ return 0
+}
+
########################################################
ctdb_spool_dir="/var/spool/ctdb"