summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Schwenke <martin@meltin.net>2011-08-05 16:39:57 +1000
committerMartin Schwenke <martin@meltin.net>2011-08-17 10:39:53 +1000
commit86792724a2a911da9cd3e75622d35084c88eb8ce (patch)
treedd5158b6681dcedebff8d69d4925ee7868a1190e
parent5c9fbb55ce1b173646cb789185886fa4a3c56b6f (diff)
downloadsamba-86792724a2a911da9cd3e75622d35084c88eb8ce.tar.gz
samba-86792724a2a911da9cd3e75622d35084c88eb8ce.tar.xz
samba-86792724a2a911da9cd3e75622d35084c88eb8ce.zip
Eventscripts - weaken TCP port check message if CTDB has just been started.
Sometimes smbd and other services can take a while to start, especially when there is a lot of activity after ctdbd has just started. The TCP port check can then pollute the logs with lots of "ERROR" messages and possibly extra debug. This creates a flag file when a service is started (but not restarted) and this flag is removed the first time that TCP port checks succeed for that service. When a port check fails and the flag file still exists, a less extreme "INFO" message is printed rather than the usual "ERROR" message. This means that until the node actually becomes healthy we see more friendly messages. The subtext is that we're hearing false positive reports "recreates" of CQ S1024874 (samba stopped responding on port 445) quite often when ctdbd is started. This reduces the chances of people reporting such false recreates... Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 571865eb6ef847857129d0b1e2ba5fa7254bfe8c)
-rwxr-xr-xctdb/config/functions31
1 files changed, 30 insertions, 1 deletions
diff --git a/ctdb/config/functions b/ctdb/config/functions
index fcb67bb7af..e75254a4e3 100755
--- a/ctdb/config/functions
+++ b/ctdb/config/functions
@@ -324,8 +324,27 @@ ctdb_check_directories() {
# check a set of tcp ports
# usage: ctdb_check_tcp_ports <ports...>
######################################################
+
+# This flag file is created when a service is initially started. It
+# is deleted the first time TCP port checks for that service succeed.
+# Until then ctdb_check_tcp_ports() prints a more subtle "error"
+# message if a port check fails.
+_ctdb_check_tcp_common ()
+{
+ _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started"
+}
+
+ctdb_check_tcp_init ()
+{
+ _ctdb_check_tcp_common
+ mkdir -p "${_ctdb_service_started_file%/*}" # dirname
+ touch "$_ctdb_service_started_file"
+}
+
ctdb_check_tcp_ports()
{
+ _ctdb_check_tcp_common
+
_cmd='netstat -l -t -n'
_ns=$($_cmd)
for _p ; do # process each function argument (port)
@@ -338,13 +357,22 @@ ctdb_check_tcp_ports()
done
# We didn't match the port, so flag an error, print some debug
- cat <<EOF
+ if [ ! -f "$_ctdb_service_started_file" ] ; then
+ cat <<EOF
ERROR: $service_name tcp port $_p is not responding
$_cmd shows this output:
$_ns
EOF
+ else
+ echo "INFO: $service_name tcp port $_p is not responding"
+ fi
+
return 1
done
+
+ rm -f "$_ctdb_service_started_file"
+
+ return 0
}
######################################################
@@ -962,6 +990,7 @@ ctdb_service_start ()
service_start "$@" || return $?
ctdb_counter_init "$@"
+ ctdb_check_tcp_init
}
ctdb_service_stop ()