From 1452b63d27383b64fb83038da0d4da59b6b0dc6a Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 16 Aug 2011 13:18:40 +1000 Subject: Eventscripts: remove "return 0" from 50.samba service_stop(). This potentially masks errors and was basically included by accident. Signed-off-by: Martin Schwenke (This used to be ctdb commit e7e4a1b4f31118027fd13a6223192f9957cf2e74) --- ctdb/config/events.d/50.samba | 2 -- 1 file changed, 2 deletions(-) diff --git a/ctdb/config/events.d/50.samba b/ctdb/config/events.d/50.samba index 1c6822714f..bf143bf26e 100755 --- a/ctdb/config/events.d/50.samba +++ b/ctdb/config/events.d/50.samba @@ -115,8 +115,6 @@ service_stop () check_ctdb_manages_winbind ; then service "$CTDB_SERVICE_WINBIND" stop fi - - return 0 } service_reconfigure () -- cgit From 430ca2f60651a3aca264ad6d72d8cdcbac1ac0c4 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 16 Aug 2011 13:28:40 +1000 Subject: Eventscripts - ensure the statd update-trigger file always exists. See the comment in the code for details. Signed-off-by: Martin Schwenke (This used to be ctdb commit 8ee9856996a8ec738e9d3ea7f1561605da526b8c) --- ctdb/config/events.d/60.nfs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/ctdb/config/events.d/60.nfs b/ctdb/config/events.d/60.nfs index 19a9ea81a1..87a91df5b5 100755 --- a/ctdb/config/events.d/60.nfs +++ b/ctdb/config/events.d/60.nfs @@ -9,7 +9,6 @@ service_start () startstop_nfs stop startstop_nfs start set_proc "sys/net/ipv4/tcp_tw_recycle" 1 - touch "$service_state_dir/update-trigger" } service_stop () { @@ -32,6 +31,14 @@ loadconfig ctdb_setup_service_state_dir +statd_update_trigger="$service_state_dir/update-trigger" +# We want this file to always exist. The corner case is when +# auto-start/stop is switched off, NFS is added as a managed service +# some time after ctdbd is started and someone else starts the NFS +# service for us. In this case this file might not otherwise exist +# when we get to a monitor event. +touch "$statd_update_trigger" + ctdb_start_stop_service is_ctdb_managed_service || exit 0 @@ -105,10 +112,10 @@ case "$1" in # once every 600 seconds, update the statd state database for which # clients need notifications - LAST_UPDATE=`stat --printf="%Y" "$service_state_dir/update-trigger"` + LAST_UPDATE=`stat --printf="%Y" "$statd_update_trigger"` CURRENT_TIME=`date +"%s"` [ $CURRENT_TIME -ge $(($LAST_UPDATE + 600)) ] && { - touch "$service_state_dir/update-trigger" + touch "$statd_update_trigger" $CTDB_BASE/statd-callout updatelocal & $CTDB_BASE/statd-callout updateremote & } -- cgit From 171bef3d6859c671618f4b4846ae876028125c96 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Wed, 17 Aug 2011 09:00:46 +1000 Subject: Eventscripts - new function ctdb_set_current_debuglevel() This function ensures that CTDB_CURRENT_DEBUGLEVEL is set. It works like this: 1. If it is already set then do nothing, since it might have been set some other way. The recommended "other way" would be to add a file in rc.local.d/. 2. If it is not set then set it by sourcing /var/ctdb/eventscript_debuglevel. 3. If this file does not exist then create it using output from "ctdb getdebug". If the optional 1st argument is set to "create" then don't source an existing file but create a new one instead - this is useful for creating the file just once in each event run in, say, 00.ctdb. If there's a problem getting the debug level from ctdb then it is silently set to 0 - no use spamming logs if our debug code is broken... Signed-off-by: Martin Schwenke (This used to be ctdb commit 93910921c8a25f2b029733cd938069ff7c7bdab7) --- ctdb/config/functions | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/ctdb/config/functions b/ctdb/config/functions index b04965281d..26b282aa99 100755 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -37,6 +37,37 @@ loadconfig () { _loadconfig "$@" } +############################################################## +# make sure CTDB_CURRENT_DEBUGLEVEL is set to the desired debug level +# (integer) +# +# If it is already set then do nothing, since it might have been set +# via a file in rc.local.d/. If it is not set then set it by sourcing +# /var/ctdb/eventscript_debuglevel. If this file does not exist then +# create it using output from "ctdb getdebug". If the option 1st arg +# is "create" then don't source an existing file but create a new one +# instead - this is useful for creating the file just once in each +# event run in 00.ctdb. If there's a problem getting the debug level +# from ctdb then it is silently set to 0 - no use spamming logs if our +# debug code is broken... +ctdb_set_current_debuglevel () +{ + [ -z "$CTDB_CURRENT_DEBUGLEVEL" ] || return 0 + + _f="$CTDB_VARDIR/eventscript_debuglevel" + + if [ "$1" = "create" -o ! -r "$_f" ] ; then + _t=$(ctdb getdebug -Y 2>/dev/null) + # get last field of output + _t="${_t%:}" + _t="${_t##*:}" + # Defaults to 0 + echo "export CTDB_CURRENT_DEBUGLEVEL=\"${_t:-0}\"" >"$_f" + fi + + . "$_f" +} + ############################################################## # determine on what type of system (init style) we are running detect_init_style() { -- cgit From 3e1a0528b8d887c498a7925ec0ebf97a53c9b94b Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Wed, 17 Aug 2011 09:14:23 +1000 Subject: Eventscripts - conditionally inherit ctdbd debug level in each monitor event Signed-off-by: Martin Schwenke (This used to be ctdb commit a7eebc06f81a7b0a3fba93759bcbdeabc8c2e86e) --- ctdb/config/events.d/00.ctdb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ctdb/config/events.d/00.ctdb b/ctdb/config/events.d/00.ctdb index 31f8fdc79b..321ba9c708 100755 --- a/ctdb/config/events.d/00.ctdb +++ b/ctdb/config/events.d/00.ctdb @@ -63,6 +63,11 @@ case "$1" in update_config_from_tdb & ;; monitor) + # Inherit the debug level from ctdbd on each monitor run. If + # there's a more urgent need then override CTDB_CURRENT_DEBUGLEVEL + # using a file in $CTDB_BASE/rc.local.d/. + ctdb_set_current_debuglevel create + # We should never enter swap, so SwapTotal == SwapFree. [ "$CTDB_CHECK_SWAP_IS_NOT_USED" = "yes" ] && { if [ -n "`grep '^Swap\(Total\|Free\)' /proc/meminfo | uniq -s 10 -u`" ]; then -- cgit From f0f9271301488bbeb4bf4faed2b8c5682166f749 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Wed, 17 Aug 2011 09:44:11 +1000 Subject: Eventscripts: add a debug() function and call ctdb_set_current_debuglevel() The debug function passes its arguments to echo if $CTDB_CURRENT_DEBUGLEVEL is >= 4 (i.e. DEBUG). If no args are given then use stdin - this allows the function to be used with here documents. To ensure $CTDB_CURRENT_DEBUGLEVEL is set, ctdb_set_current_debuglevel() is called near the end of the functions file. Signed-off-by: Martin Schwenke (This used to be ctdb commit 6143483d9f87322578c00f12081e381f425226ca) --- ctdb/config/functions | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ctdb/config/functions b/ctdb/config/functions index 26b282aa99..c6cddd462d 100755 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -68,6 +68,20 @@ ctdb_set_current_debuglevel () . "$_f" } +debug () +{ + if [ $CTDB_CURRENT_DEBUGLEVEL -ge 4 ] ; then + # If there are arguments then echo them. Otherwise expect to + # use stdin, which allows us to pass lots of debug using a + # here document. + if [ -n "$1" ] ; then + echo "DEBUG: $*" + elif ! tty -s ; then + sed -e 's@^@DEBUG: @' + fi + fi +} + ############################################################## # determine on what type of system (init style) we are running detect_init_style() { @@ -1125,6 +1139,11 @@ update_tickles () done } +# We'll call this here to ensure $CTDB_CURRENT_DEBUGLEVEL is set. +# This gives us a chance to override the debug level using a file in +# $CTDB_BASE/rc.local.d/. +ctdb_set_current_debuglevel + script_name="${0##*/}" # basename service_name="$script_name" # default is just the script name service_fail_limit=1 -- cgit From 5c9fbb55ce1b173646cb789185886fa4a3c56b6f Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 5 Jul 2011 11:32:06 +1000 Subject: Eventscript functions: optimise ctdb_check_tcp_ports() and add debug. ctdb_check_tcp_ports() runs "netstat -a -t -n" in a loop for each port. There are 2 problems with this: * Netstat is run on each loop iteration when it need only be run once. * The -a option is used to list all connections but the function only cares about the listening ports. There may be many thousands of non-listening ports to grep through. This changes ctdb_check_tcp_ports() to run netstat with the -l option instead of the -a option. It also only runs netstat once before the main loop. When a port is found to not be listening the output of the netstat command is now dumped to help with debugging. Signed-off-by: Martin Schwenke (This used to be ctdb commit 830355a8b18c53cfcc3ad1e3009bbb1a7a681fa0) --- ctdb/config/functions | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/ctdb/config/functions b/ctdb/config/functions index c6cddd462d..fcb67bb7af 100755 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -324,15 +324,26 @@ ctdb_check_directories() { # check a set of tcp ports # usage: ctdb_check_tcp_ports ###################################################### -ctdb_check_tcp_ports() { - - for p ; do - if ! netstat -a -t -n | grep -q "0\.0\.0\.0:$p .*LISTEN" ; then - if ! netstat -a -t -n | grep -q ":::$p .*LISTEN" ; then - echo "ERROR: $service_name tcp port $p is not responding" - return 1 - fi - fi +ctdb_check_tcp_ports() +{ + _cmd='netstat -l -t -n' + _ns=$($_cmd) + for _p ; do # process each function argument (port) + for _a in '0\.0\.0\.0' '::' ; do + _pat="[[:space:]]${_a}:${_p}[[:space:]]+[^[:space:]]+[[:space:]]+LISTEN" + if echo "$_ns" | grep -E -q "$_pat" ; then + # We matched the port, so process next port + continue 2 + fi + done + + # We didn't match the port, so flag an error, print some debug + cat < Date: Fri, 5 Aug 2011 16:39:57 +1000 Subject: Eventscripts - weaken TCP port check message if CTDB has just been started. Sometimes smbd and other services can take a while to start, especially when there is a lot of activity after ctdbd has just started. The TCP port check can then pollute the logs with lots of "ERROR" messages and possibly extra debug. This creates a flag file when a service is started (but not restarted) and this flag is removed the first time that TCP port checks succeed for that service. When a port check fails and the flag file still exists, a less extreme "INFO" message is printed rather than the usual "ERROR" message. This means that until the node actually becomes healthy we see more friendly messages. The subtext is that we're hearing false positive reports "recreates" of CQ S1024874 (samba stopped responding on port 445) quite often when ctdbd is started. This reduces the chances of people reporting such false recreates... Signed-off-by: Martin Schwenke (This used to be ctdb commit 571865eb6ef847857129d0b1e2ba5fa7254bfe8c) --- ctdb/config/functions | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/ctdb/config/functions b/ctdb/config/functions index fcb67bb7af..e75254a4e3 100755 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -324,8 +324,27 @@ ctdb_check_directories() { # check a set of tcp ports # usage: ctdb_check_tcp_ports ###################################################### + +# This flag file is created when a service is initially started. It +# is deleted the first time TCP port checks for that service succeed. +# Until then ctdb_check_tcp_ports() prints a more subtle "error" +# message if a port check fails. +_ctdb_check_tcp_common () +{ + _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started" +} + +ctdb_check_tcp_init () +{ + _ctdb_check_tcp_common + mkdir -p "${_ctdb_service_started_file%/*}" # dirname + touch "$_ctdb_service_started_file" +} + ctdb_check_tcp_ports() { + _ctdb_check_tcp_common + _cmd='netstat -l -t -n' _ns=$($_cmd) for _p ; do # process each function argument (port) @@ -338,13 +357,22 @@ ctdb_check_tcp_ports() done # We didn't match the port, so flag an error, print some debug - cat < Date: Wed, 17 Aug 2011 10:27:01 +1000 Subject: Eventscripts - ctdb_check_tcp_ports() only prints netstat output if debugging Use the new debug function to conditionally print the netstat output. Signed-off-by: Martin Schwenke (This used to be ctdb commit 44c14aeeb11080980fe07c7396d06843a4870747) --- ctdb/config/functions | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ctdb/config/functions b/ctdb/config/functions index e75254a4e3..617db882a2 100755 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -358,8 +358,8 @@ ctdb_check_tcp_ports() # We didn't match the port, so flag an error, print some debug if [ ! -f "$_ctdb_service_started_file" ] ; then - cat < Date: Wed, 17 Aug 2011 12:12:20 +1000 Subject: Eventscripts - generalise TCP port checking plus new nmap-based checker Split the netstat-specific parts of ctdb_check_tcp_ports() into new function ctdb_check_tcp_ports_netstat(). Implement new ctdb_check_tcp_ports_nmap() function that uses "nmap -PS" to check if the desired ports are listening. ctdb_check_ctdb_ports() now uses new configuration variable CTDB_TCP_PORT_CHECKERS to decide which port checkers to try. Default value is currently "nmap netstat". If nmap is not found then this will fall back to netstat - if logging is at debug level this will also fill the logs with message saying the nmap checker failed. This indicates that either nmap should be installed or the default value of CTDB_TCP_PORT_CHECKERS should be changed (in a configuration file) to avoid trying to use nmap. Signed-off-by: Martin Schwenke (This used to be ctdb commit d9651175b40b9454e7d4e98291955fcf1445085e) --- ctdb/config/functions | 112 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 98 insertions(+), 14 deletions(-) diff --git a/ctdb/config/functions b/ctdb/config/functions index 617db882a2..737c8a7e7c 100755 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -343,10 +343,64 @@ ctdb_check_tcp_init () ctdb_check_tcp_ports() { - _ctdb_check_tcp_common + if [ -z "$1" ] ; then + echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified" + exit 1 + fi + + # Set default value for CTDB_TCP_PORT_CHECKS if unset. + # If any of these defaults are unsupported then this variable can + # be overridden in /etc/sysconfig/ctdb or via a file in + # /etc/ctdb/rc.local.d/. + : ${CTDB_TCP_PORT_CHECKERS:=nmap netstat} + + for _c in $CTDB_TCP_PORT_CHECKERS ; do + ctdb_check_tcp_ports_$_c "$@" + case "$?" in + 0) + rm -f "$_ctdb_service_started_file" + return 0 + ;; + 1) + _ctdb_check_tcp_common + if [ ! -f "$_ctdb_service_started_file" ] ; then + echo "ERROR: $service_name tcp port $_p is not responding" + debug <&1) + if [ $? -eq 127 ] ; then + # netstat probably not installed - unlikely? + ctdb_check_tcp_ports_debug="$_ns" + return 127 + fi + for _p ; do # process each function argument (port) for _a in '0\.0\.0\.0' '::' ; do _pat="[[:space:]]${_a}:${_p}[[:space:]]+[^[:space:]]+[[:space:]]+LISTEN" @@ -356,21 +410,51 @@ ctdb_check_tcp_ports() fi done - # We didn't match the port, so flag an error, print some debug - if [ ! -f "$_ctdb_service_started_file" ] ; then - echo "ERROR: $service_name tcp port $_p is not responding" -debug <&1) + if [ $? -eq 127 ] ; then + # nmap probably not installed + ctdb_check_tcp_ports_debug="$_nmap_out" + return 127 + fi + + # get the port-related output + _port_info=$(echo "$_nmap_out" | sed -n -r -e 's@^.*Ports:[[:space:]]@@p') + + for _p ; do + # looking for something like this: + # 445/open/tcp//microsoft-ds/// + # possibly followed by a comma + _t="$_p/open/tcp//" + case "$_port_info" in + # The info we're after must be either at the beginning of + # the string or it must follow a space. + $_t*|*\ $_t*) : ;; + *) + # Nope, flag an error... + ctdb_check_tcp_ports_debug="$_cmd shows this output: +$_nmap_out" + return 1 + esac + done return 0 } -- cgit From 6e7dbf05431c99e7022f025823bf28fc2b042f22 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Wed, 17 Aug 2011 14:02:45 +1000 Subject: Eventscripts - new default TCP port checker using "ctdb checktcpport" New function ctdb_check_tcp_ports_ctdb(). This should be fast... and is now the default checker. If it fails in an unexpected way we fall back to the nmap and netstat checkers. Signed-off-by: Martin Schwenke (This used to be ctdb commit a1e16a707ce204817531a61455000361f972080a) --- ctdb/config/functions | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/ctdb/config/functions b/ctdb/config/functions index 737c8a7e7c..f65690359b 100755 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -352,7 +352,7 @@ ctdb_check_tcp_ports() # If any of these defaults are unsupported then this variable can # be overridden in /etc/sysconfig/ctdb or via a file in # /etc/ctdb/rc.local.d/. - : ${CTDB_TCP_PORT_CHECKERS:=nmap netstat} + : ${CTDB_TCP_PORT_CHECKERS:=ctdb nmap netstat} for _c in $CTDB_TCP_PORT_CHECKERS ; do ctdb_check_tcp_ports_$_c "$@" @@ -459,6 +459,33 @@ $_nmap_out" return 0 } +# Use the new "ctdb checktcpport" command to check the port. +# This is very cheap. +ctdb_check_tcp_ports_ctdb () +{ + for _p ; do # process each function argument (port) + _cmd="ctdb checktcpport $_p" + _out=$($_cmd 2>&1) + case "$?" in + 0) + ctdb_check_tcp_ports_debug="\"$_cmd\" was able to bind to port" + return 1 + ;; + 98) + # Couldn't bind, something already listening, next port... + continue + ;; + *) + ctdb_check_tcp_ports_debug="$_cmd (exited with $?) with output: +$_out" + # assume not implemented + return 127 + esac + done + + return 0 +} + ###################################################### # check a unix socket # usage: ctdb_check_unix_socket SERVICE_NAME -- cgit