diff options
author | Ronnie Sahlberg <ronniesahlberg@gmail.com> | 2011-08-11 14:01:02 +1000 |
---|---|---|
committer | Ronnie Sahlberg <ronniesahlberg@gmail.com> | 2011-08-11 14:01:02 +1000 |
commit | b77a78d809ed16e9314cef95310aa3e75d4fc60b (patch) | |
tree | 8836b545a84c3961c4bea68fff0962651843ef35 | |
parent | 53b956fee78e5448726df1398940535c61d51e44 (diff) | |
parent | 088620b0268063353e706446baa94c17f0a294d8 (diff) | |
download | samba-b77a78d809ed16e9314cef95310aa3e75d4fc60b.tar.gz samba-b77a78d809ed16e9314cef95310aa3e75d4fc60b.tar.xz samba-b77a78d809ed16e9314cef95310aa3e75d4fc60b.zip |
Merge remote branch 'martins/eventscript_infrastructure'
(This used to be ctdb commit 20864822372b6d574c545287002a429b273c4bcc)
-rwxr-xr-x[-rw-r--r--] | ctdb/config/events.d/31.clamd | 7 | ||||
-rwxr-xr-x | ctdb/config/events.d/40.vsftpd | 28 | ||||
-rwxr-xr-x | ctdb/config/events.d/41.httpd | 32 | ||||
-rwxr-xr-x | ctdb/config/events.d/50.samba | 127 | ||||
-rwxr-xr-x | ctdb/config/events.d/60.nfs | 55 | ||||
-rwxr-xr-x | ctdb/config/functions | 178 |
6 files changed, 265 insertions, 162 deletions
diff --git a/ctdb/config/events.d/31.clamd b/ctdb/config/events.d/31.clamd index 73454d7c53..53739e24f3 100644..100755 --- a/ctdb/config/events.d/31.clamd +++ b/ctdb/config/events.d/31.clamd @@ -16,8 +16,11 @@ case $CTDB_INIT_STYLE in ;; esac -service_start="service $service_name stop > /dev/null 2>&1 ; service $service_name start" -service_stop="service $service_name stop" +service_start () +{ + service $service_name stop > /dev/null 2>&1 + service $service_name start +} loadconfig diff --git a/ctdb/config/events.d/40.vsftpd b/ctdb/config/events.d/40.vsftpd index 72190db2a6..e6d58c8ec5 100755 --- a/ctdb/config/events.d/40.vsftpd +++ b/ctdb/config/events.d/40.vsftpd @@ -5,9 +5,16 @@ service_name="vsftpd" # make sure the service is stopped first -service_start="service $service_name stop > /dev/null 2>&1 ; service $service_name start" -service_stop="service $service_name stop > /dev/null 2>&1" -service_reconfigure="service $service_name restart" +service_start () +{ + service $service_name stop > /dev/null 2>&1 + service $service_name start +} +service_stop () +{ + service $service_name stop +} + service_fail_limit=2 service_tcp_ports=21 @@ -17,6 +24,8 @@ ctdb_start_stop_service is_ctdb_managed_service || exit 0 +ctdb_service_check_reconfigure + case "$1" in startup) ctdb_service_start @@ -30,20 +39,7 @@ case "$1" in ctdb_service_set_reconfigure ;; - recovered) - # if we have taken or released any ips we must - # restart vsftpd to ensure that all tcp connections are reset - if ctdb_service_needs_reconfigure ; then - ctdb_service_reconfigure - fi - ;; - monitor) - if ctdb_service_needs_reconfigure ; then - ctdb_service_reconfigure - exit 0 - fi - if [ -n "$service_tcp_ports" ] ; then if ctdb_check_tcp_ports $service_tcp_ports ; then ctdb_counter_init diff --git a/ctdb/config/events.d/41.httpd b/ctdb/config/events.d/41.httpd index e94bde3fad..6ae5d61b30 100755 --- a/ctdb/config/events.d/41.httpd +++ b/ctdb/config/events.d/41.httpd @@ -27,9 +27,16 @@ cleanup_httpd_semaphore_leak() { ########## -service_start="cleanup_httpd_semaphore_leak; service $service_name start" -service_stop="service $service_name stop; killall -q -9 $service_name || true" -service_reconfigure="service $service_name restart" +service_start () +{ + cleanup_httpd_semaphore_leak + service $service_name start +} +service_stop () +{ + service $service_name stop + killall -q -9 $service_name || true +} loadconfig @@ -37,6 +44,8 @@ ctdb_start_stop_service is_ctdb_managed_service || exit 0 +ctdb_service_check_reconfigure + case "$1" in startup) ctdb_service_start @@ -47,26 +56,21 @@ case "$1" in ;; monitor) - if ctdb_service_needs_reconfigure ; then - ctdb_service_reconfigure - exit 0 - fi - if ctdb_check_tcp_ports 80 >/dev/null 2>/dev/null ; then ctdb_counter_init else ctdb_counter_incr - ctdb_check_counter_equal 5 || { + ctdb_check_counter warn -eq 2 || { echo "HTTPD is not running. Trying to restart HTTPD." - ctdb_service_stop - ctdb_service_start + service_stop + service_start exit 0 } - ctdb_check_counter_limit 10 quiet|| { + ctdb_check_counter warn -ge 5 || { echo "HTTPD is not running. Trying to restart HTTPD." - ctdb_service_stop - ctdb_service_start + service_stop + service_start exit 1 } fi diff --git a/ctdb/config/events.d/50.samba b/ctdb/config/events.d/50.samba index 3e90742724..1c6822714f 100755 --- a/ctdb/config/events.d/50.samba +++ b/ctdb/config/events.d/50.samba @@ -25,19 +25,23 @@ case $CTDB_INIT_STYLE in esac service_name="samba" -service_start="start_samba" -service_stop="stop_samba" loadconfig -start_samba() { - # create the state directory for samba - mkdir -p $CTDB_VARDIR/state/samba +ctdb_setup_service_state_dir + +service_start () +{ + # If set then we force-start the relevant service. + _service_name="$1" # make sure samba is not already started - [ "$CTDB_MANAGES_SAMBA" = "yes" ] && { + if [ "$_service_name" = "samba" ] || \ + is_ctdb_managed_service "samba" ; then service "$CTDB_SERVICE_SMB" stop > /dev/null 2>&1 - service "$CTDB_SERVICE_NMB" stop > /dev/null 2>&1 + if [ -n "$CTDB_SERVICE_NMB" ] ; then + service "$CTDB_SERVICE_NMB" stop > /dev/null 2>&1 + fi killall -0 -q smbd && { sleep 1 # make absolutely sure samba is dead @@ -49,11 +53,11 @@ start_samba() { # make absolutely sure samba is dead killall -q -9 nmbd } - } + fi # make sure winbind is not already started - check_ctdb_manages_winbind - [ "$CTDB_MANAGES_WINBIND" = "yes" ] && { + if [ "$_service_name" = "winbind" ] || \ + check_ctdb_manages_winbind ; then service "$CTDB_SERVICE_WINBIND" stop > /dev/null 2>&1 killall -0 -q winbindd && { sleep 1 @@ -61,65 +65,78 @@ start_samba() { killall -q -9 winbindd } - } + fi # start the winbind service - [ "$CTDB_MANAGES_WINBIND" = "yes" ] && { + if [ "$_service_name" = "winbind" ] || \ + check_ctdb_manages_winbind ; then service "$CTDB_SERVICE_WINBIND" start || { - echo failed to start winbind - exit 1 + echo failed to start winbind + exit 1 } - - } + fi # start Samba service. Start it reniced, as under very heavy load # the number of smbd processes will mean that it leaves few cycles for # anything else - [ "$CTDB_MANAGES_SAMBA" = "yes" ] && { + if [ "$_service_name" = "samba" ] || \ + is_ctdb_managed_service "samba" ; then net serverid wipe - nice_service "$CTDB_SERVICE_NMB" start || { + if [ -n "$CTDB_SERVICE_NMB" ] ; then + nice_service "$CTDB_SERVICE_NMB" start || { echo failed to start nmbd exit 1 - } - + } + fi nice_service "$CTDB_SERVICE_SMB" start || { echo failed to start samba exit 1 } - } - return 0 + fi } -stop_samba() { +service_stop () +{ + # If set then we force-stop the relevant service. + _service_name="$1" + # shutdown Samba when ctdb goes down - [ "$CTDB_MANAGES_SAMBA" = "yes" ] && { + if [ "$_service_name" = "samba" ] || \ + is_ctdb_managed_service "samba" ; then service "$CTDB_SERVICE_SMB" stop - service "$CTDB_SERVICE_NMB" stop - } + if [ -n "$CTDB_SERVICE_NMB" ] ; then + service "$CTDB_SERVICE_NMB" stop + fi + fi # stop the winbind service - check_ctdb_manages_winbind - [ "$CTDB_MANAGES_WINBIND" = "yes" ] && { + if [ "$_service_name" = "winbind" ] || \ + check_ctdb_manages_winbind ; then service "$CTDB_SERVICE_WINBIND" stop - } + fi return 0 } +service_reconfigure () +{ + # Samba automatically reloads config - no restart needed. + : +} + # set default samba cleanup period - in minutes [ -z "$SAMBA_CLEANUP_PERIOD" ] && { SAMBA_CLEANUP_PERIOD=10 } # we keep a cached copy of smb.conf here -smbconf_cache="$CTDB_VARDIR/state/samba/smb.conf.cache" +smbconf_cache="$service_state_dir/smb.conf.cache" ############################################# # update the smb.conf cache in the foreground testparm_foreground_update() { - mkdir -p "$CTDB_VARDIR/state/samba" || exit 1 testparm -s 2> /dev/null | egrep -v 'registry.shares.=|include.=' > "$smbconf_cache" } @@ -183,19 +200,27 @@ testparm_cat() { testparm -s "$smbconf_cache" "$@" 2>/dev/null } -# function to see if ctdb manages winbind +# function to see if ctdb manages winbind - this overrides with extra +# logic if $CTDB_MANAGES_WINBIND is not set or null. check_ctdb_manages_winbind() { - [ -z "$CTDB_MANAGES_WINBIND" ] && { - secmode=`testparm_cat --parameter-name=security` - case $secmode in - ADS|DOMAIN) - CTDB_MANAGES_WINBIND="yes"; - ;; - *) - CTDB_MANAGES_WINBIND="no"; - ;; - esac - } + if is_ctdb_managed_service "winbind" ; then + return 0 + elif [ -n "$CTDB_MANAGES_WINBIND" ] ; then + # If this variable is set we want to respect it. We return + # false here because we know it is not set to "yes" - if it + # were then the 1st "if" above would have succeeded. + return 1 + else + _secmode=`testparm_cat --parameter-name=security` + case "$_secmode" in + ADS|DOMAIN) + return 0 + ;; + *) + return 1 + ;; + esac + fi } list_samba_shares () @@ -217,10 +242,8 @@ periodic_cleanup() { ########################### -[ "$1" = "monitor" ] && { ctdb_start_stop_service ctdb_start_stop_service "winbind" -} is_ctdb_managed_service || is_ctdb_managed_service "winbind" || exit 0 @@ -238,16 +261,17 @@ case "$1" in monitor) # Create a dummy file to track when we need to do periodic cleanup # of samba databases - [ -f $CTDB_VARDIR/state/samba/periodic_cleanup ] || { - touch $CTDB_VARDIR/state/samba/periodic_cleanup + periodic_cleanup_file="$service_state_dir/periodic_cleanup" + [ -f "$periodic_cleanup_file" ] || { + touch "$periodic_cleanup_file" } - [ `find $CTDB_VARDIR/state/samba/periodic_cleanup -mmin +$SAMBA_CLEANUP_PERIOD | wc -l` -eq 1 ] && { + [ `find "$periodic_cleanup_file" -mmin +$SAMBA_CLEANUP_PERIOD | wc -l` -eq 1 ] && { # Cleanup the databases periodic_cleanup - touch $CTDB_VARDIR/state/samba/periodic_cleanup + touch "$periodic_cleanup_file" } - [ "$CTDB_MANAGES_SAMBA" = "yes" ] && { + is_ctdb_managed_service "samba" && { [ "$CTDB_SAMBA_SKIP_SHARE_CHECK" = "yes" ] || { testparm_background_update @@ -275,8 +299,7 @@ case "$1" in } # check winbind is OK - check_ctdb_manages_winbind - [ "$CTDB_MANAGES_WINBIND" = "yes" ] && { + check_ctdb_manages_winbind && { ctdb_check_command "winbind" "wbinfo -p" } ;; diff --git a/ctdb/config/events.d/60.nfs b/ctdb/config/events.d/60.nfs index c0207eeaed..e77804836b 100755 --- a/ctdb/config/events.d/60.nfs +++ b/ctdb/config/events.d/60.nfs @@ -1,37 +1,49 @@ #!/bin/sh # script to manage nfs in a clustered environment -start_nfs() { - mkdir -p $CTDB_VARDIR/state/nfs - mkdir -p $CTDB_VARDIR/state/statd/ip - startstop_nfs stop - startstop_nfs start - set_proc "sys/net/ipv4/tcp_tw_recycle" 1 -} - . $CTDB_BASE/functions service_name="nfs" -service_start="start_nfs" -service_stop="startstop_nfs stop" -service_reconfigure="startstop_nfs restart" +service_start () +{ + startstop_nfs stop + startstop_nfs start + set_proc "sys/net/ipv4/tcp_tw_recycle" 1 + touch "$service_state_dir/update-trigger" +} +service_stop () +{ + startstop_nfs stop +} +service_reconfigure () +{ + startstop_nfs restart + + # if the ips have been reallocated, we must restart the lockmanager + # across all nodes and ping all statd listeners + [ -x $CTDB_BASE/statd-callout ] && { + $CTDB_BASE/statd-callout notify & + } >/dev/null 2>&1 +} loadconfig [ "$NFS_SERVER_MODE" != "GANESHA" ] || exit 0 +ctdb_setup_service_state_dir + ctdb_start_stop_service is_ctdb_managed_service || exit 0 +ctdb_service_check_reconfigure + case "$1" in init) # read statd from persistent database ;; startup) ctdb_service_start - mkdir -p $CTDB_VARDIR/state/statd - touch $CTDB_VARDIR/state/statd/update-trigger ;; shutdown) @@ -54,11 +66,6 @@ case "$1" in ctdb_check_directories } || exit $? - if ctdb_service_needs_reconfigure ; then - ctdb_service_reconfigure - exit 0 - fi - update_tickles 2049 # check that statd responds to rpc requests @@ -181,23 +188,15 @@ case "$1" in # once every 600 seconds, update the statd state database for which # clients need notifications - LAST_UPDATE=`stat --printf="%Y" $CTDB_VARDIR/state/statd/update-trigger 2>/dev/null` + LAST_UPDATE=`stat --printf="%Y" "$service_state_dir/update-trigger"` CURRENT_TIME=`date +"%s"` [ $CURRENT_TIME -ge $(($LAST_UPDATE + 600)) ] && { - mkdir -p $CTDB_VARDIR/state/statd - touch $CTDB_VARDIR/state/statd/update-trigger + touch "$service_state_dir/update-trigger" $CTDB_BASE/statd-callout updatelocal & $CTDB_BASE/statd-callout updateremote & } ;; - ipreallocated) - # if the ips have been reallocated, we must restart the lockmanager - # across all nodes and ping all statd listeners - [ -x $CTDB_BASE/statd-callout ] && { - $CTDB_BASE/statd-callout notify & - } >/dev/null 2>&1 - ;; *) ctdb_standard_event_handler "$@" ;; diff --git a/ctdb/config/functions b/ctdb/config/functions index 32f11f0690..2668531ca8 100755 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -355,8 +355,8 @@ startstop_nfs() { service nfs start ;; stop) - service nfs stop > /dev/null 2>&1 - service nfslock stop > /dev/null 2>&1 + service nfs stop + service nfslock stop ;; restart) set_proc "fs/nfsd/threads" 0 @@ -425,18 +425,6 @@ startstop_nfslock() { esac } -# better use delete_ip_from_iface() together with add_ip_to_iface -# remove_ip should be removed in future -remove_ip() { - local _ip_maskbits=$1 - local _iface=$2 - local _ip=`echo "$_ip_maskbits" | cut -d '/' -f1` - local _maskbits=`echo "$_ip_maskbits" | cut -d '/' -f2` - - delete_ip_from_iface "$_iface" "$_ip" "$_maskbits" - return $? -} - add_ip_to_iface() { local _iface=$1 @@ -515,16 +503,17 @@ setup_iface_ip_readd_script() # ctdb_check_counter_limit succeeds when count >= <limit> ######################################################## _ctdb_counter_common () { - _counter_file="$ctdb_fail_dir/$service_name" + _service_name="${1:-${service_name}}" + _counter_file="$ctdb_fail_dir/$_service_name" mkdir -p "${_counter_file%/*}" # dirname } ctdb_counter_init () { - _ctdb_counter_common + _ctdb_counter_common "$1" >"$_counter_file" } ctdb_counter_incr () { - _ctdb_counter_common + _ctdb_counter_common "$1" # unary counting! echo -n 1 >> "$_counter_file" @@ -556,6 +545,24 @@ ctdb_check_counter_equal () { fi return 0 } +ctdb_check_counter () { + _msg="${1:-error}" # "error" - anything else is silent on fail + _op="${2:--ge}" # an integer operator supported by test + _limit="${3:-${service_fail_limit}}" + shift 3 + _ctdb_counter_common "$1" + + # unary counting! + _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0) + if [ $_size $_op $_limit ] ; then + if [ "$_msg" = "error" ] ; then + echo "ERROR: $_limit consecutive failures for $_service_name, marking node unhealthy" + exit 1 + else + return 1 + fi + fi +} ######################################################## @@ -638,39 +645,80 @@ ctdb_setstatus () esac } +################################################################## +# Reconfigure a service on demand + +_ctdb_service_reconfigure_common () +{ + _d="$ctdb_status_dir/${1:-${service_name}}" + mkdir -p "$_d" + _ctdb_service_reconfigure_flag="$_d/reconfigure" +} + ctdb_service_needs_reconfigure () { - [ -e "$ctdb_status_dir/$service_name/reconfigure" ] + _ctdb_service_reconfigure_common "$@" + [ -e "$_ctdb_service_reconfigure_flag" ] } ctdb_service_set_reconfigure () { - d="$ctdb_status_dir/$service_name" - mkdir -p "$d" - >"$d/reconfigure" + _ctdb_service_reconfigure_common "$@" + >"$_ctdb_service_reconfigure_flag" } ctdb_service_unset_reconfigure () { - rm -f "$ctdb_status_dir/$service_name/reconfigure" + _ctdb_service_reconfigure_common "$@" + rm -f "$_ctdb_service_reconfigure_flag" } ctdb_service_reconfigure () { echo "Reconfiguring service \"$service_name\"..." - if [ -n "$service_reconfigure" ] ; then - eval $service_reconfigure - else - service "$service_name" restart + ctdb_service_unset_reconfigure "$@" + service_reconfigure "$@" || return $? + ctdb_counter_init "$@" +} + +# Default service_reconfigure() function. +service_reconfigure () +{ + service "${1:-$service_name}" restart +} + +ctdb_service_check_reconfigure () +{ + # Only do this for certain events. + case "$event_name" in + monitor|ipreallocated) : ;; + *) return 0 + esac + + if ctdb_service_needs_reconfigure "$@" ; then + ctdb_service_reconfigure "$@" + + # Fall through to non-monitor events. + [ "$event_name" = "monitor" ] || return 0 + + # We don't want to proceed with the rest of the monitor event + # here, so we exit. However, if we exit 0 then, if the + # service was previously broken, we might return a false + # positive. So we simply retrieve the status of this script + # from the previous monitor loop and exit with that status. + ctdb scriptstatus | \ + grep -q -E "^${script_name}[[:space:]]+Status:OK[[:space:]]" + exit $? fi - ctdb_service_unset_reconfigure - ctdb_counter_init } +################################################################## +# Does CTDB manage this service? - and associated auto-start/stop + ctdb_compat_managed_service () { - if [ "$1" = "yes" ] ; then - t="$t $2 " + if [ "$1" = "yes" -a "$2" = "$_service_name" ] ; then + CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2" fi } @@ -678,8 +726,17 @@ is_ctdb_managed_service () { _service_name="${1:-${service_name}}" + # $t is used just for readability and to allow better accurate + # matching via leading/trailing spaces t=" $CTDB_MANAGED_SERVICES " + # Return 0 if "<space>$_service_name<space>" appears in $t + if [ "${t#* ${_service_name} }" != "${t}" ] ; then + return 0 + fi + + # If above didn't match then update $CTDB_MANAGED_SERVICES for + # backward compatibility and try again. ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD" "vsftpd" ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA" "samba" ctdb_compat_managed_service "$CTDB_MANAGES_SCP" "scp" @@ -690,7 +747,9 @@ is_ctdb_managed_service () ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs" ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs-ganesha-gpfs" - # Returns 0 if "<space>$_service_name<space>" appears in $t + t=" $CTDB_MANAGED_SERVICES " + + # Return 0 if "<space>$_service_name<space>" appears in $t [ "${t#* ${_service_name} }" != "${t}" ] } @@ -702,40 +761,59 @@ ctdb_start_stop_service () if is_ctdb_managed_service "$_service_name" ; then if ! is_ctdb_previously_managed_service "$_service_name" ; then - echo "Starting service $_service_name" - ctdb_service_start || exit $? - ctdb_service_managed "$_service_name" - exit 0 + echo "Starting service \"$_service_name\" - now managed" + ctdb_service_start "$_service_name" + exit $? fi else if is_ctdb_previously_managed_service "$_service_name" ; then - echo "Stopping service $_service_name" - ctdb_service_stop || exit $? - ctdb_service_unmanaged "$_service_name" - exit 0 + echo "Stopping service \"$_service_name\" - no longer managed" + ctdb_service_stop "$_service_name" + exit $? fi fi } ctdb_service_start () { - if [ -n "$service_start" ] ; then - eval $service_start || return $? - else - service "$service_name" start || return $? - fi - ctdb_counter_init + # The service is marked managed if we've ever tried to start it. + ctdb_service_managed "$@" + + # Here we only want $1. If no argument is passed then + # service_start needs to know. + service_start "$@" || return $? + + ctdb_counter_init "$@" } ctdb_service_stop () { - if [ -n "$service_stop" ] ; then - eval $service_stop - else - service "$service_name" stop - fi + ctdb_service_unmanaged "$@" + service_stop "$@" +} + +# Default service_start() and service_stop() functions. + +# These may be overridden in an eventscript. When overriding, the +# following convention must be followed. If these functions are +# called with no arguments then they may use internal logic to +# determine whether the service is managed and, therefore, whether +# they should take any action. However, if the service name is +# specified as an argument then an attempt must be made to start or +# stop the service. This is because the auto-start/stop code calls +# them with the service name as an argument. +service_start () +{ + service "${1:-${service_name}}" start +} + +service_stop () +{ + service "${1:-${service_name}}" stop } +################################################################## + ctdb_standard_event_handler () { case "$1" in |