diff options
Diffstat (limited to 'ctdb/config/functions')
| -rwxr-xr-x | ctdb/config/functions | 134 |
1 files changed, 128 insertions, 6 deletions
diff --git a/ctdb/config/functions b/ctdb/config/functions index 2668531ca8..b04965281d 100755 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -106,17 +106,139 @@ get_proc () } ###################################################### +# Check that an RPC service is healthy - +# this includes allowing a certain number of failures +# before marking the NFS service unhealthy. +# +# usage: nfs_check_rpc_service SERVICE_NAME [ triple ...] +# +# each triple is a set of 3 arguments: an operator, a +# fail count limit and an action string. +# +# For example: +# +# nfs_check_rpc_service "lockd" \ +# -ge 15 "verbose restart unhealthy" \ +# -eq 10 "restart:bs" +# +# says that if lockd is down for 15 iterations then do +# a verbose restart of lockd and mark the node unhealthy. +# Before this, after 10 iterations of failure, the +# service is restarted silently in the background. +# Order is important: the number of failures need to be +# specified in reverse order because processing stops +# after the first condition that is true. +###################################################### +nfs_check_rpc_service () +{ + _prog_name="$1" ; shift + + _version=1 + _rpc_prog="$_prog_name" + _restart="" + _opts="" + case "$_prog_name" in + knfsd) + _rpc_prog=nfs + _version=3 + _restart="echo 'Trying to restart NFS service'" + _restart="${_restart}; startstop_nfs restart" + ;; + mountd) + _opts="${MOUNTD_PORT:+ -p }${MOUNTD_PORT}" + ;; + rquotad) + _opts="${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}" + ;; + lockd) + _rpc_prog=nlockmgr + _version=4 + _restart="echo 'Trying to restart lock manager service'" + _restart="${_restart}; startstop_nfslock restart" + ;; + statd) + _rpc_prog=status + _opts="${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}" + _opts="${_opts}${STATD_PORT:+ -p }${STATD_PORT}" + _opts="${_opts}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}" + ;; + *) + echo "Internal error: unknown RPC program \"$_prog_name\"." + exit 1 + esac + + _service_name="nfs_${_prog_name}" + + if ctdb_check_rpc "$_rpc_prog" $_version >/dev/null ; then + ctdb_counter_init "$_service_name" + return 0 + fi + + ctdb_counter_incr "$_service_name" + + while [ -n "$3" ] ; do + ctdb_check_counter "quiet" "$1" "$2" "$_service_name" || { + for _action in $3 ; do + case "$_action" in + verbose) + echo "$ctdb_check_rpc_out" + ;; + restart|restart:*) + # No explicit command specified, construct rpc command. + if [ -z "$_restart" ] ; then + _p="rpc.${_prog_name}" + _restart="echo 'Trying to restart $_prog_name [${_p}${_opts}]'" + _restart="${_restart}; killall -q -9 $_p" + _restart="${_restart}; $_p $_opts" + fi + + # Process restart flags... + _flags="${_action#restart:}" + # There may not have been a colon... + [ "$_flags" != "$_action" ] || _flags="" + # q=quiet - everything to /dev/null + if [ "${_flags#*q}" != "$_flags" ] ; then + _restart="{ ${_restart} ; } >/dev/null 2>&1" + fi + # s=stealthy - last command to /dev/null + if [ "${_flags#*s}" != "$_flags" ] ; then + _restart="${_restart} >/dev/null 2>&1" + fi + # b=background - the whole thing, easy and reliable + if [ "${_flags#*b}" != "$_flags" ] ; then + _restart="{ ${_restart} ; } &" + fi + + # Do it! + eval "${_restart}" + ;; + unhealthy) + exit 1 + ;; + *) + echo "Internal error: unknown action \"$_action\"." + exit 1 + esac + done + + # Only process the first action group. + break + } + shift 3 + done +} + +###################################################### # check that a rpc server is registered with portmap # and responding to requests -# usage: ctdb_check_rpc SERVICE_NAME PROGNUM VERSION +# usage: ctdb_check_rpc SERVICE_NAME VERSION ###################################################### -ctdb_check_rpc() { +ctdb_check_rpc () +{ progname="$1" - prognum="$2" - version="$3" + version="$2" - ctdb_check_rpc_out=$(rpcinfo -u localhost $prognum $version 2>&1) - if [ $? -ne 0 ] ; then + if ! ctdb_check_rpc_out=$(rpcinfo -u localhost $progname $version 2>&1) ; then ctdb_check_rpc_out="ERROR: $progname failed RPC check: $ctdb_check_rpc_out" echo "$ctdb_check_rpc_out" |
