ctdb/config/events.d/60.nfs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

#!/bin/sh
# script to manage nfs in a clustered environment

. $CTDB_BASE/functions

service_name="nfs"
service_start ()
{
    startstop_nfs stop
    startstop_nfs start
    set_proc "sys/net/ipv4/tcp_tw_recycle" 1
}
service_stop ()
{
    startstop_nfs stop
}
service_reconfigure ()
{
    startstop_nfs restart

    # if the ips have been reallocated, we must restart the lockmanager
    # across all nodes and ping all statd listeners
    [ -x $CTDB_BASE/statd-callout ] && {
	$CTDB_BASE/statd-callout notify &
    } >/dev/null 2>&1
}

loadconfig

[ "$NFS_SERVER_MODE" != "GANESHA" ] || exit 0

ctdb_setup_service_state_dir

statd_update_trigger="$service_state_dir/update-trigger"
# We want this file to always exist.  The corner case is when
# auto-start/stop is switched off, NFS is added as a managed service
# some time after ctdbd is started and someone else starts the NFS
# service for us.  In this case this file might not otherwise exist
# when we get to a monitor event.
touch "$statd_update_trigger"

ctdb_start_stop_service

is_ctdb_managed_service || exit 0

ctdb_service_check_reconfigure

case "$1" in 
     init)
	# read statd from persistent database
	;;
     startup)
	ctdb_service_start
	;;

     shutdown)
	ctdb_service_stop
	;;

     takeip)
	ctdb_service_set_reconfigure
	;;

     releaseip)
	ctdb_service_set_reconfigure
	;;

      monitor)
	# Check that directories for shares actually exist.
	[ "$CTDB_NFS_SKIP_SHARE_CHECK" = "yes" ] || {
	    exportfs -v | grep '^/' | 
	    sed -r -e 's@[[:space:]]+[^[:space:]()]+\([^[:space:]()]+\)$@@' | 
	    ctdb_check_directories 
	} || exit $?

	update_tickles 2049

	# check that statd responds to rpc requests
	# if statd is not running we try to restart it
	# we only do this IF we have a rpc.statd command.
	# For platforms where rpc.statd does not exist, we skip
	# the check completely
        p="rpc.statd"
        which $p >/dev/null 2>/dev/null && \
	    nfs_check_rpc_service "statd" \
	        -ge 6 "verbose unhealthy" \
	        -eq 4 "verbose restart" \
		-eq 2 "restart:bs"

	# check that NFS responds to rpc requests
	if [ "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" != "yes" ] ; then
	    nfs_check_rpc_service "knfsd" \
		-ge 6 "verbose unhealthy" \
		-eq 4 "verbose restart" \
		-eq 2 "restart:bs"
	fi

	# check that lockd responds to rpc requests
	nfs_check_rpc_service "lockd" \
	    -ge 15 "verbose restart unhealthy" \
	    -eq 10 "restart:bs"

	# mountd is sometimes not started correctly on RHEL5
	nfs_check_rpc_service "mountd" \
	    -ge 10 "verbose restart:b unhealthy" \
	    -eq 5 "restart:b"

	# rquotad is sometimes not started correctly on RHEL5
	# not a critical service so we dont flag the node as unhealthy
	nfs_check_rpc_service "rquotad" \
	    -gt 0 "verbose restart:b"

	# once every 600 seconds, update the statd state database for which
	# clients need notifications
	LAST_UPDATE=`stat --printf="%Y" "$statd_update_trigger"`
	CURRENT_TIME=`date +"%s"`
	[ $CURRENT_TIME -ge $(($LAST_UPDATE + 600)) ] && {
	    touch "$statd_update_trigger"
	    $CTDB_BASE/statd-callout updatelocal &
	    $CTDB_BASE/statd-callout updateremote &
	}
       	;;

    *)
	ctdb_standard_event_handler "$@"
	;;
esac

exit 0