summaryrefslogtreecommitdiffstats
path: root/ctdb
diff options
context:
space:
mode:
authorMartin Schwenke <martin@meltin.net>2015-02-13 20:55:43 +1100
committerAmitay Isaacs <amitay@samba.org>2015-03-04 10:42:27 +0100
commit500c6e194babe06b6aead7a053a9442c94db6e38 (patch)
tree58c0fe5333048032879d8d5f3ff8e88a2841fb09 /ctdb
parent032441d9a2974584cde455e4dbd5cc33fe6a23c2 (diff)
downloadsamba-500c6e194babe06b6aead7a053a9442c94db6e38.tar.gz
samba-500c6e194babe06b6aead7a053a9442c94db6e38.tar.xz
samba-500c6e194babe06b6aead7a053a9442c94db6e38.zip
ctdb-scripts: Change statd-callout to be more scalable
Updating ctdb.tdb on each add-client, del-client and each delete during notify was too ambitious. Persistent transactions do not perform well enough to do this. Revert to having add-client and del-client create touch files. Each monitor event calls "statd-callout update" to convert touch files into ctdb.tdb records. Update testcases to do the "update" and add an extra test. Signed-off-by: Martin Schwenke <martin@meltin.net> Pair-programmed-with: Amitay Isaacs <amitay@gmail.com> Reviewed-by: Amitay Isaacs <amitay@gmail.com>
Diffstat (limited to 'ctdb')
-rwxr-xr-xctdb/config/events.d/60.ganesha1
-rwxr-xr-xctdb/config/events.d/60.nfs1
-rwxr-xr-xctdb/config/functions10
-rwxr-xr-xctdb/config/statd-callout140
-rwxr-xr-xctdb/tests/eventscripts/statd-callout.001.sh1
-rwxr-xr-xctdb/tests/eventscripts/statd-callout.002.sh3
-rwxr-xr-xctdb/tests/eventscripts/statd-callout.003.sh5
-rwxr-xr-xctdb/tests/eventscripts/statd-callout.004.sh1
-rwxr-xr-xctdb/tests/eventscripts/statd-callout.005.sh2
-rwxr-xr-xctdb/tests/eventscripts/statd-callout.006.sh2
-rwxr-xr-xctdb/tests/eventscripts/statd-callout.007.sh16
11 files changed, 123 insertions, 59 deletions
diff --git a/ctdb/config/events.d/60.ganesha b/ctdb/config/events.d/60.ganesha
index 150be1f159..fc170983dc 100755
--- a/ctdb/config/events.d/60.ganesha
+++ b/ctdb/config/events.d/60.ganesha
@@ -221,6 +221,7 @@ case "$1" in
} || exit $?
update_tickles 2049
+ nfs_update_lock_info
# check that statd responds to rpc requests
# if statd is not running we try to restart it
diff --git a/ctdb/config/events.d/60.nfs b/ctdb/config/events.d/60.nfs
index c4e798e2ca..d570cd784b 100755
--- a/ctdb/config/events.d/60.nfs
+++ b/ctdb/config/events.d/60.nfs
@@ -91,6 +91,7 @@ case "$1" in
} || exit $?
update_tickles 2049
+ nfs_update_lock_info
nfs_check_rpc_services
diff --git a/ctdb/config/functions b/ctdb/config/functions
index 8c5e472f1a..da726222cb 100755
--- a/ctdb/config/functions
+++ b/ctdb/config/functions
@@ -690,6 +690,16 @@ get_tcp_connections_for_ip ()
{print $4" "$5}'
}
+##################################################################
+# use statd-callout to update NFS lock info
+##################################################################
+nfs_update_lock_info ()
+{
+ if [ -x "$CTDB_BASE/statd-callout" ] ; then
+ "$CTDB_BASE/statd-callout" update
+ fi
+}
+
########################################################
# start/stop the Ganesha nfs service
########################################################
diff --git a/ctdb/config/statd-callout b/ctdb/config/statd-callout
index b92942557f..4a331aca2c 100755
--- a/ctdb/config/statd-callout
+++ b/ctdb/config/statd-callout
@@ -30,7 +30,19 @@ loadconfig nfs
nl="
"
+ctdb_setup_service_state_dir "statd-callout"
+
+cd "$service_state_dir" || \
+ die "Failed to change directory to \"${service_state_dir}\""
+
case "$1" in
+ # Keep a single file to keep track of the last "add-client" or
+ # "del-client'. These get pushed to ctdb.tdb during "update",
+ # which will generally be run once each "monitor" cycle. In this
+ # way we avoid scalability problems with flood of persistent
+ # transactions after a "notify" when all the clients re-take their
+ # locks.
+
add-client)
# statd does not tell us to which IP the client connected so
# we must add it to all the IPs that we serve
@@ -38,42 +50,47 @@ case "$1" in
pnn=$(ctdb xpnn | sed -e 's/.*://')
date=$(date '+%s')
ctdb ip -X |
- tail -n +2 | {
- # This all needs to be in the end of the pipe so it
- # doesn't get lost
- items=""
- while IFS="|" read x sip node x ; do
- [ "$node" = "$pnn" ] || continue # not us
- key="statd-state@${sip}@${cip}"
- item="\"${key}\" \"${date}\""
- items="${items}${items:+${nl}}${item}"
- done
- if ! echo "$items" | ctdb ptrans "ctdb.tdb" ; then
- die "Failed to add clients"
- fi
- }
+ tail -n +2 |
+ while IFS="|" read x sip node x ; do
+ [ "$node" = "$pnn" ] || continue # not us
+ key="statd-state@${sip}@${cip}"
+ echo "\"${key}\" \"${date}\"" >"$key"
+ done
;;
- del-client)
+
+ del-client)
# statd does not tell us from which IP the client disconnected
# so we must add it to all the IPs that we serve
cip="$2"
pnn=$(ctdb xpnn | sed -e 's/.*://')
ctdb ip -X |
- tail -n +2 | {
- # This all needs to be in the end of the pipe so it
- # doesn't get lost
- items=""
- while IFS="|" read x sip node x ; do
- [ "$node" = "$pnn" ] || continue # not us
- key="statd-state@${sip}@${cip}"
- item="\"${key}\" \"\""
- items="${items}${items:+${nl}}${item}"
- done
- if ! echo "$items" | ctdb ptrans "ctdb.tdb" ; then
- die "Failed to delete clients"
- fi
- }
+ tail -n +2 |
+ while IFS="|" read x sip node x ; do
+ [ "$node" = "$pnn" ] || continue # not us
+ key="statd-state@${sip}@${cip}"
+ echo "\"${key}\" \"\"" >"$key"
+ done
;;
+
+ update)
+ files=$(echo statd-state@*)
+ if [ "$files" = "statd-state@*" ] ; then
+ # No files!
+ exit 0
+ fi
+ # Filter out lines for any IP addresses that are not currently
+ # hosted public IP addresses.
+ pnn=$(ctdb xpnn | sed -e 's/.*://')
+ ctdb_ips=$(ctdb ip | tail -n +2)
+ sed_expr=$(echo "$ctdb_ips" |
+ awk -v pnn=$pnn 'pnn == $2 { \
+ ip = $1; gsub(/\./, "\\.", ip); \
+ printf "/statd-state@%s@/p\n", ip }')
+ if cat $files | sed -n "$sed_expr" | ctdb ptrans "ctdb.tdb" ; then
+ rm $files
+ fi
+ ;;
+
notify)
# we must restart the lockmanager (on all nodes) so that we get
# a clusterwide grace period (so other clients dont take out
@@ -144,7 +161,8 @@ case "$1" in
# Construct a sed expression to take catdb output and produce pairs of:
# server-IP client-IP
# but only for the server-IPs that are hosted on this node.
- sed_expr=$(ctdb ip | tail -n +2 |
+ ctdb_all_ips=$(ctdb ip -n all | tail -n +2)
+ sed_expr=$(echo "$ctdb_all_ips" |
awk -v pnn=$pnn 'pnn == $2 { \
ip = $1; gsub(/\./, "\\.", ip); \
printf "s/^key.*=.*statd-state@\\(%s\\)@\\([^\"]*\\).*/\\1 \\2/p\n", ip }')
@@ -152,34 +170,42 @@ case "$1" in
statd_state=$(ctdb catdb ctdb.tdb | sed -n "$sed_expr" | sort)
[ -n "$statd_state" ] || exit 0
- # The following is dangerous if this script times out before
- # all of the smnotify commands are run. Revert to individual
- # pdelete commands for now and consider optimising smnotify to
- # read all the data from stdin and then run it in the
- # background.
- #
- # Delete all the items from the TDB
- #if ! echo "$statd_state" | \
- # awk '{ printf "\"statd-state@%s@%s\" \"\"\n", $1, $2 }') | \
- # ctdb ptrans ctdb.tdb ; then
+ prev=""
+ echo "$statd_state" | {
+ # This all needs to be in the same command group at the
+ # end of the pipe so it doesn't get lost when the loop
+ # completes.
+ items=""
+ while read sip cip ; do
+ # Collect item to delete from the DB
+ key="statd-state@${sip}@${cip}"
+ item="\"${key}\" \"\""
+ items="${items}${items:+${nl}}${item}"
- # die "Yikes!"
- #fi
+ # NOTE: Consider optimising smnotify to read all the
+ # data from stdin and then run it in the background.
+
+ # Reset stateval for each serverip
+ [ "$sip" = "$prev" ] || stateval="$state_even"
+ # Send notifies for server shutdown
+ smnotify --client=$cip --ip=$sip --server=$sip --stateval=$stateval
+ smnotify --client=$cip --ip=$sip --server=$NFS_HOSTNAME --stateval=$stateval
+ # Send notifies for server startup
+ stateval=$(($stateval + 1))
+ smnotify --client=$cip --ip=$sip --server=$sip --stateval=$stateval
+ smnotify --client=$cip --ip=$sip --server=$NFS_HOSTNAME --stateval=$stateval
+ done
- prev=""
- echo "$statd_state" |
- while read sip cip ; do
- # Delete the entry from the DB
- ctdb pdelete ctdb.tdb "statd-state@${sip}@${cip}"
- # Reset stateval for each serverip
- [ "$sip" = "$prev" ] || stateval="$state_even"
- # Send notifies for server shutdown
- smnotify --client=$cip --ip=$sip --server=$sip --stateval=$stateval
- smnotify --client=$cip --ip=$sip --server=$NFS_HOSTNAME --stateval=$stateval
- # Send notifies for server startup
- stateval=$(($stateval + 1))
- smnotify --client=$cip --ip=$sip --server=$sip --stateval=$stateval
- smnotify --client=$cip --ip=$sip --server=$NFS_HOSTNAME --stateval=$stateval
- done
+ echo "$items" | ctdb ptrans "ctdb.tdb"
+ }
+
+ # Remove any stale touch files (i.e. for IPs not currently
+ # hosted on this node and created since the last "update").
+ # There's nothing else we can do with them at this stage.
+ echo "$ctdb_all_ips" |
+ awk -v pnn=$pnn 'pnn != $2 { print $1 }' |
+ while read sip ; do
+ rm -f "statd-state@${sip}@"*
+ done
;;
esac
diff --git a/ctdb/tests/eventscripts/statd-callout.001.sh b/ctdb/tests/eventscripts/statd-callout.001.sh
index 5f7b7e281b..29b9fbc8bf 100755
--- a/ctdb/tests/eventscripts/statd-callout.001.sh
+++ b/ctdb/tests/eventscripts/statd-callout.001.sh
@@ -10,5 +10,6 @@ FAKE_DATE_OUTPUT="1234565789"
ok_null
simple_test_event "add-client" "192.168.123.45"
+simple_test_event "update"
check_ctdb_tdb_statd_state "192.168.123.45"
diff --git a/ctdb/tests/eventscripts/statd-callout.002.sh b/ctdb/tests/eventscripts/statd-callout.002.sh
index f8778f7e98..009da1b0d9 100755
--- a/ctdb/tests/eventscripts/statd-callout.002.sh
+++ b/ctdb/tests/eventscripts/statd-callout.002.sh
@@ -2,7 +2,7 @@
. "${TEST_SCRIPTS_DIR}/unit.sh"
-define_test "2 x add-client"
+define_test "2 x add-client, update"
setup_ctdb
@@ -11,5 +11,6 @@ FAKE_DATE_OUTPUT="1234565789"
ok_null
simple_test_event "add-client" "192.168.123.45"
simple_test_event "add-client" "192.168.123.46"
+simple_test_event "update"
check_ctdb_tdb_statd_state "192.168.123.45" "192.168.123.46"
diff --git a/ctdb/tests/eventscripts/statd-callout.003.sh b/ctdb/tests/eventscripts/statd-callout.003.sh
index 1319ee4099..ed28de683e 100755
--- a/ctdb/tests/eventscripts/statd-callout.003.sh
+++ b/ctdb/tests/eventscripts/statd-callout.003.sh
@@ -2,7 +2,7 @@
. "${TEST_SCRIPTS_DIR}/unit.sh"
-define_test "add-client, del-client"
+define_test "add-client, update, del-client, update"
setup_ctdb
@@ -10,6 +10,9 @@ FAKE_DATE_OUTPUT="1234565789"
ok_null
simple_test_event "add-client" "192.168.123.45"
+simple_test_event "update"
+
simple_test_event "del-client" "192.168.123.45"
+simple_test_event "update"
check_ctdb_tdb_statd_state
diff --git a/ctdb/tests/eventscripts/statd-callout.004.sh b/ctdb/tests/eventscripts/statd-callout.004.sh
index 5702b85290..011ced9578 100755
--- a/ctdb/tests/eventscripts/statd-callout.004.sh
+++ b/ctdb/tests/eventscripts/statd-callout.004.sh
@@ -10,6 +10,7 @@ FAKE_DATE_OUTPUT="1234565789"
ok_null
simple_test_event "add-client" "192.168.123.45"
+simple_test_event "update"
check_ctdb_tdb_statd_state "192.168.123.45"
diff --git a/ctdb/tests/eventscripts/statd-callout.005.sh b/ctdb/tests/eventscripts/statd-callout.005.sh
index 65e291d088..ceb4445fb7 100755
--- a/ctdb/tests/eventscripts/statd-callout.005.sh
+++ b/ctdb/tests/eventscripts/statd-callout.005.sh
@@ -10,11 +10,13 @@ FAKE_DATE_OUTPUT="1234565789"
ok_null
simple_test_event "add-client" "192.168.123.45"
+simple_test_event "update"
FAKE_CTDB_PNN=1
ok_null
simple_test_event "add-client" "192.168.123.46"
+simple_test_event "update"
FAKE_CTDB_PNN=0
diff --git a/ctdb/tests/eventscripts/statd-callout.006.sh b/ctdb/tests/eventscripts/statd-callout.006.sh
index df8af8876b..0db86fee0f 100755
--- a/ctdb/tests/eventscripts/statd-callout.006.sh
+++ b/ctdb/tests/eventscripts/statd-callout.006.sh
@@ -10,11 +10,13 @@ FAKE_DATE_OUTPUT="1234565789"
ok_null
simple_test_event "add-client" "192.168.123.45"
+simple_test_event "update"
FAKE_CTDB_PNN=1
ok_null
simple_test_event "add-client" "192.168.123.46"
+simple_test_event "update"
FAKE_CTDB_PNN=0
diff --git a/ctdb/tests/eventscripts/statd-callout.007.sh b/ctdb/tests/eventscripts/statd-callout.007.sh
new file mode 100755
index 0000000000..32339cd3b7
--- /dev/null
+++ b/ctdb/tests/eventscripts/statd-callout.007.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "add-client, del-client, update"
+
+setup_ctdb
+
+FAKE_DATE_OUTPUT="1234565789"
+
+ok_null
+simple_test_event "add-client" "192.168.123.45"
+simple_test_event "del-client" "192.168.123.45"
+simple_test_event "update"
+
+check_ctdb_tdb_statd_state