summaryrefslogtreecommitdiffstats
path: root/ctdb
diff options
context:
space:
mode:
authorroot <root@rcn1.VSOFS1.COM>2009-05-01 01:18:27 +1000
committerRonnie Sahlberg <ronniesahlberg@gmail.com>2009-05-01 01:17:59 +1000
commit6793f077a8df4d83588c420d82beae4d364c6ed1 (patch)
tree163c94ce1ce019fe7daa52ca7abce7d9daef58e5 /ctdb
parent2e3542b5e5f1344f1531a482e6e3ca0569de4718 (diff)
downloadsamba-6793f077a8df4d83588c420d82beae4d364c6ed1.tar.gz
samba-6793f077a8df4d83588c420d82beae4d364c6ed1.tar.xz
samba-6793f077a8df4d83588c420d82beae4d364c6ed1.zip
Add a new variable VerifyRecoveryLock which can be used to disable the test that the recovery daemon holds the lock properly when performing a recovery
(This used to be ctdb commit 329df9e47e6ca8ab5143985a999e68f37c6d88a5)
Diffstat (limited to 'ctdb')
-rw-r--r--ctdb/include/ctdb_private.h1
-rw-r--r--ctdb/server/ctdb_recover.c15
-rw-r--r--ctdb/server/ctdb_tunables.c1
3 files changed, 13 insertions, 4 deletions
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
index 2231d33987..eac27f7ed5 100644
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -106,6 +106,7 @@ struct ctdb_tunable {
uint32_t recd_ping_failcount;
uint32_t log_latency_ms;
uint32_t recovery_drop_all_ips;
+ uint32_t verify_recovery_lock;
};
/*
diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c
index f9112a3ff4..374b3248e1 100644
--- a/ctdb/server/ctdb_recover.c
+++ b/ctdb/server/ctdb_recover.c
@@ -509,7 +509,7 @@ static void ctdb_set_recmode_timeout(struct event_context *ev, struct timed_even
caused by the cluster filesystem being very slow to
arbitrate locks immediately after a node failure.
*/
- DEBUG(DEBUG_NOTICE,(__location__ " set_recmode timeout - allowing recmode set\n"));
+ DEBUG(DEBUG_ERR,(__location__ " set_recmode child process hung/timedout CFS slow to grant locks? (allowing recmode set anyway)\n"));
state->ctdb->recovery_mode = state->recmode;
ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, NULL);
talloc_free(state);
@@ -632,11 +632,17 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
state = talloc(ctdb, struct ctdb_set_recmode_state);
CTDB_NO_MEMORY(ctdb, state);
+
+ if (ctdb->tunable.verify_recovery_lock == 0) {
+ /* dont need to verify the reclock file */
+ ctdb->recovery_mode = recmode;
+ return 0;
+ }
+
/* For the rest of what needs to be done, we need to do this in
a child process since
1, the call to ctdb_recovery_lock() can block if the cluster
filesystem is in the process of recovery.
- 2, running of the script may take a while.
*/
ret = pipe(state->fd);
if (ret != 0) {
@@ -657,7 +663,7 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
char cc = 0;
close(state->fd[0]);
- /* we should not be able to get the lock on the nodes list,
+ /* we should not be able to get the lock on the reclock file,
as it should be held by the recovery master
*/
if (ctdb_recovery_lock(ctdb, false)) {
@@ -669,6 +675,7 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
/* make sure we die when our parent dies */
while (kill(parent, 0) == 0 || errno != ESRCH) {
sleep(5);
+ write(state->fd[1], &cc, 1);
}
_exit(0);
}
@@ -676,7 +683,7 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
talloc_set_destructor(state, set_recmode_destructor);
- state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(3, 0),
+ state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(15, 0),
ctdb_set_recmode_timeout, state);
state->fde = event_add_fd(ctdb->ev, state, state->fd[0],
diff --git a/ctdb/server/ctdb_tunables.c b/ctdb/server/ctdb_tunables.c
index 7fa5e808c5..bab9aef25a 100644
--- a/ctdb/server/ctdb_tunables.c
+++ b/ctdb/server/ctdb_tunables.c
@@ -54,6 +54,7 @@ static const struct {
{ "RecdFailCount", 3, offsetof(struct ctdb_tunable, recd_ping_failcount) },
{ "LogLatencyMs", 0, offsetof(struct ctdb_tunable, log_latency_ms) },
{ "RecoveryDropAllIPs", 60, offsetof(struct ctdb_tunable, recovery_drop_all_ips) },
+ { "VerifyRecoveryLock", 1, offsetof(struct ctdb_tunable, verify_recovery_lock) },
};
/*