diff options
author | Martin Schwenke <martin@meltin.net> | 2014-12-09 14:50:38 +1100 |
---|---|---|
committer | Amitay Isaacs <amitay@samba.org> | 2015-02-13 07:19:07 +0100 |
commit | 1d6ed91f5518d462ba368bca03be923428710157 (patch) | |
tree | 0244700cc813547b67d96cb5473657eb6cec44b3 /ctdb | |
parent | be19a17faf6da97365c425c5b423e9b74f9c9e0c (diff) | |
download | samba-1d6ed91f5518d462ba368bca03be923428710157.tar.gz samba-1d6ed91f5518d462ba368bca03be923428710157.tar.xz samba-1d6ed91f5518d462ba368bca03be923428710157.zip |
ctdb-recoverd: Simplify ctdb_recovery_lock()
Have it just silently take or fail to take the lock, except on an
unexpected failure (where it should log an error).
This means that when it is called we need to keep the old behaviour
and explicitly release the lock. In do_recovery() the lock is
released and a message is printed before attempting to take the lock.
In the daemon sanity check the lock must be released in the error path
if it is actually taken.
Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
Diffstat (limited to 'ctdb')
-rw-r--r-- | ctdb/include/ctdb_private.h | 2 | ||||
-rw-r--r-- | ctdb/server/ctdb_recover.c | 46 | ||||
-rw-r--r-- | ctdb/server/ctdb_recoverd.c | 6 |
3 files changed, 22 insertions, 32 deletions
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 19fab572a3..7005fd802f 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -1260,7 +1260,7 @@ void set_nonblocking(int fd); void set_close_on_exec(int fd); bool ctdb_recovery_have_lock(struct ctdb_context *ctdb); -bool ctdb_recovery_lock(struct ctdb_context *ctdb, bool keep); +bool ctdb_recovery_lock(struct ctdb_context *ctdb); void ctdb_recovery_unlock(struct ctdb_context *ctdb); int ctdb_set_recovery_lock_file(struct ctdb_context *ctdb, const char *file); diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index a4d84fd1a7..2c300ee090 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -675,8 +675,9 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb, /* we should not be able to get the lock on the reclock file, as it should be held by the recovery master */ - if (ctdb_recovery_lock(ctdb, false)) { + if (ctdb_recovery_lock(ctdb)) { DEBUG(DEBUG_CRIT,("ERROR: recovery lock file %s not locked when recovering!\n", ctdb->recovery_lock_file)); + ctdb_recovery_unlock(ctdb); cc = 1; } @@ -730,22 +731,16 @@ bool ctdb_recovery_have_lock(struct ctdb_context *ctdb) try and get the recovery lock in shared storage - should only work on the recovery master recovery daemon. Anywhere else is a bug */ -bool ctdb_recovery_lock(struct ctdb_context *ctdb, bool keep) +bool ctdb_recovery_lock(struct ctdb_context *ctdb) { struct flock lock; - if (keep) { - DEBUG(DEBUG_ERR, ("Take the recovery lock\n")); - } - if (ctdb->recovery_lock_fd != -1) { - close(ctdb->recovery_lock_fd); - ctdb->recovery_lock_fd = -1; - } - - ctdb->recovery_lock_fd = open(ctdb->recovery_lock_file, O_RDWR|O_CREAT, 0600); + ctdb->recovery_lock_fd = open(ctdb->recovery_lock_file, + O_RDWR|O_CREAT, 0600); if (ctdb->recovery_lock_fd == -1) { - DEBUG(DEBUG_ERR,("ctdb_recovery_lock: Unable to open %s - (%s)\n", - ctdb->recovery_lock_file, strerror(errno))); + DEBUG(DEBUG_ERR, + ("ctdb_recovery_lock: Unable to open %s - (%s)\n", + ctdb->recovery_lock_file, strerror(errno))); return false; } @@ -761,26 +756,19 @@ bool ctdb_recovery_lock(struct ctdb_context *ctdb, bool keep) int saved_errno = errno; close(ctdb->recovery_lock_fd); ctdb->recovery_lock_fd = -1; - if (keep) { - DEBUG(DEBUG_CRIT,("ctdb_recovery_lock: Failed to get " - "recovery lock on '%s' - (%s)\n", - ctdb->recovery_lock_file, - strerror(saved_errno))); + /* Fail silently on these errors, since they indicate + * lock contention, but log an error for any other + * failure. */ + if (saved_errno != EACCES && + saved_errno != EAGAIN) { + DEBUG(DEBUG_ERR,("ctdb_recovery_lock: Failed to get " + "recovery lock on '%s' - (%s)\n", + ctdb->recovery_lock_file, + strerror(saved_errno))); } return false; } - if (!keep) { - close(ctdb->recovery_lock_fd); - ctdb->recovery_lock_fd = -1; - } - - if (keep) { - DEBUG(DEBUG_NOTICE, ("Recovery lock taken successfully\n")); - } - - DEBUG(DEBUG_NOTICE,("ctdb_recovery_lock: Got recovery lock on '%s'\n", ctdb->recovery_lock_file)); - return true; } diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index d1bcd5998c..2045413ca0 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -1809,9 +1809,11 @@ static int do_recovery(struct ctdb_recoverd *rec, } if (ctdb->recovery_lock_file != NULL) { - DEBUG(DEBUG_ERR,("Taking out recovery lock from recovery daemon\n")); + DEBUG(DEBUG_ERR, ("Taking out recovery lock from recovery daemon (%s)\n", ctdb->recovery_lock_file)); start_time = timeval_current(); - if (!ctdb_recovery_lock(ctdb, true)) { + ctdb_recovery_unlock(ctdb); + DEBUG(DEBUG_NOTICE, ("Attempting to take recovery lock\n")); + if (!ctdb_recovery_lock(ctdb)) { if (ctdb->runstate == CTDB_RUNSTATE_FIRST_RECOVERY) { /* If ctdb is trying first recovery, it's * possible that current node does not know yet |