summaryrefslogtreecommitdiffstats
path: root/ctdb
diff options
context:
space:
mode:
authorAmitay Isaacs <amitay@gmail.com>2015-03-04 15:36:05 +1100
committerMichael Adam <obnox@samba.org>2015-03-05 12:06:44 +0100
commit3f97be6d0fc166ccc3c97b7f71a01a4f9adb5ddd (patch)
treea6464a75a0ffc76d1e0d3febfe41ff4f1a53a491 /ctdb
parent956d1dbfd91615032de337b0d84b40c16657b8c1 (diff)
downloadsamba-3f97be6d0fc166ccc3c97b7f71a01a4f9adb5ddd.tar.gz
samba-3f97be6d0fc166ccc3c97b7f71a01a4f9adb5ddd.tar.xz
samba-3f97be6d0fc166ccc3c97b7f71a01a4f9adb5ddd.zip
ctdb-locking: Back-off from logging every 10 seconds
If ctdb_lock_helper cannot get a lock within 10 seconds, ctdb daemon logs a message and invokes an external debug script. This is repeated every 10 seconds. In case of a contention or on a loaded system, there can be multiple ctdb_lock_helper processes waiting to get lock on record(s). For each lock request taking longer, ctdb daemon will flood the log every 10 seconds. Instead of logging aggressively every 10 seconds, relax logging to every 100s and 1000s if the elapsed time has exceeded 100s and 1000s respectively. Signed-off-by: Amitay Isaacs <amitay@gmail.com> Reviewed-by: Michael Adam <obnox@samba.org> Autobuild-User(master): Michael Adam <obnox@samba.org> Autobuild-Date(master): Thu Mar 5 12:06:44 CET 2015 on sn-devel-104
Diffstat (limited to 'ctdb')
-rw-r--r--ctdb/server/ctdb_lock.c20
1 files changed, 16 insertions, 4 deletions
diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c
index 7959d40fbf..c5a2b98bfe 100644
--- a/ctdb/server/ctdb_lock.c
+++ b/ctdb/server/ctdb_lock.c
@@ -486,6 +486,8 @@ static void ctdb_lock_timeout_handler(struct tevent_context *ev,
struct lock_context *lock_ctx;
struct ctdb_context *ctdb;
pid_t pid;
+ double elapsed_time;
+ int new_timer;
lock_ctx = talloc_get_type_abort(private_data, struct lock_context);
ctdb = lock_ctx->ctdb;
@@ -495,16 +497,17 @@ static void ctdb_lock_timeout_handler(struct tevent_context *ev,
lock_ctx->ttimer = NULL;
return;
}
+
+ elapsed_time = timeval_elapsed(&lock_ctx->start_time);
if (lock_ctx->ctdb_db) {
DEBUG(DEBUG_WARNING,
("Unable to get %s lock on database %s for %.0lf seconds\n",
(lock_ctx->type == LOCK_RECORD ? "RECORD" : "DB"),
- lock_ctx->ctdb_db->db_name,
- timeval_elapsed(&lock_ctx->start_time)));
+ lock_ctx->ctdb_db->db_name, elapsed_time));
} else {
DEBUG(DEBUG_WARNING,
("Unable to get ALLDB locks for %.0lf seconds\n",
- timeval_elapsed(&lock_ctx->start_time)));
+ elapsed_time));
}
/* Fire a child process to find the blocking process. */
@@ -529,11 +532,20 @@ static void ctdb_lock_timeout_handler(struct tevent_context *ev,
" Unable to setup lock debugging - no memory?\n"));
}
+ /* Back-off logging if lock is not obtained for a long time */
+ if (elapsed_time < 100.0) {
+ new_timer = 10;
+ } else if (elapsed_time < 1000.0) {
+ new_timer = 100;
+ } else {
+ new_timer = 1000;
+ }
+
/* reset the timeout timer */
// talloc_free(lock_ctx->ttimer);
lock_ctx->ttimer = tevent_add_timer(ctdb->ev,
lock_ctx,
- timeval_current_ofs(10, 0),
+ timeval_current_ofs(new_timer, 0),
ctdb_lock_timeout_handler,
(void *)lock_ctx);
}