summaryrefslogtreecommitdiffstats
path: root/ctdb
diff options
context:
space:
mode:
authorAndrew Tridgell <tridge@samba.org>2007-04-22 14:26:45 +0200
committerAndrew Tridgell <tridge@samba.org>2007-04-22 14:26:45 +0200
commit107d91e391e48a610146153e8c31304c5e3ceb83 (patch)
tree490a14535f228bb63f0cedea8f413d55c8290f77 /ctdb
parent2a08818e24906618ee8c237cdfe931382384fe02 (diff)
downloadsamba-107d91e391e48a610146153e8c31304c5e3ceb83.tar.gz
samba-107d91e391e48a610146153e8c31304c5e3ceb83.tar.xz
samba-107d91e391e48a610146153e8c31304c5e3ceb83.zip
- when handling a record migration in the lmaster, bypass the usual
dmaster request stage, and instead directly send a dmaster reply. This avoids a race condition where a new call comes in for the same record while processing the dmaster request - don't keep any redirect records during a ctdb call. This prevents a memory leak in case of a redirect storm (This used to be ctdb commit 59889ca0fd606c7d2156839383a09dfc5a2e4853)
Diffstat (limited to 'ctdb')
-rw-r--r--ctdb/common/ctdb.c6
-rw-r--r--ctdb/common/ctdb_call.c116
-rw-r--r--ctdb/include/ctdb_private.h2
-rw-r--r--ctdb/tools/ctdb_dump.c8
4 files changed, 86 insertions, 46 deletions
diff --git a/ctdb/common/ctdb.c b/ctdb/common/ctdb.c
index 6bd2fda529..62788d0530 100644
--- a/ctdb/common/ctdb.c
+++ b/ctdb/common/ctdb.c
@@ -376,7 +376,13 @@ static void ctdb_defer_packet(struct ctdb_context *ctdb, struct ctdb_req_header
DEBUG(0,("Error copying deferred packet to self\n"));
return;
}
+#if 0
+ /* use this to put packets directly into our recv function */
+ ctdb_recv_pkt(q->ctdb, (uint8_t *)q->hdr, q->hdr->length);
+ talloc_free(q);
+#else
event_add_timed(ctdb->ev, q, timeval_zero(), queue_next_trigger, q);
+#endif
}
/*
diff --git a/ctdb/common/ctdb_call.c b/ctdb/common/ctdb_call.c
index f4505052f8..0145dc6985 100644
--- a/ctdb/common/ctdb_call.c
+++ b/ctdb/common/ctdb_call.c
@@ -188,6 +188,61 @@ static void ctdb_call_send_redirect(struct ctdb_context *ctdb,
talloc_free(r);
}
+
+/*
+ send a dmaster reply
+
+ caller must have the chainlock before calling this routine. Caller must be
+ the lmaster
+*/
+static void ctdb_send_dmaster_reply(struct ctdb_db_context *ctdb_db,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA key, TDB_DATA data,
+ uint32_t new_dmaster,
+ uint32_t reqid)
+{
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ struct ctdb_reply_dmaster *r;
+ int ret, len;
+ TALLOC_CTX *tmp_ctx;
+
+ if (ctdb->vnn != ctdb_lmaster(ctdb, &key)) {
+ DEBUG(0,(__location__ " Caller is not lmaster!\n"));
+ return;
+ }
+
+ header->dmaster = new_dmaster;
+ ret = ctdb_ltdb_store(ctdb_db, key, header, data);
+ if (ret != 0) {
+ ctdb_fatal(ctdb, "ctdb_req_dmaster unable to update dmaster");
+ return;
+ }
+
+ /* put the packet on a temporary context, allowing us to safely free
+ it below even if ctdb_reply_dmaster() has freed it already */
+ tmp_ctx = talloc_new(ctdb);
+
+ /* send the CTDB_REPLY_DMASTER */
+ len = offsetof(struct ctdb_reply_dmaster, data) + data.dsize;
+ r = ctdb->methods->allocate_pkt(tmp_ctx, len);
+ CTDB_NO_MEMORY_FATAL(ctdb, r);
+
+ talloc_set_name_const(r, "reply_dmaster packet");
+ r->hdr.length = len;
+ r->hdr.ctdb_magic = CTDB_MAGIC;
+ r->hdr.ctdb_version = CTDB_VERSION;
+ r->hdr.operation = CTDB_REPLY_DMASTER;
+ r->hdr.destnode = new_dmaster;
+ r->hdr.srcnode = ctdb->vnn;
+ r->hdr.reqid = reqid;
+ r->datalen = data.dsize;
+ memcpy(&r->data[0], data.dptr, data.dsize);
+
+ ctdb_queue_packet(ctdb, &r->hdr);
+
+ talloc_free(tmp_ctx);
+}
+
/*
send a dmaster request (give another node the dmaster for a record)
@@ -203,6 +258,13 @@ static void ctdb_call_send_dmaster(struct ctdb_db_context *ctdb_db,
struct ctdb_req_dmaster *r;
struct ctdb_context *ctdb = ctdb_db->ctdb;
int len;
+ uint32_t lmaster = ctdb_lmaster(ctdb, key);
+
+ if (lmaster == ctdb->vnn) {
+ ctdb_send_dmaster_reply(ctdb_db, header, *key, *data,
+ c->hdr.srcnode, c->hdr.reqid);
+ return;
+ }
len = offsetof(struct ctdb_req_dmaster, data) + key->dsize + data->dsize;
r = ctdb->methods->allocate_pkt(ctdb, len);
@@ -212,7 +274,7 @@ static void ctdb_call_send_dmaster(struct ctdb_db_context *ctdb_db,
r->hdr.ctdb_magic = CTDB_MAGIC;
r->hdr.ctdb_version = CTDB_VERSION;
r->hdr.operation = CTDB_REQ_DMASTER;
- r->hdr.destnode = ctdb_lmaster(ctdb, key);
+ r->hdr.destnode = lmaster;
r->hdr.srcnode = ctdb->vnn;
r->hdr.reqid = c->hdr.reqid;
r->db_id = c->db_id;
@@ -222,9 +284,7 @@ static void ctdb_call_send_dmaster(struct ctdb_db_context *ctdb_db,
memcpy(&r->data[0], key->dptr, key->dsize);
memcpy(&r->data[key->dsize], data->dptr, data->dsize);
- /* XXX - probably not necessary when lmaster==dmaster
- update the ltdb to record the new dmaster */
- header->dmaster = r->hdr.destnode;
+ header->dmaster = c->hdr.srcnode;
ctdb_ltdb_store(ctdb_db, *key, header, *data);
ctdb_queue_packet(ctdb, &r->hdr);
@@ -242,12 +302,10 @@ static void ctdb_call_send_dmaster(struct ctdb_db_context *ctdb_db,
void ctdb_request_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
{
struct ctdb_req_dmaster *c = (struct ctdb_req_dmaster *)hdr;
- struct ctdb_reply_dmaster *r;
TDB_DATA key, data, data2;
struct ctdb_ltdb_header header;
struct ctdb_db_context *ctdb_db;
- int ret, len;
- TALLOC_CTX *tmp_ctx;
+ int ret;
key.dptr = c->data;
key.dsize = c->keylen;
@@ -275,43 +333,15 @@ void ctdb_request_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr
}
/* its a protocol error if the sending node is not the current dmaster */
- if (header.dmaster != hdr->srcnode &&
- hdr->srcnode != ctdb_lmaster(ctdb_db->ctdb, &key)) {
- ctdb_fatal(ctdb, "dmaster request from non-master");
- return;
- }
-
- header.dmaster = c->dmaster;
- ret = ctdb_ltdb_store(ctdb_db, key, &header, data);
- ctdb_ltdb_unlock(ctdb_db, key);
- if (ret != 0) {
- ctdb_fatal(ctdb, "ctdb_req_dmaster unable to update dmaster");
+ if (header.dmaster != hdr->srcnode) {
+ DEBUG(0,("vnn=%u dmaster request non-master %u dmaster=%u\n",
+ ctdb->vnn, hdr->srcnode, header.dmaster));
+ ctdb_fatal(ctdb, "ctdb_req_dmaster from non-master");
return;
}
- /* put the packet on a temporary context, allowing us to safely free
- it below even if ctdb_reply_dmaster() has freed it already */
- tmp_ctx = talloc_new(ctdb);
-
- /* send the CTDB_REPLY_DMASTER */
- len = offsetof(struct ctdb_reply_dmaster, data) + data.dsize;
- r = ctdb->methods->allocate_pkt(tmp_ctx, len);
- CTDB_NO_MEMORY_FATAL(ctdb, r);
-
- talloc_set_name_const(r, "reply_dmaster packet");
- r->hdr.length = len;
- r->hdr.ctdb_magic = CTDB_MAGIC;
- r->hdr.ctdb_version = CTDB_VERSION;
- r->hdr.operation = CTDB_REPLY_DMASTER;
- r->hdr.destnode = c->dmaster;
- r->hdr.srcnode = ctdb->vnn;
- r->hdr.reqid = hdr->reqid;
- r->datalen = data.dsize;
- memcpy(&r->data[0], data.dptr, data.dsize);
-
- ctdb_queue_packet(ctdb, &r->hdr);
-
- talloc_free(tmp_ctx);
+ ctdb_send_dmaster_reply(ctdb_db, &header, key, data, c->dmaster, hdr->reqid);
+ ctdb_ltdb_unlock(ctdb_db, key);
}
@@ -532,8 +562,6 @@ void ctdb_reply_redirect(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
state = idr_find_type(ctdb->idr, hdr->reqid, struct ctdb_call_state);
if (state == NULL) return;
- talloc_steal(state, c);
-
/* don't allow for too many redirects */
if (state->redirect_count++ == CTDB_MAX_REDIRECT) {
c->dmaster = ctdb_lmaster(ctdb, &state->call.key);
@@ -670,8 +698,10 @@ struct ctdb_call_state *ctdb_daemon_call_send_remote(struct ctdb_db_context *ctd
ctdb_queue_packet(ctdb, &state->c->hdr);
+#if CTDB_REQ_TIMEOUT
event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_REQ_TIMEOUT, 0),
ctdb_call_timeout, state);
+#endif
return state;
}
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
index a8f7b48f49..96ca6d04c9 100644
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -196,7 +196,7 @@ struct ctdb_db_context {
}} while (0)
/* arbitrary maximum timeout for ctdb operations */
-#define CTDB_REQ_TIMEOUT 10
+#define CTDB_REQ_TIMEOUT 0
/* max number of redirects before we ask the lmaster */
#define CTDB_MAX_REDIRECT 2
diff --git a/ctdb/tools/ctdb_dump.c b/ctdb/tools/ctdb_dump.c
index 419151c84d..aab1051ea3 100644
--- a/ctdb/tools/ctdb_dump.c
+++ b/ctdb/tools/ctdb_dump.c
@@ -38,6 +38,7 @@ static void usage(void)
static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
{
+ int *num_nodes = (int *)p;
struct id {
dev_t dev;
ino_t inode;
@@ -51,7 +52,10 @@ static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, voi
} else {
keystr = hex_encode(NULL, key.dptr, key.dsize);
}
- printf(" rec %s dmaster=%u\n", keystr, h->dmaster);
+ printf(" rec %s lmaster=%u dmaster=%u\n",
+ keystr,
+ ctdb_hash(&key) % (*num_nodes),
+ h->dmaster);
talloc_free(keystr);
return 0;
}
@@ -103,7 +107,7 @@ int main(int argc, const char *argv[])
}
printf("db %s\n", extra_argv[i]);
- tdb_traverse(db->tdb, traverse_fn, NULL);
+ tdb_traverse(db->tdb, traverse_fn, &extra_argc);
talloc_free(db);
}