diff options
author | Andrew Tridgell <tridge@samba.org> | 2007-04-22 14:26:45 +0200 |
---|---|---|
committer | Andrew Tridgell <tridge@samba.org> | 2007-04-22 14:26:45 +0200 |
commit | 107d91e391e48a610146153e8c31304c5e3ceb83 (patch) | |
tree | 490a14535f228bb63f0cedea8f413d55c8290f77 /ctdb | |
parent | 2a08818e24906618ee8c237cdfe931382384fe02 (diff) | |
download | samba-107d91e391e48a610146153e8c31304c5e3ceb83.tar.gz samba-107d91e391e48a610146153e8c31304c5e3ceb83.tar.xz samba-107d91e391e48a610146153e8c31304c5e3ceb83.zip |
- when handling a record migration in the lmaster, bypass the usual
dmaster request stage, and instead directly send a dmaster
reply. This avoids a race condition where a new call comes in for
the same record while processing the dmaster request
- don't keep any redirect records during a ctdb call. This prevents a
memory leak in case of a redirect storm
(This used to be ctdb commit 59889ca0fd606c7d2156839383a09dfc5a2e4853)
Diffstat (limited to 'ctdb')
-rw-r--r-- | ctdb/common/ctdb.c | 6 | ||||
-rw-r--r-- | ctdb/common/ctdb_call.c | 116 | ||||
-rw-r--r-- | ctdb/include/ctdb_private.h | 2 | ||||
-rw-r--r-- | ctdb/tools/ctdb_dump.c | 8 |
4 files changed, 86 insertions, 46 deletions
diff --git a/ctdb/common/ctdb.c b/ctdb/common/ctdb.c index 6bd2fda529..62788d0530 100644 --- a/ctdb/common/ctdb.c +++ b/ctdb/common/ctdb.c @@ -376,7 +376,13 @@ static void ctdb_defer_packet(struct ctdb_context *ctdb, struct ctdb_req_header DEBUG(0,("Error copying deferred packet to self\n")); return; } +#if 0 + /* use this to put packets directly into our recv function */ + ctdb_recv_pkt(q->ctdb, (uint8_t *)q->hdr, q->hdr->length); + talloc_free(q); +#else event_add_timed(ctdb->ev, q, timeval_zero(), queue_next_trigger, q); +#endif } /* diff --git a/ctdb/common/ctdb_call.c b/ctdb/common/ctdb_call.c index f4505052f8..0145dc6985 100644 --- a/ctdb/common/ctdb_call.c +++ b/ctdb/common/ctdb_call.c @@ -188,6 +188,61 @@ static void ctdb_call_send_redirect(struct ctdb_context *ctdb, talloc_free(r); } + +/* + send a dmaster reply + + caller must have the chainlock before calling this routine. Caller must be + the lmaster +*/ +static void ctdb_send_dmaster_reply(struct ctdb_db_context *ctdb_db, + struct ctdb_ltdb_header *header, + TDB_DATA key, TDB_DATA data, + uint32_t new_dmaster, + uint32_t reqid) +{ + struct ctdb_context *ctdb = ctdb_db->ctdb; + struct ctdb_reply_dmaster *r; + int ret, len; + TALLOC_CTX *tmp_ctx; + + if (ctdb->vnn != ctdb_lmaster(ctdb, &key)) { + DEBUG(0,(__location__ " Caller is not lmaster!\n")); + return; + } + + header->dmaster = new_dmaster; + ret = ctdb_ltdb_store(ctdb_db, key, header, data); + if (ret != 0) { + ctdb_fatal(ctdb, "ctdb_req_dmaster unable to update dmaster"); + return; + } + + /* put the packet on a temporary context, allowing us to safely free + it below even if ctdb_reply_dmaster() has freed it already */ + tmp_ctx = talloc_new(ctdb); + + /* send the CTDB_REPLY_DMASTER */ + len = offsetof(struct ctdb_reply_dmaster, data) + data.dsize; + r = ctdb->methods->allocate_pkt(tmp_ctx, len); + CTDB_NO_MEMORY_FATAL(ctdb, r); + + talloc_set_name_const(r, "reply_dmaster packet"); + r->hdr.length = len; + r->hdr.ctdb_magic = CTDB_MAGIC; + r->hdr.ctdb_version = CTDB_VERSION; + r->hdr.operation = CTDB_REPLY_DMASTER; + r->hdr.destnode = new_dmaster; + r->hdr.srcnode = ctdb->vnn; + r->hdr.reqid = reqid; + r->datalen = data.dsize; + memcpy(&r->data[0], data.dptr, data.dsize); + + ctdb_queue_packet(ctdb, &r->hdr); + + talloc_free(tmp_ctx); +} + /* send a dmaster request (give another node the dmaster for a record) @@ -203,6 +258,13 @@ static void ctdb_call_send_dmaster(struct ctdb_db_context *ctdb_db, struct ctdb_req_dmaster *r; struct ctdb_context *ctdb = ctdb_db->ctdb; int len; + uint32_t lmaster = ctdb_lmaster(ctdb, key); + + if (lmaster == ctdb->vnn) { + ctdb_send_dmaster_reply(ctdb_db, header, *key, *data, + c->hdr.srcnode, c->hdr.reqid); + return; + } len = offsetof(struct ctdb_req_dmaster, data) + key->dsize + data->dsize; r = ctdb->methods->allocate_pkt(ctdb, len); @@ -212,7 +274,7 @@ static void ctdb_call_send_dmaster(struct ctdb_db_context *ctdb_db, r->hdr.ctdb_magic = CTDB_MAGIC; r->hdr.ctdb_version = CTDB_VERSION; r->hdr.operation = CTDB_REQ_DMASTER; - r->hdr.destnode = ctdb_lmaster(ctdb, key); + r->hdr.destnode = lmaster; r->hdr.srcnode = ctdb->vnn; r->hdr.reqid = c->hdr.reqid; r->db_id = c->db_id; @@ -222,9 +284,7 @@ static void ctdb_call_send_dmaster(struct ctdb_db_context *ctdb_db, memcpy(&r->data[0], key->dptr, key->dsize); memcpy(&r->data[key->dsize], data->dptr, data->dsize); - /* XXX - probably not necessary when lmaster==dmaster - update the ltdb to record the new dmaster */ - header->dmaster = r->hdr.destnode; + header->dmaster = c->hdr.srcnode; ctdb_ltdb_store(ctdb_db, *key, header, *data); ctdb_queue_packet(ctdb, &r->hdr); @@ -242,12 +302,10 @@ static void ctdb_call_send_dmaster(struct ctdb_db_context *ctdb_db, void ctdb_request_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) { struct ctdb_req_dmaster *c = (struct ctdb_req_dmaster *)hdr; - struct ctdb_reply_dmaster *r; TDB_DATA key, data, data2; struct ctdb_ltdb_header header; struct ctdb_db_context *ctdb_db; - int ret, len; - TALLOC_CTX *tmp_ctx; + int ret; key.dptr = c->data; key.dsize = c->keylen; @@ -275,43 +333,15 @@ void ctdb_request_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr } /* its a protocol error if the sending node is not the current dmaster */ - if (header.dmaster != hdr->srcnode && - hdr->srcnode != ctdb_lmaster(ctdb_db->ctdb, &key)) { - ctdb_fatal(ctdb, "dmaster request from non-master"); - return; - } - - header.dmaster = c->dmaster; - ret = ctdb_ltdb_store(ctdb_db, key, &header, data); - ctdb_ltdb_unlock(ctdb_db, key); - if (ret != 0) { - ctdb_fatal(ctdb, "ctdb_req_dmaster unable to update dmaster"); + if (header.dmaster != hdr->srcnode) { + DEBUG(0,("vnn=%u dmaster request non-master %u dmaster=%u\n", + ctdb->vnn, hdr->srcnode, header.dmaster)); + ctdb_fatal(ctdb, "ctdb_req_dmaster from non-master"); return; } - /* put the packet on a temporary context, allowing us to safely free - it below even if ctdb_reply_dmaster() has freed it already */ - tmp_ctx = talloc_new(ctdb); - - /* send the CTDB_REPLY_DMASTER */ - len = offsetof(struct ctdb_reply_dmaster, data) + data.dsize; - r = ctdb->methods->allocate_pkt(tmp_ctx, len); - CTDB_NO_MEMORY_FATAL(ctdb, r); - - talloc_set_name_const(r, "reply_dmaster packet"); - r->hdr.length = len; - r->hdr.ctdb_magic = CTDB_MAGIC; - r->hdr.ctdb_version = CTDB_VERSION; - r->hdr.operation = CTDB_REPLY_DMASTER; - r->hdr.destnode = c->dmaster; - r->hdr.srcnode = ctdb->vnn; - r->hdr.reqid = hdr->reqid; - r->datalen = data.dsize; - memcpy(&r->data[0], data.dptr, data.dsize); - - ctdb_queue_packet(ctdb, &r->hdr); - - talloc_free(tmp_ctx); + ctdb_send_dmaster_reply(ctdb_db, &header, key, data, c->dmaster, hdr->reqid); + ctdb_ltdb_unlock(ctdb_db, key); } @@ -532,8 +562,6 @@ void ctdb_reply_redirect(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) state = idr_find_type(ctdb->idr, hdr->reqid, struct ctdb_call_state); if (state == NULL) return; - talloc_steal(state, c); - /* don't allow for too many redirects */ if (state->redirect_count++ == CTDB_MAX_REDIRECT) { c->dmaster = ctdb_lmaster(ctdb, &state->call.key); @@ -670,8 +698,10 @@ struct ctdb_call_state *ctdb_daemon_call_send_remote(struct ctdb_db_context *ctd ctdb_queue_packet(ctdb, &state->c->hdr); +#if CTDB_REQ_TIMEOUT event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_REQ_TIMEOUT, 0), ctdb_call_timeout, state); +#endif return state; } diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index a8f7b48f49..96ca6d04c9 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -196,7 +196,7 @@ struct ctdb_db_context { }} while (0) /* arbitrary maximum timeout for ctdb operations */ -#define CTDB_REQ_TIMEOUT 10 +#define CTDB_REQ_TIMEOUT 0 /* max number of redirects before we ask the lmaster */ #define CTDB_MAX_REDIRECT 2 diff --git a/ctdb/tools/ctdb_dump.c b/ctdb/tools/ctdb_dump.c index 419151c84d..aab1051ea3 100644 --- a/ctdb/tools/ctdb_dump.c +++ b/ctdb/tools/ctdb_dump.c @@ -38,6 +38,7 @@ static void usage(void) static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p) { + int *num_nodes = (int *)p; struct id { dev_t dev; ino_t inode; @@ -51,7 +52,10 @@ static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, voi } else { keystr = hex_encode(NULL, key.dptr, key.dsize); } - printf(" rec %s dmaster=%u\n", keystr, h->dmaster); + printf(" rec %s lmaster=%u dmaster=%u\n", + keystr, + ctdb_hash(&key) % (*num_nodes), + h->dmaster); talloc_free(keystr); return 0; } @@ -103,7 +107,7 @@ int main(int argc, const char *argv[]) } printf("db %s\n", extra_argv[i]); - tdb_traverse(db->tdb, traverse_fn, NULL); + tdb_traverse(db->tdb, traverse_fn, &extra_argc); talloc_free(db); } |