From 70fa7394faaebebe777d168adc9fc89459d476e8 Mon Sep 17 00:00:00 2001 From: Michael Adam Date: Thu, 3 Feb 2011 12:26:45 +0100 Subject: vacuum: correctly send TRY_DELETE_RECORDS ctrl to all active nodes Originally, the control was sent to all records in the vnn_map, but there was something still missing here: When a node can not become lmaster (via CTDB_CAPABILITY_LMASTER=no) then it will not be part of the vnn_map. So such a node would be active but never receive the TRY_DELETE_RECORDS control from a vacuuming run. This is fixed in this change by correctly building the list of active nodes first in the same way that the recovery process does it. (This used to be ctdb commit 49247df4a47a8a107fa7dd7b187e69e243e6bdbe) --- ctdb/server/ctdb_vacuum.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c index d6a16f52d6..2c643f72cc 100644 --- a/ctdb/server/ctdb_vacuum.c +++ b/ctdb/server/ctdb_vacuum.c @@ -290,6 +290,9 @@ static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db, struct vacuum_data *v struct delete_records_list *recs; TDB_DATA indata, outdata; int32_t res; + struct ctdb_node_map *nodemap; + uint32_t *active_nodes; + int num_active_nodes; recs = talloc_zero(vdata, struct delete_records_list); if (recs == NULL) { @@ -315,24 +318,37 @@ static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db, struct vacuum_data *v indata.dptr = (void *)recs->records; /* - * now tell all the other nodes to delete all these records + * now tell all the active nodes to delete all these records * (if possible) */ - for (i = 0; i < ctdb->vnn_map->size; i++) { + + ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), + CTDB_CURRENT_NODE, + recs, /* talloc context */ + &nodemap); + if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " unable to get node map\n")); + return -1; + } + + active_nodes = list_of_active_nodes(ctdb, nodemap, + nodemap, /* talloc context */ + false /* include self */); + /* yuck! ;-) */ + num_active_nodes = talloc_get_size(active_nodes)/sizeof(*active_nodes); + + for (i = 0; i < num_active_nodes; i++) { struct ctdb_marshall_buffer *records; struct ctdb_rec_data *rec; - if (ctdb->vnn_map->map[i] == ctdb->pnn) { - /* we dont delete the records on the local node just yet */ - continue; - } - - ret = ctdb_control(ctdb, ctdb->vnn_map->map[i], 0, + ret = ctdb_control(ctdb, active_nodes[i], 0, CTDB_CONTROL_TRY_DELETE_RECORDS, 0, indata, recs, &outdata, &res, NULL, NULL); if (ret != 0 || res != 0) { - DEBUG(DEBUG_ERR,("Failed to delete records on node %u\n", ctdb->vnn_map->map[i])); + DEBUG(DEBUG_ERR, ("Failed to delete records on " + "node %u: ret[%d] res[%d]\n", + active_nodes[i], ret, res)); return -1; } @@ -369,6 +385,9 @@ static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db, struct vacuum_data *v } } + /* free nodemap and active_nodes */ + talloc_free(nodemap); + /* * The only records remaining in the tree would be those * records where all other nodes could successfully -- cgit