diff options
-rw-r--r-- | ctdb/client/ctdb_client.c | 191 | ||||
-rw-r--r-- | ctdb/config/ctdb.sysconfig | 4 | ||||
-rw-r--r-- | ctdb/include/ctdb.h | 11 | ||||
-rw-r--r-- | ctdb/include/ctdb_private.h | 24 | ||||
-rw-r--r-- | ctdb/server/ctdb_control.c | 5 | ||||
-rw-r--r-- | ctdb/server/ctdb_recover.c | 160 | ||||
-rw-r--r-- | ctdb/server/ctdb_recoverd.c | 222 | ||||
-rw-r--r-- | ctdb/server/ctdb_takeover.c | 57 | ||||
-rwxr-xr-x | ctdb/tests/events.d/00.test | 6 | ||||
-rw-r--r-- | ctdb/tools/ctdb_vacuum.c | 101 |
10 files changed, 519 insertions, 262 deletions
diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c index 677e02da19..fdd2b99f80 100644 --- a/ctdb/client/ctdb_client.c +++ b/ctdb/client/ctdb_client.c @@ -2486,3 +2486,194 @@ int ctdb_ctrl_uptime(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct time return ctdb_ctrl_uptime_recv(ctdb, mem_ctx, state, uptime); } +/* + send a control to execute the "recovered" event script on a node + */ +int ctdb_ctrl_end_recovery(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode) +{ + int ret; + int32_t status; + + ret = ctdb_control(ctdb, destnode, 0, + CTDB_CONTROL_END_RECOVERY, 0, tdb_null, + NULL, NULL, &status, &timeout, NULL); + if (ret != 0 || status != 0) { + DEBUG(0,(__location__ " ctdb_control for end_recovery failed\n")); + return -1; + } + + return 0; +} + +/* + callback for the async helpers used when sending the same control + to multiple nodes in parallell. +*/ +static void async_callback(struct ctdb_client_control_state *state) +{ + struct client_async_data *data = talloc_get_type(state->async.private_data, struct client_async_data); + int ret; + int32_t res; + + /* one more node has responded with recmode data */ + data->count--; + + /* if we failed to push the db, then return an error and let + the main loop try again. + */ + if (state->state != CTDB_CONTROL_DONE) { + if ( !data->dont_log_errors) { + DEBUG(0,("Async operation failed with state %d\n", state->state)); + } + data->fail_count++; + return; + } + + state->async.fn = NULL; + + ret = ctdb_control_recv(state->ctdb, state, data, NULL, &res, NULL); + if ((ret != 0) || (res != 0)) { + if ( !data->dont_log_errors) { + DEBUG(0,("Async operation failed with ret=%d res=%d\n", ret, (int)res)); + } + data->fail_count++; + } +} + + +void ctdb_client_async_add(struct client_async_data *data, struct ctdb_client_control_state *state) +{ + /* set up the callback functions */ + state->async.fn = async_callback; + state->async.private_data = data; + + /* one more control to wait for to complete */ + data->count++; +} + + +/* wait for up to the maximum number of seconds allowed + or until all nodes we expect a response from has replied +*/ +int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *data) +{ + while (data->count > 0) { + event_loop_once(ctdb->ev); + } + if (data->fail_count != 0) { + if (!data->dont_log_errors) { + DEBUG(0,("Async wait failed - fail_count=%u\n", + data->fail_count)); + } + return -1; + } + return 0; +} + + +/* + perform a simple control on the listed nodes + The control cannot return data + */ +int ctdb_client_async_control(struct ctdb_context *ctdb, + enum ctdb_controls opcode, + uint32_t *nodes, + struct timeval timeout, + bool dont_log_errors, + TDB_DATA data) +{ + struct client_async_data *async_data; + struct ctdb_client_control_state *state; + int j, num_nodes; + + async_data = talloc_zero(ctdb, struct client_async_data); + CTDB_NO_MEMORY_FATAL(ctdb, async_data); + async_data->dont_log_errors = dont_log_errors; + + num_nodes = talloc_get_size(nodes) / sizeof(uint32_t); + + /* loop over all nodes and send an async control to each of them */ + for (j=0; j<num_nodes; j++) { + uint32_t pnn = nodes[j]; + + state = ctdb_control_send(ctdb, pnn, 0, opcode, + 0, data, async_data, &timeout, NULL); + if (state == NULL) { + DEBUG(0,(__location__ " Failed to call async control %u\n", (unsigned)opcode)); + talloc_free(async_data); + return -1; + } + + ctdb_client_async_add(async_data, state); + } + + if (ctdb_client_async_wait(ctdb, async_data) != 0) { + talloc_free(async_data); + return -1; + } + + talloc_free(async_data); + return 0; +} + +uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb, + struct ctdb_vnn_map *vnn_map, + TALLOC_CTX *mem_ctx, + bool include_self) +{ + int i, j, num_nodes; + uint32_t *nodes; + + for (i=num_nodes=0;i<vnn_map->size;i++) { + if (vnn_map->map[i] == ctdb->pnn && !include_self) { + continue; + } + num_nodes++; + } + + nodes = talloc_array(mem_ctx, uint32_t, num_nodes); + CTDB_NO_MEMORY_FATAL(ctdb, nodes); + + for (i=j=0;i<vnn_map->size;i++) { + if (vnn_map->map[i] == ctdb->pnn && !include_self) { + continue; + } + nodes[j++] = vnn_map->map[i]; + } + + return nodes; +} + +uint32_t *list_of_active_nodes(struct ctdb_context *ctdb, + struct ctdb_node_map *node_map, + TALLOC_CTX *mem_ctx, + bool include_self) +{ + int i, j, num_nodes; + uint32_t *nodes; + + for (i=num_nodes=0;i<node_map->num;i++) { + if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) { + continue; + } + if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) { + continue; + } + num_nodes++; + } + + nodes = talloc_array(mem_ctx, uint32_t, num_nodes); + CTDB_NO_MEMORY_FATAL(ctdb, nodes); + + for (i=j=0;i<node_map->num;i++) { + if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) { + continue; + } + if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) { + continue; + } + nodes[j++] = node_map->nodes[i].pnn; + } + + return nodes; +} diff --git a/ctdb/config/ctdb.sysconfig b/ctdb/config/ctdb.sysconfig index f236cda6e7..9306884b64 100644 --- a/ctdb/config/ctdb.sysconfig +++ b/ctdb/config/ctdb.sysconfig @@ -42,10 +42,6 @@ # default is to not manage Samba # CTDB_MANAGES_SAMBA=yes -# should ctdb manage starting/stopping the http service for you? -# default is to not manage http -# CTDB_MANAGES_HTTPD=yes - # should ctdb manage starting/stopping Winbind service? # if left comented out then it will be autodetected based on smb.conf # CTDB_MANAGES_WINBIND=yes diff --git a/ctdb/include/ctdb.h b/ctdb/include/ctdb.h index eee6983417..b779b94dcd 100644 --- a/ctdb/include/ctdb.h +++ b/ctdb/include/ctdb.h @@ -499,4 +499,15 @@ struct ctdb_client_control_state *ctdb_ctrl_uptime_send(struct ctdb_context *ctd int ctdb_ctrl_uptime_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, struct ctdb_uptime **uptime); +int ctdb_ctrl_end_recovery(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode); + +uint32_t *list_of_active_nodes(struct ctdb_context *ctdb, + struct ctdb_node_map *node_map, + TALLOC_CTX *mem_ctx, + bool include_self); +uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb, + struct ctdb_vnn_map *vnn_map, + TALLOC_CTX *mem_ctx, + bool include_self); + #endif diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 57501fc68a..ab875924fa 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -488,6 +488,8 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_WIPE_DATABASE = 67, CTDB_CONTROL_DELETE_RECORD = 68, CTDB_CONTROL_UPTIME = 69, + CTDB_CONTROL_START_RECOVERY = 70, + CTDB_CONTROL_END_RECOVERY = 71, }; /* @@ -1082,6 +1084,12 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA indata, bool *async_reply); +int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb, + struct ctdb_req_control *c, + bool *async_reply); +int32_t ctdb_control_end_recovery(struct ctdb_context *ctdb, + struct ctdb_req_control *c, + bool *async_reply); struct ctdb_public_ip { uint32_t pnn; @@ -1221,4 +1229,20 @@ void ctdb_unblock_signal(int signum); int32_t ctdb_monitoring_mode(struct ctdb_context *ctdb); int ctdb_set_child_logging(struct ctdb_context *ctdb); + +struct client_async_data { + bool dont_log_errors; + uint32_t count; + uint32_t fail_count; +}; +void ctdb_client_async_add(struct client_async_data *data, struct ctdb_client_control_state *state); +int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *data); +int ctdb_client_async_control(struct ctdb_context *ctdb, + enum ctdb_controls opcode, + uint32_t *nodes, + struct timeval timeout, + bool dont_log_errors, + TDB_DATA data); + + #endif diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c index 01a77fe887..884ed69177 100644 --- a/ctdb/server/ctdb_control.c +++ b/ctdb/server/ctdb_control.c @@ -355,6 +355,11 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, case CTDB_CONTROL_UPTIME: return ctdb_control_uptime(ctdb, outdata); + case CTDB_CONTROL_START_RECOVERY: + return ctdb_control_start_recovery(ctdb, c, async_reply); + + case CTDB_CONTROL_END_RECOVERY: + return ctdb_control_end_recovery(ctdb, c, async_reply); default: DEBUG(0,(__location__ " Unknown CTDB control opcode %u\n", opcode)); return -1; diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index 65ad471564..b239554a02 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -397,27 +397,6 @@ struct ctdb_set_recmode_state { }; /* - called when the 'recovered' event script has finished - */ -static void ctdb_recovered_callback(struct ctdb_context *ctdb, int status, void *p) -{ - struct ctdb_set_recmode_state *state = talloc_get_type(p, struct ctdb_set_recmode_state); - - ctdb_enable_monitoring(state->ctdb); - - if (status == 0) { - ctdb->recovery_mode = state->recmode; - } else { - DEBUG(0,(__location__ " recovered event script failed (status %d)\n", status)); - } - - ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL); - talloc_free(state); - - gettimeofday(&ctdb->last_recovery_time, NULL); -} - -/* called if our set_recmode child times out. this would happen if ctdb_recovery_lock() would block. */ @@ -473,23 +452,11 @@ static void set_recmode_handler(struct event_context *ev, struct fd_event *fde, return; } + state->ctdb->recovery_mode = state->recmode; - ctdb_disable_monitoring(state->ctdb); - - /* call the events script to tell all subsystems that we have recovered */ - ret = ctdb_event_script_callback(state->ctdb, - timeval_current_ofs(state->ctdb->tunable.script_timeout, 0), - state, - ctdb_recovered_callback, - state, "recovered"); - - if (ret != 0) { - ctdb_enable_monitoring(state->ctdb); - - ctdb_request_control_reply(state->ctdb, state->c, NULL, -1, "failed to run eventscript from set_recmode"); - talloc_free(state); - return; - } + ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, NULL); + talloc_free(state); + return; } /* @@ -742,3 +709,122 @@ int32_t ctdb_control_delete_record(struct ctdb_context *ctdb, TDB_DATA indata) free(data.dptr); return 0; } + + +struct recovery_callback_state { + struct ctdb_req_control *c; +}; + + +/* + called when the 'recovered' event script has finished + */ +static void ctdb_end_recovery_callback(struct ctdb_context *ctdb, int status, void *p) +{ + struct recovery_callback_state *state = talloc_get_type(p, struct recovery_callback_state); + + ctdb_enable_monitoring(ctdb); + + if (status != 0) { + DEBUG(0,(__location__ " recovered event script failed (status %d)\n", status)); + } + + ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL); + talloc_free(state); + + gettimeofday(&ctdb->last_recovery_time, NULL); +} + +/* + recovery has finished + */ +int32_t ctdb_control_end_recovery(struct ctdb_context *ctdb, + struct ctdb_req_control *c, + bool *async_reply) +{ + int ret; + struct recovery_callback_state *state; + + DEBUG(0,("Recovery has finished\n")); + + state = talloc(ctdb, struct recovery_callback_state); + CTDB_NO_MEMORY(ctdb, state); + + state->c = talloc_steal(state, c); + + ctdb_disable_monitoring(ctdb); + + ret = ctdb_event_script_callback(ctdb, + timeval_current_ofs(ctdb->tunable.script_timeout, 0), + state, + ctdb_end_recovery_callback, + state, "recovered"); + + if (ret != 0) { + ctdb_enable_monitoring(ctdb); + + DEBUG(0,(__location__ " Failed to end recovery\n")); + talloc_free(state); + return -1; + } + + /* tell the control that we will be reply asynchronously */ + *async_reply = true; + return 0; +} + +/* + called when the 'startrecovery' event script has finished + */ +static void ctdb_start_recovery_callback(struct ctdb_context *ctdb, int status, void *p) +{ + struct recovery_callback_state *state = talloc_get_type(p, struct recovery_callback_state); + + ctdb_enable_monitoring(ctdb); + + if (status != 0) { + DEBUG(0,(__location__ " startrecovery event script failed (status %d)\n", status)); + } + + ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL); + talloc_free(state); +} + +/* + start a recuvery + */ +int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb, + struct ctdb_req_control *c, + bool *async_reply) +{ + int ret; + struct recovery_callback_state *state; + + DEBUG(0,("Recovery has started\n")); + + state = talloc(ctdb, struct recovery_callback_state); + CTDB_NO_MEMORY(ctdb, state); + + state->c = talloc_steal(state, c); + + ctdb_disable_monitoring(ctdb); + + ret = ctdb_event_script_callback(ctdb, + timeval_current_ofs(ctdb->tunable.script_timeout, 0), + state, + ctdb_start_recovery_callback, + state, "startrecovery"); + + if (ret != 0) { + ctdb_enable_monitoring(ctdb); + + DEBUG(0,(__location__ " Failed to start recovery\n")); + talloc_free(state); + return -1; + } + + /* tell the control that we will be reply asynchronously */ + *async_reply = true; + return 0; +} + diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index c13136e848..8595706cc0 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -58,66 +58,6 @@ struct ctdb_recoverd { #define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_interval, 0) -struct async_data { - uint32_t count; - uint32_t fail_count; -}; - -static void async_callback(struct ctdb_client_control_state *state) -{ - struct async_data *data = talloc_get_type(state->async.private_data, struct async_data); - int ret; - int32_t res; - - /* one more node has responded with recmode data */ - data->count--; - - /* if we failed to push the db, then return an error and let - the main loop try again. - */ - if (state->state != CTDB_CONTROL_DONE) { - DEBUG(0,("Async operation failed with state %d\n", state->state)); - data->fail_count++; - return; - } - - state->async.fn = NULL; - - ret = ctdb_control_recv(state->ctdb, state, data, NULL, &res, NULL); - if ((ret != 0) || (res != 0)) { - DEBUG(0,("Async operation failed with ret=%d res=%d\n", ret, (int)res)); - data->fail_count++; - } -} - - -static void async_add(struct async_data *data, struct ctdb_client_control_state *state) -{ - /* set up the callback functions */ - state->async.fn = async_callback; - state->async.private_data = data; - - /* one more control to wait for to complete */ - data->count++; -} - - -/* wait for up to the maximum number of seconds allowed - or until all nodes we expect a response from has replied -*/ -static int async_wait(struct ctdb_context *ctdb, struct async_data *data) -{ - while (data->count > 0) { - event_loop_once(ctdb->ev); - } - if (data->fail_count != 0) { - DEBUG(0,("Async wait failed - fail_count=%u\n", data->fail_count)); - return -1; - } - return 0; -} - - /* unban a node */ @@ -255,50 +195,49 @@ static void ctdb_ban_node(struct ctdb_recoverd *rec, uint32_t pnn, uint32_t ban_ enum monitor_result { MONITOR_OK, MONITOR_RECOVERY_NEEDED, MONITOR_ELECTION_NEEDED, MONITOR_FAILED}; -/* - perform a simple control on all active nodes. The control cannot return data +/* + run the "recovered" eventscript on all nodes */ -static int async_control_on_active_nodes(struct ctdb_context *ctdb, enum ctdb_controls opcode, - struct ctdb_node_map *nodemap, TDB_DATA data, bool include_self) +static int run_recovered_eventscript(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap) { - struct async_data *async_data; - struct ctdb_client_control_state *state; - int j; - struct timeval timeout = CONTROL_TIMEOUT(); - - async_data = talloc_zero(ctdb, struct async_data); - CTDB_NO_MEMORY_FATAL(ctdb, async_data); + TALLOC_CTX *tmp_ctx; - /* loop over all active nodes and send an async control to each of them */ - for (j=0; j<nodemap->num; j++) { - if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { - continue; - } - if (nodemap->nodes[j].pnn == ctdb->pnn && !include_self) { - continue; - } - state = ctdb_control_send(ctdb, nodemap->nodes[j].pnn, 0, opcode, - 0, data, async_data, &timeout, NULL); - if (state == NULL) { - DEBUG(0,(__location__ " Failed to call async control %u\n", (unsigned)opcode)); - talloc_free(async_data); - return -1; - } - - async_add(async_data, state); - } + tmp_ctx = talloc_new(ctdb); + CTDB_NO_MEMORY(ctdb, tmp_ctx); - if (async_wait(ctdb, async_data) != 0) { - DEBUG(0,(__location__ " Failed async control %u\n", (unsigned)opcode)); - talloc_free(async_data); + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_END_RECOVERY, + list_of_active_nodes(ctdb, nodemap, tmp_ctx, true), + CONTROL_TIMEOUT(), false, tdb_null) != 0) { + DEBUG(0, (__location__ " Unable to run the 'recovered' event. Recovery failed.\n")); + talloc_free(tmp_ctx); return -1; } - talloc_free(async_data); + talloc_free(tmp_ctx); return 0; } +/* + run the "startrecovery" eventscript on all nodes + */ +static int run_startrecovery_eventscript(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap) +{ + TALLOC_CTX *tmp_ctx; + tmp_ctx = talloc_new(ctdb); + CTDB_NO_MEMORY(ctdb, tmp_ctx); + + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_START_RECOVERY, + list_of_active_nodes(ctdb, nodemap, tmp_ctx, true), + CONTROL_TIMEOUT(), false, tdb_null) != 0) { + DEBUG(0, (__location__ " Unable to run the 'startrecovery' event. Recovery failed.\n")); + talloc_free(tmp_ctx); + return -1; + } + + talloc_free(tmp_ctx); + return 0; +} /* change recovery mode on all nodes @@ -306,12 +245,21 @@ static int async_control_on_active_nodes(struct ctdb_context *ctdb, enum ctdb_co static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t rec_mode) { TDB_DATA data; + uint32_t *nodes; + TALLOC_CTX *tmp_ctx; + + tmp_ctx = talloc_new(ctdb); + CTDB_NO_MEMORY(ctdb, tmp_ctx); + + nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true); /* freeze all nodes */ if (rec_mode == CTDB_RECOVERY_ACTIVE) { - if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_FREEZE, - nodemap, tdb_null, true) != 0) { + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_FREEZE, + nodes, CONTROL_TIMEOUT(), + false, tdb_null) != 0) { DEBUG(0, (__location__ " Unable to freeze nodes. Recovery failed.\n")); + talloc_free(tmp_ctx); return -1; } } @@ -320,20 +268,25 @@ static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *no data.dsize = sizeof(uint32_t); data.dptr = (unsigned char *)&rec_mode; - if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_SET_RECMODE, - nodemap, data, true) != 0) { + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_SET_RECMODE, + nodes, CONTROL_TIMEOUT(), + false, data) != 0) { DEBUG(0, (__location__ " Unable to set recovery mode. Recovery failed.\n")); + talloc_free(tmp_ctx); return -1; } if (rec_mode == CTDB_RECOVERY_NORMAL) { - if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_THAW, - nodemap, tdb_null, true) != 0) { + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_THAW, + nodes, CONTROL_TIMEOUT(), + false, tdb_null) != 0) { DEBUG(0, (__location__ " Unable to thaw nodes. Recovery failed.\n")); + talloc_free(tmp_ctx); return -1; } } + talloc_free(tmp_ctx); return 0; } @@ -343,16 +296,23 @@ static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *no static int set_recovery_master(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t pnn) { TDB_DATA data; + TALLOC_CTX *tmp_ctx; + + tmp_ctx = talloc_new(ctdb); + CTDB_NO_MEMORY(ctdb, tmp_ctx); data.dsize = sizeof(uint32_t); data.dptr = (unsigned char *)&pnn; - if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_SET_RECMASTER, - nodemap, data, true) != 0) { + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_SET_RECMASTER, + list_of_active_nodes(ctdb, nodemap, tmp_ctx, true), + CONTROL_TIMEOUT(), false, data) != 0) { DEBUG(0, (__location__ " Unable to set recmaster. Recovery failed.\n")); + talloc_free(tmp_ctx); return -1; } + talloc_free(tmp_ctx); return 0; } @@ -1141,6 +1101,10 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid, struct recdb_data params; struct ctdb_control_pulldb_reply *recdata; TDB_DATA outdata; + TALLOC_CTX *tmp_ctx; + + tmp_ctx = talloc_new(ctdb); + CTDB_NO_MEMORY(ctdb, tmp_ctx); recdata = talloc_zero(recdb, struct ctdb_control_pulldb_reply); CTDB_NO_MEMORY(ctdb, recdata); @@ -1155,12 +1119,14 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid, if (tdb_traverse_read(recdb->tdb, traverse_recdb, ¶ms) == -1) { DEBUG(0,(__location__ " Failed to traverse recdb database\n")); talloc_free(params.recdata); + talloc_free(tmp_ctx); return -1; } if (params.failed) { DEBUG(0,(__location__ " Failed to traverse recdb database\n")); talloc_free(params.recdata); + talloc_free(tmp_ctx); return -1; } @@ -1169,9 +1135,12 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid, outdata.dptr = (void *)recdata; outdata.dsize = params.len; - if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_PUSH_DB, nodemap, outdata, true) != 0) { + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_PUSH_DB, + list_of_active_nodes(ctdb, nodemap, tmp_ctx, true), + CONTROL_TIMEOUT(), false, outdata) != 0) { DEBUG(0,(__location__ " Failed to push recdb records to nodes for db 0x%x\n", dbid)); talloc_free(recdata); + talloc_free(tmp_ctx); return -1; } @@ -1179,6 +1148,7 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid, dbid, recdata->count)); talloc_free(recdata); + talloc_free(tmp_ctx); return 0; } @@ -1221,9 +1191,11 @@ static int recover_database(struct ctdb_recoverd *rec, data.dptr = (void *)&w; data.dsize = sizeof(w); - if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_WIPE_DATABASE, - nodemap, data, true) != 0) { + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_WIPE_DATABASE, + list_of_active_nodes(ctdb, nodemap, recdb, true), + CONTROL_TIMEOUT(), false, data) != 0) { DEBUG(0, (__location__ " Unable to wipe database. Recovery failed.\n")); + talloc_free(recdb); return -1; } @@ -1304,6 +1276,7 @@ static int do_recovery(struct ctdb_recoverd *rec, DEBUG(0, (__location__ " Recovery - created remote databases\n")); + /* set recovery mode to active on all nodes */ ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_ACTIVE); if (ret!=0) { @@ -1311,6 +1284,13 @@ static int do_recovery(struct ctdb_recoverd *rec, return -1; } + /* execute the "startrecovery" event script on all nodes */ + ret = run_startrecovery_eventscript(ctdb, nodemap); + if (ret!=0) { + DEBUG(0, (__location__ " Unable to run the 'startrecovery' event on cluster\n")); + return -1; + } + /* pick a new generation number */ generation = new_generation(); @@ -1334,8 +1314,9 @@ static int do_recovery(struct ctdb_recoverd *rec, data.dptr = (void *)&generation; data.dsize = sizeof(uint32_t); - if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_TRANSACTION_START, - nodemap, data, true) != 0) { + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_TRANSACTION_START, + list_of_active_nodes(ctdb, nodemap, mem_ctx, true), + CONTROL_TIMEOUT(), false, data) != 0) { DEBUG(0, (__location__ " Unable to start transactions. Recovery failed.\n")); return -1; } @@ -1352,8 +1333,9 @@ static int do_recovery(struct ctdb_recoverd *rec, DEBUG(0, (__location__ " Recovery - starting database commits\n")); /* commit all the changes */ - if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_TRANSACTION_COMMIT, - nodemap, data, true) != 0) { + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_TRANSACTION_COMMIT, + list_of_active_nodes(ctdb, nodemap, mem_ctx, true), + CONTROL_TIMEOUT(), false, data) != 0) { DEBUG(0, (__location__ " Unable to commit recovery changes. Recovery failed.\n")); return -1; } @@ -1417,6 +1399,13 @@ static int do_recovery(struct ctdb_recoverd *rec, DEBUG(1, (__location__ " Recovery - done takeover\n")); } + /* execute the "recovered" event script on all nodes */ + ret = run_recovered_eventscript(ctdb, nodemap); + if (ret!=0) { + DEBUG(0, (__location__ " Unable to run the 'recovered' event on cluster\n")); + return -1; + } + /* disable recovery mode */ ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_NORMAL); if (ret!=0) { @@ -2445,12 +2434,29 @@ again: /* we might need to change who has what IP assigned */ if (rec->need_takeover_run) { rec->need_takeover_run = false; + + /* execute the "startrecovery" event script on all nodes */ + ret = run_startrecovery_eventscript(ctdb, nodemap); + if (ret!=0) { + DEBUG(0, (__location__ " Unable to run the 'startrecovery' event on cluster\n")); + do_recovery(rec, mem_ctx, pnn, num_active, nodemap, + vnnmap, ctdb->pnn); + } + ret = ctdb_takeover_run(ctdb, nodemap); if (ret != 0) { DEBUG(0, (__location__ " Unable to setup public takeover addresses - starting recovery\n")); do_recovery(rec, mem_ctx, pnn, num_active, nodemap, vnnmap, ctdb->pnn); } + + /* execute the "recovered" event script on all nodes */ + ret = run_recovered_eventscript(ctdb, nodemap); + if (ret!=0) { + DEBUG(0, (__location__ " Unable to run the 'recovered' event on cluster\n")); + do_recovery(rec, mem_ctx, pnn, num_active, nodemap, + vnnmap, ctdb->pnn); + } } goto again; diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index b63b88f4c2..cea3f95e34 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -641,11 +641,14 @@ create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx) int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap) { int i, num_healthy, retries; - int ret; struct ctdb_public_ip ip; uint32_t mask; struct ctdb_public_ip_list *all_ips, *tmp_ip; int maxnode, maxnum=0, minnode, minnum=0, num; + TDB_DATA data; + struct timeval timeout; + struct client_async_data *async_data; + struct ctdb_client_control_state *state; TALLOC_CTX *tmp_ctx = talloc_new(ctdb); @@ -813,6 +816,9 @@ try_again: /* now tell all nodes to delete any alias that they should not have. This will be a NOOP on nodes that don't currently hold the given alias */ + async_data = talloc_zero(tmp_ctx, struct client_async_data); + CTDB_NO_MEMORY_FATAL(ctdb, async_data); + for (i=0;i<nodemap->num;i++) { /* don't talk to unconnected nodes, but do talk to banned nodes */ if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) { @@ -830,21 +836,33 @@ try_again: ip.sin.sin_family = AF_INET; ip.sin.sin_addr = tmp_ip->sin.sin_addr; - ret = ctdb_ctrl_release_ip(ctdb, TAKEOVER_TIMEOUT(), - nodemap->nodes[i].pnn, - &ip); - if (ret != 0) { - DEBUG(0,("Failed to tell vnn %u to release IP %s\n", - nodemap->nodes[i].pnn, - inet_ntoa(tmp_ip->sin.sin_addr))); + timeout = TAKEOVER_TIMEOUT(); + data.dsize = sizeof(ip); + data.dptr = (uint8_t *)&ip; + state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn, + 0, CTDB_CONTROL_RELEASE_IP, 0, + data, async_data, + &timeout, NULL); + if (state == NULL) { + DEBUG(0,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn)); talloc_free(tmp_ctx); return -1; } + + ctdb_client_async_add(async_data, state); } } + if (ctdb_client_async_wait(ctdb, async_data) != 0) { + DEBUG(0,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n")); + talloc_free(tmp_ctx); + return -1; + } + talloc_free(async_data); /* tell all nodes to get their own IPs */ + async_data = talloc_zero(tmp_ctx, struct client_async_data); + CTDB_NO_MEMORY_FATAL(ctdb, async_data); for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) { if (tmp_ip->pnn == -1) { /* this IP won't be taken over */ @@ -854,16 +872,25 @@ try_again: ip.sin.sin_family = AF_INET; ip.sin.sin_addr = tmp_ip->sin.sin_addr; - ret = ctdb_ctrl_takeover_ip(ctdb, TAKEOVER_TIMEOUT(), - tmp_ip->pnn, - &ip); - if (ret != 0) { - DEBUG(0,("Failed asking vnn %u to take over IP %s\n", - tmp_ip->pnn, - inet_ntoa(tmp_ip->sin.sin_addr))); + timeout = TAKEOVER_TIMEOUT(); + data.dsize = sizeof(ip); + data.dptr = (uint8_t *)&ip; + state = ctdb_control_send(ctdb, tmp_ip->pnn, + 0, CTDB_CONTROL_TAKEOVER_IP, 0, + data, async_data, + &timeout, NULL); + if (state == NULL) { + DEBUG(0,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn)); talloc_free(tmp_ctx); return -1; } + + ctdb_client_async_add(async_data, state); + } + if (ctdb_client_async_wait(ctdb, async_data) != 0) { + DEBUG(0,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n")); + talloc_free(tmp_ctx); + return -1; } talloc_free(tmp_ctx); diff --git a/ctdb/tests/events.d/00.test b/ctdb/tests/events.d/00.test index 026cf6cba7..c5933be36a 100755 --- a/ctdb/tests/events.d/00.test +++ b/ctdb/tests/events.d/00.test @@ -10,6 +10,12 @@ case $cmd in echo "monitor event stderr" >&2 exit 0 ;; + + startrecovery) + echo "ctdb startrecovery event" + exit 0; + ;; + startup) echo "ctdb startup event" exit 0; diff --git a/ctdb/tools/ctdb_vacuum.c b/ctdb/tools/ctdb_vacuum.c index 2bc9908c7e..f412c04303 100644 --- a/ctdb/tools/ctdb_vacuum.c +++ b/ctdb/tools/ctdb_vacuum.c @@ -28,103 +28,6 @@ /* should be tunable */ #define TIMELIMIT() timeval_current_ofs(10, 0) -struct async_data { - uint32_t count; - uint32_t fail_count; -}; - -static void async_callback(struct ctdb_client_control_state *state) -{ - struct async_data *data = talloc_get_type(state->async.private_data, struct async_data); - int ret; - int32_t res; - - /* one more node has responded with recmode data */ - data->count--; - - /* if we failed to push the db, then return an error and let - the main loop try again. - */ - if (state->state != CTDB_CONTROL_DONE) { - data->fail_count++; - return; - } - - state->async.fn = NULL; - - ret = ctdb_control_recv(state->ctdb, state, data, NULL, &res, NULL); - if ((ret != 0) || (res != 0)) { - data->fail_count++; - } -} - -static void async_add(struct async_data *data, struct ctdb_client_control_state *state) -{ - /* set up the callback functions */ - state->async.fn = async_callback; - state->async.private_data = data; - - /* one more control to wait for to complete */ - data->count++; -} - - -/* wait for up to the maximum number of seconds allowed - or until all nodes we expect a response from has replied -*/ -static int async_wait(struct ctdb_context *ctdb, struct async_data *data) -{ - while (data->count > 0) { - event_loop_once(ctdb->ev); - } - if (data->fail_count != 0) { - return -1; - } - return 0; -} - -/* - perform a simple control on nodes in the vnn map except ourselves. - The control cannot return data - */ -static int async_control_on_vnnmap(struct ctdb_context *ctdb, enum ctdb_controls opcode, - TDB_DATA data) -{ - struct async_data *async_data; - struct ctdb_client_control_state *state; - int j; - struct timeval timeout = TIMELIMIT(); - - async_data = talloc_zero(ctdb, struct async_data); - CTDB_NO_MEMORY_FATAL(ctdb, async_data); - - /* loop over all active nodes and send an async control to each of them */ - for (j=0; j<ctdb->vnn_map->size; j++) { - uint32_t pnn = ctdb->vnn_map->map[j]; - if (pnn == ctdb->pnn) { - continue; - } - state = ctdb_control_send(ctdb, pnn, 0, opcode, - 0, data, async_data, &timeout, NULL); - if (state == NULL) { - DEBUG(0,(__location__ " Failed to call async control %u\n", (unsigned)opcode)); - talloc_free(async_data); - return -1; - } - - async_add(async_data, state); - } - - if (async_wait(ctdb, async_data) != 0) { - talloc_free(async_data); - return -1; - } - - talloc_free(async_data); - return 0; -} - - /* vacuum one record */ @@ -172,7 +75,9 @@ static int ctdb_vacuum_one(struct ctdb_context *ctdb, TDB_DATA key, data.dptr = (void *)rec; data.dsize = rec->length; - if (async_control_on_vnnmap(ctdb, CTDB_CONTROL_DELETE_RECORD, data) != 0) { + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_DELETE_RECORD, + list_of_vnnmap_nodes(ctdb, ctdb->vnn_map, rec, false), + TIMELIMIT(), true, data) != 0) { /* one or more nodes failed to delete a record - no problem! */ talloc_free(rec); return 0; |