summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Tridgell <tridge@samba.org>2007-05-19 00:56:49 +1000
committerAndrew Tridgell <tridge@samba.org>2007-05-19 00:56:49 +1000
commit28f2fc669b5697eb2e8fb01c8ab2514ecb9f1199 (patch)
tree54bd4a2ed0b6924bd83caf5048e7e8f21debb23e
parent049e1504ee7b62f6abd61dddc59558963780d641 (diff)
a better way to resend calls after recovery
(This used to be ctdb commit 444f52e134fc22aaf254d05c86d8b357ded876f4)
-rw-r--r--ctdb/common/ctdb_call.c41
-rw-r--r--ctdb/common/ctdb_freeze.c1
-rw-r--r--ctdb/include/ctdb_private.h9
3 files changed, 22 insertions, 29 deletions
diff --git a/ctdb/common/ctdb_call.c b/ctdb/common/ctdb_call.c
index cd7244ff15..c19d88f660 100644
--- a/ctdb/common/ctdb_call.c
+++ b/ctdb/common/ctdb_call.c
@@ -607,37 +607,20 @@ void ctdb_reply_error(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
*/
static int ctdb_call_destructor(struct ctdb_call_state *state)
{
+ DLIST_REMOVE(state->ctdb_db->ctdb->pending_calls, state);
ctdb_reqid_remove(state->ctdb_db->ctdb, state->reqid);
return 0;
}
/*
- called when a ctdb_call times out
+ called when a ctdb_call needs to be resent after a reconfigure event
*/
-static void ctdb_call_timeout(struct event_context *ev, struct timed_event *te,
- struct timeval t, void *private_data)
+static void ctdb_call_resend(struct ctdb_call_state *state)
{
- struct ctdb_call_state *state = talloc_get_type(private_data, struct ctdb_call_state);
struct ctdb_context *ctdb = state->ctdb_db->ctdb;
- ctdb->status.timeouts.call++;
-
- event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_CALL_TIMEOUT, 0),
- ctdb_call_timeout, state);
-
- if (++state->resend_count < 10 &&
- (ctdb->vnn_map->generation == state->generation ||
- ctdb->recovery_mode != CTDB_RECOVERY_NORMAL)) {
- /* the call is just being slow, or we are curently
- recovering, give it more time */
- return;
- }
-
- /* the generation count changed or we're timing out too much -
- the call must be re-issued */
state->generation = ctdb->vnn_map->generation;
- state->resend_count = 0;
/* use a new reqid, in case the old reply does eventually come in */
ctdb_reqid_remove(ctdb, state->reqid);
@@ -651,7 +634,19 @@ static void ctdb_call_timeout(struct event_context *ev, struct timed_event *te,
state->c->hdr.destnode = ctdb->vnn;
ctdb_queue_packet(ctdb, &state->c->hdr);
- DEBUG(0,("requeued ctdb_call after timeout\n"));
+ DEBUG(0,("resent ctdb_call\n"));
+}
+
+/*
+ resend all pending calls on recovery
+ */
+void ctdb_call_resend_all(struct ctdb_context *ctdb)
+{
+ struct ctdb_call_state *state, *next;
+ for (state=ctdb->pending_calls;state;state=next) {
+ next = state->next;
+ ctdb_call_resend(state);
+ }
}
/*
@@ -743,10 +738,10 @@ struct ctdb_call_state *ctdb_daemon_call_send_remote(struct ctdb_db_context *ctd
state->state = CTDB_CALL_WAIT;
state->generation = ctdb->vnn_map->generation;
+ DLIST_ADD(ctdb->pending_calls, state);
+
ctdb_queue_packet(ctdb, &state->c->hdr);
- event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_CALL_TIMEOUT, 0),
- ctdb_call_timeout, state);
return state;
}
diff --git a/ctdb/common/ctdb_freeze.c b/ctdb/common/ctdb_freeze.c
index 96a128332e..5868ed099c 100644
--- a/ctdb/common/ctdb_freeze.c
+++ b/ctdb/common/ctdb_freeze.c
@@ -223,5 +223,6 @@ int32_t ctdb_control_thaw(struct ctdb_context *ctdb)
{
talloc_free(ctdb->freeze_handle);
ctdb->freeze_handle = NULL;
+ ctdb_call_resend_all(ctdb);
return 0;
}
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
index 57901ed6a9..0149714c85 100644
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -265,6 +265,7 @@ struct ctdb_context {
uint32_t num_clients;
uint32_t seqnum_frequency;
uint32_t recovery_master;
+ struct ctdb_call_state *pending_calls;
};
struct ctdb_db_context {
@@ -300,11 +301,6 @@ struct ctdb_db_context {
ctdb_fatal(ctdb, "Out of memory in " __location__ ); \
}} while (0)
-/* timeout for ctdb call operations. When this timeout expires we
- check if the generation count has changed, and if it has then
- re-issue the call */
-#define CTDB_CALL_TIMEOUT 2
-
/* maximum timeout for ctdb control calls */
#define CTDB_CONTROL_TIMEOUT 60
@@ -390,6 +386,7 @@ enum call_state {CTDB_CALL_WAIT, CTDB_CALL_DONE, CTDB_CALL_ERROR};
state of a in-progress ctdb call
*/
struct ctdb_call_state {
+ struct ctdb_call_state *next, *prev;
enum call_state state;
uint32_t reqid;
struct ctdb_req_call *c;
@@ -397,7 +394,6 @@ struct ctdb_call_state {
const char *errmsg;
struct ctdb_call call;
uint32_t generation;
- uint32_t resend_count;
struct {
void (*fn)(struct ctdb_call_state *);
void *private_data;
@@ -828,5 +824,6 @@ int ctdb_start_monitoring(struct ctdb_context *ctdb);
void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode);
void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node);
+void ctdb_call_resend_all(struct ctdb_context *ctdb);
#endif