diff options
Diffstat (limited to 'ctdb/server/ctdb_call.c')
-rw-r--r-- | ctdb/server/ctdb_call.c | 483 |
1 files changed, 476 insertions, 7 deletions
diff --git a/ctdb/server/ctdb_call.c b/ctdb/server/ctdb_call.c index 0ea76bf44f..9e8642221c 100644 --- a/ctdb/server/ctdb_call.c +++ b/ctdb/server/ctdb_call.c @@ -43,7 +43,6 @@ return ctdb_db; } - /* a varient of input packet that can be used in lock requeue */ @@ -339,7 +338,7 @@ static void ctdb_become_dmaster(struct ctdb_db_context *ctdb_db, return; } - ctdb_call_local(ctdb_db, state->call, &header, state, &data); + ctdb_call_local(ctdb_db, state->call, &header, state, &data, true); ret = ctdb_ltdb_unlock(ctdb_db, state->call->key); if (ret != 0) { @@ -489,6 +488,8 @@ void ctdb_request_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) call->key.dsize = c->keylen; call->call_data.dptr = c->data + c->keylen; call->call_data.dsize = c->calldatalen; + call->reply_data.dptr = NULL; + call->reply_data.dsize = 0; /* determine if we are the dmaster for this key. This also fetches the record data (if any), thus avoiding a 2nd fetch of the data @@ -505,9 +506,40 @@ void ctdb_request_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) return; } - /* if we are not the dmaster, then send a redirect to the - requesting node */ - if (header.dmaster != ctdb->pnn) { + /* Dont do READONLY if we dont have a tracking database */ + if ((c->flags & CTDB_WANT_READONLY) && !ctdb_db->readonly) { + c->flags &= ~CTDB_WANT_READONLY; + } + + if (header.flags & CTDB_REC_RO_REVOKE_COMPLETE) { + header.flags &= ~(CTDB_REC_RO_HAVE_DELEGATIONS|CTDB_REC_RO_HAVE_READONLY|CTDB_REC_RO_REVOKING_READONLY|CTDB_REC_RO_REVOKE_COMPLETE); + if (ctdb_ltdb_store(ctdb_db, call->key, &header, data) != 0) { + ctdb_fatal(ctdb, "Failed to write header with cleared REVOKE flag"); + } + /* and clear out the tracking data */ + if (tdb_delete(ctdb_db->rottdb, call->key) != 0) { + DEBUG(DEBUG_ERR,(__location__ " Failed to clear out trackingdb record\n")); + } + } + + /* if we are revoking, we must defer all other calls until the revoke + * had completed. + */ + if (header.flags & CTDB_REC_RO_REVOKING_READONLY) { + talloc_free(data.dptr); + ret = ctdb_ltdb_unlock(ctdb_db, call->key); + + if (ctdb_add_revoke_deferred_call(ctdb, ctdb_db, call->key, hdr, ctdb_call_input_pkt, ctdb) != 0) { + ctdb_fatal(ctdb, "Failed to add deferred call for revoke child"); + } + talloc_free(call); + return; + } + + /* if we are not the dmaster and are not hosting any delegations, + then send a redirect to the requesting node */ + if ((header.dmaster != ctdb->pnn) + && (!(header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) ) { talloc_free(data.dptr); ctdb_call_send_redirect(ctdb, call->key, c, &header); @@ -518,6 +550,80 @@ void ctdb_request_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) return; } + if ( (!(c->flags & CTDB_WANT_READONLY)) + && (header.flags & (CTDB_REC_RO_HAVE_DELEGATIONS|CTDB_REC_RO_HAVE_READONLY)) ) { + header.flags |= CTDB_REC_RO_REVOKING_READONLY; + if (ctdb_ltdb_store(ctdb_db, call->key, &header, data) != 0) { + ctdb_fatal(ctdb, "Failed to store record with HAVE_DELEGATIONS set"); + } + ret = ctdb_ltdb_unlock(ctdb_db, call->key); + + if (ctdb_start_revoke_ro_record(ctdb, ctdb_db, call->key, &header, data) != 0) { + ctdb_fatal(ctdb, "Failed to start record revoke"); + } + talloc_free(data.dptr); + + if (ctdb_add_revoke_deferred_call(ctdb, ctdb_db, call->key, hdr, ctdb_call_input_pkt, ctdb) != 0) { + ctdb_fatal(ctdb, "Failed to add deferred call for revoke child"); + } + talloc_free(call); + + return; + } + + /* If this is the first request for delegation. bump rsn and set + * the delegations flag + */ + if ((c->flags & CTDB_WANT_READONLY) + && (c->callid == CTDB_FETCH_WITH_HEADER_FUNC) + && (!(header.flags & CTDB_REC_RO_HAVE_DELEGATIONS))) { + header.rsn += 3; + header.flags |= CTDB_REC_RO_HAVE_DELEGATIONS; + if (ctdb_ltdb_store(ctdb_db, call->key, &header, data) != 0) { + ctdb_fatal(ctdb, "Failed to store record with HAVE_DELEGATIONS set"); + } + } + if ((c->flags & CTDB_WANT_READONLY) + && (call->call_id == CTDB_FETCH_WITH_HEADER_FUNC)) { + TDB_DATA tdata; + + tdata = tdb_fetch(ctdb_db->rottdb, call->key); + if (ctdb_trackingdb_add_pnn(ctdb, &tdata, c->hdr.srcnode) != 0) { + ctdb_fatal(ctdb, "Failed to add node to trackingdb"); + } + if (tdb_store(ctdb_db->rottdb, call->key, tdata, TDB_REPLACE) != 0) { + ctdb_fatal(ctdb, "Failed to store trackingdb data"); + } + free(tdata.dptr); + + ret = ctdb_ltdb_unlock(ctdb_db, call->key); + if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret)); + } + + len = offsetof(struct ctdb_reply_call, data) + data.dsize + sizeof(struct ctdb_ltdb_header); + r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REPLY_CALL, len, + struct ctdb_reply_call); + CTDB_NO_MEMORY_FATAL(ctdb, r); + r->hdr.destnode = c->hdr.srcnode; + r->hdr.reqid = c->hdr.reqid; + r->status = 0; + r->datalen = data.dsize + sizeof(struct ctdb_ltdb_header); + header.rsn -= 2; + header.flags |= CTDB_REC_RO_HAVE_READONLY; + header.flags &= ~CTDB_REC_RO_HAVE_DELEGATIONS; + memcpy(&r->data[0], &header, sizeof(struct ctdb_ltdb_header)); + + if (data.dsize) { + memcpy(&r->data[sizeof(struct ctdb_ltdb_header)], data.dptr, data.dsize); + } + + ctdb_queue_packet(ctdb, &r->hdr); + + talloc_free(r); + return; + } + CTDB_UPDATE_STAT(ctdb, max_hop_count, c->hopcount); /* Try if possible to migrate the record off to the caller node. @@ -543,7 +649,11 @@ void ctdb_request_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) } } - ctdb_call_local(ctdb_db, call, &header, hdr, &data); + ret = ctdb_call_local(ctdb_db, call, &header, hdr, &data, true); + if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " ctdb_call_local failed\n")); + call->status = -1; + } ret = ctdb_ltdb_unlock(ctdb_db, call->key); if (ret != 0) { @@ -766,7 +876,7 @@ struct ctdb_call_state *ctdb_call_local_send(struct ctdb_db_context *ctdb_db, *(state->call) = *call; state->ctdb_db = ctdb_db; - ret = ctdb_call_local(ctdb_db, state->call, header, state, data); + ret = ctdb_call_local(ctdb_db, state->call, header, state, data, true); event_add_timed(ctdb->ev, state, timeval_zero(), call_local_trigger, state); @@ -890,3 +1000,362 @@ void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode) talloc_free(r); } + + + +struct revokechild_deferred_call { + struct ctdb_context *ctdb; + struct ctdb_req_header *hdr; + deferred_requeue_fn fn; + void *ctx; +}; + +struct revokechild_handle { + struct revokechild_handle *next, *prev; + struct ctdb_context *ctdb; + struct ctdb_db_context *ctdb_db; + struct fd_event *fde; + int status; + int fd[2]; + pid_t child; + TDB_DATA key; +}; + +struct revokechild_requeue_handle { + struct ctdb_context *ctdb; + struct ctdb_req_header *hdr; + deferred_requeue_fn fn; + void *ctx; +}; + +static void deferred_call_requeue(struct event_context *ev, struct timed_event *te, + struct timeval t, void *private_data) +{ + struct revokechild_requeue_handle *requeue_handle = talloc_get_type(private_data, struct revokechild_requeue_handle); + + requeue_handle->fn(requeue_handle->ctx, requeue_handle->hdr); + talloc_free(requeue_handle); +} + +static int deferred_call_destructor(struct revokechild_deferred_call *deferred_call) +{ + struct ctdb_context *ctdb = deferred_call->ctdb; + struct revokechild_requeue_handle *requeue_handle = talloc(ctdb, struct revokechild_requeue_handle); + struct ctdb_req_call *c = (struct ctdb_req_call *)deferred_call->hdr; + + requeue_handle->ctdb = ctdb; + requeue_handle->hdr = deferred_call->hdr; + requeue_handle->fn = deferred_call->fn; + requeue_handle->ctx = deferred_call->ctx; + talloc_steal(requeue_handle, requeue_handle->hdr); + + /* when revoking, any READONLY requests have 1 second grace to let read/write finish first */ + event_add_timed(ctdb->ev, requeue_handle, timeval_current_ofs(c->flags & CTDB_WANT_READONLY ? 1 : 0, 0), deferred_call_requeue, requeue_handle); + + return 0; +} + + +static int revokechild_destructor(struct revokechild_handle *rc) +{ + if (rc->fde != NULL) { + talloc_free(rc->fde); + } + + if (rc->fd[0] != -1) { + close(rc->fd[0]); + } + if (rc->fd[1] != -1) { + close(rc->fd[1]); + } + kill(rc->child, SIGKILL); + + DLIST_REMOVE(rc->ctdb_db->revokechild_active, rc); + return 0; +} + +static void revokechild_handler(struct event_context *ev, struct fd_event *fde, + uint16_t flags, void *private_data) +{ + struct revokechild_handle *rc = talloc_get_type(private_data, + struct revokechild_handle); + int ret; + char c; + + ret = read(rc->fd[0], &c, 1); + if (ret != 1) { + DEBUG(DEBUG_ERR,("Failed to read status from revokechild. errno:%d\n", errno)); + rc->status = -1; + talloc_free(rc); + return; + } + if (c != 0) { + DEBUG(DEBUG_ERR,("revokechild returned failure. status:%d\n", c)); + rc->status = -1; + talloc_free(rc); + return; + } + + talloc_free(rc); +} + +struct ctdb_revoke_state { + struct ctdb_db_context *ctdb_db; + TDB_DATA key; + struct ctdb_ltdb_header *header; + TDB_DATA data; + int count; + int status; + int finished; +}; + +static void update_record_cb(struct ctdb_client_control_state *state) +{ + struct ctdb_revoke_state *revoke_state; + int ret; + int32_t res; + + if (state == NULL) { + return; + } + revoke_state = state->async.private_data; + + state->async.fn = NULL; + ret = ctdb_control_recv(state->ctdb, state, state, NULL, &res, NULL); + if ((ret != 0) || (res != 0)) { + DEBUG(DEBUG_ERR,("Recv for revoke update record failed ret:%d res:%d\n", ret, res)); + revoke_state->status = -1; + } + + revoke_state->count--; + if (revoke_state->count <= 0) { + revoke_state->finished = 1; + } +} + +static void revoke_send_cb(struct ctdb_context *ctdb, uint32_t pnn, void *private_data) +{ + struct ctdb_revoke_state *revoke_state = private_data; + struct ctdb_client_control_state *state; + + state = ctdb_ctrl_updaterecord_send(ctdb, revoke_state, timeval_current_ofs(5,0), pnn, revoke_state->ctdb_db, revoke_state->key, revoke_state->header, revoke_state->data); + if (state == NULL) { + DEBUG(DEBUG_ERR,("Failure to send update record to revoke readonly delegation\n")); + revoke_state->status = -1; + return; + } + state->async.fn = update_record_cb; + state->async.private_data = revoke_state; + + revoke_state->count++; + +} + +static void ctdb_revoke_timeout_handler(struct event_context *ev, struct timed_event *te, + struct timeval yt, void *private_data) +{ + struct ctdb_revoke_state *state = private_data; + + DEBUG(DEBUG_ERR,("Timed out waiting for revoke to finish\n")); + state->finished = 1; + state->status = -1; +} + +static int ctdb_revoke_all_delegations(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db, TDB_DATA tdata, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA data) +{ + struct ctdb_revoke_state *state = talloc_zero(ctdb, struct ctdb_revoke_state); + int status; + + state->ctdb_db = ctdb_db; + state->key = key; + state->header = header; + state->data = data; + + ctdb_trackingdb_traverse(ctdb, tdata, revoke_send_cb, state); + + event_add_timed(ctdb->ev, state, timeval_current_ofs(5, 0), ctdb_revoke_timeout_handler, state); + + while (state->finished == 0) { + event_loop_once(ctdb->ev); + } + + status = state->status; + + if (status == 0) { + struct ctdb_ltdb_header new_header; + TDB_DATA new_data; + + if (ctdb_ltdb_lock(ctdb_db, key) != 0) { + DEBUG(DEBUG_ERR,("Failed to chainlock the database in revokechild\n")); + talloc_free(state); + return -1; + } + if (ctdb_ltdb_fetch(ctdb_db, key, &new_header, state, &new_data) != 0) { + ctdb_ltdb_unlock(ctdb_db, key); + DEBUG(DEBUG_ERR,("Failed for fetch tdb record in revokechild\n")); + talloc_free(state); + return -1; + } + header->rsn++; + if (new_header.rsn > header->rsn) { + ctdb_ltdb_unlock(ctdb_db, key); + DEBUG(DEBUG_ERR,("RSN too high in tdb record in revokechild\n")); + talloc_free(state); + return -1; + } + if ( (new_header.flags & (CTDB_REC_RO_REVOKING_READONLY|CTDB_REC_RO_HAVE_DELEGATIONS)) != (CTDB_REC_RO_REVOKING_READONLY|CTDB_REC_RO_HAVE_DELEGATIONS) ) { + ctdb_ltdb_unlock(ctdb_db, key); + DEBUG(DEBUG_ERR,("Flags are wrong in tdb record in revokechild\n")); + talloc_free(state); + return -1; + } + new_header.rsn++; + new_header.flags |= CTDB_REC_RO_REVOKE_COMPLETE; + if (ctdb_ltdb_store(ctdb_db, key, &new_header, new_data) != 0) { + ctdb_ltdb_unlock(ctdb_db, key); + DEBUG(DEBUG_ERR,("Failed to write new record in revokechild\n")); + talloc_free(state); + return -1; + } + ctdb_ltdb_unlock(ctdb_db, key); + } + + talloc_free(state); + return status; +} + + +int ctdb_start_revoke_ro_record(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA data) +{ + TDB_DATA tdata; + struct revokechild_handle *rc; + pid_t parent = getpid(); + int ret; + + header->flags &= ~(CTDB_REC_RO_REVOKING_READONLY|CTDB_REC_RO_HAVE_DELEGATIONS|CTDB_REC_RO_HAVE_READONLY); + header->rsn -= 1; + + if ((rc = talloc_zero(ctdb_db, struct revokechild_handle)) == NULL) { + DEBUG(DEBUG_ERR,("Failed to allocate revokechild_handle\n")); + return -1; + } + + tdata = tdb_fetch(ctdb_db->rottdb, key); + if (tdata.dsize > 0) { + uint8_t *tmp; + + tmp = tdata.dptr; + tdata.dptr = talloc_memdup(rc, tdata.dptr, tdata.dsize); + free(tmp); + } + + rc->status = 0; + rc->ctdb = ctdb; + rc->ctdb_db = ctdb_db; + rc->fd[0] = -1; + rc->fd[1] = -1; + + talloc_set_destructor(rc, revokechild_destructor); + + rc->key.dsize = key.dsize; + rc->key.dptr = talloc_memdup(rc, key.dptr, key.dsize); + if (rc->key.dptr == NULL) { + DEBUG(DEBUG_ERR,("Failed to allocate key for revokechild_handle\n")); + talloc_free(rc); + return -1; + } + + ret = pipe(rc->fd); + if (ret != 0) { + DEBUG(DEBUG_ERR,("Failed to allocate key for revokechild_handle\n")); + talloc_free(rc); + return -1; + } + + + rc->child = ctdb_fork(ctdb); + if (rc->child == (pid_t)-1) { + DEBUG(DEBUG_ERR,("Failed to fork child for revokechild\n")); + talloc_free(rc); + return -1; + } + + if (rc->child == 0) { + char c = 0; + close(rc->fd[0]); + debug_extra = talloc_asprintf(NULL, "revokechild-%s:", ctdb_db->db_name); + + if (switch_from_server_to_client(ctdb, "revokechild-%s", ctdb_db->db_name) != 0) { + DEBUG(DEBUG_ERR,("Failed to switch from server to client for revokechild process\n")); + c = 1; + goto child_finished; + } + + c = ctdb_revoke_all_delegations(ctdb, ctdb_db, tdata, key, header, data); + +child_finished: + write(rc->fd[1], &c, 1); + /* make sure we die when our parent dies */ + while (kill(parent, 0) == 0 || errno != ESRCH) { + sleep(5); + } + _exit(0); + } + + close(rc->fd[1]); + rc->fd[1] = -1; + set_close_on_exec(rc->fd[0]); + + /* This is an active revokechild child process */ + DLIST_ADD_END(ctdb_db->revokechild_active, rc, NULL); + + rc->fde = event_add_fd(ctdb->ev, rc, rc->fd[0], + EVENT_FD_READ, revokechild_handler, + (void *)rc); + if (rc->fde == NULL) { + DEBUG(DEBUG_ERR,("Failed to set up fd event for revokechild process\n")); + talloc_free(rc); + } + tevent_fd_set_auto_close(rc->fde); + + return 0; +} + +int ctdb_add_revoke_deferred_call(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_req_header *hdr, deferred_requeue_fn fn, void *call_context) +{ + struct revokechild_handle *rc; + struct revokechild_deferred_call *deferred_call; + + for (rc = ctdb_db->revokechild_active; rc; rc = rc->next) { + if (rc->key.dsize == 0) { + continue; + } + if (rc->key.dsize != key.dsize) { + continue; + } + if (!memcmp(rc->key.dptr, key.dptr, key.dsize)) { + break; + } + } + + if (rc == NULL) { + DEBUG(DEBUG_ERR,("Failed to add deferred call to revoke list. revoke structure not found\n")); + return -1; + } + + deferred_call = talloc(rc, struct revokechild_deferred_call); + if (deferred_call == NULL) { + DEBUG(DEBUG_ERR,("Failed to allocate deferred call structure for revoking record\n")); + return -1; + } + + deferred_call->ctdb = ctdb; + deferred_call->hdr = hdr; + deferred_call->fn = fn; + deferred_call->ctx = call_context; + + talloc_set_destructor(deferred_call, deferred_call_destructor); + talloc_steal(deferred_call, hdr); + + return 0; +} |