diff options
| -rw-r--r-- | ctdb/server/ctdb_recoverd.c | 25 |
1 files changed, 20 insertions, 5 deletions
diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 1a53bb8334..3617efd4e2 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2017,8 +2017,15 @@ ctdb_recoverd_write_pnn_connect_count(struct ctdb_recoverd *rec) const char count = rec->num_connected; struct ctdb_context *ctdb = talloc_get_type(rec->ctdb, struct ctdb_context); + if (rec->rec_file_fd == -1) { + DEBUG(DEBUG_CRIT,(__location__ " Unable to write pnn count. pnnfile is not open.\n")); + return; + } + if (pwrite(rec->rec_file_fd, &count, 1, ctdb->pnn) == -1) { DEBUG(DEBUG_CRIT, (__location__ " Failed to write pnn count\n")); + close(rec->rec_file_fd); + rec->rec_file_fd = -1; } } @@ -2038,8 +2045,8 @@ ctdb_recoverd_get_pnn_lock(struct ctdb_recoverd *rec) DEBUG(DEBUG_INFO, ("Setting PNN lock for pnn:%d\n", ctdb->pnn)); if (rec->rec_file_fd != -1) { - DEBUG(DEBUG_CRIT, (__location__ " rec_lock_fd is already open. Aborting\n")); - exit(10); + close(rec->rec_file_fd); + rec->rec_file_fd = -1; } pnnfile = talloc_asprintf(rec, "%s.pnn", ctdb->recovery_lock_file); @@ -2049,7 +2056,8 @@ ctdb_recoverd_get_pnn_lock(struct ctdb_recoverd *rec) if (rec->rec_file_fd == -1) { DEBUG(DEBUG_CRIT,(__location__ " Unable to open %s - (%s)\n", pnnfile, strerror(errno))); - exit(10); + talloc_free(pnnfile); + return; } set_close_on_exec(rec->rec_file_fd); @@ -2063,12 +2071,12 @@ ctdb_recoverd_get_pnn_lock(struct ctdb_recoverd *rec) close(rec->rec_file_fd); rec->rec_file_fd = -1; DEBUG(DEBUG_CRIT,(__location__ " Failed to get pnn lock on '%s'\n", pnnfile)); - exit(10); + talloc_free(pnnfile); + return; } DEBUG(DEBUG_NOTICE,(__location__ " Got pnn lock on '%s'\n", pnnfile)); - talloc_free(pnnfile); /* we start out with 0 connected nodes */ @@ -2086,6 +2094,9 @@ static void ctdb_update_pnn_count(struct event_context *ev, struct timed_event * struct ctdb_context *ctdb = rec->ctdb; struct ctdb_node_map *nodemap = rec->nodemap; + /* close and reopen the pnn lock file */ + ctdb_recoverd_get_pnn_lock(rec); + ctdb_recoverd_write_pnn_connect_count(rec); event_add_timed(rec->ctdb->ev, rec->ctdb, @@ -2108,6 +2119,10 @@ static void ctdb_update_pnn_count(struct event_context *ev, struct timed_event * return; } if (ctdb->recovery_lock_fd == -1) { + DEBUG(DEBUG_ERR, (__location__ " Lost reclock pnn file. Yielding recmaster role\n")); + close(ctdb->recovery_lock_fd); + ctdb->recovery_lock_fd = -1; + force_election(rec, ctdb->pnn, rec->nodemap); return; } for (i=0; i<nodemap->num; i++) { |
