summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Tridgell <tridge@samba.org>2007-06-02 10:03:28 +1000
committerAndrew Tridgell <tridge@samba.org>2007-06-02 10:03:28 +1000
commit4f72a202d940e414a3e8359fd3929ebd178b836a (patch)
tree02fdc8c3d96b2cdd2249d2d064914dda57fd9925
parent69edd8e617f8042f643f789cd7528618c9c9c8ac (diff)
downloadsamba-4f72a202d940e414a3e8359fd3929ebd178b836a.tar.gz
samba-4f72a202d940e414a3e8359fd3929ebd178b836a.tar.xz
samba-4f72a202d940e414a3e8359fd3929ebd178b836a.zip
- moved cmdline options that are only relevant to ctdbd into ctdbd.c
- fixed a valgrind error on failing to send a control - don't mark node dead when already disconnected - moved node list lock code into common code (This used to be ctdb commit bcc0432d0fea7ef223f82ccee81cf35c18144b1b)
-rw-r--r--ctdb/common/cmdline.c53
-rw-r--r--ctdb/common/ctdb_daemon.c9
-rw-r--r--ctdb/common/ctdb_monitor.c13
-rw-r--r--ctdb/common/ctdb_recover.c36
-rw-r--r--ctdb/common/ctdb_recoverd.c34
-rw-r--r--ctdb/direct/ctdbd.c51
-rw-r--r--ctdb/include/ctdb_private.h4
7 files changed, 112 insertions, 88 deletions
diff --git a/ctdb/common/cmdline.c b/ctdb/common/cmdline.c
index 8cea85d49b..3de4387407 100644
--- a/ctdb/common/cmdline.c
+++ b/ctdb/common/cmdline.c
@@ -29,21 +29,11 @@
*/
static struct {
- const char *nlist;
- const char *transport;
- const char *myaddress;
const char *socketname;
- int self_connect;
- const char *db_dir;
int torture;
const char *events;
} ctdb_cmdline = {
- .nlist = ETCDIR "/ctdb/nodes",
- .transport = "tcp",
- .myaddress = NULL,
.socketname = CTDB_PATH,
- .self_connect = 0,
- .db_dir = VARDIR "/ctdb",
.torture = 0,
};
@@ -64,13 +54,8 @@ static void ctdb_cmdline_callback(poptContext con,
struct poptOption popt_ctdb_cmdline[] = {
{ NULL, 0, POPT_ARG_CALLBACK, (void *)ctdb_cmdline_callback },
- { "nlist", 0, POPT_ARG_STRING, &ctdb_cmdline.nlist, 0, "node list file", "filename" },
- { "listen", 0, POPT_ARG_STRING, &ctdb_cmdline.myaddress, 0, "address to listen on", "address" },
{ "socket", 0, POPT_ARG_STRING, &ctdb_cmdline.socketname, 0, "local socket name", "filename" },
- { "transport", 0, POPT_ARG_STRING, &ctdb_cmdline.transport, 0, "protocol transport", NULL },
- { "self-connect", 0, POPT_ARG_NONE, &ctdb_cmdline.self_connect, 0, "enable self connect", "boolean" },
{ "debug", 'd', POPT_ARG_INT, &LogLevel, 0, "debug level"},
- { "dbdir", 0, POPT_ARG_STRING, &ctdb_cmdline.db_dir, 0, "directory for the tdb files", NULL },
{ "torture", 0, POPT_ARG_NONE, &ctdb_cmdline.torture, 0, "enable nastiness in library", NULL },
{ "events", 0, POPT_ARG_STRING, NULL, OPT_EVENTSYSTEM, "event system", NULL },
{ NULL }
@@ -85,11 +70,6 @@ struct ctdb_context *ctdb_cmdline_init(struct event_context *ev)
struct ctdb_context *ctdb;
int ret;
- if (ctdb_cmdline.nlist == NULL) {
- printf("You must provide a node list with --nlist\n");
- exit(1);
- }
-
/* initialise ctdb */
ctdb = ctdb_init(ev);
if (ctdb == NULL) {
@@ -97,28 +77,10 @@ struct ctdb_context *ctdb_cmdline_init(struct event_context *ev)
exit(1);
}
- if (ctdb_cmdline.self_connect) {
- ctdb_set_flags(ctdb, CTDB_FLAG_SELF_CONNECT);
- }
if (ctdb_cmdline.torture) {
ctdb_set_flags(ctdb, CTDB_FLAG_TORTURE);
}
- ret = ctdb_set_transport(ctdb, ctdb_cmdline.transport);
- if (ret == -1) {
- printf("ctdb_set_transport failed - %s\n", ctdb_errstr(ctdb));
- exit(1);
- }
-
- /* tell ctdb what address to listen on */
- if (ctdb_cmdline.myaddress) {
- ret = ctdb_set_address(ctdb, ctdb_cmdline.myaddress);
- if (ret == -1) {
- printf("ctdb_set_address failed - %s\n", ctdb_errstr(ctdb));
- exit(1);
- }
- }
-
/* tell ctdb the socket address */
ret = ctdb_set_socketname(ctdb, ctdb_cmdline.socketname);
if (ret == -1) {
@@ -126,21 +88,6 @@ struct ctdb_context *ctdb_cmdline_init(struct event_context *ev)
exit(1);
}
- /* tell ctdb what nodes are available */
- ret = ctdb_set_nlist(ctdb, ctdb_cmdline.nlist);
- if (ret == -1) {
- printf("ctdb_set_nlist failed - %s\n", ctdb_errstr(ctdb));
- exit(1);
- }
-
- if (ctdb_cmdline.db_dir) {
- ret = ctdb_set_tdb_dir(ctdb, ctdb_cmdline.db_dir);
- if (ret == -1) {
- printf("ctdb_set_tdb_dir failed - %s\n", ctdb_errstr(ctdb));
- exit(1);
- }
- }
-
return ctdb;
}
diff --git a/ctdb/common/ctdb_daemon.c b/ctdb/common/ctdb_daemon.c
index ecd095f115..7a98768177 100644
--- a/ctdb/common/ctdb_daemon.c
+++ b/ctdb/common/ctdb_daemon.c
@@ -951,6 +951,7 @@ static void daemon_request_control_from_client(struct ctdb_client *client,
TDB_DATA data;
int res;
struct daemon_control_state *state;
+ TALLOC_CTX *tmp_ctx = talloc_new(client);
if (c->hdr.destnode == CTDB_CURRENT_NODE) {
c->hdr.destnode = client->ctdb->vnn;
@@ -970,6 +971,10 @@ static void daemon_request_control_from_client(struct ctdb_client *client,
}
talloc_set_destructor(state, daemon_control_destructor);
+
+ if (c->flags & CTDB_CTRL_FLAG_NOREPLY) {
+ talloc_steal(tmp_ctx, state);
+ }
data.dptr = &c->data[0];
data.dsize = c->datalen;
@@ -983,9 +988,7 @@ static void daemon_request_control_from_client(struct ctdb_client *client,
c->hdr.destnode));
}
- if (c->flags & CTDB_CTRL_FLAG_NOREPLY) {
- talloc_free(state);
- }
+ talloc_free(tmp_ctx);
}
/*
diff --git a/ctdb/common/ctdb_monitor.c b/ctdb/common/ctdb_monitor.c
index dd875ba45c..750a1f5bd6 100644
--- a/ctdb/common/ctdb_monitor.c
+++ b/ctdb/common/ctdb_monitor.c
@@ -48,12 +48,15 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve
continue;
}
- /* it might have come alive again */
- if (!(node->flags & NODE_FLAGS_CONNECTED) && node->rx_cnt != 0) {
- ctdb_node_connected(node);
+ if (!(node->flags & NODE_FLAGS_CONNECTED)) {
+ /* it might have come alive again */
+ if (node->rx_cnt != 0) {
+ ctdb_node_connected(node);
+ }
continue;
}
+
if (node->rx_cnt == 0) {
node->dead_count++;
} else {
@@ -63,6 +66,7 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve
node->rx_cnt = 0;
if (node->dead_count >= CTDB_MONITORING_DEAD_COUNT) {
+ DEBUG(0,("dead count reached for node %u\n", node->vnn));
ctdb_node_dead(node);
ctdb_send_keepalive(ctdb, node->vnn);
/* maybe tell the transport layer to kill the
@@ -71,7 +75,8 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve
continue;
}
- if (node->tx_cnt == 0 && (node->flags & NODE_FLAGS_CONNECTED)) {
+ if (node->tx_cnt == 0) {
+ DEBUG(5,("sending keepalive to %u\n", node->vnn));
ctdb_send_keepalive(ctdb, node->vnn);
}
diff --git a/ctdb/common/ctdb_recover.c b/ctdb/common/ctdb_recover.c
index 824b0adf89..6ba3316f24 100644
--- a/ctdb/common/ctdb_recover.c
+++ b/ctdb/common/ctdb_recover.c
@@ -655,3 +655,39 @@ int32_t ctdb_control_delete_low_rsn(struct ctdb_context *ctdb, TDB_DATA indata,
return 0;
}
+
+/*
+ try and lock the node list file - should only work on the recovery master recovery
+ daemon. Anywhere else is a bug
+ */
+bool ctdb_lock_node_list(struct ctdb_context *ctdb, bool keep)
+{
+ struct flock lock;
+
+ if (ctdb->node_list_fd != -1) {
+ close(ctdb->node_list_fd);
+ }
+ ctdb->node_list_fd = open(ctdb->node_list_file, O_RDWR);
+ if (ctdb->node_list_fd == -1) {
+ DEBUG(0,("Unable to open %s - (%s)\n",
+ ctdb->node_list_file, strerror(errno)));
+ return false;
+ }
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 1;
+ lock.l_pid = 0;
+
+ if (fcntl(ctdb->node_list_fd, F_SETLK, &lock) != 0) {
+ return false;
+ }
+
+ if (!keep) {
+ close(ctdb->node_list_fd);
+ ctdb->node_list_fd = -1;
+ }
+
+ return true;
+}
diff --git a/ctdb/common/ctdb_recoverd.c b/ctdb/common/ctdb_recoverd.c
index cacffa90d5..f4a58bf122 100644
--- a/ctdb/common/ctdb_recoverd.c
+++ b/ctdb/common/ctdb_recoverd.c
@@ -375,7 +375,9 @@ static int update_vnnmap_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_nod
return 0;
}
-
+/*
+ we are the recmaster, and recovery is needed - start a recovery run
+ */
static int do_recovery(struct ctdb_context *ctdb,
TALLOC_CTX *mem_ctx, uint32_t vnn, uint32_t num_active,
struct ctdb_node_map *nodemap, struct ctdb_vnn_map *vnnmap)
@@ -383,7 +385,11 @@ static int do_recovery(struct ctdb_context *ctdb,
int i, j, ret;
uint32_t generation;
struct ctdb_dbid_map *dbmap;
- struct flock lock;
+
+ if (!ctdb_lock_node_list(ctdb, true)) {
+ DEBUG(0,("Unable to lock node list - aborting recovery\n"));
+ return -1;
+ }
/* set recovery mode to active on all nodes */
ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_ACTIVE);
@@ -392,30 +398,6 @@ static int do_recovery(struct ctdb_context *ctdb,
return -1;
}
- /* get the recmaster lock */
- if (ctdb->node_list_fd != -1) {
- close(ctdb->node_list_fd);
- }
-
- ctdb->node_list_fd = open(ctdb->node_list_file, O_RDWR);
- if (ctdb->node_list_fd == -1) {
- DEBUG(0,("Unable to open %s - aborting recovery (%s)\n",
- ctdb->node_list_file, strerror(errno)));
- return -1;
- }
-
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 1;
- lock.l_pid = 0;
-
- if (fcntl(ctdb->node_list_fd, F_SETLK, &lock) != 0) {
- DEBUG(0,("Unable to lock %s - aborting recovery (%s)\n",
- ctdb->node_list_file, strerror(errno)));
- return -1;
- }
-
DEBUG(0, (__location__ " Recovery initiated\n"));
/* pick a new generation number */
diff --git a/ctdb/direct/ctdbd.c b/ctdb/direct/ctdbd.c
index 367ed8b6ba..8102eef751 100644
--- a/ctdb/direct/ctdbd.c
+++ b/ctdb/direct/ctdbd.c
@@ -39,13 +39,23 @@ static void block_signal(int signum)
}
static struct {
+ const char *nlist;
+ const char *transport;
+ const char *myaddress;
const char *public_address_list;
const char *public_interface;
const char *event_script;
const char *logfile;
+ const char *recovery_lock_file;
+ const char *db_dir;
+ int self_connect;
} options = {
+ .nlist = ETCDIR "/ctdb/nodes",
+ .transport = "tcp",
.event_script = ETCDIR "/ctdb/events",
- .logfile = VARDIR "/log/log.ctdb"
+ .logfile = VARDIR "/log/log.ctdb",
+ .db_dir = VARDIR "/ctdb",
+ .self_connect = 0,
};
@@ -66,6 +76,11 @@ int main(int argc, const char *argv[])
{ "public-interface", 0, POPT_ARG_STRING, &options.public_interface, 0, "public interface", "interface"},
{ "event-script", 0, POPT_ARG_STRING, &options.event_script, 0, "event script", "filename" },
{ "logfile", 0, POPT_ARG_STRING, &options.logfile, 0, "log file location", "filename" },
+ { "nlist", 0, POPT_ARG_STRING, &options.nlist, 0, "node list file", "filename" },
+ { "listen", 0, POPT_ARG_STRING, &options.myaddress, 0, "address to listen on", "address" },
+ { "transport", 0, POPT_ARG_STRING, &options.transport, 0, "protocol transport", NULL },
+ { "self-connect", 0, POPT_ARG_NONE, &options.self_connect, 0, "enable self connect", "boolean" },
+ { "dbdir", 0, POPT_ARG_STRING, &options.db_dir, 0, "directory for the tdb files", NULL },
POPT_TABLEEND
};
int opt, ret;
@@ -98,6 +113,40 @@ int main(int argc, const char *argv[])
ctdb = ctdb_cmdline_init(ev);
+ if (options.self_connect) {
+ ctdb_set_flags(ctdb, CTDB_FLAG_SELF_CONNECT);
+ }
+
+ ret = ctdb_set_transport(ctdb, options.transport);
+ if (ret == -1) {
+ printf("ctdb_set_transport failed - %s\n", ctdb_errstr(ctdb));
+ exit(1);
+ }
+
+ /* tell ctdb what address to listen on */
+ if (options.myaddress) {
+ ret = ctdb_set_address(ctdb, options.myaddress);
+ if (ret == -1) {
+ printf("ctdb_set_address failed - %s\n", ctdb_errstr(ctdb));
+ exit(1);
+ }
+ }
+
+ /* tell ctdb what nodes are available */
+ ret = ctdb_set_nlist(ctdb, options.nlist);
+ if (ret == -1) {
+ printf("ctdb_set_nlist failed - %s\n", ctdb_errstr(ctdb));
+ exit(1);
+ }
+
+ if (options.db_dir) {
+ ret = ctdb_set_tdb_dir(ctdb, options.db_dir);
+ if (ret == -1) {
+ printf("ctdb_set_tdb_dir failed - %s\n", ctdb_errstr(ctdb));
+ exit(1);
+ }
+ }
+
ret = ctdb_set_logfile(ctdb, options.logfile);
if (ret == -1) {
printf("ctdb_set_logfile to %s failed - %s\n", options.logfile, ctdb_errstr(ctdb));
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
index d366c5071d..870cbd7a71 100644
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -357,7 +357,7 @@ struct ctdb_db_context {
#define CTDB_MONITORING_TIMEOUT 2
/* number of monitoring timeouts before a node is considered dead */
-#define CTDB_MONITORING_DEAD_COUNT 2
+#define CTDB_MONITORING_DEAD_COUNT 3
/* number of consecutive calls from the same node before we give them
@@ -970,5 +970,7 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb);
void set_nonblocking(int fd);
void set_close_on_exec(int fd);
+bool ctdb_lock_node_list(struct ctdb_context *ctdb, bool keep);
+
#endif