Copyright (C) Andrew Tridgell 2007
Copyright (C) Ronnie sahlberg 2007
diff --git a/ctdb/doc/ctdbd.1.xml b/ctdb/doc/ctdbd.1.xml
index 48dc0e2b9c..d5e3824b21 100644
--- a/ctdb/doc/ctdbd.1.xml
+++ b/ctdb/doc/ctdbd.1.xml
@@ -812,12 +812,14 @@ CTDB_CAPABILITY_RECMASTER=no
A second way is to use the built in NAT-GW feature in CTDB.
With NAT-GW only one extra address is required for the entire cluster
- instead of one address per node.
+ instead of one address per node. This extra address is dedicated
+ to traffic that originates from the cluster and is destined for the
+ external network.
In NAT-GW one of the nodes in the cluster is designated the NAT Gateway
through which all traffic that is originated in the cluster will be
- routed through if the public addresses are not available.
+ routed through if the public addresses are not available.
@@ -846,22 +848,18 @@ CTDB_CAPABILITY_RECMASTER=no
# to use NAT MASQUERADING for all traffic from the internal private network
# to the external network. This node is the NAT-GW node.
#
-# All other nodes are set up with policy routing so that all traffic with
-# a source address of the private network and a destination outside of
-# the private network are instead routed through the NAT-GW node.
+# All other nodes are set up with a default rote with a metric of 10 to point
+# to the nat-gw node.
#
# The effect of this is that only when a node does not have a public address
-# or a route to the external network will the node use the private address
-# as the source address and only then will it use the policy routing
-# through the NAT-GW.
-# As long as a node has a public address and can route to the external network
-# the node will always pick the public address as the source address and NAT-GW
-# routing will not be used.
-#NATGW_PUBLIC_IP=10.0.0.227/24
-#NATGW_PUBLIC_IFACE=eth0
-#NATGW_DEFAULT_GATEWAY=10.0.0.1
-#NATGW_PRIVATE_IFACE=eth1
-#NATGW_PRIVATE_NETWORK=10.1.1.0/24
+# and thus no proper routes to the external world it will instead
+# route all packets through the nat-gw node.
+#
+# NATGW_PUBLIC_IP=10.0.0.227/24
+# NATGW_PUBLIC_IFACE=eth0
+# NATGW_DEFAULT_GATEWAY=10.0.0.1
+# NATGW_PRIVATE_IFACE=eth1
+# NATGW_PRIVATE_NETWORK=10.1.1.0/24
@@ -928,11 +926,8 @@ CTDB_CAPABILITY_RECMASTER=no
internal private network and which is destined to the external network(s).
- All other nodes are configured with policy routing so that all outgoing
- packets that have a source ip address belonging to the private network
- (which means they are not routable from the public network) are instead
- sent on to the designated NAT-GW host instead of using the normal
- routing table.
+ All other nodes are configured with a default route of metric 10 pointing
+ to the designated NAT GW node.
This is implemented in the 11.natgw eventscript. Please see the
--
cgit
From 4d2195c503c3e47107138bcd02b0222d89312a85 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg
Date: Thu, 19 Mar 2009 10:43:57 +1100
Subject: The wbinfo --sequence command has been depreciated in favor of the
new --online-status command
(This used to be ctdb commit b6e34503ac094a274a569a69e3d93d92ad911f4d)
---
ctdb/tools/ctdb_diagnostics | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ctdb/tools/ctdb_diagnostics b/ctdb/tools/ctdb_diagnostics
index 8cf1f9ab6e..a9531d646c 100755
--- a/ctdb/tools/ctdb_diagnostics
+++ b/ctdb/tools/ctdb_diagnostics
@@ -236,7 +236,7 @@ show_all "smbclient -U% -L 127.0.0.1"
WORKGROUP=`testparm -s --parameter-name=WORKGROUP 2> /dev/null`
show_all id "$WORKGROUP/Administrator"
show_all "wbinfo -p"
-show_all "wbinfo --sequence"
+show_all "wbinfo --online-status"
show_all "smbd -b"
date
--
cgit
From dc05c1b80cad4597c8a900d5cce41c5e7731937e Mon Sep 17 00:00:00 2001
From: root
Date: Mon, 23 Mar 2009 12:37:30 +1100
Subject: create a helper function that converts a ctdb instance in daemon mode
to become a ctdb client instance.
use this from the recovery daemon child process to switch to client mode
and connect back to the main daemon
(This used to be ctdb commit 16f31786a031255ab5b3099a0a3c745de973347a)
---
ctdb/client/ctdb_client.c | 36 ++++++++++++++++++++++++++++++++++++
ctdb/include/ctdb.h | 2 ++
ctdb/server/ctdb_recoverd.c | 30 +++++-------------------------
3 files changed, 43 insertions(+), 25 deletions(-)
diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c
index 16fc03b48a..1f68c242a1 100644
--- a/ctdb/client/ctdb_client.c
+++ b/ctdb/client/ctdb_client.c
@@ -3472,3 +3472,39 @@ int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb)
return 0;
}
+
+/* when forking the main daemon and the child process needs to connect back
+ * to the daemon as a client process, this function can be used to change
+ * the ctdb context from daemon into client mode
+ */
+int switch_from_server_to_client(struct ctdb_context *ctdb)
+{
+ int ret;
+
+ /* shutdown the transport */
+ if (ctdb->methods) {
+ ctdb->methods->shutdown(ctdb);
+ }
+
+ /* get a new event context */
+ talloc_free(ctdb->ev);
+ ctdb->ev = event_context_init(ctdb);
+
+ close(ctdb->daemon.sd);
+ ctdb->daemon.sd = -1;
+
+ /* the client does not need to be realtime */
+ if (ctdb->do_setsched) {
+ ctdb_restore_scheduler(ctdb);
+ }
+
+ /* initialise ctdb */
+ ret = ctdb_socket_connect(ctdb);
+ if (ret != 0) {
+ DEBUG(DEBUG_ALERT, (__location__ " Failed to init ctdb client\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
diff --git a/ctdb/include/ctdb.h b/ctdb/include/ctdb.h
index 2ec477f608..f0f510abd3 100644
--- a/ctdb/include/ctdb.h
+++ b/ctdb/include/ctdb.h
@@ -610,4 +610,6 @@ int ctdb_transaction_commit(struct ctdb_transaction_handle *h);
int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb);
+int switch_from_server_to_client(struct ctdb_context *ctdb);
+
#endif
diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c
index 540749d12f..28be460c98 100644
--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@@ -2906,7 +2906,6 @@ static void recd_sig_child_handler(struct event_context *ev,
*/
int ctdb_start_recoverd(struct ctdb_context *ctdb)
{
- int ret;
int fd[2];
struct signal_event *se;
@@ -2931,35 +2930,16 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb)
close(fd[1]);
- /* shutdown the transport */
- if (ctdb->methods) {
- ctdb->methods->shutdown(ctdb);
- }
-
- /* get a new event context */
- talloc_free(ctdb->ev);
- ctdb->ev = event_context_init(ctdb);
-
- event_add_fd(ctdb->ev, ctdb, fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
- ctdb_recoverd_parent, &fd[0]);
-
- close(ctdb->daemon.sd);
- ctdb->daemon.sd = -1;
-
srandom(getpid() ^ time(NULL));
- /* the recovery daemon does not need to be realtime */
- if (ctdb->do_setsched) {
- ctdb_restore_scheduler(ctdb);
- }
-
- /* initialise ctdb */
- ret = ctdb_socket_connect(ctdb);
- if (ret != 0) {
- DEBUG(DEBUG_ALERT, (__location__ " Failed to init ctdb\n"));
+ if (switch_from_server_to_client(ctdb) != 0) {
+ DEBUG(DEBUG_CRIT, (__location__ "ERROR: failed to switch recovery daemon into client mode. shutting down.\n"));
exit(1);
}
+ event_add_fd(ctdb->ev, ctdb, fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
+ ctdb_recoverd_parent, &fd[0]);
+
/* set up a handler to pick up sigchld */
se = event_add_signal(ctdb->ev, ctdb,
SIGCHLD, 0,
--
cgit
From 629d5ee1fa9b84150c25c151b3bf0690cb787a90 Mon Sep 17 00:00:00 2001
From: root
Date: Mon, 23 Mar 2009 19:07:45 +1100
Subject: add a new command "ctdb scriptstatus" this command shows which
eventscripts were executed during the last monitoring cycle and the status
from each eventscript.
If an eventscript timedout or returned an error we also
show the output from the eventscript.
Example :
[root@rcn1 ctdb-git]# ./bin/ctdb scriptstatus
6 scripts were executed last monitoring cycle
00.ctdb Status:OK Duration:0.021 Mon Mar 23 19:04:32 2009
10.interface Status:OK Duration:0.048 Mon Mar 23 19:04:32 2009
20.multipathd Status:OK Duration:0.011 Mon Mar 23 19:04:33 2009
40.vsftpd Status:OK Duration:0.011 Mon Mar 23 19:04:33 2009
41.httpd Status:OK Duration:0.011 Mon Mar 23 19:04:33 2009
50.samba Status:ERROR Duration:0.057 Mon Mar 23 19:04:33 2009
OUTPUT:ERROR: Samba tcp port 445 is not responding
Add a new helper function "switch_from_server_to_client()" which both
the recovery daemon can use as well as in the child process we start for running the actual eventscripts.
Create several new controls, both for the eventscript child process to inform the master daemon of the current status of the scripts as well as for the ctdb tool to extract this information from the runninc daemon.
(This used to be ctdb commit c98f90ad61c9b1e679116fbed948ddca4111968d)
---
ctdb/client/ctdb_client.c | 107 ++++++++++++++++
ctdb/include/ctdb.h | 24 ++++
ctdb/include/ctdb_private.h | 22 ++++
ctdb/server/ctdb_control.c | 19 +++
ctdb/server/ctdb_logging.c | 4 +
ctdb/server/eventscript.c | 305 +++++++++++++++++++++++++++++++++++++++++++-
ctdb/tools/ctdb.c | 39 ++++++
7 files changed, 517 insertions(+), 3 deletions(-)
diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c
index 1f68c242a1..3bdb4b2617 100644
--- a/ctdb/client/ctdb_client.c
+++ b/ctdb/client/ctdb_client.c
@@ -3508,3 +3508,110 @@ int switch_from_server_to_client(struct ctdb_context *ctdb)
return 0;
}
+/*
+ tell the main daemon we are starting a new monitor event script
+ */
+int ctdb_ctrl_event_script_init(struct ctdb_context *ctdb)
+{
+ int ret;
+ int32_t res;
+
+ ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_EVENT_SCRIPT_INIT, 0, tdb_null,
+ ctdb, NULL, &res, NULL, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,("Failed to send event_script_init\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ tell the main daemon we are starting a new monitor event script
+ */
+int ctdb_ctrl_event_script_finished(struct ctdb_context *ctdb)
+{
+ int ret;
+ int32_t res;
+
+ ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_EVENT_SCRIPT_FINISHED, 0, tdb_null,
+ ctdb, NULL, &res, NULL, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,("Failed to send event_script_init\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ tell the main daemon we are starting to run an eventscript
+ */
+int ctdb_ctrl_event_script_start(struct ctdb_context *ctdb, const char *name)
+{
+ int ret;
+ int32_t res;
+ TDB_DATA data;
+
+ data.dptr = discard_const(name);
+ data.dsize = strlen(name)+1;
+
+ ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_EVENT_SCRIPT_START, 0, data,
+ ctdb, NULL, &res, NULL, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,("Failed to send event_script_start\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ tell the main daemon the status of the script we ran
+ */
+int ctdb_ctrl_event_script_stop(struct ctdb_context *ctdb, int32_t result)
+{
+ int ret;
+ int32_t res;
+ TDB_DATA data;
+
+ data.dptr = (uint8_t *)&result;
+ data.dsize = sizeof(result);
+
+ ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_EVENT_SCRIPT_STOP, 0, data,
+ ctdb, NULL, &res, NULL, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,("Failed to send event_script_stop\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+
+/*
+ get the status of running the monitor eventscripts
+ */
+int ctdb_ctrl_getscriptstatus(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_monitoring_wire **script_status)
+{
+ int ret;
+ TDB_DATA outdata;
+ int32_t res;
+
+ ret = ctdb_control(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS, 0, tdb_null,
+ mem_ctx, &outdata, &res, &timeout, NULL);
+ if (ret != 0 || res != 0 || outdata.dsize == 0) {
+ DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getscriptstatus failed ret:%d res:%d\n", ret, res));
+ return -1;
+ }
+
+ *script_status = (struct ctdb_monitoring_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
+ talloc_free(outdata.dptr);
+
+ return 0;
+}
+
diff --git a/ctdb/include/ctdb.h b/ctdb/include/ctdb.h
index f0f510abd3..866ba76e2a 100644
--- a/ctdb/include/ctdb.h
+++ b/ctdb/include/ctdb.h
@@ -612,4 +612,28 @@ int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb);
int switch_from_server_to_client(struct ctdb_context *ctdb);
+#define MONITOR_SCRIPT_OK 0
+#define MONITOR_SCRIPT_TIMEOUT 1
+
+#define MAX_SCRIPT_NAME 31
+#define MAX_SCRIPT_OUTPUT 511
+struct ctdb_monitoring_script_wire {
+ char name[MAX_SCRIPT_NAME+1];
+ struct timeval start;
+ struct timeval finished;
+ int32_t status;
+ int32_t timedout;
+ char output[MAX_SCRIPT_OUTPUT+1];
+};
+
+struct ctdb_monitoring_wire {
+ uint32_t num_scripts;
+ struct ctdb_monitoring_script_wire scripts[1];
+};
+
+int ctdb_ctrl_getscriptstatus(struct ctdb_context *ctdb,
+ struct timeval timeout, uint32_t destnode,
+ TALLOC_CTX *mem_ctx, struct ctdb_monitoring_wire **script_status);
+
+
#endif
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
index ceac3842bd..988b6d94bb 100644
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -412,6 +412,8 @@ struct ctdb_context {
TALLOC_CTX *eventscripts_ctx; /* a context to hold data for the RUN_EVENTSCRIPTS control */
uint32_t *recd_ping_count;
TALLOC_CTX *release_ips_ctx; /* a context used to automatically drop all IPs if we fail to recover the node */
+ TALLOC_CTX *script_monitoring_ctx; /* a context where we store results while running the monitor event */
+ TALLOC_CTX *last_monitoring_ctx;
};
struct ctdb_db_context {
@@ -550,6 +552,11 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0,
CTDB_CONTROL_TAKEOVER_IP = 89,
CTDB_CONTROL_GET_PUBLIC_IPS = 90,
CTDB_CONTROL_GET_NODEMAP = 91,
+ CTDB_CONTROL_EVENT_SCRIPT_INIT = 92,
+ CTDB_CONTROL_EVENT_SCRIPT_START = 93,
+ CTDB_CONTROL_EVENT_SCRIPT_STOP = 94,
+ CTDB_CONTROL_EVENT_SCRIPT_FINISHED = 95,
+ CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS = 96,
};
/*
@@ -1401,4 +1408,19 @@ int32_t ctdb_control_set_recmaster(struct ctdb_context *ctdb, uint32_t opcode, T
extern int script_log_level;
+int ctdb_ctrl_event_script_init(struct ctdb_context *ctdb);
+int ctdb_ctrl_event_script_start(struct ctdb_context *ctdb, const char *name);
+int ctdb_ctrl_event_script_stop(struct ctdb_context *ctdb, int32_t res);
+int ctdb_ctrl_event_script_finished(struct ctdb_context *ctdb);
+
+int32_t ctdb_control_event_script_init(struct ctdb_context *ctdb);
+int32_t ctdb_control_event_script_start(struct ctdb_context *ctdb, TDB_DATA indata);
+int32_t ctdb_control_event_script_stop(struct ctdb_context *ctdb, TDB_DATA indata);
+int32_t ctdb_control_event_script_finished(struct ctdb_context *ctdb);
+
+
+int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb, TDB_DATA *outdata);
+
+int ctdb_log_event_script_output(struct ctdb_context *ctdb, char *str, uint16_t len);
+
#endif
diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c
index b8b31c9b51..ac77696a4b 100644
--- a/ctdb/server/ctdb_control.c
+++ b/ctdb/server/ctdb_control.c
@@ -418,6 +418,25 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
CHECK_CONTROL_DATA_SIZE(0);
return ctdb_control_recd_ping(ctdb);
+ case CTDB_CONTROL_EVENT_SCRIPT_INIT:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_event_script_init(ctdb);
+
+ case CTDB_CONTROL_EVENT_SCRIPT_START:
+ return ctdb_control_event_script_start(ctdb, indata);
+
+ case CTDB_CONTROL_EVENT_SCRIPT_STOP:
+ CHECK_CONTROL_DATA_SIZE(sizeof(int32_t));
+ return ctdb_control_event_script_stop(ctdb, indata);
+
+ case CTDB_CONTROL_EVENT_SCRIPT_FINISHED:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_event_script_finished(ctdb);
+
+ case CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_get_event_script_status(ctdb, outdata);
+
default:
DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode));
return -1;
diff --git a/ctdb/server/ctdb_logging.c b/ctdb/server/ctdb_logging.c
index 06c7eb8b69..45a9f7410e 100644
--- a/ctdb/server/ctdb_logging.c
+++ b/ctdb/server/ctdb_logging.c
@@ -163,6 +163,8 @@ static void ctdb_log_handler(struct event_context *ev, struct fd_event *fde,
}
if (script_log_level <= LogLevel) {
do_debug("%*.*s\n", n2, n2, ctdb->log->buf);
+ /* log it in the eventsystem as well */
+ ctdb_log_event_script_output(ctdb, ctdb->log->buf, n2);
}
memmove(ctdb->log->buf, p+1, sizeof(ctdb->log->buf) - n1);
ctdb->log->buf_used -= n1;
@@ -174,6 +176,8 @@ static void ctdb_log_handler(struct event_context *ev, struct fd_event *fde,
if (script_log_level <= LogLevel) {
do_debug("%*.*s\n",
(int)ctdb->log->buf_used, (int)ctdb->log->buf_used, ctdb->log->buf);
+ /* log it in the eventsystem as well */
+ ctdb_log_event_script_output(ctdb, ctdb->log->buf, ctdb->log->buf_used);
}
ctdb->log->buf_used = 0;
}
diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c
index 6edd1a4dc6..2d0ac40861 100644
--- a/ctdb/server/eventscript.c
+++ b/ctdb/server/eventscript.c
@@ -52,6 +52,235 @@ struct ctdb_event_script_state {
const char *options;
};
+
+struct ctdb_monitor_script_status {
+ struct ctdb_monitor_script_status *next;
+ const char *name;
+ struct timeval start;
+ struct timeval finished;
+ int32_t status;
+ int32_t timedout;
+ char *output;
+};
+
+struct ctdb_monitoring_status {
+ struct timeval start;
+ struct timeval finished;
+ int32_t status;
+ struct ctdb_monitor_script_status *scripts;
+};
+
+
+/* called from ctdb_logging when we have received output on STDERR from
+ * one of the eventscripts
+ */
+int ctdb_log_event_script_output(struct ctdb_context *ctdb, char *str, uint16_t len)
+{
+ struct ctdb_monitoring_status *monitoring_status =
+ talloc_get_type(ctdb->script_monitoring_ctx,
+ struct ctdb_monitoring_status);
+ struct ctdb_monitor_script_status *script;
+
+ if (monitoring_status == NULL) {
+ return -1;
+ }
+
+ script = monitoring_status->scripts;
+ if (script == NULL) {
+ return -1;
+ }
+
+ if (script->output == NULL) {
+ script->output = talloc_asprintf(script, "%*.*s", len, len, str);
+ } else {
+ script->output = talloc_asprintf_append(script->output, "%*.*s", len, len, str);
+ }
+
+ return 0;
+}
+
+/* called from the event script child process when we are starting a new
+ * monitor event
+ */
+int32_t ctdb_control_event_script_init(struct ctdb_context *ctdb)
+{
+ struct ctdb_monitoring_status *monitoring_status;
+
+ DEBUG(DEBUG_INFO, ("event script init called\n"));
+ if (ctdb->script_monitoring_ctx != NULL) {
+ talloc_free(ctdb->script_monitoring_ctx);
+ ctdb->script_monitoring_ctx = NULL;
+ }
+
+ monitoring_status = talloc_zero(ctdb, struct ctdb_monitoring_status);
+ if (monitoring_status == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " ERROR: Failed to talloc script_monitoring context\n"));
+ return -1;
+ }
+
+ ctdb->script_monitoring_ctx = monitoring_status;
+ monitoring_status->start = timeval_current();
+
+ return 0;
+}
+
+
+/* called from the event script child process when we are star running
+ * an eventscript
+ */
+int32_t ctdb_control_event_script_start(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ const char *name = (const char *)indata.dptr;
+ struct ctdb_monitoring_status *monitoring_status =
+ talloc_get_type(ctdb->script_monitoring_ctx,
+ struct ctdb_monitoring_status);
+ struct ctdb_monitor_script_status *script;
+
+ DEBUG(DEBUG_INFO, ("event script start called : %s\n", name));
+
+ if (monitoring_status == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when starting to run script %s\n", name));
+ return -1;
+ }
+
+ script = talloc_zero(monitoring_status, struct ctdb_monitor_script_status);
+ if (script == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to talloc ctdb_monitor_script_status for script %s\n", name));
+ return -1;
+ }
+
+ script->next = monitoring_status->scripts;
+ script->name = talloc_strdup(script, name);
+ script->start = timeval_current();
+ monitoring_status->scripts = script;
+
+ return 0;
+}
+
+/* called from the event script child process when we have finished running
+ * an eventscript
+ */
+int32_t ctdb_control_event_script_stop(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ int32_t res = *((int32_t *)indata.dptr);
+ struct ctdb_monitoring_status *monitoring_status =
+ talloc_get_type(ctdb->script_monitoring_ctx,
+ struct ctdb_monitoring_status);
+ struct ctdb_monitor_script_status *script;
+
+ DEBUG(DEBUG_INFO, ("event script stop called : %d\n", (int)res));
+
+ if (monitoring_status == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when script finished.\n"));
+ return -1;
+ }
+
+ script = monitoring_status->scripts;
+ if (script == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " script is NULL when the script had finished\n"));
+ return -1;
+ }
+
+ script->finished = timeval_current();
+ script->status = res;
+
+ return 0;
+}
+
+/* called from the event script child process when we have completed a
+ * monitor event
+ */
+int32_t ctdb_control_event_script_finished(struct ctdb_context *ctdb)
+{
+ struct ctdb_monitoring_status *monitoring_status =
+ talloc_get_type(ctdb->script_monitoring_ctx,
+ struct ctdb_monitoring_status);
+
+ DEBUG(DEBUG_INFO, ("event script finished called\n"));
+
+ if (monitoring_status == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when monitoring event finished\n"));
+ return -1;
+ }
+
+ monitoring_status->finished = timeval_current();
+ monitoring_status->status = MONITOR_SCRIPT_OK;
+ if (ctdb->last_monitoring_ctx) {
+ talloc_free(ctdb->last_monitoring_ctx);
+ }
+ ctdb->last_monitoring_ctx = ctdb->script_monitoring_ctx;
+ ctdb->script_monitoring_ctx = NULL;
+
+ return 0;
+}
+
+static struct ctdb_monitoring_wire *marshall_monitoring_scripts(TALLOC_CTX *mem_ctx, struct ctdb_monitoring_wire *monitoring_scripts, struct ctdb_monitor_script_status *script)
+{
+ struct ctdb_monitoring_script_wire script_wire;
+ size_t size;
+
+ if (script == NULL) {
+ return monitoring_scripts;
+ }
+ monitoring_scripts = marshall_monitoring_scripts(mem_ctx, monitoring_scripts, script->next);
+ if (monitoring_scripts == NULL) {
+ return NULL;
+ }
+
+ bzero(&script_wire, sizeof(struct ctdb_monitoring_script_wire));
+ strncpy(script_wire.name, script->name, MAX_SCRIPT_NAME);
+ script_wire.start = script->start;
+ script_wire.finished = script->finished;
+ script_wire.status = script->status;
+ script_wire.timedout = script->timedout;
+ if (script->output != NULL) {
+ strncpy(script_wire.output, script->output, MAX_SCRIPT_OUTPUT);
+ }
+
+ size = talloc_get_size(monitoring_scripts);
+ monitoring_scripts = talloc_realloc_size(mem_ctx, monitoring_scripts, size + sizeof(struct ctdb_monitoring_script_wire));
+ if (monitoring_scripts == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to talloc_resize monitoring_scripts blob\n"));
+ return NULL;
+ }
+
+ memcpy(&monitoring_scripts->scripts[monitoring_scripts->num_scripts], &script_wire, sizeof(script_wire));
+ monitoring_scripts->num_scripts++;
+
+ return monitoring_scripts;
+}
+
+int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb, TDB_DATA *outdata)
+{
+ struct ctdb_monitoring_status *monitoring_status =
+ talloc_get_type(ctdb->last_monitoring_ctx,
+ struct ctdb_monitoring_status);
+ struct ctdb_monitoring_wire *monitoring_scripts;
+
+ if (monitoring_status == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " last_monitor_ctx is NULL when reading status\n"));
+ return -1;
+ }
+
+ monitoring_scripts = talloc_size(outdata, offsetof(struct ctdb_monitoring_wire, scripts));
+ if (monitoring_scripts == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " failed to talloc monitoring_scripts structure\n"));
+ return -1;
+ }
+
+ monitoring_scripts->num_scripts = 0;
+ monitoring_scripts = marshall_monitoring_scripts(outdata, monitoring_scripts, monitoring_status->scripts);
+ if (monitoring_scripts == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Monitoring scritps is NULL. can not return data to client\n"));
+ return -1;
+ }
+
+ outdata->dsize = talloc_get_size(monitoring_scripts);
+ outdata->dptr = (uint8_t *)monitoring_scripts;
+
+ return 0;
+}
+
/*
run the event script - varargs version
this function is called and run in the context of a forked child
@@ -68,6 +297,27 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
struct dirent *de;
char *script;
int count;
+ int is_monitor = 0;
+
+ /* This is running in the forked child process. At this stage
+ * we want to switch from being a ctdb daemon into being a client
+ * and connect to the local daemon.
+ */
+ if (switch_from_server_to_client(ctdb) != 0) {
+ DEBUG(DEBUG_CRIT, (__location__ "ERROR: failed to switch eventscript child into client mode. shutting down.\n"));
+ exit(1);
+ }
+
+ if (!strcmp(options, "monitor")) {
+ is_monitor = 1;
+ }
+ if (is_monitor == 1) {
+ if (ctdb_ctrl_event_script_init(ctdb) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to init event script monitoring\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+ }
if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
/* we guarantee that only some specifically allowed event scripts are run
@@ -80,6 +330,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
if (i == ARRAY_SIZE(allowed_scripts)) {
DEBUG(DEBUG_ERR,("Refusing to run event scripts with option '%s' while in recovery\n",
options));
+ talloc_free(tmp_ctx);
return -1;
}
}
@@ -175,6 +426,14 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
child_state.start = timeval_current();
child_state.script_running = cmdstr;
+ if (is_monitor == 1) {
+ if (ctdb_ctrl_event_script_start(ctdb, script) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to start event script monitoring\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+ }
+
ret = system(cmdstr);
/* if the system() call was successful, translate ret into the
return code from the command
@@ -182,9 +441,25 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
if (ret != -1) {
ret = WEXITSTATUS(ret);
}
+ if (is_monitor == 1) {
+ if (ctdb_ctrl_event_script_stop(ctdb, ret) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to stop event script monitoring\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+ }
+
/* return an error if the script failed */
if (ret != 0) {
DEBUG(DEBUG_ERR,("Event script %s failed with error %d\n", cmdstr, ret));
+ if (is_monitor == 1) {
+ if (ctdb_ctrl_event_script_finished(ctdb) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to finish event script monitoring\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+ }
+
talloc_free(tmp_ctx);
return ret;
}
@@ -196,6 +471,14 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
child_state.start = timeval_current();
child_state.script_running = "finished";
+ if (is_monitor == 1) {
+ if (ctdb_ctrl_event_script_finished(ctdb) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to finish event script monitoring\n"));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+ }
+
talloc_free(tmp_ctx);
return 0;
}
@@ -249,6 +532,9 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
void *private_data = state->private_data;
struct ctdb_context *ctdb = state->ctdb;
char *options;
+ struct ctdb_monitoring_status *monitoring_status =
+ talloc_get_type(ctdb->script_monitoring_ctx,
+ struct ctdb_monitoring_status);
DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u\n", state->options, ctdb->event_script_timeouts));
@@ -282,6 +568,21 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
callback(ctdb, -1, private_data);
}
+ if (monitoring_status != NULL) {
+ struct ctdb_monitor_script_status *script;
+
+ script = monitoring_status->scripts;
+ if (script != NULL) {
+ script->timedout = 1;
+ }
+ monitoring_status->status = MONITOR_SCRIPT_TIMEOUT;
+ if (ctdb->last_monitoring_ctx) {
+ talloc_free(ctdb->last_monitoring_ctx);
+ ctdb->last_monitoring_ctx = ctdb->script_monitoring_ctx;
+ ctdb->script_monitoring_ctx = NULL;
+ }
+ }
+
talloc_free(options);
}
@@ -337,10 +638,8 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
signed char rt;
close(state->fd[0]);
- if (ctdb->do_setsched) {
- ctdb_restore_scheduler(ctdb);
- }
set_close_on_exec(state->fd[1]);
+
rt = ctdb_event_script_v(ctdb, state->options);
while ((ret = write(state->fd[1], &rt, sizeof(rt))) != sizeof(rt)) {
sleep(1);
diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c
index b2014f0eb6..c48d587095 100644
--- a/ctdb/tools/ctdb.c
+++ b/ctdb/tools/ctdb.c
@@ -471,6 +471,44 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv
return 0;
}
+
+/*
+ display the status of the monitoring scripts
+ */
+static int control_scriptstatus(struct ctdb_context *ctdb, int argc, const char **argv)
+{
+ int i, ret;
+ struct ctdb_monitoring_wire *script_status;
+
+ ret = ctdb_ctrl_getscriptstatus(ctdb, TIMELIMIT(), options.pnn, ctdb, &script_status);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Unable to get script status from node %u\n", options.pnn));
+ return ret;
+ }
+
+ printf("%d scripts were executed last monitoring cycle\n", script_status->num_scripts);
+ for (i=0; inum_scripts; i++) {
+ printf("%-20s Status:%s ",
+ script_status->scripts[i].name,
+ script_status->scripts[i].timedout?"TIMEDOUT":script_status->scripts[i].status==0?"OK":"ERROR");
+ if (script_status->scripts[i].timedout == 0) {
+ printf("Duration:%.3lf ",
+ timeval_delta(&script_status->scripts[i].finished,
+ &script_status->scripts[i].start));
+ }
+ printf("%s",
+ ctime(&script_status->scripts[i].start.tv_sec));
+ if ((script_status->scripts[i].timedout != 0)
+ || (script_status->scripts[i].status != 0) ) {
+ printf(" OUTPUT:%s\n",
+ script_status->scripts[i].output);
+ }
+ }
+
+ return 0;
+}
+
+
/*
display the pnn of the recovery master
*/
@@ -2647,6 +2685,7 @@ static const struct {
{ "restoredb", control_restoredb, false, "restore the database from a file.", ""},
{ "recmaster", control_recmaster, false, "show the pnn for the recovery master."},
{ "setflags", control_setflags, false, "set flags for a node in the nodemap.", " "},
+ { "scriptstatus", control_scriptstatus, false, "show the status of the monitoring scripts"},
};
/*
--
cgit