summaryrefslogtreecommitdiffstats
path: root/ctdb/utils/pmda/pmda_ctdb.c
diff options
context:
space:
mode:
Diffstat (limited to 'ctdb/utils/pmda/pmda_ctdb.c')
-rw-r--r--ctdb/utils/pmda/pmda_ctdb.c596
1 files changed, 596 insertions, 0 deletions
diff --git a/ctdb/utils/pmda/pmda_ctdb.c b/ctdb/utils/pmda/pmda_ctdb.c
new file mode 100644
index 0000000000..927fea5805
--- /dev/null
+++ b/ctdb/utils/pmda/pmda_ctdb.c
@@ -0,0 +1,596 @@
+/*
+ * CTDB Performance Metrics Domain Agent (PMDA) for Performance Co-Pilot (PCP)
+ *
+ * Copyright (c) 1995,2004 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2011 David Disseldorp
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <pcp/pmapi.h>
+#include <pcp/impl.h>
+#include <pcp/pmda.h>
+#include "../../include/includes.h"
+#include "../../lib/tevent/tevent.h"
+#include "../../include/ctdb.h"
+#include "../../include/ctdb_private.h"
+#include "../../include/ctdb_protocol.h"
+#include "domain.h"
+
+/*
+ * CTDB PMDA
+ *
+ * This PMDA connects to the locally running ctdbd daemon and pulls
+ * statistics for export via PCP. The ctdbd Unix domain socket path can be
+ * specified with the CTDB_SOCKET environment variable, otherwise the default
+ * path is used.
+ */
+
+/*
+ * All metrics supported in this PMDA - one table entry for each.
+ * The 4th field specifies the serial number of the instance domain
+ * for the metric, and must be either PM_INDOM_NULL (denoting a
+ * metric that only ever has a single value), or the serial number
+ * of one of the instance domains declared in the instance domain table
+ * (i.e. in indomtab, above).
+ */
+static pmdaMetric metrictab[] = {
+ /* num_clients */
+ { NULL, { PMDA_PMID(0,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* frozen */
+ { NULL, { PMDA_PMID(1,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* recovering */
+ { NULL, { PMDA_PMID(3,3), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* client_packets_sent */
+ { NULL, { PMDA_PMID(4,4), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* client_packets_recv */
+ { NULL, { PMDA_PMID(5,5), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* node_packets_sent */
+ { NULL, { PMDA_PMID(6,6), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* node_packets_recv */
+ { NULL, { PMDA_PMID(7,7), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* keepalive_packets_sent */
+ { NULL, { PMDA_PMID(8,8), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* keepalive_packets_recv */
+ { NULL, { PMDA_PMID(9,9), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_call */
+ { NULL, { PMDA_PMID(10,10), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* reply_call */
+ { NULL, { PMDA_PMID(10,11), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_dmaster */
+ { NULL, { PMDA_PMID(10,12), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* reply_dmaster */
+ { NULL, { PMDA_PMID(10,13), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* reply_error */
+ { NULL, { PMDA_PMID(10,14), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_message */
+ { NULL, { PMDA_PMID(10,15), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_control */
+ { NULL, { PMDA_PMID(10,16), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* reply_control */
+ { NULL, { PMDA_PMID(10,17), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_call */
+ { NULL, { PMDA_PMID(11,18), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_message */
+ { NULL, { PMDA_PMID(11,19), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_control */
+ { NULL, { PMDA_PMID(11,20), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* call */
+ { NULL, { PMDA_PMID(12,21), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,0) }, },
+ /* control */
+ { NULL, { PMDA_PMID(12,22), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,0) }, },
+ /* traverse */
+ { NULL, { PMDA_PMID(12,23), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,0) }, },
+ /* total_calls */
+ { NULL, { PMDA_PMID(13,24), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* pending_calls */
+ { NULL, { PMDA_PMID(14,25), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* lockwait_calls */
+ { NULL, { PMDA_PMID(15,27), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* pending_lockwait_calls */
+ { NULL, { PMDA_PMID(16,27), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* childwrite_calls */
+ { NULL, { PMDA_PMID(17,28), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* pending_childwrite_calls */
+ { NULL, { PMDA_PMID(18,29), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* memory_used */
+ { NULL, { PMDA_PMID(19,30), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_BYTE,0,0) }, },
+ /* max_hop_count */
+ { NULL, { PMDA_PMID(20,31), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* max_reclock_ctdbd */
+ { NULL, { PMDA_PMID(21,32), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
+ /* max_reclock_recd */
+ { NULL, { PMDA_PMID(22,33), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
+ /* max_call_latency */
+ { NULL, { PMDA_PMID(23,34), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
+ /* max_lockwait_latency */
+ { NULL, { PMDA_PMID(24,35), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
+ /* max_childwrite_latency */
+ { NULL, { PMDA_PMID(25,36), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
+ /* num_recoveries */
+ { NULL, { PMDA_PMID(26,37), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+};
+
+static struct event_context *ev;
+static struct ctdb_context *ctdb;
+static struct ctdb_statistics *stats;
+
+static void
+pmda_ctdb_q_read_cb(uint8_t *data, size_t cnt, void *args)
+{
+ if (cnt == 0) {
+ fprintf(stderr, "ctdbd unreachable\n");
+ /* cleanup on request timeout */
+ return;
+ }
+
+ ctdb_client_read_cb(data, cnt, args);
+}
+
+
+static int
+pmda_ctdb_daemon_connect(void)
+{
+ const char *socket_name;
+ int ret;
+ struct sockaddr_un addr;
+
+ ev = event_context_init(NULL);
+ if (ev == NULL) {
+ fprintf(stderr, "Failed to init event ctx\n");
+ return -1;
+ }
+
+ ctdb = ctdb_init(ev);
+ if (ctdb == NULL) {
+ fprintf(stderr, "Failed to init ctdb\n");
+ goto err_ev;
+ }
+
+ socket_name = getenv("CTDB_SOCKET");
+ if (socket_name == NULL) {
+ socket_name = CTDB_PATH;
+ }
+
+ ret = ctdb_set_socketname(ctdb, socket_name);
+ if (ret == -1) {
+ fprintf(stderr, "ctdb_set_socketname failed - %s\n",
+ ctdb_errstr(ctdb));
+ goto err_ctdb;
+ }
+
+ /*
+ * ctdb_socket_connect() sets a default queue callback handler that
+ * calls exit() if ctdbd is unavailable on recv, use our own wrapper to
+ * work around this
+ */
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path));
+
+ ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (ctdb->daemon.sd == -1) {
+ fprintf(stderr, "Failed to open client socket\n");
+ goto err_ctdb;
+ }
+
+ set_nonblocking(ctdb->daemon.sd);
+ set_close_on_exec(ctdb->daemon.sd);
+
+ if (connect(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
+ fprintf(stderr, "Failed to connect to ctdb daemon via %s\n",
+ ctdb->daemon.name);
+ goto err_sd;
+ }
+
+ ctdb->daemon.queue = ctdb_queue_setup(ctdb, ctdb, ctdb->daemon.sd,
+ CTDB_DS_ALIGNMENT,
+ pmda_ctdb_q_read_cb, ctdb,
+ "to-ctdbd");
+ if (ctdb->daemon.queue == NULL) {
+ fprintf(stderr, "Failed to setup queue\n");
+ goto err_sd;
+ }
+
+ ctdb->pnn = ctdb_ctrl_getpnn(ctdb, timeval_current_ofs(3, 0),
+ CTDB_CURRENT_NODE);
+ if (ctdb->pnn == (uint32_t)-1) {
+ fprintf(stderr, "Failed to get ctdb pnn\n");
+ goto err_sd;
+ }
+
+ return 0;
+err_sd:
+ close(ctdb->daemon.sd);
+err_ctdb:
+ talloc_free(ctdb);
+err_ev:
+ talloc_free(ev);
+ ctdb = NULL;
+ return -1;
+}
+
+static void
+pmda_ctdb_daemon_disconnect(void)
+{
+ if (ctdb->methods) {
+ ctdb->methods->shutdown(ctdb);
+ }
+
+ if (ctdb->daemon.sd != -1) {
+ close(ctdb->daemon.sd);
+ }
+
+ talloc_free(ctdb);
+ talloc_free(ev);
+ ctdb = NULL;
+}
+
+static int
+fill_node(unsigned int item, pmAtomValue *atom)
+{
+ switch (item) {
+ case 10:
+ atom->ul = stats->node.req_call;
+ break;
+ case 11:
+ atom->ul = stats->node.reply_call;
+ break;
+ case 12:
+ atom->ul = stats->node.req_dmaster;
+ break;
+ case 13:
+ atom->ul = stats->node.reply_dmaster;
+ break;
+ case 14:
+ atom->ul = stats->node.reply_error;
+ break;
+ case 15:
+ atom->ul = stats->node.req_message;
+ break;
+ case 16:
+ atom->ul = stats->node.req_control;
+ break;
+ case 17:
+ atom->ul = stats->node.reply_control;
+ break;
+ default:
+ return PM_ERR_PMID;
+ }
+
+ return 0;
+}
+
+static int
+fill_client(unsigned int item, pmAtomValue *atom)
+{
+ switch (item) {
+ case 18:
+ atom->ul = stats->client.req_call;
+ break;
+ case 19:
+ atom->ul = stats->client.req_message;
+ break;
+ case 20:
+ atom->ul = stats->client.req_control;
+ break;
+ default:
+ return PM_ERR_PMID;
+ }
+
+ return 0;
+}
+
+static int
+fill_timeout(unsigned int item, pmAtomValue *atom)
+{
+ switch (item) {
+ case 21:
+ atom->ul = stats->timeouts.call;
+ break;
+ case 22:
+ atom->ul = stats->timeouts.control;
+ break;
+ case 23:
+ atom->ul = stats->timeouts.traverse;
+ break;
+ default:
+ return PM_ERR_PMID;
+ }
+
+ return 0;
+}
+
+/*
+ * callback provided to pmdaFetch
+ */
+static int
+pmda_ctdb_fetch_cb(pmdaMetric *mdesc, unsigned int inst, pmAtomValue *atom)
+{
+ int ret;
+ __pmID_int *id = (__pmID_int *)&(mdesc->m_desc.pmid);
+
+ if (inst != PM_IN_NULL) {
+ return PM_ERR_INST;
+ }
+
+ if (stats == NULL) {
+ fprintf(stderr, "stats not available\n");
+ ret = PM_ERR_VALUE;
+ goto err_out;
+ }
+
+
+ switch (id->cluster) {
+ case 0:
+ atom->ul = stats->num_clients;
+ break;
+ case 1:
+ atom->ul = stats->frozen;
+ break;
+ case 3:
+ atom->ul = stats->recovering;
+ break;
+ case 4:
+ atom->ul = stats->client_packets_sent;
+ break;
+ case 5:
+ atom->ul = stats->client_packets_recv;
+ break;
+ case 6:
+ atom->ul = stats->node_packets_sent;
+ break;
+ case 7:
+ atom->ul = stats->node_packets_recv;
+ break;
+ case 8:
+ atom->ul = stats->keepalive_packets_sent;
+ break;
+ case 9:
+ atom->ul = stats->keepalive_packets_recv;
+ break;
+ case 10:
+ ret = fill_node(id->item, atom);
+ if (ret) {
+ goto err_out;
+ }
+ break;
+ case 11:
+ ret = fill_client(id->item, atom);
+ if (ret) {
+ goto err_out;
+ }
+ break;
+ case 12:
+ ret = fill_timeout(id->item, atom);
+ if (ret) {
+ goto err_out;
+ }
+ break;
+ case 13:
+ atom->ul = stats->total_calls;
+ break;
+ case 14:
+ atom->ul = stats->pending_calls;
+ break;
+ case 15:
+ atom->ul = stats->lockwait_calls;
+ break;
+ case 16:
+ atom->ul = stats->pending_lockwait_calls;
+ break;
+ case 17:
+ atom->ul = stats->childwrite_calls;
+ break;
+ case 18:
+ atom->ul = stats->pending_childwrite_calls;
+ break;
+ case 19:
+ atom->ul = stats->memory_used;
+ break;
+ case 20:
+ atom->ul = stats->max_hop_count;
+ break;
+ case 21:
+ atom->d = stats->reclock.ctdbd.max;
+ break;
+ case 22:
+ atom->d = stats->reclock.recd.max;
+ break;
+ case 23:
+ atom->d = stats->call_latency.max;
+ break;
+ case 24:
+ atom->d = stats->lockwait_latency.max;
+ break;
+ case 25:
+ atom->d = stats->childwrite_latency.max;
+ break;
+ case 26:
+ atom->d = stats->num_recoveries;
+ break;
+ default:
+ return PM_ERR_PMID;
+ }
+
+ ret = 0;
+err_out:
+ return ret;
+}
+
+/*
+ * This routine is called once for each pmFetch(3) operation, so is a
+ * good place to do once-per-fetch functions, such as value caching or
+ * instance domain evaluation.
+ */
+static int
+pmda_ctdb_fetch(int numpmid, pmID pmidlist[], pmResult **resp, pmdaExt *pmda)
+{
+ int ret;
+ TDB_DATA data;
+ int32_t res;
+ struct timeval ctdb_timeout;
+
+ if (ctdb == NULL) {
+ fprintf(stderr, "attempting reconnect to ctdbd\n");
+ ret = pmda_ctdb_daemon_connect();
+ if (ret < 0) {
+ fprintf(stderr, "reconnect failed\n");
+ return PM_ERR_VALUE;
+ }
+ }
+
+ ctdb_timeout = timeval_current_ofs(1, 0);
+ ret = ctdb_control(ctdb, ctdb->pnn, 0,
+ CTDB_CONTROL_STATISTICS, 0, tdb_null,
+ ctdb, &data, &res, &ctdb_timeout, NULL);
+
+ if (ret != 0 || res != 0) {
+ fprintf(stderr, "ctdb control for statistics failed, reconnecting\n");
+ pmda_ctdb_daemon_disconnect();
+ ret = PM_ERR_VALUE;
+ goto err_out;
+ }
+
+ stats = (struct ctdb_statistics *)data.dptr;
+
+ if (data.dsize != sizeof(struct ctdb_statistics)) {
+ fprintf(stderr, "incorrect statistics size %zu - not %zu\n",
+ data.dsize, sizeof(struct ctdb_statistics));
+ ret = PM_ERR_VALUE;
+ goto err_stats;
+ }
+
+ ret = pmdaFetch(numpmid, pmidlist, resp, pmda);
+
+err_stats:
+ talloc_free(stats);
+err_out:
+ return ret;
+}
+
+/*
+ * Initialise the agent
+ */
+void
+pmda_ctdb_init(pmdaInterface *dp)
+{
+ if (dp->status != 0) {
+ return;
+ }
+
+ dp->version.two.fetch = pmda_ctdb_fetch;
+ pmdaSetFetchCallBack(dp, pmda_ctdb_fetch_cb);
+
+ pmdaInit(dp, NULL, 0, metrictab,
+ (sizeof(metrictab) / sizeof(metrictab[0])));
+}
+
+static char *
+helpfile(void)
+{
+ static char buf[MAXPATHLEN];
+
+ if (!buf[0]) {
+ snprintf(buf, sizeof(buf), "%s/ctdb/help",
+ pmGetConfig("PCP_PMDAS_DIR"));
+ }
+ return buf;
+}
+
+static void
+usage(void)
+{
+ fprintf(stderr, "Usage: %s [options]\n\n", pmProgname);
+ fputs("Options:\n"
+ " -d domain use domain (numeric) for metrics domain of PMDA\n"
+ " -l logfile write log into logfile rather than using default log name\n"
+ "\nExactly one of the following options may appear:\n"
+ " -i port expect PMCD to connect on given inet port (number or name)\n"
+ " -p expect PMCD to supply stdin/stdout (pipe)\n"
+ " -u socket expect PMCD to connect on given unix domain socket\n",
+ stderr);
+ exit(1);
+}
+
+/*
+ * Set up the agent if running as a daemon.
+ */
+int
+main(int argc, char **argv)
+{
+ int err = 0;
+ char log_file[] = "pmda_ctdb.log";
+ pmdaInterface dispatch;
+
+ __pmSetProgname(argv[0]);
+
+ pmdaDaemon(&dispatch, PMDA_INTERFACE_2, pmProgname, CTDB,
+ log_file, helpfile());
+
+ if (pmdaGetOpt(argc, argv, "d:i:l:pu:?", &dispatch, &err) != EOF) {
+ err++;
+ }
+
+ if (err) {
+ usage();
+ }
+
+ pmdaOpenLog(&dispatch);
+ pmda_ctdb_init(&dispatch);
+ pmdaConnect(&dispatch);
+ pmdaMain(&dispatch);
+
+ exit(0);
+}
+