summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Schwenke <martin@meltin.net>2012-09-06 20:22:38 +1000
committerMartin Schwenke <martin@meltin.net>2012-10-18 20:05:42 +1100
commit8d7562f3f8a0e62d9348c4a4ee72ffd8f1e41c4a (patch)
tree5abb1035fb4dd3f0bd8c6d9550786e280ae8438c
parent6372592982fd7302524bb9926f56029c57fe2037 (diff)
downloadsamba-8d7562f3f8a0e62d9348c4a4ee72ffd8f1e41c4a.tar.gz
samba-8d7562f3f8a0e62d9348c4a4ee72ffd8f1e41c4a.tar.xz
samba-8d7562f3f8a0e62d9348c4a4ee72ffd8f1e41c4a.zip
common: Debug ctdb_addr_to_str() using new function ctdb_external_trace()
We've seen this function report "Unknown family, 0" and then CTDB disappeared without a trace. If we can reproduce it then this might help us to debug it. The idea is that you do something like the following in /etc/sysconfig/ctdb: export CTDB_EXTERNAL_TRACE="/etc/ctdb/config/gcore_trace.sh" When we hit this error than we call out to gcore to get a core file so we can do forensics. This might block CTDB for a few seconds. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 7895bc003f087ab2f3181df3c464386f59bfcc39)
-rwxr-xr-xctdb/Makefile.in1
-rw-r--r--ctdb/common/ctdb_util.c25
-rwxr-xr-xctdb/config/gcore_trace.sh3
-rw-r--r--ctdb/include/ctdb_private.h1
-rw-r--r--ctdb/packaging/RPM/ctdb.spec.in1
5 files changed, 31 insertions, 0 deletions
diff --git a/ctdb/Makefile.in b/ctdb/Makefile.in
index 48cb57c5d6c..6c822601735 100755
--- a/ctdb/Makefile.in
+++ b/ctdb/Makefile.in
@@ -359,6 +359,7 @@ install: all $(PMDA_INSTALL)
if [ ! -f $(DESTDIR)$(etcdir)/ctdb/notify.sh ];then ${INSTALLCMD} -m 755 config/notify.sh $(DESTDIR)$(etcdir)/ctdb; fi
${INSTALLCMD} -m 755 config/debug-hung-script.sh $(DESTDIR)$(etcdir)/ctdb
if [ ! -f $(DESTDIR)$(etcdir)/ctdb/ctdb-crash-cleanup.sh ];then ${INSTALLCMD} -m 755 config/ctdb-crash-cleanup.sh $(DESTDIR)$(etcdir)/ctdb; fi
+ if [ ! -f $(DESTDIR)$(etcdir)/ctdb/gcore_trace.sh ];then ${INSTALLCMD} -m 755 config/gcore_trace.sh $(DESTDIR)$(etcdir)/ctdb; fi
install_pmda:
$(INSTALLCMD) -m 755 -d $(PMDA_DEST_DIR)
diff --git a/ctdb/common/ctdb_util.c b/ctdb/common/ctdb_util.c
index ed322ac8b7f..71dee2b9544 100644
--- a/ctdb/common/ctdb_util.c
+++ b/ctdb/common/ctdb_util.c
@@ -59,6 +59,30 @@ void ctdb_fatal(struct ctdb_context *ctdb, const char *msg)
abort();
}
+/* Invoke an external program to do some sort of tracing on the CTDB
+ * process. This might block for a little while. The external
+ * program is specified by the environment variable
+ * CTDB_EXTERNAL_TRACE. This program should take one argument: the
+ * pid of the process to trace. Commonly, the program would be a
+ * wrapper script around gcore.
+ */
+void ctdb_external_trace(void)
+{
+
+ const char * t = getenv("CTDB_EXTERNAL_TRACE");
+ char * cmd;
+
+ if (t == NULL) {
+ return;
+ }
+
+ cmd = talloc_asprintf(NULL, "%s %lu", t, (unsigned long) getpid());
+ DEBUG(DEBUG_WARNING,("begin external trace: %s\n", cmd));
+ system(cmd);
+ DEBUG(DEBUG_WARNING,("end external trace: %s\n", cmd));
+ talloc_free(cmd);
+}
+
/*
parse a IP:port pair
*/
@@ -555,6 +579,7 @@ char *ctdb_addr_to_str(ctdb_sock_addr *addr)
break;
default:
DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family));
+ ctdb_external_trace();
}
return cip;
diff --git a/ctdb/config/gcore_trace.sh b/ctdb/config/gcore_trace.sh
new file mode 100755
index 00000000000..4d3e1d1d584
--- /dev/null
+++ b/ctdb/config/gcore_trace.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+gcore -o "/var/log/core" "$1" 2>&1 | logger -t "ctdb:gcore_trace"
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
index 94b45c0d850..6f6d898167d 100644
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -666,6 +666,7 @@ struct ctdb_fetch_handle {
/* internal prototypes */
void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
void ctdb_fatal(struct ctdb_context *ctdb, const char *msg);
+void ctdb_external_trace(void);
bool ctdb_same_address(struct ctdb_address *a1, struct ctdb_address *a2);
int ctdb_parse_address(struct ctdb_context *ctdb,
TALLOC_CTX *mem_ctx, const char *str,
diff --git a/ctdb/packaging/RPM/ctdb.spec.in b/ctdb/packaging/RPM/ctdb.spec.in
index 6a6398bdc7e..48cd0ab6b5e 100644
--- a/ctdb/packaging/RPM/ctdb.spec.in
+++ b/ctdb/packaging/RPM/ctdb.spec.in
@@ -123,6 +123,7 @@ rm -rf $RPM_BUILD_ROOT
%config(noreplace) %{_sysconfdir}/ctdb/notify.sh
%config(noreplace) %{_sysconfdir}/ctdb/debug-hung-script.sh
%config(noreplace) %{_sysconfdir}/ctdb/ctdb-crash-cleanup.sh
+%config(noreplace) %{_sysconfdir}/ctdb/gcore_trace.sh
%config(noreplace) %{_sysconfdir}/ctdb/functions
%attr(755,root,root) %{initdir}/ctdb