summaryrefslogtreecommitdiffstats
path: root/source3/lib
diff options
context:
space:
mode:
authorJames Peach <jpeach@samba.org>2006-04-04 00:27:50 +0000
committerGerald (Jerry) Carter <jerry@samba.org>2007-10-10 11:15:53 -0500
commit4fa555980070d78b39711ef21d77628d26055bc2 (patch)
tree38dd03bac5a57b45404f620ddf4eda2dd6ea06b0 /source3/lib
parentf5e7376bca7ab02ef48110ef6c0fb394851a606c (diff)
downloadsamba-4fa555980070d78b39711ef21d77628d26055bc2.tar.gz
samba-4fa555980070d78b39711ef21d77628d26055bc2.tar.xz
samba-4fa555980070d78b39711ef21d77628d26055bc2.zip
r14898: This change is an attempt to improve the quality of the information that
is produced when a process exits abnormally. First, we coalesce the core dumping code so that we greatly improve our odds of being able to produce a core file, even in the case of a memory fault. I've removed duplicates of dump_core() and split it in two to reduce the amount of work needed to actually do the dump. Second, we refactor the exit_server code path to always log an explanation and a stack trace. My goal is to always produce enough log information for us to be able to explain any server exit, though there is a risk that this could produce too much log information on a flaky network. Finally, smbcontrol has gained a smbd fault injection operation to test the changes above. This is only enabled for developer builds. (This used to be commit 56bc02d64498eb3faf89f0c5452b9299daea8e95)
Diffstat (limited to 'source3/lib')
-rw-r--r--source3/lib/fault.c97
-rw-r--r--source3/lib/util.c65
2 files changed, 125 insertions, 37 deletions
diff --git a/source3/lib/fault.c b/source3/lib/fault.c
index 3cb66846393..8ae45f24353 100644
--- a/source3/lib/fault.c
+++ b/source3/lib/fault.c
@@ -20,7 +20,12 @@
#include "includes.h"
+#ifdef HAVE_SYS_PRCTL_H
+#include <sys/prctl.h>
+#endif
+
static void (*cont_fn)(void *);
+static pstring corepath;
/*******************************************************************
report a fault
@@ -33,11 +38,11 @@ static void fault_report(int sig)
counter++;
- DEBUG(0,("===============================================================\n"));
+ DEBUGSEP(0);
DEBUG(0,("INTERNAL ERROR: Signal %d in pid %d (%s)",sig,(int)sys_getpid(),SAMBA_VERSION_STRING));
DEBUG(0,("\nPlease read the Trouble-Shooting section of the Samba3-HOWTO\n"));
DEBUG(0,("\nFrom: http://www.samba.org/samba/docs/Samba3-HOWTO.pdf\n"));
- DEBUG(0,("===============================================================\n"));
+ DEBUGSEP(0);
smb_panic("internal error");
@@ -82,3 +87,91 @@ void fault_setup(void (*fn)(void *))
CatchSignal(SIGABRT,SIGNAL_CAST sig_fault);
#endif
}
+
+/*******************************************************************
+make all the preparations to safely dump a core file
+********************************************************************/
+
+void dump_core_setup(const char *progname)
+{
+ pstring logbase;
+ char * end;
+
+ if (lp_logfile() && *lp_logfile()) {
+ snprintf(logbase, sizeof(logbase), "%s", lp_logfile());
+ if ((end = strrchr_m(logbase, '/'))) {
+ *end = '\0';
+ }
+ } else {
+ /* We will end up here is the log file is given on the command
+ * line by the -l option but the "log file" option is not set
+ * in smb.conf.
+ */
+ snprintf(logbase, sizeof(logbase), "%s", dyn_LOGFILEBASE);
+ }
+
+ SMB_ASSERT(progname != NULL);
+
+ snprintf(corepath, sizeof(corepath), "%s/cores", logbase);
+ mkdir(corepath,0700);
+
+ snprintf(corepath, sizeof(corepath), "%s/cores/%s",
+ logbase, progname);
+ mkdir(corepath,0700);
+
+ sys_chown(corepath,getuid(),getgid());
+ chmod(corepath,0700);
+
+#ifdef HAVE_GETRLIMIT
+#ifdef RLIMIT_CORE
+ {
+ struct rlimit rlp;
+ getrlimit(RLIMIT_CORE, &rlp);
+ rlp.rlim_cur = MAX(16*1024*1024,rlp.rlim_cur);
+ setrlimit(RLIMIT_CORE, &rlp);
+ getrlimit(RLIMIT_CORE, &rlp);
+ DEBUG(3,("Maximum core file size limits now %d(soft) %d(hard)\n",
+ (int)rlp.rlim_cur,(int)rlp.rlim_max));
+ }
+#endif
+#endif
+
+#if defined(HAVE_PRCTL) && defined(PR_SET_DUMPABLE)
+ /* On Linux we lose the ability to dump core when we change our user
+ * ID. We know how to dump core safely, so let's make sure we have our
+ * dumpable flag set.
+ */
+ prctl(PR_SET_DUMPABLE, 1);
+#endif
+
+ /* FIXME: if we have a core-plus-pid facility, configurably set
+ * this up here.
+ */
+}
+
+ void dump_core(void)
+{
+ if (*corepath != '\0') {
+ /* The chdir might fail if we dump core before we finish
+ * processing the config file.
+ */
+ if (chdir(corepath) != 0) {
+ DEBUG(0, ("unable to change to %s", corepath));
+ DEBUGADD(0, ("refusing to dump core\n"));
+ exit(1);
+ }
+
+ DEBUG(0,("dumping core in %s\n", corepath));
+ }
+
+ umask(~(0700));
+ dbgflush();
+
+ /* Ensure we don't have a signal handler for abort. */
+#ifdef SIGABRT
+ CatchSignal(SIGABRT,SIGNAL_CAST SIG_DFL);
+#endif
+
+ abort();
+}
+
diff --git a/source3/lib/util.c b/source3/lib/util.c
index 0b831ea335b..0fbe4a13d3d 100644
--- a/source3/lib/util.c
+++ b/source3/lib/util.c
@@ -1545,19 +1545,10 @@ gid_t nametogid(const char *name)
Something really nasty happened - panic !
********************************************************************/
-#ifdef HAVE_LIBEXC_H
-#include <libexc.h>
-#endif
-
-static void smb_panic2(const char *why, BOOL decrement_pid_count )
+void smb_panic(const char *const why)
{
char *cmd;
int result;
-#ifdef HAVE_BACKTRACE_SYMBOLS
- void *backtrace_stack[BACKTRACE_STACK_SIZE];
- size_t backtrace_size;
- char **backtrace_strings;
-#endif
#ifdef DEVELOPER
{
@@ -1570,9 +1561,12 @@ static void smb_panic2(const char *why, BOOL decrement_pid_count )
}
#endif
+ DEBUG(0,("PANIC (pid %llu): %s\n",
+ (unsigned long long)sys_getpid(), why));
+ log_stack_trace();
+
/* only smbd needs to decrement the smbd counter in connections.tdb */
- if ( decrement_pid_count )
- decrement_smbd_process_count();
+ decrement_smbd_process_count();
cmd = lp_panic_action();
if (cmd && *cmd) {
@@ -1586,9 +1580,27 @@ static void smb_panic2(const char *why, BOOL decrement_pid_count )
DEBUG(0, ("smb_panic(): action returned status %d\n",
WEXITSTATUS(result)));
}
- DEBUG(0,("PANIC: %s\n", why));
+ dump_core();
+}
+
+/*******************************************************************
+ Print a backtrace of the stack to the debug log. This function
+ DELIBERATELY LEAKS MEMORY. The expectation is that you should
+ exit shortly after calling it.
+********************************************************************/
+
+#ifdef HAVE_LIBEXC_H
+#include <libexc.h>
+#endif
+
+void log_stack_trace(void)
+{
#ifdef HAVE_BACKTRACE_SYMBOLS
+ void *backtrace_stack[BACKTRACE_STACK_SIZE];
+ size_t backtrace_size;
+ char **backtrace_strings;
+
/* get the backtrace (stack frames) */
backtrace_size = backtrace(backtrace_stack,BACKTRACE_STACK_SIZE);
backtrace_strings = backtrace_symbols(backtrace_stack, backtrace_size);
@@ -1607,16 +1619,14 @@ static void smb_panic2(const char *why, BOOL decrement_pid_count )
#elif HAVE_LIBEXC
-#define NAMESIZE 32 /* Arbitrary */
-
/* The IRIX libexc library provides an API for unwinding the stack. See
* libexc(3) for details. Apparantly trace_back_stack leaks memory, but
* since we are about to abort anyway, it hardly matters.
- *
- * Note that if we paniced due to a SIGSEGV or SIGBUS (or similar) this
- * will fail with a nasty message upon failing to open the /proc entry.
*/
{
+
+#define NAMESIZE 32 /* Arbitrary */
+
__uint64_t addrs[BACKTRACE_STACK_SIZE];
char * names[BACKTRACE_STACK_SIZE];
char namebuf[BACKTRACE_STACK_SIZE * NAMESIZE];
@@ -1646,24 +1656,9 @@ static void smb_panic2(const char *why, BOOL decrement_pid_count )
}
}
#undef NAMESIZE
+#else
+ DEBUG(0, ("unable to produce a stack trace on this platform\n"));
#endif
-
- dbgflush();
-#ifdef SIGABRT
- CatchSignal(SIGABRT,SIGNAL_CAST SIG_DFL);
-#endif
- abort();
-}
-
-/*******************************************************************
- wrapper for smb_panic2()
-********************************************************************/
-
- void smb_panic( const char *why )
-{
- smb_panic2( why, True );
- /* Notreached. */
- abort();
}
/*******************************************************************