From e796e95a4efbfeb04223f61c600c7712da7b500f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 17 Jan 2011 16:39:11 +0100 Subject: replace KerneloopsScanner plugin and dumpoops with abrt-dump-oops, use it in abrtd Patch adds new tool: $ abrt-dump-oops Usage: abrt-dump-oops [-vsrdow] FILE Extract oops from syslog/dmesg file -v, --verbose Be verbose -s Log to syslog -r Parse kernel's message buffer before parsing FILE -d Create ABRT dump for every oops found -o Print found oopses on standard output -w Do not exit, watch the file for new oopses It extends dumpoops. Extensions: * it can watch the syslog using -w option (uses inotify) * it can scan dmesg buffer too In this way, it also becomes a replacement for KerneloopsScanner plugin: oops-detecting logic is taken verbatim from KerneloopsScanner source. abrtd is changed to start it if it sees this directive in abrt.conf: [ LogScanners ] abrt-dump-oops = abrt-dump-oops -drw /var/log/messages Default abrt.conf is changed to have such line. Patch doesn't remove KerneloopsScanner plugin and dumpoops binary yet, I will do it in a separate trivial patch. Signed-off-by: Denys Vlasenko --- abrt.spec | 1 + examples/cut_here.right | 2 +- examples/not_oops1.right | 2 +- examples/not_oops2.right | 2 +- examples/not_oops3.right | 2 +- examples/oops1.right | 2 +- examples/oops2.right | 2 +- examples/oops3.right | 2 +- examples/test.sh | 10 +- src/daemon/Daemon.cpp | 57 +++- src/daemon/Settings.cpp | 25 +- src/daemon/Settings.h | 5 +- src/daemon/abrt.conf | 12 +- src/plugins/Makefile.am | 20 ++ src/plugins/abrt-dump-oops.c | 712 +++++++++++++++++++++++++++++++++++++++++++ 15 files changed, 820 insertions(+), 36 deletions(-) create mode 100644 src/plugins/abrt-dump-oops.c diff --git a/abrt.spec b/abrt.spec index 0bdd873d..469a9e04 100644 --- a/abrt.spec +++ b/abrt.spec @@ -428,6 +428,7 @@ fi %{_mandir}/man7/abrt-KerneloopsScanner.7.gz %{_libdir}/%{name}/KerneloopsReporter.glade %{_mandir}/man7/abrt-KerneloopsReporter.7.gz +%{_bindir}/abrt-dump-oops %{_bindir}/abrt-action-analyze-oops %{_bindir}/abrt-action-kerneloops diff --git a/examples/cut_here.right b/examples/cut_here.right index b5970f03..28548d1a 100644 --- a/examples/cut_here.right +++ b/examples/cut_here.right @@ -1,4 +1,4 @@ -dumpoops: found oopses: 1 +abrt-dump-oops: Found oopses: 1 Version: 2.6.32-19.el6.x86_64 WARNING: at arch/x86/kernel/cpu/mtrr/generic.c:467 diff --git a/examples/not_oops1.right b/examples/not_oops1.right index 5f2c00f0..64c3a19a 100644 --- a/examples/not_oops1.right +++ b/examples/not_oops1.right @@ -1 +1 @@ -dumpoops: found oopses: 0 +abrt-dump-oops: Found oopses: 0 diff --git a/examples/not_oops2.right b/examples/not_oops2.right index 5f2c00f0..64c3a19a 100644 --- a/examples/not_oops2.right +++ b/examples/not_oops2.right @@ -1 +1 @@ -dumpoops: found oopses: 0 +abrt-dump-oops: Found oopses: 0 diff --git a/examples/not_oops3.right b/examples/not_oops3.right index 5f2c00f0..64c3a19a 100644 --- a/examples/not_oops3.right +++ b/examples/not_oops3.right @@ -1 +1 @@ -dumpoops: found oopses: 0 +abrt-dump-oops: Found oopses: 0 diff --git a/examples/oops1.right b/examples/oops1.right index 67005199..2e642e72 100644 --- a/examples/oops1.right +++ b/examples/oops1.right @@ -1,4 +1,4 @@ -dumpoops: found oopses: 1 +abrt-dump-oops: Found oopses: 1 Version: 2.6.27.9-159.fc10.i686 #1 BUG: unable to handle kernel NULL pointer dereference at 00000000 diff --git a/examples/oops2.right b/examples/oops2.right index 1a6f7447..4d2f7ba7 100644 --- a/examples/oops2.right +++ b/examples/oops2.right @@ -1,4 +1,4 @@ -dumpoops: found oopses: 1 +abrt-dump-oops: Found oopses: 1 Version: 2.6.33.1-19.fc13.x86_64 [ INFO: possible recursive locking detected ] diff --git a/examples/oops3.right b/examples/oops3.right index 86c4f431..7f68e690 100644 --- a/examples/oops3.right +++ b/examples/oops3.right @@ -1,4 +1,4 @@ -dumpoops: found oopses: 3 +abrt-dump-oops: Found oopses: 3 Version: 2.6.32-0.51.rc7.git2.fc13.x86_64 general protection fault: 0000 [#1] SMP diff --git a/examples/test.sh b/examples/test.sh index 6c704faa..8f7566fb 100755 --- a/examples/test.sh +++ b/examples/test.sh @@ -1,7 +1,7 @@ for f in *.test; do - b="${f%%.test}" - dumpoops -s "$f" >"$b".out 2>&1 - if diff -u "$b".right "$b".out >"$b".diff 2>&1; then - rm "$b".out "$b".diff - fi + b="${f%%.test}" + abrt-dump-oops -o "$f" >"$b".out 2>&1 + if diff -u "$b".right "$b".out >"$b".diff 2>&1; then + rm "$b".out "$b".diff + fi done diff --git a/src/daemon/Daemon.cpp b/src/daemon/Daemon.cpp index c01ea75a..e864c466 100644 --- a/src/daemon/Daemon.cpp +++ b/src/daemon/Daemon.cpp @@ -90,6 +90,7 @@ static volatile sig_atomic_t s_sig_caught; static int s_signal_pipe[2]; static int s_signal_pipe_write = -1; static int s_upload_watch = -1; +static pid_t log_scanner_pid = -1; static unsigned s_timeout; static bool s_exiting; @@ -432,12 +433,22 @@ static gboolean handle_signal_cb(GIOChannel *gio, GIOCondition condition, gpoint s_exiting = 1; else { - if (socket_client_count) - socket_client_count--; - if (!socket_channel_cb_id) + pid_t pid; + while ((pid = waitpid(-1, NULL, WNOHANG)) > 0) { - log("Accepting connections on '%s'", SOCKET_FILE); - socket_channel_cb_id = add_watch_or_die(socket_channel, G_IO_IN | G_IO_PRI, server_socket_cb); + if (pid == log_scanner_pid) + { + log("log scanner exited"); + log_scanner_pid = -1; + continue; + } + if (socket_client_count) + socket_client_count--; + if (!socket_channel_cb_id) + { + log("Accepting connections on '%s'", SOCKET_FILE); + socket_channel_cb_id = add_watch_or_die(socket_channel, G_IO_IN | G_IO_PRI, server_socket_cb); + } } } return TRUE; @@ -648,6 +659,7 @@ static void start_syslog_logging() xdup2(STDIN_FILENO, STDERR_FILENO); openlog("abrtd", 0, LOG_DAEMON); logmode = LOGMODE_SYSLOG; + putenv((char*)"ABRT_SYSLOG=1"); } static void ensure_writable_dir(const char *dir, mode_t mode, const char *user) @@ -719,9 +731,10 @@ int main(int argc, char** argv) unsigned opts = parse_opts(argc, argv, abrtd_options, abrtd_usage); - if (opts & OPT_s) - start_syslog_logging(); + msg_prefix = "abrtd"; /* for log(), error_msg() and such */ + unsetenv("ABRT_SYSLOG"); + putenv(xasprintf("ABRT_VERBOSE=%u", g_verbose)); /* When dbus daemon starts us, it doesn't set PATH * (I saw it set only DBUS_STARTER_ADDRESS and DBUS_STARTER_BUS_TYPE). * In this case, set something sane: @@ -730,9 +743,8 @@ int main(int argc, char** argv) if (!env_path || !env_path[0]) putenv((char*)"PATH=/usr/sbin:/usr/bin:/sbin:/bin"); - putenv(xasprintf("ABRT_VERBOSE=%u", g_verbose)); - - msg_prefix = "abrtd"; /* for log(), error_msg() and such */ + if (opts & OPT_s) + start_syslog_logging(); xpipe(s_signal_pipe); close_on_exec_on(s_signal_pipe[0]); @@ -879,6 +891,22 @@ int main(int argc, char** argv) /* Only now we want signal pipe to work */ s_signal_pipe_write = s_signal_pipe[1]; + if (g_settings_sLogScanners) + { + const char *scanner_argv[] = { + "/bin/sh", "-c", + g_settings_sLogScanners, + NULL + }; + log_scanner_pid = fork_execv_on_steroids(EXECFLG_INPUT_NUL, + (char**)scanner_argv, + /*pipefds:*/ NULL, + /*unsetenv_vec:*/ NULL, + /*dir:*/ NULL, + /*uid:*/ 0); + VERB1 log("Started log scanner, pid:%d", (int)log_scanner_pid); + } + /* Enter the event loop */ log("Init complete, entering main loop"); run_main_loop(pMainloop); @@ -913,10 +941,17 @@ int main(int argc, char** argv) g_main_loop_unref(pMainloop); settings_free(); + + if (log_scanner_pid > 0) + { + VERB2 log("Sending SIGTERM to %d", log_scanner_pid); + kill(log_scanner_pid, SIGTERM); + } + /* Exiting */ if (s_sig_caught && s_sig_caught != SIGALRM && s_sig_caught != SIGCHLD) { - error_msg_and_die("Got signal %d, exiting", s_sig_caught); + error_msg("Got signal %d, exiting", s_sig_caught); signal(s_sig_caught, SIG_DFL); raise(s_sig_caught); } diff --git a/src/daemon/Settings.cpp b/src/daemon/Settings.cpp index abdd0324..63453933 100644 --- a/src/daemon/Settings.cpp +++ b/src/daemon/Settings.cpp @@ -19,8 +19,9 @@ #include "abrtlib.h" #include "Settings.h" -#define SECTION_COMMON "Common" -#define SECTION_CRON "Cron" +#define SECTION_COMMON "Common" +#define SECTION_LOG_SCANNERS "LogScanners" +#define SECTION_CRON "Cron" /* Conf file has this format: * [ section_name1 ] @@ -47,13 +48,16 @@ static map_string_t s_mapSectionCron; /* one line: "OpenGPGCheck = value" */ bool g_settings_bOpenGPGCheck = false; /* one line: "OpenGPGPublicKeys = value1,value2" */ -GList *g_settings_setOpenGPGPublicKeys = NULL; -GList *g_settings_setBlackListedPkgs = NULL; -GList *g_settings_setBlackListedPaths = NULL; -char *g_settings_sWatchCrashdumpArchiveDir = NULL; +GList * g_settings_setOpenGPGPublicKeys = NULL; +GList * g_settings_setBlackListedPkgs = NULL; +GList * g_settings_setBlackListedPaths = NULL; +char * g_settings_sWatchCrashdumpArchiveDir = NULL; unsigned int g_settings_nMaxCrashReportsSize = 1000; bool g_settings_bProcessUnpackaged = false; +/* [ LogScanners ] */ +char * g_settings_sLogScanners = NULL; + /* [ Cron ] */ /* many lines, one per key: "map_key = aa_first,bb_first(bb_second),cc_first" */ map_cron_t g_settings_mapCron; @@ -277,7 +281,7 @@ static int ReadConfigurationFromFile(FILE *fp) value += line[ii]; continue; } - if (isspace(line[ii]) && !is_quote) + if (isspace(line[ii]) && !is_quote && is_key) { continue; } @@ -304,8 +308,9 @@ static int ReadConfigurationFromFile(FILE *fp) section += line[ii]; continue; } - if (line[ii] == '=' && !is_quote) + if (is_key && line[ii] == '=' && !is_quote) { + while (isspace(line[ii + 1])) ii++; is_key = false; key = value; value = ""; @@ -351,6 +356,10 @@ static int ReadConfigurationFromFile(FILE *fp) s_mapSectionCommon[key] += ","; s_mapSectionCommon[key] += value; } + else if (section == SECTION_LOG_SCANNERS) + { + g_settings_sLogScanners = xstrdup(value.c_str()); + } else if (section == SECTION_CRON) { if (s_mapSectionCron[key] != "") diff --git a/src/daemon/Settings.h b/src/daemon/Settings.h index 71824c74..01cd1adc 100644 --- a/src/daemon/Settings.h +++ b/src/daemon/Settings.h @@ -31,7 +31,10 @@ extern GList *g_settings_setBlackListedPaths; extern unsigned int g_settings_nMaxCrashReportsSize; extern bool g_settings_bOpenGPGCheck; extern bool g_settings_bProcessUnpackaged; -extern char *g_settings_sWatchCrashdumpArchiveDir; +extern char * g_settings_sWatchCrashdumpArchiveDir; + +extern char * g_settings_sLogScanners; + extern map_cron_t g_settings_mapCron; int LoadSettings(); diff --git a/src/daemon/abrt.conf b/src/daemon/abrt.conf index 07ea51a0..fedefea0 100644 --- a/src/daemon/abrt.conf +++ b/src/daemon/abrt.conf @@ -8,7 +8,7 @@ OpenGPGCheck = yes # Blacklisted packages # -BlackList = nspluginwrapper, valgrind, strace +BlackList = nspluginwrapper,valgrind,strace # Process crashes in executables which do not belong to any package? # @@ -16,7 +16,7 @@ ProcessUnpackaged = no # Blacklisted executable paths (shell patterns) # -BlackListedPaths = /usr/share/doc/*, */example* +BlackListedPaths = /usr/share/doc/*,*/example* # Enable this if you want abrtd to auto-unpack crashdump tarballs which appear # in this directory (for example, uploaded via ftp, scp etc). @@ -30,12 +30,16 @@ BlackListedPaths = /usr/share/doc/*, */example* MaxCrashReportsSize = 1000 +# So far we support only one line here +# +[ LogScanners ] +abrt-dump-oops = abrt-dump-oops -drw /var/log/messages + + # Which Action plugins to run repeatedly # [ Cron ] # h:m - at h:m # s - every s seconds -120 = KerneloopsScanner - #02:00 = FileTransfer diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index c960e66e..5d3b5890 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am @@ -4,6 +4,7 @@ bin_SCRIPTS = \ abrt-action-install-debuginfo.py bin_PROGRAMS = \ + abrt-dump-oops \ abrt-action-analyze-c \ abrt-action-analyze-python \ abrt-action-analyze-oops \ @@ -90,6 +91,25 @@ libKerneloopsScanner_la_LDFLAGS = \ -avoid-version \ $(GLIB_LIBS) +abrt_dump_oops_SOURCES = \ + abrt-dump-oops.c +abrt_dump_oops_CPPFLAGS = \ + -I$(srcdir)/../include/report -I$(srcdir)/../include \ + -I$(srcdir)/../lib \ + -DBIN_DIR=\"$(bindir)\" \ + -DVAR_RUN=\"$(VAR_RUN)\" \ + -DCONF_DIR=\"$(CONF_DIR)\" \ + -DLOCALSTATEDIR='"$(localstatedir)"' \ + -DDEBUG_DUMPS_DIR=\"$(DEBUG_DUMPS_DIR)\" \ + -DDEBUG_INFO_DIR=\"$(DEBUG_INFO_DIR)\" \ + -DPLUGINS_LIB_DIR=\"$(PLUGINS_LIB_DIR)\" \ + -DPLUGINS_CONF_DIR=\"$(PLUGINS_CONF_DIR)\" \ + $(GLIB_CFLAGS) \ + -D_GNU_SOURCE \ + -Wall -Werror +abrt_dump_oops_LDADD = \ + ../lib/libreport.la + abrt_action_analyze_c_SOURCES = \ abrt-action-analyze-c.c abrt_action_analyze_c_CPPFLAGS = \ diff --git a/src/plugins/abrt-dump-oops.c b/src/plugins/abrt-dump-oops.c new file mode 100644 index 00000000..ad222edd --- /dev/null +++ b/src/plugins/abrt-dump-oops.c @@ -0,0 +1,712 @@ +/* + Copyright (C) 2011 ABRT team + Copyright (C) 2011 RedHat Inc + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Authors: + Anton Arapov + Arjan van de Ven + */ +#include +#include /* __NR_syslog */ +#include +#include /* ioctl(FIONREAD) */ +#include "abrtlib.h" +#include "parse_options.h" + +#define PROGNAME "abrt-dump-oops" + +static void queue_oops(GList **vec, const char *data, const char *version) +{ + char *ver_data = xasprintf("%s\n%s", version, data); + *vec = g_list_append(*vec, ver_data); +} + +/* + * extract_version tries to find the kernel version in given data + */ +static char *extract_version(const char *linepointer) +{ + if (strstr(linepointer, "Pid") + || strstr(linepointer, "comm") + || strstr(linepointer, "CPU") + || strstr(linepointer, "REGS") + || strstr(linepointer, "EFLAGS") + ) { + char* start; + char* end; + + start = strstr((char*)linepointer, "2.6."); + if (start) + { + end = strchr(start, ')'); + if (!end) + end = strchrnul(start, ' '); + return xstrndup(start, end-start); + } + } + + return NULL; +} + +/* + * extract_oops tries to find oops signatures in a log + */ +struct line_info { + char *ptr; + char level; +}; + +static int record_oops(GList **oopses, struct line_info* lines_info, int oopsstart, int oopsend) +{ + int q; + int len; + char *oops; + char *version; + + len = 2; + for (q = oopsstart; q <= oopsend; q++) + len += strlen(lines_info[q].ptr) + 1; + + oops = (char*)xzalloc(len); + + version = NULL; + for (q = oopsstart; q <= oopsend; q++) + { + if (!version) + version = extract_version(lines_info[q].ptr); + + if (lines_info[q].ptr[0]) + { + strcat(oops, lines_info[q].ptr); + strcat(oops, "\n"); + } + } + int rv = 1; + /* too short oopses are invalid */ + if (strlen(oops) > 100) + queue_oops(oopses, oops, version ? version : "undefined"); + else + { + VERB3 log("Dropped oops: too short"); + rv = 0; + } + free(oops); + free(version); + return rv; +} + +#define REALLOC_CHUNK 1000 +static int extract_oopses(GList **oopses, char *buffer, size_t buflen) +{ + char *c; + int linecount = 0; + int lines_info_alloc = 0; + struct line_info *lines_info = NULL; + + /* Split buffer into lines */ + + if (buflen != 0) + buffer[buflen - 1] = '\n'; /* the buffer usually ends with \n, but let's make sure */ + c = buffer; + while (c < buffer + buflen) + { + char linelevel; + char *c9; + char *colon; + + c9 = (char*)memchr(c, '\n', buffer + buflen - c); /* a \n will always be found */ + assert(c9); + *c9 = '\0'; /* turn the \n into a string termination */ + if (c9 == c) + goto next_line; + + /* Is it a syslog file (/var/log/messages or similar)? + * Even though _usually_ it looks like "Nov 19 12:34:38 localhost kernel: xxx", + * some users run syslog in non-C locale: + * "2010-02-22T09:24:08.156534-08:00 gnu-4 gnome-session[2048]: blah blah" + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !!! + * We detect it by checking for N:NN:NN pattern in first 15 chars + * (and this still is not good enough... false positive: "pci 0000:15:00.0: PME# disabled") + */ + colon = strchr(c, ':'); + if (colon && colon > c && colon < c + 15 + && isdigit(colon[-1]) /* N:... */ + && isdigit(colon[1]) /* ...N:NN:... */ + && isdigit(colon[2]) + && colon[3] == ':' + && isdigit(colon[4]) /* ...N:NN:NN... */ + && isdigit(colon[5]) + ) { + /* It's syslog file, not a bare dmesg */ + + /* Skip non-kernel lines */ + char *kernel_str = strstr(c, "kernel: "); + if (kernel_str == NULL) + { + /* if we see our own marker: + * "hostname abrt: Kerneloops: Reported 1 kernel oopses to Abrt" + * we know we submitted everything upto here already */ + if (strstr(c, "abrt:") && strstr(c, "Abrt")) + { + VERB3 log("Found our marker at line %d, restarting line count from 0", linecount); + linecount = 0; + lines_info_alloc = 0; + free(lines_info); + lines_info = NULL; + } + goto next_line; + } + c = kernel_str + sizeof("kernel: ")-1; + } + + linelevel = 0; + /* store and remove kernel log level */ + if (*c == '<' && c[1] && c[2] == '>') + { + linelevel = c[1]; + c += 3; + } + /* remove jiffies time stamp counter if present */ + if (*c == '[') + { + char *c2 = strchr(c, '.'); + char *c3 = strchr(c, ']'); + if (c2 && c3 && (c2 < c3) && (c3-c) < 14 && (c2-c) < 8) + { + c = c3 + 1; + if (*c == ' ') + c++; + } + } + if (linecount >= lines_info_alloc) + { + lines_info_alloc += REALLOC_CHUNK; + lines_info = (struct line_info*)xrealloc(lines_info, + lines_info_alloc * sizeof(lines_info[0])); + } + lines_info[linecount].ptr = c; + lines_info[linecount].level = linelevel; + linecount++; +next_line: + c = c9 + 1; + } + + /* Analyze lines */ + + int i; + char prevlevel = 0; + int oopsstart = -1; + int inbacktrace = 0; + int oopsesfound = 0; + + i = 0; + while (i < linecount) + { + char *curline = lines_info[i].ptr; + + if (curline == NULL) + { + i++; + continue; + } + while (*curline == ' ') + curline++; + + if (oopsstart < 0) + { + /* Find start-of-oops markers */ + /* In some comparisons, we skip 1st letter, to avoid dealing with + * changes in capitalization in kernel. For example, I see that + * current kernel git (at 2011-01-01) has both "kernel BUG at ..." + * and "Kernel BUG at ..." messages, and I don't want to change + * the code below whenever kernel is changed to use "K" (or "k") + * uniformly. + */ + if (strstr(curline, /*g*/ "eneral protection fault:")) + oopsstart = i; + else if (strstr(curline, "BUG:")) + oopsstart = i; + else if (strstr(curline, /*k*/ "ernel BUG at")) + oopsstart = i; + else if (strstr(curline, "do_IRQ: stack overflow:")) + oopsstart = i; + else if (strstr(curline, "RTNL: assertion failed")) + oopsstart = i; + else if (strstr(curline, /*e*/ "eek! page_mapcount(page) went negative!")) + oopsstart = i; + else if (strstr(curline, /*n*/ "ear stack overflow (cur:")) + oopsstart = i; + else if (strstr(curline, /*d*/ "ouble fault:")) + oopsstart = i; + else if (strstr(curline, /*b*/ "adness at")) + oopsstart = i; + else if (strstr(curline, "NETDEV WATCHDOG")) + oopsstart = i; + else if (strstr(curline, "WARNING: at ")) /* WARN_ON() generated message */ + oopsstart = i; + else if (strstr(curline, /*u*/ "nable to handle kernel")) + oopsstart = i; + else if (strstr(curline, /*s*/ "ysctl table check failed")) + oopsstart = i; + else if (strstr(curline, "INFO: possible recursive locking detected")) + oopsstart = i; + // Not needed: "--[ cut here ]--" is always followed + // by "Badness at", "kernel BUG at", or "WARNING: at" string + //else if (strstr(curline, "------------[ cut here ]------------")) + // oopsstart = i; + else if (strstr(curline, "list_del corruption")) + oopsstart = i; + else if (strstr(curline, "list_add corruption")) + oopsstart = i; + + if (i >= 3 && strstr(curline, "Oops:")) + oopsstart = i-3; + + if (oopsstart >= 0) + { + /* debug information */ + VERB3 { + log("Found oops at line %d: '%s'", oopsstart, lines_info[oopsstart].ptr); + if (oopsstart != i) + log("Trigger line is %d: '%s'", i, c); + } + /* try to find the end marker */ + int i2 = i + 1; + while (i2 < linecount && i2 < (i+50)) + { + if (strstr(lines_info[i2].ptr, "---[ end trace")) + { + inbacktrace = 1; + i = i2; + break; + } + i2++; + } + } + } + + /* Are we entering a call trace part? */ + /* a call trace starts with "Call Trace:" or with the " [<.......>] function+0xFF/0xAA" pattern */ + if (oopsstart >= 0 && !inbacktrace) + { + if (strstr(curline, "Call Trace:")) + inbacktrace = 1; + else + if (strnlen(curline, 9) > 8 + && curline[0] == '[' && curline[1] == '<' + && strstr(curline, ">]") + && strstr(curline, "+0x") + && strstr(curline, "/0x") + ) { + inbacktrace = 1; + } + } + + /* Are we at the end of an oops? */ + else if (oopsstart >= 0 && inbacktrace) + { + int oopsend = INT_MAX; + + /* line needs to start with " [" or have "] [" if it is still a call trace */ + /* example: "[] radeon_get_ring_head+0x16/0x41 [radeon]" */ + if (curline[0] != '[' + && !strstr(curline, "] [") + && !strstr(curline, "--- Exception") + && !strstr(curline, "LR =") + && !strstr(curline, "<#DF>") + && !strstr(curline, "") + && !strstr(curline, "") + && !strstr(curline, "<>") + && strncmp(curline, "Code: ", 6) != 0 + && strncmp(curline, "RIP ", 4) != 0 + && strncmp(curline, "RSP ", 4) != 0 + ) { + oopsend = i-1; /* not a call trace line */ + } + /* oops lines are always more than 8 chars long */ + else if (strnlen(curline, 8) < 8) + oopsend = i-1; + /* single oopses are of the same loglevel */ + else if (lines_info[i].level != prevlevel) + oopsend = i-1; + else if (strstr(curline, "Instruction dump:")) + oopsend = i; + /* if a new oops starts, this one has ended */ + else if (strstr(curline, "WARNING: at ") && oopsstart != i) /* WARN_ON() generated message */ + oopsend = i-1; + else if (strstr(curline, "Unable to handle") && oopsstart != i) + oopsend = i-1; + /* kernel end-of-oops marker (not including marker itself) */ + else if (strstr(curline, "---[ end trace")) + oopsend = i-1; + + if (oopsend <= i) + { + VERB3 log("End of oops at line %d (%d): '%s'", oopsend, i, lines_info[oopsend].ptr); + if (record_oops(oopses, lines_info, oopsstart, oopsend)) + oopsesfound++; + oopsstart = -1; + inbacktrace = 0; + } + } + + prevlevel = lines_info[i].level; + i++; + + if (oopsstart >= 0) + { + /* Do we have a suspiciously long oops? Cancel it */ + if (i-oopsstart > 60) + { + inbacktrace = 0; + oopsstart = -1; + VERB3 log("Dropped oops, too long"); + continue; + } + if (!inbacktrace && i-oopsstart > 40) + { + /*inbacktrace = 0; - already is */ + oopsstart = -1; + VERB3 log("Dropped oops, too long"); + continue; + } + } + } /* while (i < linecount) */ + + /* process last oops if we have one */ + if (oopsstart >= 0 && inbacktrace) + { + int oopsend = i-1; + VERB3 log("End of oops at line %d (end of file): '%s'", oopsend, lines_info[oopsend].ptr); + if (record_oops(oopses, lines_info, oopsstart, oopsend)) + oopsesfound++; + } + + free(lines_info); + return oopsesfound; +} + +#define MAX_SCAN_BLOCK (4*1024*1024) +#define READ_AHEAD (10*1024) + +static void scan_dmesg(GList **oops_list) +{ + VERB1 log("Scanning dmesg"); + + /* syslog(3) - read the last len bytes from the log buffer + * (non-destructively), but dont read more than was written + * into the buffer since the last "clear ring buffer" cmd. + * Returns the number of bytes read. + */ + char *buffer = xzalloc(16*1024); + syscall(__NR_syslog, 3, buffer, 16*1024 - 1); /* always NUL terminated */ + extract_oopses(oops_list, buffer, strlen(buffer)); + free(buffer); +} + +static int scan_syslog_file(GList **oops_list, int fd, struct stat *statbuf, int partial_line_len) +{ + /* fstat(fd, &statbuf) was just done by caller */ + + off_t cur_pos = lseek(fd, 0, SEEK_CUR); + if (statbuf->st_size <= cur_pos) + return partial_line_len; /* we are at EOF, nothing to do */ + + VERB3 log("File grew by %llu bytes, from %llu to %llu", + (long long)(statbuf->st_size - cur_pos), + (long long)(cur_pos), + (long long)(statbuf->st_size)); + + /* Do not try to allocate an absurd amount of memory. */ + int sz = MAX_SCAN_BLOCK - READ_AHEAD; + if (sz > statbuf->st_size - cur_pos) + sz = statbuf->st_size - cur_pos; + + /* Rewind to the beginning of the current line */ + if (partial_line_len > 0 && lseek(fd, -partial_line_len, SEEK_CUR) != (off_t)-1) + { + VERB3 log("Went back %u bytes", partial_line_len); + cur_pos -= partial_line_len; + sz += partial_line_len; + } + + /* + * In theory we have a race here, since someone can spew + * to /var/log/messages before we read it in... + * We try to deal with it by reading READ_AHEAD extra. + */ + sz += READ_AHEAD; + char *buffer = xzalloc(sz); + + partial_line_len = 0; + do { + int r = full_read(fd, buffer, sz-1); + if (r <= 0) + break; + VERB3 log("Read %u bytes", r); + + /* For future scans, try to find where last (incomplete) line starts */ + partial_line_len = 0; + char *last_newline = memrchr(buffer, '\n', r) ? : buffer-1; + partial_line_len = buffer+r - (last_newline+1); + if (partial_line_len > 500) /* cap it */ + partial_line_len = 500; + + extract_oopses(oops_list, buffer, r); + cur_pos += r; + } while (cur_pos < statbuf->st_size); + + free(buffer); + + return partial_line_len; +} + +/* returns number of errors */ +static int save_oops_to_dump_dir(GList *oops_list, unsigned oops_cnt) +{ + unsigned countdown = 16; /* do not report hundreds of oopses */ + unsigned idx = oops_cnt; + time_t t = time(NULL); + pid_t my_pid = getpid(); + + VERB1 log("Saving %u oopses as crash dump dirs", idx >= countdown ? countdown-1 : idx); + + char *tainted_str = NULL; + /* once tainted flag is set to 1, only restart can reset the flag to 0 */ + FILE *tainted_fp = fopen("/proc/sys/kernel/tainted", "r"); + if (tainted_fp) + { + tainted_str = xmalloc_fgetline(tainted_fp); + fclose(tainted_fp); + } + else + error_msg("/proc/sys/kernel/tainted does not exist"); + + int errors = 0; + + while (idx != 0 && --countdown != 0) + { + char path[sizeof(DEBUG_DUMPS_DIR"/kerneloops-%lu-%lu-%lu") + 3 * sizeof(long)*3]; + sprintf(path, DEBUG_DUMPS_DIR"/kerneloops-%lu-%lu-%lu", (long)t, (long)my_pid, (long)idx); + + char *first_line = (char*)g_list_nth_data(oops_list, --idx); + char *second_line = (char*)strchr(first_line, '\n'); /* never NULL */ + *second_line++ = '\0'; + + struct dump_dir *dd = dd_create(path, /*uid:*/ 0); + if (dd) + { + dd_save_text(dd, FILENAME_ANALYZER, "Kerneloops"); + dd_save_text(dd, FILENAME_EXECUTABLE, "kernel"); + dd_save_text(dd, FILENAME_KERNEL, first_line); + dd_save_text(dd, FILENAME_CMDLINE, "not_applicable"); + dd_save_text(dd, FILENAME_BACKTRACE, second_line); + /* Optional, makes generated bz more informative */ + strchrnul(second_line, '\n')[0] = '\0'; + dd_save_text(dd, FILENAME_REASON, second_line); + + if (tainted_str && tainted_str[0] != '0') + dd_save_text(dd, FILENAME_TAINTED, tainted_str); + + free(tainted_str); + dd_close(dd); + } + else + errors++; + } + + return errors; +} + +int main(int argc, char **argv) +{ + char *env_verbose = getenv("ABRT_VERBOSE"); + if (env_verbose) + g_verbose = atoi(env_verbose); + + const char *filename = "/var/log/messages"; + + const char *program_usage = _( + PROGNAME" [-vsrdow] FILE\n" + "\n" + "Extract oops from syslog/dmesg file" + ); + enum { + OPT_v = 1 << 0, + OPT_s = 1 << 1, + OPT_r = 1 << 2, + OPT_d = 1 << 3, + OPT_o = 1 << 4, + OPT_w = 1 << 5, + }; + /* Keep enum above and order of options below in sync! */ + struct options program_options[] = { + OPT__VERBOSE(&g_verbose), + OPT_BOOL('s', NULL, NULL, _("Log to syslog")), + OPT_BOOL('r', NULL, NULL, _("Parse kernel's message buffer before parsing FILE")), + OPT_BOOL('d', NULL, NULL, _("Create ABRT dump for every oops found")), + OPT_BOOL('o', NULL, NULL, _("Print found oopses on standard output")), + OPT_BOOL('w', NULL, NULL, _("Do not exit, watch the file for new oopses")), + OPT_END() + }; + + unsigned opts = parse_opts(argc, argv, program_options, program_usage); + argv += optind; + if (!argv[0]) + show_usage_and_die(program_usage, program_options); + filename = argv[0]; + + putenv(xasprintf("ABRT_VERBOSE=%u", g_verbose)); + msg_prefix = PROGNAME; + if ((opts & OPT_s) + || getenv("ABRT_SYSLOG") + ) { + openlog(msg_prefix, 0, LOG_DAEMON); + logmode = LOGMODE_SYSLOG; + } + + int inotify_fd = -1; + if (opts & OPT_w) + { + inotify_fd = inotify_init(); + if (inotify_fd == -1) + perror_msg_and_die("inotify_init failed"); + //close_on_exec_on(inotify_fd); + } + + GList *oops_list = NULL; + if (opts & OPT_r) + /* Scan dmesg (only once even with -w) */ + scan_dmesg(&oops_list); + + int partial_line_len = 0; + struct stat statbuf; + int file_fd = -1; + int wd = -1; + + while (1) /* loops only if -w */ + { + /* If file is already opened, parse oopses from current pos */ + if (file_fd >= 0) + { + memset(&statbuf, 0, sizeof(statbuf)); + fstat(file_fd, &statbuf); + partial_line_len = scan_syslog_file(&oops_list, file_fd, &statbuf, partial_line_len); + + /* Was file deleted or replaced? */ + ino_t fd_ino = statbuf.st_ino; + if (stat(filename, &statbuf) != 0 || statbuf.st_ino != fd_ino) /* yes */ + { + VERB2 log("Can't stat '%s', closing fd", filename); + close(file_fd); + if (wd >= 0) + inotify_rm_watch(inotify_fd, wd); + file_fd = -1; + wd = -1; + partial_line_len = 0; + } + } + + /* If file isn't opened, try to open it and parse oopses */ + if (file_fd < 0) + { + file_fd = open(filename, O_RDONLY); + if (file_fd < 0) + { + if (!(opts & OPT_w)) + perror_msg_and_die("Can't open '%s'", filename); + /* with -w, we ignore open errors */ + } + else + { + VERB2 log("Opened '%s'", filename); + /* For -w case, if we don't have inotify watch yet, open one */ + if ((opts & OPT_w) && wd < 0) + { + wd = inotify_add_watch(inotify_fd, filename, IN_MODIFY | IN_MOVE_SELF | IN_DELETE_SELF); + if (wd < 0) + perror_msg("inotify_add_watch failed on '%s'", filename); + else + VERB2 log("Added inotify watch for '%s'", filename); + } + if (fstat(file_fd, &statbuf) == 0) + { + /* If file is large, skip the beginning. + * IOW: ignore old log messages because they are unlikely + * to have sufficiently recent data to be useful. + */ + if (statbuf.st_size > (MAX_SCAN_BLOCK - READ_AHEAD)) + lseek(file_fd, statbuf.st_size - (MAX_SCAN_BLOCK - READ_AHEAD), SEEK_SET); + /* Note that statbuf is filled by fstat by now, + * scan_syslog_file needs that + */ + partial_line_len = scan_syslog_file(&oops_list, file_fd, &statbuf, partial_line_len); + } + } + } + + /* Print and/or save oopses */ + int oops_cnt = g_list_length(oops_list); + if (!(opts & OPT_w) || oops_cnt != 0) + log("Found oopses: %d", oops_cnt); + if (oops_cnt != 0) + { + if (opts & OPT_o) + { + int i = 0; + while (i < oops_cnt) + printf("\nVersion: %s", (char*)g_list_nth_data(oops_list, i++)); + } + if (opts & OPT_d) + { + log("Creating dump directories"); + int errors = save_oops_to_dump_dir(oops_list, oops_cnt); + if (errors > 0) + log("%d errors while dumping oopses", errors); + } + } + for (GList *li = oops_list; li; li = g_list_next(li)) + free((char*)li->data); + g_list_free(oops_list); + oops_list = NULL; + + /* Done if no -w */ + if (!(opts & OPT_w)) + break; + + /* Even if log file grows all the time, say, a new line every 5 ms, + * we don't want to scan it all the time. Sleep a bit and let it grow + * in bigger increments. + * Sleep longer if file does not exist. + */ + sleep(file_fd >= 0 ? 1 : 59); + + /* Now wait for it to change, be moved or deleted */ + if (wd >= 0) + { + char buf[4096]; + VERB3 log("Waiting for '%s' to change", filename); + /* We block here: */ + int len = read(inotify_fd, buf, sizeof(buf)); + if (len < 0 && errno != EINTR) /* I saw EINTR here on strace attach */ + perror_msg("Error reading inotify fd"); + /* we don't actually check what happened to file - + * the code will handle all possibilities. + */ + VERB3 log("Change in '%s' detected", filename); + } + + } /* while (1) */ + + return 0; +} -- cgit