From 867ea1ee3fd02e15487c308dc9e5ef2bb141f7c2 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 22 Feb 2010 13:49:22 +0100 Subject: Kerneloops: make hashing more likely to produce same hash on different oopses Signed-off-by: Denys Vlasenko --- lib/Plugins/Kerneloops.cpp | 83 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 73 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Plugins/Kerneloops.cpp b/lib/Plugins/Kerneloops.cpp index 12ce3783..fb7eaca7 100644 --- a/lib/Plugins/Kerneloops.cpp +++ b/lib/Plugins/Kerneloops.cpp @@ -23,6 +23,78 @@ #include "ABRTException.h" #include "CommLayerInner.h" +static unsigned hash_oops_str(const char *oops_ptr) +{ + unsigned char old_c; + unsigned char c = 0; + unsigned hash = 0; + while (1) + { + old_c = c; + c = *oops_ptr++; + if (!c) + break; + if (c == '\n') { + // Exclude some lines which have process name - in some oops classes + // process name is irrelevant and changes with every oops. + // Lines we filter out: + // Pid: 8003, comm: Xorg Not tainted (2.6.27.9-159.fc10.i686 #1) + // Process Xorg (pid: 8003, ti=f0a0c000 task=f2380000 task.ti=f0a0c000) + if (strncmp(oops_ptr, "Pid: ", 5) == 0 + || strncmp(oops_ptr, "Process ", 8) == 0 + ) { + while (*oops_ptr && *oops_ptr != '\n') + oops_ptr++; + continue; + } + } + if (!isalnum(old_c)) { + if (c >= '0' && c <= '9') { + // Convert all (possibly hex) numbers to just one '0' + if (c == '0' && *oops_ptr == 'x') // "0xSOMETHING" + oops_ptr++; + while (isxdigit(*oops_ptr)) + oops_ptr++; + c = '0'; + } else + if ((c|0x20) >= 'a' && (c|0x20) <= 'f') { + // This *may be* a hex number without 0x prefix: "f0a0c000" + // Check that it indeed is, and replace with '0' + const char *oops_ptr2 = oops_ptr; + while (isxdigit(*oops_ptr2)) + oops_ptr2++; + // Does it end in a letter which is not a hex digit? + // (Example: "abcw" is not a hex number, "abc " is) + if (!isalpha(*oops_ptr2)) { + // It's "abc " case. Skip the "abc" string + oops_ptr = oops_ptr2; + c = '0'; + } + // else: hash the string as-is + } + } + // TODO: Drop call trace tail - in interrupt-driven oopses, + // everything before interrupt is irrelevant. + // Example of call trace part of oops: + // Call Trace: + // [] ? radeon_cp_resume+0x7d/0xbc [radeon] + // [] ? drm_ioctl+0x1b0/0x225 [drm] + // [] ? radeon_cp_resume+0x0/0xbc [radeon] + // [] ? vfs_ioctl+0x50/0x69 + // [] ? do_vfs_ioctl+0x23b/0x247 + // [] ? audit_syscall_entry+0xf9/0x123 + // [] ? sys_ioctl+0x40/0x5c + // [] ? syscall_call+0x7/0xb + + /* An algorithm proposed by Donald E. Knuth in The Art Of Computer + * Programming Volume 3, under the topic of sorting and search + * chapter 6.4. + */ + hash = ((hash << 5) ^ (hash >> 27)) ^ c; + } + return hash; +} + std::string CAnalyzerKerneloops::GetLocalUUID(const char *pDebugDumpDir) { log(_("Getting local universal unique identification")); @@ -34,16 +106,7 @@ std::string CAnalyzerKerneloops::GetLocalUUID(const char *pDebugDumpDir) dd.LoadText(FILENAME_KERNELOOPS, oops); } - /* An algorithm proposed by Donald E. Knuth in The Art Of Computer - * Programming Volume 3, under the topic of sorting and search - * chapter 6.4. - */ - unsigned len = oops.length(); - unsigned hash = len; - for (unsigned i = 0; i < len; i++) - { - hash = ((hash << 5) ^ (hash >> 27)) ^ oops[i]; - } + unsigned hash = hash_oops_str(oops.c_str()); hash &= 0x7FFFFFFF; return to_string(hash); -- cgit