summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorKarel Klic <kklic@redhat.com>2010-03-26 17:55:51 +0100
committerKarel Klic <kklic@redhat.com>2010-03-26 18:11:09 +0100
commit7ccd55eb10921e94a81b699a2c96cb1dc25515d1 (patch)
tree66297ccd0633eb5b8f817c3d710c36b267b3bc61 /scripts
parent605c31f9d0897c18a900c7a4c0ad75bc439d18e5 (diff)
downloadabrt-7ccd55eb10921e94a81b699a2c96cb1dc25515d1.tar.gz
abrt-7ccd55eb10921e94a81b699a2c96cb1dc25515d1.tar.xz
abrt-7ccd55eb10921e94a81b699a2c96cb1dc25515d1.zip
Move backtrace parser from src/Backtrace to lib/Utils.
Move abrt-backtrace app from src/Backtrace/main.c to src/utils/abrt-backtrace. Move backtrace preprocessign code from abrt-backtrace to the parser. Implemented new backtrace rating algorithm. Added old bt rating algorithm to backtrace.c Move strbuf to lib/Utils, and updated it to use xfuncs. Created separate header for xfuncs. Some functions in xfuncs marked as extern "c", so they can be used in C code. Merged backtrace fallback (independent_backtrace) "parser" into backtrace.{h/c}. Added option --rate to abrt-backtrace, to be able to use the new backtrace rating algorithm in scripts.
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/abrt-bz-downloader82
-rwxr-xr-xscripts/abrt-bz-dupchecker272
-rwxr-xr-xscripts/abrt-bz-hashchecker59
-rwxr-xr-xscripts/check-bt-parsability20
4 files changed, 433 insertions, 0 deletions
diff --git a/scripts/abrt-bz-downloader b/scripts/abrt-bz-downloader
new file mode 100755
index 00000000..7f294257
--- /dev/null
+++ b/scripts/abrt-bz-downloader
@@ -0,0 +1,82 @@
+#!/usr/bin/python
+# -*- mode:python -*-
+# ABRT Bugzilla Backtrace Downloader
+# Downloads all backtraces reported by ABRT from Bugzilla.
+#
+# Please do not run this script unless it's neccessary to do so.
+# It forces Bugzilla to send data related to thousands of bug reports.
+
+from bugzilla import RHBugzilla
+from optparse import OptionParser
+import sys
+import os.path
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-u", "--user", dest="user",
+ help="Bugzilla user name (REQUIRED)", metavar="USERNAME")
+parser.add_option("-p", "--password", dest="password",
+ help="Bugzilla password (REQUIRED)", metavar="PASSWORD")
+parser.add_option("-b", "--bugzilla", dest="bugzilla",
+ help="Bugzilla URL (defaults to Red Hat Bugzilla)", metavar="URL")
+parser.add_option("-f", "--fields",
+ action="store_true", dest="fields", default=False,
+ help="Print possible bug fields and exit.")
+
+(options, args) = parser.parse_args()
+
+if not options.user or len(options.user) == 0:
+ parser.error("User name is required.\nTry {0} --help".format(sys.argv[0]))
+
+if not options.password or len(options.password) == 0:
+ parser.error("Password is required.\nTry {0} --help".format(sys.argv[0]))
+
+if not options.bugzilla or len(options.bugzilla) == 0:
+ options.bugzilla = "https://bugzilla.redhat.com/xmlrpc.cgi"
+
+bz = RHBugzilla()
+bz.connect(options.bugzilla)
+bz.login(options.user, options.password)
+
+if options.fields:
+ print bz.bugfields
+ exit(0)
+
+buginfos = bz.query({'status_whiteboard_type':'allwordssubstr','status_whiteboard':'abrt_hash'})
+
+print "{0} bugs found.".format(len(buginfos))
+
+for buginfo in buginfos:
+ # Skip bugs with already downloaded backtraces.
+ filename = "{0}.bt".format(buginfo.bug_id)
+ if os.path.isfile(filename):
+ print "Skipping {0} (already exists).".format(filename)
+ continue
+
+ # Skip bugs with broken or Python backtraces
+ broken_backtrace_bugs = [ 517116, # binary file :)
+ 518516, # not a backtrace, GDB fail
+ 524259, # multiple backtraces in single file
+ 524427, # multiple backtraces in single file
+ 528529, # just [New Thread xx] lines
+ #528915, 10000 frames, out of memory, to be fixed
+ #529422, 10000 frames, out of memory, to be fixed
+ #530239, 10000 frames, out of memory, to be fixed
+ 532264, # no header
+ 533475, # no backtrace
+ #537819, 50000 frames, out of memory, to be fixed
+ #539699, to be fixed, parser bug
+ 539992] # completely broken backtrace
+ if buginfo.bug_id in broken_backtrace_bugs:
+ continue
+
+ # Get backtrace from bug and store it as a file.
+ bug = bz.getbug(buginfo.bug_id)
+ for attachment in bug.attachments:
+ if attachment['filename'] == 'backtrace':
+ data = bz.openattachment(attachment['id'])
+ f = open(filename, 'w')
+ f.write(data.read())
+ f.close()
+ print "Attachment {0} downloaded.".format(filename)
+
+bz.logout()
diff --git a/scripts/abrt-bz-dupchecker b/scripts/abrt-bz-dupchecker
new file mode 100755
index 00000000..654a3702
--- /dev/null
+++ b/scripts/abrt-bz-dupchecker
@@ -0,0 +1,272 @@
+#!/usr/bin/python
+# -*- mode:python -*-
+# ABRT Bugzilla Duplication Checker
+# Downloads all backtraces reported by ABRT from Bugzilla,
+# and search for duplicates using the newest ABRT duplication
+# checker.
+#
+# Some bugs in Bugzilla were reported by older ABRT
+# versions, which had poor duplication detection.
+#
+# Please do not run this script unless it's neccessary to do so.
+# It forces Bugzilla to send data related to thousands of bug reports.
+#
+#
+# Useful text to be pasted to Bugzilla:
+"""
+This bug appears to have been filled using a buggy version of ABRT, because
+it contains unusable backtrace. Sorry for the inconvenience.
+Closing as INSUFFICIENT_DATA.
+"""
+
+from bugzilla import RHBugzilla
+from optparse import OptionParser
+import sys
+import os.path
+import subprocess
+import cPickle
+import urllib
+import json
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-u", "--user", dest="user",
+ help="Bugzilla user name (REQUIRED)", metavar="USERNAME")
+parser.add_option("-p", "--password", dest="password",
+ help="Bugzilla password (REQUIRED)", metavar="PASSWORD")
+parser.add_option("-b", "--bugzilla", dest="bugzilla", default="https://bugzilla.redhat.com/xmlrpc.cgi",
+ help="Bugzilla URL (defaults to Red Hat Bugzilla)", metavar="URL")
+parser.add_option("-v", "--verbose", dest="verbose",
+ help="Detailed output")
+parser.add_option("-c", "--close", help="Close some of the bugs in Bugzilla (DANGEROUS)",
+ action="store_true", default=False, dest="close")
+parser.add_option("-i", "--wiki", help="Generate output in wiki syntax",
+ action="store_true", default=False, dest="wiki")
+
+(options, args) = parser.parse_args()
+
+if not options.user or len(options.user) == 0:
+ parser.error("User name is required.\nTry {0} --help".format(sys.argv[0]))
+
+if not options.password or len(options.password) == 0:
+ parser.error("Password is required.\nTry {0} --help".format(sys.argv[0]))
+
+bz = RHBugzilla()
+bz.connect(options.bugzilla)
+bz.login(options.user, options.password)
+
+buginfos = bz.query({'status_whiteboard_type':'allwordssubstr','status_whiteboard':'abrt_hash'})
+
+print "{0} bugs found.".format(len(buginfos))
+
+#
+# Load cache from previous run. Speeds up the case Bugzilla closes connection.
+# The cache should be manually removed after a day or so, because the data in it
+# are no longer valid.
+#
+database = {}
+ids = {}
+CACHE_FILE = "abrt-bz-dupchecker-cache.tmp"
+if os.path.isfile(CACHE_FILE):
+ f = open(CACHE_FILE, 'r')
+ database = cPickle.load(f)
+ ids = cPickle.load(f)
+ f.close()
+
+def save_to_cache():
+ global database
+ f = open(CACHE_FILE, 'w')
+ cPickle.dump(database, f, 2)
+ cPickle.dump(ids, f, 2)
+ f.close()
+
+count = 0
+for buginfo in buginfos:
+ count += 1
+ print "{0}/{1}".format(count, len(buginfos))
+ if count % 100 == 0:
+ save_to_cache()
+
+ if ids.has_key(buginfo.bug_id):
+ continue
+
+ ids[buginfo.bug_id] = True
+
+ if not buginfo.bug_status in ["NEW", "ASSIGNED", "MODIFIED", "VERIFIED"]:
+ if options.verbose:
+ print "Bug {0} has status {1}, skipping.".format(buginfo.bug_id, buginfo.bug_status)
+ continue
+
+ bug = bz.getbug(buginfo.bug_id)
+
+ # Skip bugs with already downloaded backtraces.
+ filename = "{0}.bt".format(buginfo.bug_id)
+ if os.path.isfile(filename):
+ if options.verbose:
+ print "Skipping {0} (already exists).".format(filename)
+ else:
+ # Get backtrace from bug and store it as a file.
+ downloaded = False
+ for attachment in bug.attachments:
+ if attachment['filename'] == 'backtrace':
+ data = bz.openattachment(attachment['id'])
+ f = open(filename, 'w')
+ f.write(data.read())
+ f.close()
+ downloaded = True
+ if options.verbose:
+ print "Attachment {0} downloaded.".format(filename)
+
+ # Silently skip bugs without backtrace.
+ # Those are usually duplicates of bugs; the duplication copies
+ # abrt_hash, but it does not copy the attachment.
+ if not downloaded:
+ continue
+
+ command = ["./abrt-backtrace"]
+ command.append(filename)
+ command.append("--single-thread")
+ command.append("--frame-depth=5")
+ command.append("--remove-exit-handlers")
+ command.append("--remove-noncrash-frames")
+
+ helper = subprocess.Popen(command, stdout=subprocess.PIPE)
+ backtrace, err = helper.communicate()
+ helper.wait()
+
+ if helper.returncode != 0:
+ print "Problems parsing {0}".format(filename)
+ continue
+
+ # Empty backtrace is provided by Python apps.
+ if len(backtrace) == 0:
+ continue
+
+ bugitem = {'id':buginfo.bug_id, 'comments':len(bug.longdescs)}
+ if backtrace in database:
+ components = database[backtrace]
+ if buginfo.component in components:
+ components[buginfo.component].append(bugitem)
+ if options.verbose:
+ print "Duplicate found: {0}".format(database[out]['id'])
+ print "Backtrace: {0}".format(out)
+ else:
+ components[buginfo.component] = [ bugitem ]
+ else:
+ database[backtrace] = { buginfo.component: [ bugitem ] }
+
+# The number of duplicates.
+dupcount = 0
+# The number of duplicates that can be closed.
+dupclosecount = 0
+for backtrace, components in database.items():
+ for component, bugitems in components.items():
+ dupcount += len(bugitems) - 1
+ dupclosecount += min(len(filter(lambda x: x <= 2,
+ map(lambda x: x["comments"],
+ bugitems))),
+ len(bugitems) - 1)
+
+# Get the component owner.
+# Sort the duplicates by the component owner, and
+# filter out those which should not be printed.
+dups = []
+for backtrace, components in database.items():
+ for component, bugitems in components.items():
+ if len(bugitems) <= 1:
+ continue
+
+ # Get the component owner
+ owner = "Failed to get component owner"
+ try:
+ component_info = json.load(urllib.urlopen("https://admin.fedoraproject.org/pkgdb/packages/name/{0}?tg_format=json".format(component)))
+ component_packages = component_info['packageListings']
+ component_f12 = filter(lambda x:x["collection"]["version"]=="12", component_packages)
+ if len(component_f12) == 1:
+ owner = component_f12[0]["owner"]
+ except KeyError:
+ pass
+
+ dups.append((component, owner, bugitems, backtrace))
+ print "."
+
+# Close all bugs where it is appropriate.
+if options.close:
+ LIMIT = 1000
+ counter = 0
+ for (component, owner, bugitems, backtrace) in dups:
+ # Find the master bug item
+ # Its the one with the most comments.
+
+ # Sort function sorting by comment count.
+ def commentCmp(x, y):
+ if x['comments'] < y['comments']:
+ return 1
+ elif x['comments'] == y['comments']:
+ # Sort by bug id, older bugs should became the master bug
+ if x['id'] > y['id']:
+ return 1
+ elif x['id'] == y['id']:
+ return 0
+ else:
+ return -1
+ else:
+ return -1
+
+ sorteditems = sorted(bugitems, commentCmp)
+
+ master = sorteditems[0]
+ for item in sorteditems[1:]:
+ if item['comments'] > 2:
+ continue
+
+ bug = bz.getbug(int(item['id']))
+ # Check the bug status AGAIN to make sure the bug is still opened.
+ if not bug.bug_status in ["NEW", "ASSIGNED"]:
+ continue
+
+ print "Closing bug #{0} with {1} comments as a duplicate of #{2}.".format(item['id'], item['comments'], master['id'])
+ bug.close("DUPLICATE", int(master['id']), "",
+ ("This bug appears to have been filled using a buggy version of ABRT, because\n" +
+ "it contains a backtrace which is a duplicate of backtrace from bug #{0}.\n\n" +
+ "Sorry for the inconvenience.").format(master['id']))
+
+ counter += 1
+ if counter > LIMIT:
+ sys.exit(0)
+
+bz.logout()
+
+print
+print "SUMMARY"
+print "=========================================================================="
+print "Total number of duplicate bugs detected: {0}".format(dupcount)
+print "Number of duplicate bugs that will be closed : {0}".format(dupclosecount)
+print "------------------------------"
+
+# Print the duplicates sorted by package owner.
+def cmp(x, y):
+ if x[1] < y[1]:
+ return -1
+ elif x[1] == y[1]:
+ return 0
+ else:
+ return 1
+
+for (component, owner, bugitems, backtrace) in sorted(dups, cmp):
+ if options.wiki:
+ print "----"
+ print "* component: '''{0}''' ({1})".format(component, owner)
+ print "* duplicates: {0}".format(
+ reduce(lambda x,y: x+", "+y,
+ map(lambda x: "#[https://bugzilla.redhat.com/show_bug.cgi?id={0} {0}] ({1} comments)".format(x['id'],x['comments']),
+ bugitems)))
+ print "* backtrace:"
+ for line in backtrace.replace("Thread\n", "").splitlines():
+ print "*# {0}".format(line)
+ else:
+ print "Component: {0} ({1})".format(component, owner)
+ print "Duplicates: {0}".format(
+ reduce(lambda x,y: x+", "+y,
+ map(lambda x: "{0} ({1})".format(x['id'],x['comments']),
+ bugitems)))
+ print "Backtrace: {0}".format(backtrace)
diff --git a/scripts/abrt-bz-hashchecker b/scripts/abrt-bz-hashchecker
new file mode 100755
index 00000000..ec7ce1a6
--- /dev/null
+++ b/scripts/abrt-bz-hashchecker
@@ -0,0 +1,59 @@
+#!/usr/bin/python
+# -*- mode:python -*-
+# Checks how many bugs in Bugzilla have the same hash.
+#
+# Please do not run this script unless it's neccessary to do so.
+# It forces Bugzilla to send data related to thousands of bug reports.
+
+from bugzilla import RHBugzilla
+from optparse import OptionParser
+import sys
+import os.path
+import subprocess
+import re
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-u", "--user", dest="user",
+ help="Bugzilla user name (REQUIRED)", metavar="USERNAME")
+parser.add_option("-p", "--password", dest="password",
+ help="Bugzilla password (REQUIRED)", metavar="PASSWORD")
+parser.add_option("-b", "--bugzilla", dest="bugzilla",
+ help="Bugzilla URL (defaults to Red Hat Bugzilla)", metavar="URL")
+
+(options, args) = parser.parse_args()
+
+if not options.user or len(options.user) == 0:
+ parser.error("User name is required.\nTry {0} --help".format(sys.argv[0]))
+
+if not options.password or len(options.password) == 0:
+ parser.error("Password is required.\nTry {0} --help".format(sys.argv[0]))
+
+if not options.bugzilla or len(options.bugzilla) == 0:
+ options.bugzilla = "https://bugzilla.redhat.com/xmlrpc.cgi"
+
+bz = RHBugzilla()
+bz.connect(options.bugzilla)
+bz.login(options.user, options.password)
+
+buginfos = bz.query({'status_whiteboard_type':'allwordssubstr','status_whiteboard':'abrt_hash'})
+
+print "{0} bugs found.".format(len(buginfos))
+
+hashes = {}
+for buginfo in buginfos:
+ match = re.search("abrt_hash:([^ ]+)", buginfo.status_whiteboard)
+ if not match:
+ continue
+ hash = match.group(1)
+ if not hash:
+ continue
+ if hash in hashes:
+ hashes[hash].append(buginfo.bug_id)
+ else:
+ hashes[hash] = [ buginfo.bug_id ]
+ print hash
+bz.logout()
+
+for hash, ids in hashes.items():
+ if len(ids) > 1:
+ print "Duplicates found: ", reduce(lambda x,y: str(x)+", "+str(y), ids)
diff --git a/scripts/check-bt-parsability b/scripts/check-bt-parsability
new file mode 100755
index 00000000..a5018bfa
--- /dev/null
+++ b/scripts/check-bt-parsability
@@ -0,0 +1,20 @@
+#!/bin/bash
+# -*- mode: bash -*-
+
+PASS=0
+FAIL=0
+for file in *.bt
+do
+ #echo "$file"
+ ./abrt-backtrace $file 1> /dev/null
+ if [ "$?" -eq "0" ]
+ then
+ echo -n "."
+ PASS=$(($PASS+1))
+ else
+ echo "-$file"
+ FAIL=$(($FAIL+1))
+ fi
+done
+echo ""
+echo "Passed $PASS and failed $FAIL."