summaryrefslogtreecommitdiffstats
path: root/scripts/abrt-bz-dupchecker
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/abrt-bz-dupchecker')
-rwxr-xr-xscripts/abrt-bz-dupchecker272
1 files changed, 272 insertions, 0 deletions
diff --git a/scripts/abrt-bz-dupchecker b/scripts/abrt-bz-dupchecker
new file mode 100755
index 00000000..654a3702
--- /dev/null
+++ b/scripts/abrt-bz-dupchecker
@@ -0,0 +1,272 @@
+#!/usr/bin/python
+# -*- mode:python -*-
+# ABRT Bugzilla Duplication Checker
+# Downloads all backtraces reported by ABRT from Bugzilla,
+# and search for duplicates using the newest ABRT duplication
+# checker.
+#
+# Some bugs in Bugzilla were reported by older ABRT
+# versions, which had poor duplication detection.
+#
+# Please do not run this script unless it's neccessary to do so.
+# It forces Bugzilla to send data related to thousands of bug reports.
+#
+#
+# Useful text to be pasted to Bugzilla:
+"""
+This bug appears to have been filled using a buggy version of ABRT, because
+it contains unusable backtrace. Sorry for the inconvenience.
+Closing as INSUFFICIENT_DATA.
+"""
+
+from bugzilla import RHBugzilla
+from optparse import OptionParser
+import sys
+import os.path
+import subprocess
+import cPickle
+import urllib
+import json
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-u", "--user", dest="user",
+ help="Bugzilla user name (REQUIRED)", metavar="USERNAME")
+parser.add_option("-p", "--password", dest="password",
+ help="Bugzilla password (REQUIRED)", metavar="PASSWORD")
+parser.add_option("-b", "--bugzilla", dest="bugzilla", default="https://bugzilla.redhat.com/xmlrpc.cgi",
+ help="Bugzilla URL (defaults to Red Hat Bugzilla)", metavar="URL")
+parser.add_option("-v", "--verbose", dest="verbose",
+ help="Detailed output")
+parser.add_option("-c", "--close", help="Close some of the bugs in Bugzilla (DANGEROUS)",
+ action="store_true", default=False, dest="close")
+parser.add_option("-i", "--wiki", help="Generate output in wiki syntax",
+ action="store_true", default=False, dest="wiki")
+
+(options, args) = parser.parse_args()
+
+if not options.user or len(options.user) == 0:
+ parser.error("User name is required.\nTry {0} --help".format(sys.argv[0]))
+
+if not options.password or len(options.password) == 0:
+ parser.error("Password is required.\nTry {0} --help".format(sys.argv[0]))
+
+bz = RHBugzilla()
+bz.connect(options.bugzilla)
+bz.login(options.user, options.password)
+
+buginfos = bz.query({'status_whiteboard_type':'allwordssubstr','status_whiteboard':'abrt_hash'})
+
+print "{0} bugs found.".format(len(buginfos))
+
+#
+# Load cache from previous run. Speeds up the case Bugzilla closes connection.
+# The cache should be manually removed after a day or so, because the data in it
+# are no longer valid.
+#
+database = {}
+ids = {}
+CACHE_FILE = "abrt-bz-dupchecker-cache.tmp"
+if os.path.isfile(CACHE_FILE):
+ f = open(CACHE_FILE, 'r')
+ database = cPickle.load(f)
+ ids = cPickle.load(f)
+ f.close()
+
+def save_to_cache():
+ global database
+ f = open(CACHE_FILE, 'w')
+ cPickle.dump(database, f, 2)
+ cPickle.dump(ids, f, 2)
+ f.close()
+
+count = 0
+for buginfo in buginfos:
+ count += 1
+ print "{0}/{1}".format(count, len(buginfos))
+ if count % 100 == 0:
+ save_to_cache()
+
+ if ids.has_key(buginfo.bug_id):
+ continue
+
+ ids[buginfo.bug_id] = True
+
+ if not buginfo.bug_status in ["NEW", "ASSIGNED", "MODIFIED", "VERIFIED"]:
+ if options.verbose:
+ print "Bug {0} has status {1}, skipping.".format(buginfo.bug_id, buginfo.bug_status)
+ continue
+
+ bug = bz.getbug(buginfo.bug_id)
+
+ # Skip bugs with already downloaded backtraces.
+ filename = "{0}.bt".format(buginfo.bug_id)
+ if os.path.isfile(filename):
+ if options.verbose:
+ print "Skipping {0} (already exists).".format(filename)
+ else:
+ # Get backtrace from bug and store it as a file.
+ downloaded = False
+ for attachment in bug.attachments:
+ if attachment['filename'] == 'backtrace':
+ data = bz.openattachment(attachment['id'])
+ f = open(filename, 'w')
+ f.write(data.read())
+ f.close()
+ downloaded = True
+ if options.verbose:
+ print "Attachment {0} downloaded.".format(filename)
+
+ # Silently skip bugs without backtrace.
+ # Those are usually duplicates of bugs; the duplication copies
+ # abrt_hash, but it does not copy the attachment.
+ if not downloaded:
+ continue
+
+ command = ["./abrt-backtrace"]
+ command.append(filename)
+ command.append("--single-thread")
+ command.append("--frame-depth=5")
+ command.append("--remove-exit-handlers")
+ command.append("--remove-noncrash-frames")
+
+ helper = subprocess.Popen(command, stdout=subprocess.PIPE)
+ backtrace, err = helper.communicate()
+ helper.wait()
+
+ if helper.returncode != 0:
+ print "Problems parsing {0}".format(filename)
+ continue
+
+ # Empty backtrace is provided by Python apps.
+ if len(backtrace) == 0:
+ continue
+
+ bugitem = {'id':buginfo.bug_id, 'comments':len(bug.longdescs)}
+ if backtrace in database:
+ components = database[backtrace]
+ if buginfo.component in components:
+ components[buginfo.component].append(bugitem)
+ if options.verbose:
+ print "Duplicate found: {0}".format(database[out]['id'])
+ print "Backtrace: {0}".format(out)
+ else:
+ components[buginfo.component] = [ bugitem ]
+ else:
+ database[backtrace] = { buginfo.component: [ bugitem ] }
+
+# The number of duplicates.
+dupcount = 0
+# The number of duplicates that can be closed.
+dupclosecount = 0
+for backtrace, components in database.items():
+ for component, bugitems in components.items():
+ dupcount += len(bugitems) - 1
+ dupclosecount += min(len(filter(lambda x: x <= 2,
+ map(lambda x: x["comments"],
+ bugitems))),
+ len(bugitems) - 1)
+
+# Get the component owner.
+# Sort the duplicates by the component owner, and
+# filter out those which should not be printed.
+dups = []
+for backtrace, components in database.items():
+ for component, bugitems in components.items():
+ if len(bugitems) <= 1:
+ continue
+
+ # Get the component owner
+ owner = "Failed to get component owner"
+ try:
+ component_info = json.load(urllib.urlopen("https://admin.fedoraproject.org/pkgdb/packages/name/{0}?tg_format=json".format(component)))
+ component_packages = component_info['packageListings']
+ component_f12 = filter(lambda x:x["collection"]["version"]=="12", component_packages)
+ if len(component_f12) == 1:
+ owner = component_f12[0]["owner"]
+ except KeyError:
+ pass
+
+ dups.append((component, owner, bugitems, backtrace))
+ print "."
+
+# Close all bugs where it is appropriate.
+if options.close:
+ LIMIT = 1000
+ counter = 0
+ for (component, owner, bugitems, backtrace) in dups:
+ # Find the master bug item
+ # Its the one with the most comments.
+
+ # Sort function sorting by comment count.
+ def commentCmp(x, y):
+ if x['comments'] < y['comments']:
+ return 1
+ elif x['comments'] == y['comments']:
+ # Sort by bug id, older bugs should became the master bug
+ if x['id'] > y['id']:
+ return 1
+ elif x['id'] == y['id']:
+ return 0
+ else:
+ return -1
+ else:
+ return -1
+
+ sorteditems = sorted(bugitems, commentCmp)
+
+ master = sorteditems[0]
+ for item in sorteditems[1:]:
+ if item['comments'] > 2:
+ continue
+
+ bug = bz.getbug(int(item['id']))
+ # Check the bug status AGAIN to make sure the bug is still opened.
+ if not bug.bug_status in ["NEW", "ASSIGNED"]:
+ continue
+
+ print "Closing bug #{0} with {1} comments as a duplicate of #{2}.".format(item['id'], item['comments'], master['id'])
+ bug.close("DUPLICATE", int(master['id']), "",
+ ("This bug appears to have been filled using a buggy version of ABRT, because\n" +
+ "it contains a backtrace which is a duplicate of backtrace from bug #{0}.\n\n" +
+ "Sorry for the inconvenience.").format(master['id']))
+
+ counter += 1
+ if counter > LIMIT:
+ sys.exit(0)
+
+bz.logout()
+
+print
+print "SUMMARY"
+print "=========================================================================="
+print "Total number of duplicate bugs detected: {0}".format(dupcount)
+print "Number of duplicate bugs that will be closed : {0}".format(dupclosecount)
+print "------------------------------"
+
+# Print the duplicates sorted by package owner.
+def cmp(x, y):
+ if x[1] < y[1]:
+ return -1
+ elif x[1] == y[1]:
+ return 0
+ else:
+ return 1
+
+for (component, owner, bugitems, backtrace) in sorted(dups, cmp):
+ if options.wiki:
+ print "----"
+ print "* component: '''{0}''' ({1})".format(component, owner)
+ print "* duplicates: {0}".format(
+ reduce(lambda x,y: x+", "+y,
+ map(lambda x: "#[https://bugzilla.redhat.com/show_bug.cgi?id={0} {0}] ({1} comments)".format(x['id'],x['comments']),
+ bugitems)))
+ print "* backtrace:"
+ for line in backtrace.replace("Thread\n", "").splitlines():
+ print "*# {0}".format(line)
+ else:
+ print "Component: {0} ({1})".format(component, owner)
+ print "Duplicates: {0}".format(
+ reduce(lambda x,y: x+", "+y,
+ map(lambda x: "{0} ({1})".format(x['id'],x['comments']),
+ bugitems)))
+ print "Backtrace: {0}".format(backtrace)