diff options
Diffstat (limited to 'scripts/abrt-bz-dupchecker')
-rwxr-xr-x | scripts/abrt-bz-dupchecker | 281 |
1 files changed, 0 insertions, 281 deletions
diff --git a/scripts/abrt-bz-dupchecker b/scripts/abrt-bz-dupchecker deleted file mode 100755 index 65e11531..00000000 --- a/scripts/abrt-bz-dupchecker +++ /dev/null @@ -1,281 +0,0 @@ -#!/usr/bin/python -# -*- mode:python -*- -# ABRT Bugzilla Duplication Checker -# Downloads all backtraces reported by ABRT from Bugzilla, -# and search for duplicates using the newest ABRT duplication -# checker. -# -# Some bugs in Bugzilla were reported by older ABRT -# versions, which had poor duplication detection. -# -# Please do not run this script unless it's neccessary to do so. -# It forces Bugzilla to send data related to thousands of bug reports. -# -# -# Useful text to be pasted to Bugzilla: -""" -This bug appears to have been filled using a buggy version of ABRT, because -it contains unusable backtrace. Sorry for the inconvenience. -Closing as INSUFFICIENT_DATA. -""" - -from bugzilla import RHBugzilla -from optparse import OptionParser -import sys -import os.path -import subprocess -import cPickle -import urllib -import json - -parser = OptionParser(version="%prog 1.0") -parser.add_option("-u", "--user", dest="user", - help="Bugzilla user name (REQUIRED)", metavar="USERNAME") -parser.add_option("-p", "--password", dest="password", - help="Bugzilla password (REQUIRED)", metavar="PASSWORD") -parser.add_option("-b", "--bugzilla", dest="bugzilla", default="https://bugzilla.redhat.com/xmlrpc.cgi", - help="Bugzilla URL (defaults to Red Hat Bugzilla)", metavar="URL") -parser.add_option("-v", "--verbose", dest="verbose", - help="Detailed output") -parser.add_option("-c", "--close", help="Close some of the bugs in Bugzilla (DANGEROUS)", - action="store_true", default=False, dest="close") -parser.add_option("-i", "--wiki", help="Generate output in wiki syntax", - action="store_true", default=False, dest="wiki") - -(options, args) = parser.parse_args() - -if not options.user or len(options.user) == 0: - parser.error("User name is required.\nTry {0} --help".format(sys.argv[0])) - -if not options.password or len(options.password) == 0: - parser.error("Password is required.\nTry {0} --help".format(sys.argv[0])) - -bz = RHBugzilla() -bz.connect(options.bugzilla) -bz.login(options.user, options.password) - -buginfos = bz.query({'status_whiteboard_type':'allwordssubstr','status_whiteboard':'abrt_hash', 'product':'Fedora'}) - -print "{0} bugs found.".format(len(buginfos)) - -# -# Load cache from previous run. Speeds up the case Bugzilla closes connection. -# The cache should be manually removed after a day or so, because the data in it -# are no longer valid. -# -database = {} -ids = {} -CACHE_FILE = "abrt-bz-dupchecker-cache.tmp" -if os.path.isfile(CACHE_FILE): - f = open(CACHE_FILE, 'r') - database = cPickle.load(f) - ids = cPickle.load(f) - f.close() - -def save_to_cache(): - global database - f = open(CACHE_FILE, 'w') - cPickle.dump(database, f, 2) - cPickle.dump(ids, f, 2) - f.close() - -count = 0 -for buginfo in buginfos: - count += 1 - print "{0}/{1}".format(count, len(buginfos)) - if count % 100 == 0: - save_to_cache() - - if ids.has_key(buginfo.bug_id): - continue - - ids[buginfo.bug_id] = True - - if not buginfo.bug_status in ["NEW", "ASSIGNED", "MODIFIED", "VERIFIED"]: - if options.verbose: - print "Bug {0} has status {1}, skipping.".format(buginfo.bug_id, buginfo.bug_status) - continue - - bug = bz.getbug(buginfo.bug_id) - - # Skip bugs with already downloaded backtraces. - filename = "{0}.bt".format(buginfo.bug_id) - if os.path.isfile(filename): - if options.verbose: - print "Skipping {0} (already exists).".format(filename) - else: - # Get backtrace from bug and store it as a file. - downloaded = False - for attachment in bug.attachments: - if attachment['filename'] == 'backtrace': - data = bz.openattachment(attachment['id']) - f = open(filename, 'w') - f.write(data.read()) - f.close() - downloaded = True - if options.verbose: - print "Attachment {0} downloaded.".format(filename) - - # Silently skip bugs without backtrace. - # Those are usually duplicates of bugs; the duplication copies - # abrt_hash, but it does not copy the attachment. - if not downloaded: - continue - - command = ["abrt-backtrace"] - command.append(filename) - command.append("--single-thread") - command.append("--frame-depth=5") - command.append("--remove-exit-handlers") - command.append("--remove-noncrash-frames") - - helper = subprocess.Popen(command, stdout=subprocess.PIPE) - backtrace, err = helper.communicate() - helper.wait() - - if helper.returncode != 0: - print "Problems parsing {0}".format(filename) - continue - - # Empty backtrace is provided by Python apps. - if len(backtrace) == 0: - continue - - bugitem = {'id':buginfo.bug_id, 'comments':len(bug.longdescs)} - if backtrace in database: - components = database[backtrace] - if buginfo.component in components: - components[buginfo.component].append(bugitem) - if options.verbose: - print "Duplicate found: {0}".format(database[out]['id']) - print "Backtrace: {0}".format(out) - else: - components[buginfo.component] = [ bugitem ] - else: - database[backtrace] = { buginfo.component: [ bugitem ] } - -# The number of duplicates. -dupcount = 0 -# The number of duplicates that can be closed. -dupclosecount = 0 -for backtrace, components in database.items(): - for component, bugitems in components.items(): - dupcount += len(bugitems) - 1 - dupclosecount += min(len(filter(lambda x: x <= 2, - map(lambda x: x["comments"], - bugitems))), - len(bugitems) - 1) - -# Get the component owner. -# Sort the duplicates by the component owner, and -# filter out those which should not be printed. -dups = [] -for backtrace, components in database.items(): - for component, bugitems in components.items(): - if len(bugitems) <= 1: - continue - - # Get the component owner - owner = "Failed to get component owner" - try: - component_info = json.load(urllib.urlopen("https://admin.fedoraproject.org/pkgdb/acls/name/{0}?tg_format=json".format(component))) - component_packages = component_info['packageListings'] - component_f12 = filter(lambda x:x["collection"]["version"]=="12", component_packages) - if len(component_f12) == 1: - owner = component_f12[0]["owner"] - except KeyError: - pass - - dups.append((component, owner, bugitems, backtrace)) - print "." - -# Close all bugs where it is appropriate. -if options.close: - LIMIT = 10000 # infinite - counter = 0 - for (component, owner, bugitems, backtrace) in dups: - # Find the master bug item - # Its the one with the most comments. - - # Sort function sorting by comment count. - def commentCmp(x, y): - if x['comments'] < y['comments']: - return 1 - elif x['comments'] == y['comments']: - # Sort by bug id, older bugs should became the master bug - if x['id'] > y['id']: - return 1 - elif x['id'] == y['id']: - return 0 - else: - return -1 - else: - return -1 - - # Sort the duplicates by the number of comments. - # Select the bug with the highest number of comments as the master bug. - # All other bugs without user comments will be closed as a duplicate of - # the master bug. - sorteditems = sorted(bugitems, commentCmp) - master = sorteditems[0] - - # Check the master bug status AGAIN to make sure the bug is still opened. - bug = bz.getbug(int(master['id'])) - if not bug.bug_status in ["NEW", "ASSIGNED"]: - continue - - for item in sorteditems[1:]: - if item['comments'] > 2: - continue - - # Check the bug status AGAIN to make sure the bug is still opened. - bug = bz.getbug(int(item['id'])) - if not bug.bug_status in ["NEW", "ASSIGNED"]: - continue - - print "Closing bug #{0} with {1} comments as a duplicate of #{2}.".format(item['id'], item['comments'], master['id']) - bug.close("DUPLICATE", int(master['id']), "", - ("This bug appears to have been filled using a buggy version of ABRT, because\n" + - "it contains a backtrace which is a duplicate of backtrace from bug #{0}.\n\n" + - "Sorry for the inconvenience.").format(master['id'])) - - counter += 1 - if counter > LIMIT: - sys.exit(0) - -bz.logout() - -print -print "SUMMARY" -print "==========================================================================" -print "Total number of duplicate bugs detected: {0}".format(dupcount) -print "Number of duplicate bugs that will be closed : {0}".format(dupclosecount) -print "------------------------------" - -# Print the duplicates sorted by package owner. -def cmp(x, y): - if x[1] < y[1]: - return -1 - elif x[1] == y[1]: - return 0 - else: - return 1 - -for (component, owner, bugitems, backtrace) in sorted(dups, cmp): - if options.wiki: - print "----" - print "* component: '''{0}''' ({1})".format(component, owner) - print "* duplicates: {0}".format( - reduce(lambda x,y: x+", "+y, - map(lambda x: "#[https://bugzilla.redhat.com/show_bug.cgi?id={0} {0}] ({1} comments)".format(x['id'],x['comments']), - bugitems))) - print "* backtrace:" - for line in backtrace.replace("Thread\n", "").splitlines(): - print "*# {0}".format(line) - else: - print "Component: {0} ({1})".format(component, owner) - print "Duplicates: {0}".format( - reduce(lambda x,y: x+", "+y, - map(lambda x: "{0} ({1})".format(x['id'],x['comments']), - bugitems))) - print "Backtrace: {0}".format(backtrace) |