#!/usr/bin/python # -*- mode:python -*- # ABRT Bugzilla Duplication Checker # Downloads all backtraces reported by ABRT from Bugzilla, # and search for duplicates using the newest ABRT duplication # checker. # # Some bugs in Bugzilla were reported by older ABRT # versions, which had poor duplication detection. # # Please do not run this script unless it's neccessary to do so. # It forces Bugzilla to send data related to thousands of bug reports. # # # Useful text to be pasted to Bugzilla: """ This bug appears to have been filled using a buggy version of ABRT, because it contains unusable backtrace. Sorry for the inconvenience. Closing as INSUFFICIENT_DATA. """ from bugzilla import RHBugzilla from optparse import OptionParser import sys import os.path import subprocess import cPickle import urllib import json parser = OptionParser(version="%prog 1.0") parser.add_option("-u", "--user", dest="user", help="Bugzilla user name (REQUIRED)", metavar="USERNAME") parser.add_option("-p", "--password", dest="password", help="Bugzilla password (REQUIRED)", metavar="PASSWORD") parser.add_option("-b", "--bugzilla", dest="bugzilla", default="https://bugzilla.redhat.com/xmlrpc.cgi", help="Bugzilla URL (defaults to Red Hat Bugzilla)", metavar="URL") parser.add_option("-v", "--verbose", dest="verbose", help="Detailed output") parser.add_option("-c", "--close", help="Close some of the bugs in Bugzilla (DANGEROUS)", action="store_true", default=False, dest="close") parser.add_option("-i", "--wiki", help="Generate output in wiki syntax", action="store_true", default=False, dest="wiki") (options, args) = parser.parse_args() if not options.user or len(options.user) == 0: parser.error("User name is required.\nTry {0} --help".format(sys.argv[0])) if not options.password or len(options.password) == 0: parser.error("Password is required.\nTry {0} --help".format(sys.argv[0])) bz = RHBugzilla() bz.connect(options.bugzilla) bz.login(options.user, options.password) buginfos = bz.query({'status_whiteboard_type':'allwordssubstr','status_whiteboard':'abrt_hash'}) print "{0} bugs found.".format(len(buginfos)) # # Load cache from previous run. Speeds up the case Bugzilla closes connection. # The cache should be manually removed after a day or so, because the data in it # are no longer valid. # database = {} ids = {} CACHE_FILE = "abrt-bz-dupchecker-cache.tmp" if os.path.isfile(CACHE_FILE): f = open(CACHE_FILE, 'r') database = cPickle.load(f) ids = cPickle.load(f) f.close() def save_to_cache(): global database f = open(CACHE_FILE, 'w') cPickle.dump(database, f, 2) cPickle.dump(ids, f, 2) f.close() count = 0 for buginfo in buginfos: count += 1 print "{0}/{1}".format(count, len(buginfos)) if count % 100 == 0: save_to_cache() if ids.has_key(buginfo.bug_id): continue ids[buginfo.bug_id] = True if not buginfo.bug_status in ["NEW", "ASSIGNED", "MODIFIED", "VERIFIED"]: if options.verbose: print "Bug {0} has status {1}, skipping.".format(buginfo.bug_id, buginfo.bug_status) continue bug = bz.getbug(buginfo.bug_id) # Skip bugs with already downloaded backtraces. filename = "{0}.bt".format(buginfo.bug_id) if os.path.isfile(filename): if options.verbose: print "Skipping {0} (already exists).".format(filename) else: # Get backtrace from bug and store it as a file. downloaded = False for attachment in bug.attachments: if attachment['filename'] == 'backtrace': data = bz.openattachment(attachment['id']) f = open(filename, 'w') f.write(data.read()) f.close() downloaded = True if options.verbose: print "Attachment {0} downloaded.".format(filename) # Silently skip bugs without backtrace. # Those are usually duplicates of bugs; the duplication copies # abrt_hash, but it does not copy the attachment. if not downloaded: continue command = ["./abrt-backtrace"] command.append(filename) command.append("--single-thread") command.append("--frame-depth=5") command.append("--remove-exit-handlers") command.append("--remove-noncrash-frames") helper = subprocess.Popen(command, stdout=subprocess.PIPE) backtrace, err = helper.communicate() helper.wait() if helper.returncode != 0: print "Problems parsing {0}".format(filename) continue # Empty backtrace is provided by Python apps. if len(backtrace) == 0: continue bugitem = {'id':buginfo.bug_id, 'comments':len(bug.longdescs)} if backtrace in database: components = database[backtrace] if buginfo.component in components: components[buginfo.component].append(bugitem) if options.verbose: print "Duplicate found: {0}".format(database[out]['id']) print "Backtrace: {0}".format(out) else: components[buginfo.component] = [ bugitem ] else: database[backtrace] = { buginfo.component: [ bugitem ] } # The number of duplicates. dupcount = 0 # The number of duplicates that can be closed. dupclosecount = 0 for backtrace, components in database.items(): for component, bugitems in components.items(): dupcount += len(bugitems) - 1 dupclosecount += min(len(filter(lambda x: x <= 2, map(lambda x: x["comments"], bugitems))), len(bugitems) - 1) # Get the component owner. # Sort the duplicates by the component owner, and # filter out those which should not be printed. dups = [] for backtrace, components in database.items(): for component, bugitems in components.items(): if len(bugitems) <= 1: continue # Get the component owner owner = "Failed to get component owner" try: component_info = json.load(urllib.urlopen("https://admin.fedoraproject.org/pkgdb/packages/name/{0}?tg_format=json".format(component))) component_packages = component_info['packageListings'] component_f12 = filter(lambda x:x["collection"]["version"]=="12", component_packages) if len(component_f12) == 1: owner = component_f12[0]["owner"] except KeyError: pass dups.append((component, owner, bugitems, backtrace)) print "." # Close all bugs where it is appropriate. if options.close: LIMIT = 1000 counter = 0 for (component, owner, bugitems, backtrace) in dups: # Find the master bug item # Its the one with the most comments. # Sort function sorting by comment count. def commentCmp(x, y): if x['comments'] < y['comments']: return 1 elif x['comments'] == y['comments']: # Sort by bug id, older bugs should became the master bug if x['id'] > y['id']: return 1 elif x['id'] == y['id']: return 0 else: return -1 else: return -1 sorteditems = sorted(bugitems, commentCmp) master = sorteditems[0] for item in sorteditems[1:]: if item['comments'] > 2: continue bug = bz.getbug(int(item['id'])) # Check the bug status AGAIN to make sure the bug is still opened. if not bug.bug_status in ["NEW", "ASSIGNED"]: continue print "Closing bug #{0} with {1} comments as a duplicate of #{2}.".format(item['id'], item['comments'], master['id']) bug.close("DUPLICATE", int(master['id']), "", ("This bug appears to have been filled using a buggy version of ABRT, because\n" + "it contains a backtrace which is a duplicate of backtrace from bug #{0}.\n\n" + "Sorry for the inconvenience.").format(master['id'])) counter += 1 if counter > LIMIT: sys.exit(0) bz.logout() print print "SUMMARY" print "==========================================================================" print "Total number of duplicate bugs detected: {0}".format(dupcount) print "Number of duplicate bugs that will be closed : {0}".format(dupclosecount) print "------------------------------" # Print the duplicates sorted by package owner. def cmp(x, y): if x[1] < y[1]: return -1 elif x[1] == y[1]: return 0 else: return 1 for (component, owner, bugitems, backtrace) in sorted(dups, cmp): if options.wiki: print "----" print "* component: '''{0}''' ({1})".format(component, owner) print "* duplicates: {0}".format( reduce(lambda x,y: x+", "+y, map(lambda x: "#[https://bugzilla.redhat.com/show_bug.cgi?id={0} {0}] ({1} comments)".format(x['id'],x['comments']), bugitems))) print "* backtrace:" for line in backtrace.replace("Thread\n", "").splitlines(): print "*# {0}".format(line) else: print "Component: {0} ({1})".format(component, owner) print "Duplicates: {0}".format( reduce(lambda x,y: x+", "+y, map(lambda x: "{0} ({1})".format(x['id'],x['comments']), bugitems))) print "Backtrace: {0}".format(backtrace)