path: root/scripts/abrt-bz-dupchecker
diff options
Diffstat (limited to 'scripts/abrt-bz-dupchecker')
1 files changed, 0 insertions, 281 deletions
diff --git a/scripts/abrt-bz-dupchecker b/scripts/abrt-bz-dupchecker
deleted file mode 100755
index 65e11531..00000000
--- a/scripts/abrt-bz-dupchecker
+++ /dev/null
@@ -1,281 +0,0 @@
-# -*- mode:python -*-
-# ABRT Bugzilla Duplication Checker
-# Downloads all backtraces reported by ABRT from Bugzilla,
-# and search for duplicates using the newest ABRT duplication
-# checker.
-# Some bugs in Bugzilla were reported by older ABRT
-# versions, which had poor duplication detection.
-# Please do not run this script unless it's neccessary to do so.
-# It forces Bugzilla to send data related to thousands of bug reports.
-# Useful text to be pasted to Bugzilla:
-This bug appears to have been filled using a buggy version of ABRT, because
-it contains unusable backtrace. Sorry for the inconvenience.
-from bugzilla import RHBugzilla
-from optparse import OptionParser
-import sys
-import os.path
-import subprocess
-import cPickle
-import urllib
-import json
-parser = OptionParser(version="%prog 1.0")
-parser.add_option("-u", "--user", dest="user",
- help="Bugzilla user name (REQUIRED)", metavar="USERNAME")
-parser.add_option("-p", "--password", dest="password",
- help="Bugzilla password (REQUIRED)", metavar="PASSWORD")
-parser.add_option("-b", "--bugzilla", dest="bugzilla", default="",
- help="Bugzilla URL (defaults to Red Hat Bugzilla)", metavar="URL")
-parser.add_option("-v", "--verbose", dest="verbose",
- help="Detailed output")
-parser.add_option("-c", "--close", help="Close some of the bugs in Bugzilla (DANGEROUS)",
- action="store_true", default=False, dest="close")
-parser.add_option("-i", "--wiki", help="Generate output in wiki syntax",
- action="store_true", default=False, dest="wiki")
-(options, args) = parser.parse_args()
-if not options.user or len(options.user) == 0:
- parser.error("User name is required.\nTry {0} --help".format(sys.argv[0]))
-if not options.password or len(options.password) == 0:
- parser.error("Password is required.\nTry {0} --help".format(sys.argv[0]))
-bz = RHBugzilla()
-bz.login(options.user, options.password)
-buginfos = bz.query({'status_whiteboard_type':'allwordssubstr','status_whiteboard':'abrt_hash', 'product':'Fedora'})
-print "{0} bugs found.".format(len(buginfos))
-# Load cache from previous run. Speeds up the case Bugzilla closes connection.
-# The cache should be manually removed after a day or so, because the data in it
-# are no longer valid.
-database = {}
-ids = {}
-CACHE_FILE = "abrt-bz-dupchecker-cache.tmp"
-if os.path.isfile(CACHE_FILE):
- f = open(CACHE_FILE, 'r')
- database = cPickle.load(f)
- ids = cPickle.load(f)
- f.close()
-def save_to_cache():
- global database
- f = open(CACHE_FILE, 'w')
- cPickle.dump(database, f, 2)
- cPickle.dump(ids, f, 2)
- f.close()
-count = 0
-for buginfo in buginfos:
- count += 1
- print "{0}/{1}".format(count, len(buginfos))
- if count % 100 == 0:
- save_to_cache()
- if ids.has_key(buginfo.bug_id):
- continue
- ids[buginfo.bug_id] = True
- if not buginfo.bug_status in ["NEW", "ASSIGNED", "MODIFIED", "VERIFIED"]:
- if options.verbose:
- print "Bug {0} has status {1}, skipping.".format(buginfo.bug_id, buginfo.bug_status)
- continue
- bug = bz.getbug(buginfo.bug_id)
- # Skip bugs with already downloaded backtraces.
- filename = "{0}.bt".format(buginfo.bug_id)
- if os.path.isfile(filename):
- if options.verbose:
- print "Skipping {0} (already exists).".format(filename)
- else:
- # Get backtrace from bug and store it as a file.
- downloaded = False
- for attachment in bug.attachments:
- if attachment['filename'] == 'backtrace':
- data = bz.openattachment(attachment['id'])
- f = open(filename, 'w')
- f.write(
- f.close()
- downloaded = True
- if options.verbose:
- print "Attachment {0} downloaded.".format(filename)
- # Silently skip bugs without backtrace.
- # Those are usually duplicates of bugs; the duplication copies
- # abrt_hash, but it does not copy the attachment.
- if not downloaded:
- continue
- command = ["abrt-backtrace"]
- command.append(filename)
- command.append("--single-thread")
- command.append("--frame-depth=5")
- command.append("--remove-exit-handlers")
- command.append("--remove-noncrash-frames")
- helper = subprocess.Popen(command, stdout=subprocess.PIPE)
- backtrace, err = helper.communicate()
- helper.wait()
- if helper.returncode != 0:
- print "Problems parsing {0}".format(filename)
- continue
- # Empty backtrace is provided by Python apps.
- if len(backtrace) == 0:
- continue
- bugitem = {'id':buginfo.bug_id, 'comments':len(bug.longdescs)}
- if backtrace in database:
- components = database[backtrace]
- if buginfo.component in components:
- components[buginfo.component].append(bugitem)
- if options.verbose:
- print "Duplicate found: {0}".format(database[out]['id'])
- print "Backtrace: {0}".format(out)
- else:
- components[buginfo.component] = [ bugitem ]
- else:
- database[backtrace] = { buginfo.component: [ bugitem ] }
-# The number of duplicates.
-dupcount = 0
-# The number of duplicates that can be closed.
-dupclosecount = 0
-for backtrace, components in database.items():
- for component, bugitems in components.items():
- dupcount += len(bugitems) - 1
- dupclosecount += min(len(filter(lambda x: x <= 2,
- map(lambda x: x["comments"],
- bugitems))),
- len(bugitems) - 1)
-# Get the component owner.
-# Sort the duplicates by the component owner, and
-# filter out those which should not be printed.
-dups = []
-for backtrace, components in database.items():
- for component, bugitems in components.items():
- if len(bugitems) <= 1:
- continue
- # Get the component owner
- owner = "Failed to get component owner"
- try:
- component_info = json.load(urllib.urlopen("{0}?tg_format=json".format(component)))
- component_packages = component_info['packageListings']
- component_f12 = filter(lambda x:x["collection"]["version"]=="12", component_packages)
- if len(component_f12) == 1:
- owner = component_f12[0]["owner"]
- except KeyError:
- pass
- dups.append((component, owner, bugitems, backtrace))
- print "."
-# Close all bugs where it is appropriate.
-if options.close:
- LIMIT = 10000 # infinite
- counter = 0
- for (component, owner, bugitems, backtrace) in dups:
- # Find the master bug item
- # Its the one with the most comments.
- # Sort function sorting by comment count.
- def commentCmp(x, y):
- if x['comments'] < y['comments']:
- return 1
- elif x['comments'] == y['comments']:
- # Sort by bug id, older bugs should became the master bug
- if x['id'] > y['id']:
- return 1
- elif x['id'] == y['id']:
- return 0
- else:
- return -1
- else:
- return -1
- # Sort the duplicates by the number of comments.
- # Select the bug with the highest number of comments as the master bug.
- # All other bugs without user comments will be closed as a duplicate of
- # the master bug.
- sorteditems = sorted(bugitems, commentCmp)
- master = sorteditems[0]
- # Check the master bug status AGAIN to make sure the bug is still opened.
- bug = bz.getbug(int(master['id']))
- if not bug.bug_status in ["NEW", "ASSIGNED"]:
- continue
- for item in sorteditems[1:]:
- if item['comments'] > 2:
- continue
- # Check the bug status AGAIN to make sure the bug is still opened.
- bug = bz.getbug(int(item['id']))
- if not bug.bug_status in ["NEW", "ASSIGNED"]:
- continue
- print "Closing bug #{0} with {1} comments as a duplicate of #{2}.".format(item['id'], item['comments'], master['id'])
- bug.close("DUPLICATE", int(master['id']), "",
- ("This bug appears to have been filled using a buggy version of ABRT, because\n" +
- "it contains a backtrace which is a duplicate of backtrace from bug #{0}.\n\n" +
- "Sorry for the inconvenience.").format(master['id']))
- counter += 1
- if counter > LIMIT:
- sys.exit(0)
-print "SUMMARY"
-print "=========================================================================="
-print "Total number of duplicate bugs detected: {0}".format(dupcount)
-print "Number of duplicate bugs that will be closed : {0}".format(dupclosecount)
-print "------------------------------"
-# Print the duplicates sorted by package owner.
-def cmp(x, y):
- if x[1] < y[1]:
- return -1
- elif x[1] == y[1]:
- return 0
- else:
- return 1
-for (component, owner, bugitems, backtrace) in sorted(dups, cmp):
- if
- print "----"
- print "* component: '''{0}''' ({1})".format(component, owner)
- print "* duplicates: {0}".format(
- reduce(lambda x,y: x+", "+y,
- map(lambda x: "#[{0} {0}] ({1} comments)".format(x['id'],x['comments']),
- bugitems)))
- print "* backtrace:"
- for line in backtrace.replace("Thread\n", "").splitlines():
- print "*# {0}".format(line)
- else:
- print "Component: {0} ({1})".format(component, owner)
- print "Duplicates: {0}".format(
- reduce(lambda x,y: x+", "+y,
- map(lambda x: "{0} ({1})".format(x['id'],x['comments']),
- bugitems)))
- print "Backtrace: {0}".format(backtrace)