4 files changed, 433 insertions, 0 deletions
diff --git a/scripts/abrt-bz-downloader b/scripts/abrt-bz-downloader
new file mode 100755
index 00000000..7f294257
--- /dev/null
+++ b/scripts/abrt-bz-downloader
@@ -0,0 +1,82 @@
+#!/usr/bin/python
+# -*- mode:python -*-
+# ABRT Bugzilla Backtrace Downloader
+# Downloads all backtraces reported by ABRT from Bugzilla.
+#
+# Please do not run this script unless it's neccessary to do so. 
+# It forces Bugzilla to send data related to thousands of bug reports.
+
+from bugzilla import RHBugzilla
+from optparse import OptionParser
+import sys
+import os.path
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-u", "--user", dest="user",
+                  help="Bugzilla user name (REQUIRED)", metavar="USERNAME")
+parser.add_option("-p", "--password", dest="password",
+                  help="Bugzilla password (REQUIRED)", metavar="PASSWORD")
+parser.add_option("-b", "--bugzilla", dest="bugzilla",
+                  help="Bugzilla URL (defaults to Red Hat Bugzilla)", metavar="URL")
+parser.add_option("-f", "--fields",
+                  action="store_true", dest="fields", default=False,
+                  help="Print possible bug fields and exit.")
+
+(options, args) = parser.parse_args()
+
+if not options.user or len(options.user) == 0:
+  parser.error("User name is required.\nTry {0} --help".format(sys.argv[0]))
+
+if not options.password or len(options.password) == 0:
+  parser.error("Password is required.\nTry {0} --help".format(sys.argv[0]))
+
+if not options.bugzilla or len(options.bugzilla) == 0:
+  options.bugzilla = "https://bugzilla.redhat.com/xmlrpc.cgi"
+
+bz = RHBugzilla()
+bz.connect(options.bugzilla)
+bz.login(options.user, options.password)
+
+if options.fields:
+  print bz.bugfields
+  exit(0)
+
+buginfos = bz.query({'status_whiteboard_type':'allwordssubstr','status_whiteboard':'abrt_hash'})
+
+print "{0} bugs found.".format(len(buginfos))
+
+for buginfo in buginfos:
+  # Skip bugs with already downloaded backtraces.
+  filename = "{0}.bt".format(buginfo.bug_id)
+  if os.path.isfile(filename):
+    print "Skipping {0} (already exists).".format(filename)
+    continue
+
+  # Skip bugs with broken or Python backtraces
+  broken_backtrace_bugs = [ 517116, # binary file :)
+                            518516, # not a backtrace, GDB fail
+                            524259, # multiple backtraces in single file
+                            524427, # multiple backtraces in single file
+                            528529, # just [New Thread xx] lines
+                            #528915, 10000 frames, out of memory, to be fixed
+                            #529422, 10000 frames, out of memory, to be fixed
+                            #530239, 10000 frames, out of memory, to be fixed
+                            532264, # no header
+                            533475, # no backtrace
+                            #537819, 50000 frames, out of memory, to be fixed
+                            #539699, to be fixed, parser bug
+                            539992] # completely broken backtrace
+  if buginfo.bug_id in broken_backtrace_bugs:
+    continue
+
+  # Get backtrace from bug and store it as a file.
+  bug = bz.getbug(buginfo.bug_id)
+  for attachment in bug.attachments:
+    if attachment['filename'] == 'backtrace':
+      data = bz.openattachment(attachment['id'])
+      f = open(filename, 'w')
+      f.write(data.read())
+      f.close()
+      print "Attachment {0} downloaded.".format(filename)
+
+bz.logout()
diff --git a/scripts/abrt-bz-dupchecker b/scripts/abrt-bz-dupchecker
new file mode 100755
index 00000000..654a3702
--- /dev/null
+++ b/scripts/abrt-bz-dupchecker
@@ -0,0 +1,272 @@
+#!/usr/bin/python
+# -*- mode:python -*-
+# ABRT Bugzilla Duplication Checker
+# Downloads all backtraces reported by ABRT from Bugzilla,
+# and search for duplicates using the newest ABRT duplication 
+# checker.
+#
+# Some bugs in Bugzilla were reported by older ABRT
+# versions, which had poor duplication detection.
+#
+# Please do not run this script unless it's neccessary to do so. 
+# It forces Bugzilla to send data related to thousands of bug reports.
+#
+#
+# Useful text to be pasted to Bugzilla:
+"""
+This bug appears to have been filled using a buggy version of ABRT, because
+it contains unusable backtrace. Sorry for the inconvenience.
+Closing as INSUFFICIENT_DATA.
+"""
+
+from bugzilla import RHBugzilla
+from optparse import OptionParser
+import sys
+import os.path
+import subprocess
+import cPickle
+import urllib
+import json
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-u", "--user", dest="user",
+                  help="Bugzilla user name (REQUIRED)", metavar="USERNAME")
+parser.add_option("-p", "--password", dest="password",
+                  help="Bugzilla password (REQUIRED)", metavar="PASSWORD")
+parser.add_option("-b", "--bugzilla", dest="bugzilla", default="https://bugzilla.redhat.com/xmlrpc.cgi",
+                  help="Bugzilla URL (defaults to Red Hat Bugzilla)", metavar="URL")
+parser.add_option("-v", "--verbose", dest="verbose",
+                  help="Detailed output")
+parser.add_option("-c", "--close", help="Close some of the bugs in Bugzilla (DANGEROUS)", 
+                  action="store_true", default=False, dest="close")
+parser.add_option("-i", "--wiki", help="Generate output in wiki syntax", 
+                  action="store_true", default=False, dest="wiki")
+
+(options, args) = parser.parse_args()
+
+if not options.user or len(options.user) == 0:
+  parser.error("User name is required.\nTry {0} --help".format(sys.argv[0]))
+
+if not options.password or len(options.password) == 0:
+  parser.error("Password is required.\nTry {0} --help".format(sys.argv[0]))
+
+bz = RHBugzilla()
+bz.connect(options.bugzilla)
+bz.login(options.user, options.password)
+
+buginfos = bz.query({'status_whiteboard_type':'allwordssubstr','status_whiteboard':'abrt_hash'})
+
+print "{0} bugs found.".format(len(buginfos))
+
+#
+# Load cache from previous run. Speeds up the case Bugzilla closes connection.
+# The cache should be manually removed after a day or so, because the data in it 
+# are no longer valid.
+#
+database = {}
+ids = {}
+CACHE_FILE = "abrt-bz-dupchecker-cache.tmp"
+if os.path.isfile(CACHE_FILE):
+  f = open(CACHE_FILE, 'r')
+  database = cPickle.load(f)
+  ids = cPickle.load(f)
+  f.close()
+
+def save_to_cache():
+  global database
+  f = open(CACHE_FILE, 'w')
+  cPickle.dump(database, f, 2)
+  cPickle.dump(ids, f, 2)
+  f.close()  
+
+count = 0
+for buginfo in buginfos:
+  count += 1
+  print "{0}/{1}".format(count, len(buginfos))
+  if count % 100 == 0:
+    save_to_cache()
+
+  if ids.has_key(buginfo.bug_id):
+    continue
+
+  ids[buginfo.bug_id] = True
+
+  if not buginfo.bug_status in ["NEW", "ASSIGNED", "MODIFIED", "VERIFIED"]:
+    if options.verbose:
+      print "Bug {0} has status {1}, skipping.".format(buginfo.bug_id, buginfo.bug_status)
+    continue
+
+  bug = bz.getbug(buginfo.bug_id)
+
+  # Skip bugs with already downloaded backtraces.
+  filename = "{0}.bt".format(buginfo.bug_id)
+  if os.path.isfile(filename):
+    if options.verbose:
+      print "Skipping {0} (already exists).".format(filename)
+  else:
+    # Get backtrace from bug and store it as a file.
+    downloaded = False
+    for attachment in bug.attachments:
+      if attachment['filename'] == 'backtrace':
+        data = bz.openattachment(attachment['id'])
+        f = open(filename, 'w')
+        f.write(data.read())
+        f.close()
+        downloaded = True
+        if options.verbose:
+          print "Attachment {0} downloaded.".format(filename)
+
+    # Silently skip bugs without backtrace.
+    # Those are usually duplicates of bugs; the duplication copies
+    # abrt_hash, but it does not copy the attachment.
+    if not downloaded:
+      continue
+  
+  command = ["./abrt-backtrace"]
+  command.append(filename)
+  command.append("--single-thread")
+  command.append("--frame-depth=5")
+  command.append("--remove-exit-handlers")
+  command.append("--remove-noncrash-frames")
+
+  helper = subprocess.Popen(command, stdout=subprocess.PIPE)
+  backtrace, err = helper.communicate()
+  helper.wait()
+
+  if helper.returncode != 0:
+    print "Problems parsing {0}".format(filename)
+    continue
+
+  # Empty backtrace is provided by Python apps.
+  if len(backtrace) == 0:
+    continue
+
+  bugitem = {'id':buginfo.bug_id, 'comments':len(bug.longdescs)}
+  if backtrace in database:
+    components = database[backtrace]
+    if buginfo.component in components:
+      components[buginfo.component].append(bugitem)
+      if options.verbose:
+        print "Duplicate found: {0}".format(database[out]['id'])
+        print "Backtrace: {0}".format(out)
+    else:
+      components[buginfo.component] = [ bugitem ]
+  else:
+    database[backtrace] = { buginfo.component: [ bugitem ] }
+
+# The number of duplicates.
+dupcount = 0
+# The number of duplicates that can be closed.
+dupclosecount = 0
+for backtrace, components in database.items():
+  for component, bugitems in components.items():
+    dupcount += len(bugitems) - 1
+    dupclosecount += min(len(filter(lambda x: x <= 2, 
+                                    map(lambda x: x["comments"], 
+                                        bugitems))),
+                         len(bugitems) - 1)
+
+# Get the component owner.    
+# Sort the duplicates by the component owner, and
+# filter out those which should not be printed.
+dups = []
+for backtrace, components in database.items():
+  for component, bugitems in components.items():
+    if len(bugitems) <= 1:
+      continue
+
+    # Get the component owner
+    owner = "Failed to get component owner"
+    try:
+      component_info = json.load(urllib.urlopen("https://admin.fedoraproject.org/pkgdb/packages/name/{0}?tg_format=json".format(component)))
+      component_packages = component_info['packageListings']
+      component_f12 = filter(lambda x:x["collection"]["version"]=="12", component_packages)
+      if len(component_f12) == 1:
+        owner = component_f12[0]["owner"]
+    except KeyError:
+      pass
+      
+    dups.append((component, owner, bugitems, backtrace))
+    print "."
+
+# Close all bugs where it is appropriate.
+if options.close:
+  LIMIT = 1000
+  counter = 0
+  for (component, owner, bugitems, backtrace) in dups:
+    # Find the master bug item
+    # Its the one with the most comments.
+    
+    # Sort function sorting by comment count.
+    def commentCmp(x, y):
+      if x['comments'] < y['comments']:
+        return 1
+      elif x['comments'] == y['comments']: 
+        # Sort by bug id, older bugs should became the master bug
+        if x['id'] > y['id']:
+          return 1
+        elif x['id'] == y['id']:
+          return 0
+        else:
+          return -1
+      else:
+        return -1
+
+    sorteditems = sorted(bugitems, commentCmp)
+
+    master = sorteditems[0]
+    for item in sorteditems[1:]:
+      if item['comments'] > 2:
+        continue
+
+      bug = bz.getbug(int(item['id']))
+      # Check the bug status AGAIN to make sure the bug is still opened.
+      if not bug.bug_status in ["NEW", "ASSIGNED"]:
+        continue
+
+      print "Closing bug #{0} with {1} comments as a duplicate of #{2}.".format(item['id'], item['comments'], master['id'])
+      bug.close("DUPLICATE", int(master['id']), "", 
+                ("This bug appears to have been filled using a buggy version of ABRT, because\n" +
+                "it contains a backtrace which is a duplicate of backtrace from bug #{0}.\n\n" +
+                "Sorry for the inconvenience.").format(master['id']))
+
+      counter += 1
+      if counter > LIMIT:
+        sys.exit(0)
+      
+bz.logout()
+
+print
+print "SUMMARY"
+print "=========================================================================="
+print "Total number of duplicate bugs detected: {0}".format(dupcount)
+print "Number of duplicate bugs that will be closed : {0}".format(dupclosecount)
+print "------------------------------"
+
+# Print the duplicates sorted by package owner.
+def cmp(x, y):
+  if x[1] < y[1]:
+    return -1
+  elif x[1] == y[1]:
+    return 0
+  else:
+    return 1
+
+for (component, owner, bugitems, backtrace) in sorted(dups, cmp):
+  if options.wiki:
+    print "----"
+    print "* component: '''{0}''' ({1})".format(component, owner)
+    print "* duplicates: {0}".format(
+      reduce(lambda x,y: x+", "+y, 
+             map(lambda x: "#[https://bugzilla.redhat.com/show_bug.cgi?id={0} {0}] ({1} comments)".format(x['id'],x['comments']), 
+                 bugitems)))
+    print "* backtrace:"
+    for line in backtrace.replace("Thread\n", "").splitlines():
+      print "*# {0}".format(line)
+  else:
+    print "Component: {0} ({1})".format(component, owner)
+    print "Duplicates: {0}".format(
+      reduce(lambda x,y: x+", "+y, 
+             map(lambda x: "{0} ({1})".format(x['id'],x['comments']), 
+                 bugitems)))
+    print "Backtrace: {0}".format(backtrace)
diff --git a/scripts/abrt-bz-hashchecker b/scripts/abrt-bz-hashchecker
new file mode 100755
index 00000000..ec7ce1a6
--- /dev/null
+++ b/scripts/abrt-bz-hashchecker
@@ -0,0 +1,59 @@
+#!/usr/bin/python
+# -*- mode:python -*-
+# Checks how many bugs in Bugzilla have the same hash.
+#
+# Please do not run this script unless it's neccessary to do so. 
+# It forces Bugzilla to send data related to thousands of bug reports.
+
+from bugzilla import RHBugzilla
+from optparse import OptionParser
+import sys
+import os.path
+import subprocess
+import re
+
+parser = OptionParser(version="%prog 1.0")
+parser.add_option("-u", "--user", dest="user",
+                  help="Bugzilla user name (REQUIRED)", metavar="USERNAME")
+parser.add_option("-p", "--password", dest="password",
+                  help="Bugzilla password (REQUIRED)", metavar="PASSWORD")
+parser.add_option("-b", "--bugzilla", dest="bugzilla",
+                  help="Bugzilla URL (defaults to Red Hat Bugzilla)", metavar="URL")
+
+(options, args) = parser.parse_args()
+
+if not options.user or len(options.user) == 0:
+  parser.error("User name is required.\nTry {0} --help".format(sys.argv[0]))
+
+if not options.password or len(options.password) == 0:
+  parser.error("Password is required.\nTry {0} --help".format(sys.argv[0]))
+
+if not options.bugzilla or len(options.bugzilla) == 0:
+  options.bugzilla = "https://bugzilla.redhat.com/xmlrpc.cgi"
+
+bz = RHBugzilla()
+bz.connect(options.bugzilla)
+bz.login(options.user, options.password)
+
+buginfos = bz.query({'status_whiteboard_type':'allwordssubstr','status_whiteboard':'abrt_hash'})
+
+print "{0} bugs found.".format(len(buginfos))
+
+hashes = {}
+for buginfo in buginfos:
+    match = re.search("abrt_hash:([^ ]+)", buginfo.status_whiteboard)
+    if not match:
+      continue
+    hash = match.group(1)
+    if not hash:
+      continue
+    if hash in hashes:
+        hashes[hash].append(buginfo.bug_id)
+    else:
+        hashes[hash] = [ buginfo.bug_id ]
+    print hash
+bz.logout()
+
+for hash, ids in hashes.items():
+    if len(ids) > 1:
+      print "Duplicates found: ", reduce(lambda x,y: str(x)+", "+str(y), ids)
diff --git a/scripts/check-bt-parsability b/scripts/check-bt-parsability
new file mode 100755
index 00000000..a5018bfa
--- /dev/null
+++ b/scripts/check-bt-parsability
@@ -0,0 +1,20 @@
+#!/bin/bash
+# -*- mode: bash -*-
+
+PASS=0
+FAIL=0
+for file in *.bt
+do
+  #echo "$file"
+  ./abrt-backtrace $file 1> /dev/null
+  if [ "$?" -eq "0" ]
+  then
+      echo -n "."
+      PASS=$(($PASS+1))
+  else
+      echo "-$file"
+      FAIL=$(($FAIL+1))
+  fi
+done 
+echo ""
+echo "Passed $PASS and failed $FAIL."