From 3dc290bb06ab719f56c4aed249594dd744fd5678 Mon Sep 17 00:00:00 2001
From: James Laska <jlaska@redhat.com>
Date: Tue, 29 Mar 2011 12:16:38 -0400
Subject: Add repoclosure-bz script that auto-files repoclosure bugs

---
 repoclosure-bz | 407 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 407 insertions(+)
 create mode 100755 repoclosure-bz

diff --git a/repoclosure-bz b/repoclosure-bz
new file mode 100755
index 0000000..2b50441
--- /dev/null
+++ b/repoclosure-bz
@@ -0,0 +1,407 @@
+#!/usr/bin/python
+
+import os
+import sys
+import re
+import subprocess
+import logging
+import hashlib
+import optparse
+import urlparse
+import urlgrabber
+import rpmUtils.miscutils
+import bugzilla
+
+# Initial simple logging stuff
+logging.basicConfig()
+log = logging.getLogger("bugzilla")
+if '--debug' in sys.argv:
+    log.setLevel(logging.DEBUG)
+elif '--verbose' in sys.argv:
+    log.setLevel(logging.INFO)
+
+default_bz = 'https://bugzilla.redhat.com/xmlrpc.cgi'
+bzclass = bugzilla.RHBugzilla3
+#bzclass = bugzilla.Bugzilla3
+bz = bzclass(url=default_bz)
+
+# Extract and return a package name when supplied a string in the format of
+# %{name}-%{version}-%{release}
+def get_package_name(envra):
+    (name, version, release, epoch, arch) = rpmUtils.miscutils.splitFilename(envra)
+    return name
+
+# Given a list of strings, return a list of hashes corresponding to the strings
+def hash_data(buffer):
+    # Sort the buffer
+    buffer.sort()
+
+    # convert to string
+    buffer = ' '.join(buffer)
+
+    # Hash and save for later
+    return "repoclosure_hash:%s" % hashlib.sha256(buffer).hexdigest()
+
+# Given a string, return a list of hashes that characterize the repoclosure
+# failures
+def generate_hash_list(lines):
+    buffer = list()
+    hashes = dict() # use a dictionary to avoid duplicates
+    name = ""
+
+    for line in lines:
+        # If this is a 'package:' line, ignore it
+        if line.startswith("package:"):
+            # Determine binary package name for use with hashing
+            envra = re.search("^package: ([^ ]*) from.*", line).group(1)
+            assert isinstance(envra, str) and envra != ""
+            name = get_package_name(envra)
+            continue
+        # If this is a 'unresolved deps:' line, process it
+        elif "unresolved deps:" in line:
+            # If existing information exists, hash it
+            if len(buffer) > 0:
+                hashes[hash_data(buffer)] = True
+            # Clear the buffer, using binary package name
+            assert isinstance(name, str) and name != ""
+            buffer = [name]
+        # Otherwise, continue updating buffer
+        else:
+            # Remove leading+trailing whitespace
+            line = line.strip()
+
+            # Remove any arch-specific data: "()(64bit)"
+            line = line.replace("()(64bit)","")
+
+            # Update the buffer with data
+            assert isinstance(buffer, list)
+            buffer.append(line)
+
+    # If any buffered data remains, hash it
+    if len(buffer) > 0:
+        hashes[hash_data(buffer)] = True
+        buffer = list()
+
+    return hashes.keys()
+
+# Using a supplied product, version and list of hashes return a list of bugs
+# matching any of the supplied hashes
+def bz_find_matching_hashes(bz_product, bz_version, hashes):
+    # Force a list
+    if isinstance(hashes, str):
+        hashes = [hashes]
+
+    # Build the query parameters
+    q = dict()
+    q['product'] = bz_product
+    q['version'] = bz_version
+    q['status_whiteboard'] = ' '.join(hashes)
+    q['status_whiteboard_type'] = 'anywords'
+
+    q['column_list'] = [ 'bug_id', 'bug_status', 'assigned_to', 'component',
+        'short_desc', 'keywords', 'blockedby' ]
+    log.debug("bz.query: %s", q)
+    buglist = bz.query(q)
+
+    return buglist
+
+# File a bug in bugzilla using the provided information
+def bz_file_bug(product, version, component, summary, hashes, description, blocked=[]):
+    data = dict(product=product, version=version, component=component,
+            short_desc=summary, comment=description,
+            rep_platform='All', bug_severity='medium', priority='medium',
+            op_sys='Linux', bug_file_loc='',
+            status_whiteboard = ' '.join(hashes))
+    if len(blocked) > 0:
+        data['blocked'] = map(lambda b: int(b), blocked) # force as a list of int's
+    b = bz.createbug(**data)
+    b.refresh()
+    return b
+
+# Add the supplied 'blocks' information to the given bug_id
+def bz_add_to_blocker(bug, blocks):
+    # Ensure arguments are of the expected type
+    assert isinstance(bug, bugzilla.base._Bug)
+    # Convert to a list
+    if isinstance(blocks, int):
+        blocks = [blocks]
+    # If no data was provided, simply return
+    if len(blocks) == 0:
+        return
+
+    # Get list of old blocks
+    old_blocks = bug.blocked
+    log.debug("Updating bug#%s blocks - from '%s'" % (bug.bug_id, old_blocks))
+
+    # Create a dict of the current list of blocks
+    blocks_dict = {int(b): True for b in old_blocks}
+
+    # Update blocks_dict with new hashes
+    for b in blocks:
+        if not isinstance(b, int):
+            b = int(b)
+        if not blocks_dict.has_key(b):
+            print "\t- adding to blocks: %s" % b
+            blocks_dict[b] = True
+
+    # Convert back to a list
+    new_blocks = blocks_dict.keys()
+
+    # Update the bug
+    log.debug("Updating bug#%s blocks - to '%s'" % (bug.bug_id, new_blocks))
+    bz._updatedeps(bug.bug_id, new_blocks, [], 'add')
+
+# Update the 'status_whiteboard' with new hashes
+def bz_update_whiteboard(bug, hashes):
+    # Ensure arguments are of the expected type
+    assert isinstance(bug, bugzilla.base._Bug)
+    # Force hash list
+    if isinstance(hashes, str):
+        hashes = [hashes]
+    # If no data was provided, simply return
+    if len(hashes) == 0:
+        return
+
+    old_whiteboard = bug.getwhiteboard()
+    # Strip leading+trailing whitespace
+    old_whiteboard = old_whiteboard.strip()
+    log.debug("Updating bug#%s whiteboard - from '%s'" % (bug.bug_id, old_whiteboard))
+
+    # Create a dict of current whiteboard
+    whiteboard_dict = {k: True for k in old_whiteboard.split(' ')}
+
+    # Update whiteboard_dict with new hashes
+    for hash in hashes:
+        if not whiteboard_dict.has_key(hash):
+            print "\t- adding to whiteboard: %s" % hash
+            whiteboard_dict[hash] = True
+
+    # flatten to a string
+    new_whiteboard = ' '.join(whiteboard_dict.keys())
+    # Remove leading+trailing whitespace
+    new_whiteboard = new_whiteboard.strip()
+
+    # update whiteboard to include any new hashes
+    log.debug("Updating bug#%s whiteboard - to '%s'" % (bug.bug_id, new_whiteboard))
+    bug.setwhiteboard(new_whiteboard)
+
+def get_basearch():
+    '''like get_arch, but returns the basearch (as used by yum etc.)'''
+    arch = os.uname()[4]
+    if arch in ('i486', 'i586', 'i686'):
+        arch = 'i386'
+    elif arch == 'ppc64':
+        arch = 'ppc'
+    return arch
+
+# Parse arguments and return options
+def parse_args():
+    parser = optparse.OptionParser()
+    parser.add_option('--verbose', action='store_true',
+        help="give more info about what's going on")
+    parser.add_option('--debug', action='store_true',
+        help="output bunches of debugging info")
+    parser.add_option("-y", "--assumeyes", dest="assumeyes", default=False,
+        action="store_true", help="Answer yes for all questions")
+
+    # Repoclosure options
+    optgrp = optparse.OptionGroup(parser, "Repoclosure options")
+    archlist = ['i386', 'x86_64', 'ppc']
+    optgrp.add_option("-a", "--arch", action="store", type="choice",
+        choices=archlist, default=get_basearch(),
+        help="target architecture (%default)")
+    optgrp.add_option("-l", "--logurl", action="store", default="",
+        help="URL for existing repoclosure results.  When provided, it will" + \
+             "just process existing results and not re-run repoclosure (useful" + \
+             "when running multiple times)")
+    optgrp.add_option('-r', '--repo', action='append', default=[], dest="repos",
+        help="Repository URL; can be used multiple times (default: use system " + \
+             "repos)")
+    parser.add_option_group(optgrp)
+
+    # Bugzilla options
+    optgrp = optparse.OptionGroup(parser, "Bugzilla options")
+    optgrp.add_option('-b', '--blocks', action='append',
+        default=[], help="Set blocks field when filing bugs")
+    optgrp.add_option('--product', dest='bz_product', action='store',
+        default='Fedora', help='Bugzilla product (default: %default)')
+    optgrp.add_option('--version', dest='bz_version', action='store',
+        default='rawhide', help='Bugzilla version (default: %default)')
+    parser.add_option_group(optgrp)
+
+    (opts, args) = parser.parse_args()
+
+    if len(opts.repos) == 0 and opts.logurl == "":
+        parser.error("Must provide either one -r|--repo or -l|--logurl")
+        sys.exit(1)
+
+    return (opts, args)
+
+# Validate that specific applications are available on this system
+def system_sanity():
+    '''Check the system to be sure the required binaries/services are there'''
+    try:
+        cmds = [ "repoclosure --help", \
+                 "repoquery --help", \
+               ]
+        for cmd in cmds:
+            msg = "'%s' failed. Be sure %s is installed." % (cmd, cmd.split(' ')[0])
+            assert subprocess.call(cmd.split(' '), stdout=subprocess.PIPE) == 0
+        return True
+    except (IOError, OSError, AssertionError):
+        print msg
+        return False
+
+if __name__ == "__main__":
+
+    # Parse arguments
+    (opts, args) = parse_args()
+
+    # Validate environment
+    if not system_sanity():
+        sys.exit(1)
+
+    # Attempt to authenticate to bugzilla
+    if os.path.exists(bz.cookiefile):
+        log.info('Using cookies in %s for bugzilla authentication', bz.cookiefile)
+    else:
+        # FIXME check to see if .bugzillarc is in use
+        log.info("No authentication info provided.  Have you run `bugzilla login`?")
+
+    # Build shared repoclosure and repoquery argument string
+    repo_opts = "--tempcache "
+    count = 1
+    for url in opts.repos:
+        repo_opts += " --repofrompath=repo-%s,%s --repoid=repo-%s" % (count,
+            url, count)
+        count += 1
+
+    # Has repoclosure already been run?
+    if opts.logurl:
+        try:
+            data = urlgrabber.urlread(opts.logurl)
+        except Exception, e:
+            log.error("Failed to read '%s'" % opts.logurl)
+            log.debug(e)
+            sys.exit(1)
+
+    else:
+        # Run repoclosure
+        print "Running repoclosure (this may take a few minutes) ..."
+        cmd = 'repoclosure --newest %s' % (repo_opts,)
+        logging.debug(cmd)
+        data = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT).communicate()[0].strip()
+        print data
+    # Output the results for debugging
+    log.debug(data)
+
+    # Parse results
+    print "Parsing results ..."
+    problems = dict()
+    package = ""
+    src = ""
+    for line in data.split('\n'):
+
+        log.debug("Parsing line '%s'" % line)
+        if line.startswith("package:"):
+            package = re.search("^package: ([^ ]*) from.*", line).group(1)
+
+            # Keep track of problems by src-rpm name (not binary)
+            cmd = "repoquery -q --qf %%{sourcerpm},%%{PACKAGER} %s %s" % (repo_opts, package)
+            result = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE,
+                stderr=open('/dev/null', 'w')).communicate()[0].strip()
+            (src, packager) = result.split(',',1)
+
+            # Ignore anything that isn't packaged by Fedora (e.g. rpmfusion)
+            if packager != "Fedora Project":
+                log.warn("Unsupported packager (%s), skipping '%s'" % (packager, package))
+                src = ""
+                continue
+
+            # Strip off '.src.rpm'
+            if src.endswith(".src.rpm"):
+                src = src[:-8]
+
+            # Log result
+            if src == "":
+                log.warn("Unable to locate src.rpm for '%s'" % package)
+                continue
+            else:
+                log.info("Identified failure with package '%s', src '%s'" % \
+                    (package, src))
+
+            # Update dictionary of src names
+            if not problems.has_key(src):
+                problems[src] = dict(output=list(), packages=[package])
+            else:
+                problems[src]['packages'].append(package)
+
+        # always save the output
+        if src != "":
+            problems[src]['output'].append(line)
+
+    # Print summary
+    print "%d dependency problems affecting %s source packages" % \
+        (data.count("unresolved deps:"), len(problems.keys()))
+
+    # Hash the dependency data
+    for (src,problem) in problems.items():
+
+        output = problem['output']
+        # Generate a list of unique hashes for each problem in the output
+        # string
+        hashes = generate_hash_list(output)
+        problem['hashes'] = hashes
+
+        # Strip off the %{version}-%{release}, leaving just %{name}
+        component = get_package_name(src)
+
+        # Create a description that consists of the error, and stdout leading up to
+        # it (not full repoclosure stdout)
+        description = data[:data.index("package:")] + "\n".join(output)
+
+        # Attempt to make a meaningful summary
+        # XXX - this relies on pattern matching repoclosure output ... likely
+        # to break
+        match = re.search('package: ([^ ]*) from [^ ]*\n[ ]*unresolved deps:[ ]*\n(.*)', '\n'.join(output), re.MULTILINE)
+        if match:
+            summary = "Broken dependency: %s requires %s" % (match.group(1).strip(), match.group(2).strip())
+        else:
+            summary = "Broken dependencies for %s" % src
+
+        # Print summary
+        print "\n== %s ==" % src
+        print "Summary: %s" % summary
+        print "Product: %s" % opts.bz_product
+        print "Version: %s" % opts.bz_version
+        print "Component: %s" % component
+        print "Whiteboard:\n\t%s" % "\n\t".join(hashes)
+        print "Binaries affected:\n\t%s" % "\n\t".join(problem['packages'])
+        print "Description:\n%s\n" % description
+
+        # Attempt to find an existing bug matching any of the hashes
+        matching_bugs = bz_find_matching_hashes(opts.bz_product, opts.bz_version, hashes)
+        if len(matching_bugs) > 0:
+            for b in matching_bugs:
+                print "Found existing bug: http://%s/%s (%s)" % (urlparse.urlparse(default_bz).netloc, str(b.bug_id), b.bug_status)
+                # If --blocks were provided, add them now
+                if opts.blocks:
+                    bz_add_to_blocker(b, opts.blocks)
+                # Update whiteboard with any new hashes
+                bz_update_whiteboard(b, hashes)
+
+        # No matching bug was found, let's create one
+        else:
+            yesno = opts.assumeyes
+            # If we are at a terminal, and weren't told to assume 'yes'
+            if not yesno and sys.stdin.isatty():
+                yesno = raw_input("File a new bug (y|n): ").lower() == 'y'
+            if yesno:
+                # File a new bug
+                b = bz_file_bug(product=opts.product, version=opts.version,
+                    component=component, summary=summary, hashes=hashes,
+                    description=description, blocked=opts.blocks)
+                print "Filed new bug: http://%s/%s" % (urlparse.urlparse(default_bz).netloc, str(b.bug_id))
+            else:
+                log.warn("Not filing a bug at user request")
-- 
cgit