#!/usr/bin/python

import os
import sys
import re
import subprocess
import logging
import hashlib
import optparse
import urlparse
import urlgrabber
import rpmUtils.miscutils
import bugzilla

# Initial simple logging stuff
logging.basicConfig()
log = logging.getLogger()
if '--debug' in sys.argv:
    logging.getLogger("bugzilla").setLevel(logging.DEBUG)
    log.setLevel(logging.DEBUG)
elif '--verbose' in sys.argv:
    logging.getLogger("bugzilla").setLevel(logging.INFO)
    log.setLevel(logging.INFO)

default_bz = 'https://bugzilla.redhat.com/xmlrpc.cgi'
bzclass = bugzilla.RHBugzilla3
#bzclass = bugzilla.Bugzilla3
bz = bzclass(url=default_bz)

# Extract and return a package name when supplied a string in the format of
# %{name}-%{version}-%{release}
def get_package_name(envra):
    (name, version, release, epoch, arch) = rpmUtils.miscutils.splitFilename(envra)
    return name

# Given a list of strings, return a list of hashes corresponding to the strings
def hash_data(buffer):
    # Sort the buffer
    buffer.sort()

    # convert to string
    buffer = ' '.join(buffer)

    # Hash and save for later
    return "repoclosure_hash:%s" % hashlib.sha256(buffer).hexdigest()

# Given a string, return a list of hashes that characterize the repoclosure
# failures
def generate_hash_list(lines):
    buffer = list()
    hashes = dict() # use a dictionary to avoid duplicates
    name = ""

    for line in lines:
        # If this is a 'package:' line, ignore it
        if line.startswith("package:"):
            # Determine binary package name for use with hashing
            envra = re.search("^package: ([^ ]*) from.*", line).group(1)
            assert isinstance(envra, str) and envra != ""
            name = get_package_name(envra)
            continue
        # If this is a 'unresolved deps:' line, process it
        elif "unresolved deps:" in line:
            # If existing information exists, hash it
            if len(buffer) > 0:
                hashes[hash_data(buffer)] = True
            # Clear the buffer, using binary package name
            assert isinstance(name, str) and name != ""
            buffer = [name]
        # Otherwise, continue updating buffer
        else:
            # Remove leading+trailing whitespace
            line = line.strip()

            # Remove any arch-specific data: "()(64bit)"
            line = line.replace("()(64bit)","")

            # Update the buffer with data
            assert isinstance(buffer, list)
            buffer.append(line)

    # If any buffered data remains, hash it
    if len(buffer) > 0:
        hashes[hash_data(buffer)] = True
        buffer = list()

    return hashes.keys()

# Using a supplied product, version and list of hashes return a list of bugs
# matching any of the supplied hashes
def bz_find_matching_hashes(bz_product, bz_version, hashes):
    # Force a list
    if isinstance(hashes, str):
        hashes = [hashes]

    # Build the query parameters
    q = dict()
    q['product'] = bz_product
    q['version'] = bz_version
    q['status_whiteboard'] = ' '.join(hashes)
    q['status_whiteboard_type'] = 'anywords'

    q['column_list'] = [ 'bug_id', 'bug_status', 'assigned_to', 'component',
        'short_desc', 'keywords', 'blockedby' ]
    log.debug("bz.query: %s", q)
    buglist = bz.query(q)

    return buglist

# File a bug in bugzilla using the provided information
def bz_file_bug(product, version, component, summary, hashes, description, blocked=[]):
    data = dict(product=product, version=version, component=component,
            short_desc=summary, comment=description,
            rep_platform='All', bug_severity='medium', priority='medium',
            op_sys='Linux', bug_file_loc='',
            status_whiteboard = ' '.join(hashes))
    if len(blocked) > 0:
        data['blocked'] = map(lambda b: int(b), blocked) # force as a list of int's
    b = bz.createbug(**data)
    b.refresh()
    return b

# Add the supplied 'blocks' information to the given bug_id
def bz_add_to_blocker(bug, blocks):
    # Ensure arguments are of the expected type
    assert isinstance(bug, bugzilla.base._Bug)
    # Convert to a list
    if isinstance(blocks, int):
        blocks = [blocks]
    # If no data was provided, simply return
    if len(blocks) == 0:
        return

    # Get list of old blocks
    old_blocks = bug.blocked
    log.debug("Updating bug#%s blocks - from '%s'" % (bug.bug_id, old_blocks))

    # Create a dict of the current list of blocks
    blocks_dict = {int(b): True for b in old_blocks}

    # Update blocks_dict with new hashes
    for b in blocks:
        if not isinstance(b, int):
            b = int(b)
        if not blocks_dict.has_key(b):
            print "\t- adding to blocks: %s" % b
            blocks_dict[b] = True

    # Convert back to a list
    new_blocks = blocks_dict.keys()

    # Update the bug
    log.debug("Updating bug#%s blocks - to '%s'" % (bug.bug_id, new_blocks))
    bz._updatedeps(bug.bug_id, new_blocks, [], 'add')

# Update the 'status_whiteboard' with new hashes
def bz_update_whiteboard(bug, hashes):
    # Ensure arguments are of the expected type
    assert isinstance(bug, bugzilla.base._Bug)
    # Force hash list
    if isinstance(hashes, str):
        hashes = [hashes]
    # If no data was provided, simply return
    if len(hashes) == 0:
        return

    old_whiteboard = bug.getwhiteboard()
    # Strip leading+trailing whitespace
    old_whiteboard = old_whiteboard.strip()
    log.debug("Updating bug#%s whiteboard - from '%s'" % (bug.bug_id, old_whiteboard))

    # Create a dict of current whiteboard
    whiteboard_dict = {k: True for k in old_whiteboard.split(' ')}

    # Update whiteboard_dict with new hashes
    for hash in hashes:
        if not whiteboard_dict.has_key(hash):
            print "\t- adding to whiteboard: %s" % hash
            whiteboard_dict[hash] = True

    # flatten to a string
    new_whiteboard = ' '.join(whiteboard_dict.keys())
    # Remove leading+trailing whitespace
    new_whiteboard = new_whiteboard.strip()

    # update whiteboard to include any new hashes
    log.debug("Updating bug#%s whiteboard - to '%s'" % (bug.bug_id, new_whiteboard))
    bug.setwhiteboard(new_whiteboard)

def get_basearch():
    '''like get_arch, but returns the basearch (as used by yum etc.)'''
    arch = os.uname()[4]
    if arch in ('i486', 'i586', 'i686'):
        arch = 'i386'
    elif arch == 'ppc64':
        arch = 'ppc'
    return arch

# Parse arguments and return options
def parse_args():
    parser = optparse.OptionParser()
    parser.add_option('--verbose', action='store_true',
        help="give more info about what's going on")
    parser.add_option('--debug', action='store_true',
        help="output bunches of debugging info")
    parser.add_option("-y", "--assumeyes", dest="assumeyes", default=False,
        action="store_true", help="Answer yes for all questions")

    # Repoclosure options
    optgrp = optparse.OptionGroup(parser, "Repoclosure options")
    archlist = ['i386', 'x86_64', 'ppc']
    optgrp.add_option("-a", "--arch", action="store", type="choice",
        choices=archlist, default=get_basearch(),
        help="target architecture (%default)")
    optgrp.add_option("-l", "--logurl", action="store", default="",
        help="URL for existing repoclosure results.  When provided, it will" + \
             "just process existing results and not re-run repoclosure (useful" + \
             "when running multiple times)")
    optgrp.add_option('--repoid', action='append', default=[], dest="repoids",
        help="specify repo ids to query, can be specified multiple" \
             " times (default is all enabled) ")
    optgrp.add_option('--repofrompath', action='append', default=[], dest="repos",
        help="specify repoid & paths of additional repositories - " \
             "unique repoid and path required, can be specified " \
             "multiple times. Example. " \
             "  --repofrompath=myrepo,/path/to/repo")
    optgrp.add_option('--ignoreid', action='append', default=[], dest="ignoreids",
        help="specify repo ids to ignore any dependency conflicts (may be used more than once).")

    parser.add_option_group(optgrp)

    # Bugzilla options
    optgrp = optparse.OptionGroup(parser, "Bugzilla options")
    optgrp.add_option('-b', '--blocks', action='append',
        default=[], help="Set blocks field when filing bugs")
    optgrp.add_option('--product', dest='bz_product', action='store',
        default='Fedora', help='Bugzilla product (default: %default)')
    optgrp.add_option('--component', dest='bz_component', action='store',
        default=None, help='Bugzilla component (optional)')
    optgrp.add_option('--version', dest='bz_version', action='store',
        default='rawhide', help='Bugzilla version (default: %default)')
    parser.add_option_group(optgrp)

    (opts, args) = parser.parse_args()

    if len(opts.repos) == 0 and opts.logurl == "":
        parser.error("Must provide either one -r|--repo or -l|--logurl")
        sys.exit(1)

    return (opts, args)

# Validate that specific applications are available on this system
def system_sanity():
    '''Check the system to be sure the required binaries/services are there'''
    try:
        cmds = [ "repoclosure --help", \
                 "repoquery --help", \
               ]
        for cmd in cmds:
            msg = "'%s' failed. Be sure %s is installed." % (cmd, cmd.split(' ')[0])
            assert subprocess.call(cmd.split(' '), stdout=subprocess.PIPE) == 0
        return True
    except (IOError, OSError, AssertionError):
        print msg
        return False

if __name__ == "__main__":

    # Parse arguments
    (opts, args) = parse_args()

    # Validate environment
    if not system_sanity():
        sys.exit(1)

    # Attempt to authenticate to bugzilla
    if os.path.exists(bz.cookiefile):
        log.info('Using cookies in %s for bugzilla authentication', bz.cookiefile)
    else:
        # FIXME check to see if .bugzillarc is in use
        log.info("No authentication info provided.  Have you run `bugzilla login`?")

    # Build shared repoclosure and repoquery argument string
    repo_opts = "--tempcache "
    for repoid in opts.repoids:
        repo_opts += " --repoid=%s" % (repoid,)
    for rpath in opts.repos:
        repo_opts += " --repofrompath=%s" % (rpath,)

    # Has repoclosure already been run?
    if opts.logurl:
        try:
            data = urlgrabber.urlread(opts.logurl)
        except Exception, e:
            log.error("Failed to read '%s'" % opts.logurl)
            log.debug(e)
            sys.exit(1)

    else:
        # Run repoclosure
        print "Running repoclosure (this may take a few minutes) ..."
        cmd = 'repoclosure --newest %s' % (repo_opts,)
        logging.debug(cmd)
        data = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT).communicate()[0].strip()
        print data
    # Output the results for debugging
    log.debug(data)

    # Parse results
    print "Parsing results ..."
    problems = dict()
    package = ""
    src = ""
    broken_dep_count = 0
    ignore_dep_count = 0
    for line in data.split('\n'):

        log.debug("Parsing line '%s'" % line)
        match = re.search("^package: ([^ ]*) from ([^ ]*)$", line)
        if match:
            broken_dep_count += 1
            package = match.group(1)
            repo = match.group(2)

            # Were we asked to skip packages
            if repo in opts.ignoreids:
                ignore_dep_count += 1
                log.warn("Ignoring package %s from repo %s" % (package, repo))
                src = ""
                continue

            # Keep track of problems by src-rpm name (not binary)
            cmd = "repoquery -q --qf %%{sourcerpm},%%{PACKAGER} %s %s" % (repo_opts, package)
            result = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE,
                stderr=open('/dev/null', 'w')).communicate()[0].strip()
            log.debug(cmd)
            (src, packager) = result.split(',',1)

            # Ignore anything that isn't packaged by Fedora (e.g. rpmfusion)
            if packager not in ["Fedora Project", "Red Hat, Inc. <http://bugzilla.redhat.com/bugzilla>"]:
                log.warn("Unsupported packager (%s), skipping '%s'" % (packager, package))
                src = ""
                continue

            # Strip off '.src.rpm'
            if src.endswith(".src.rpm"):
                src = src[:-8]

            # Log result
            if src == "":
                log.warn("Unable to locate src.rpm for '%s'" % package)
                continue
            else:
                log.info("Identified failure with package '%s', src '%s'" % \
                    (package, src))

            # Update dictionary of src names
            if not problems.has_key(src):
                problems[src] = dict(output=list(), packages=[package])
            else:
                problems[src]['packages'].append(package)

        # always save the output
        if src != "":
            problems[src]['output'].append(line)

    # Print summary
    print "Found %d dependency problems (ignoring %s) affecting %s source packages" % \
        (broken_dep_count, ignore_dep_count, len(problems.keys()))

    # Hash the dependency data
    for (src,problem) in problems.items():

        output = problem['output']
        # Generate a list of unique hashes for each problem in the output
        # string
        hashes = generate_hash_list(output)
        problem['hashes'] = hashes

        # Strip off the %{version}-%{release}, leaving just %{name}
        if opts.bz_component is None:
            component = get_package_name(src)
        else:
            component = opts.bz_component

        # Create a description that consists of the error, and stdout leading up to
        # it (not full repoclosure stdout)
        description = data[:data.index("package:")] + "\n".join(output)

        # Attempt to make a meaningful summary
        # XXX - this relies on pattern matching repoclosure output ... likely
        # to break
        match = re.search('package: ([^ ]*) from [^ ]*\n[ ]*unresolved deps:[ ]*\n(.*)', '\n'.join(output), re.MULTILINE)
        if match:
            summary = "Broken dependency: %s requires %s" % (match.group(1).strip(), match.group(2).strip())
        else:
            summary = "Broken dependencies for %s" % src

        # Print summary
        print "\n== %s ==" % src
        print "Summary: %s" % summary
        print "Product: %s" % opts.bz_product
        print "Version: %s" % opts.bz_version
        print "Component: %s" % component
        print "Whiteboard:\n\t%s" % "\n\t".join(hashes)
        print "Binaries affected:\n\t%s" % "\n\t".join(problem['packages'])
        print "Description:\n%s\n" % description

        # Attempt to find an existing bug matching any of the hashes
        matching_bugs = bz_find_matching_hashes(opts.bz_product, opts.bz_version, hashes)
        if len(matching_bugs) > 0:
            for b in matching_bugs:
                print "Found existing bug: http://%s/%s (%s)" % (urlparse.urlparse(default_bz).netloc, str(b.bug_id), b.bug_status)
                # If --blocks were provided, add them now
                if opts.blocks:
                    bz_add_to_blocker(b, opts.blocks)
                # Update whiteboard with any new hashes
                bz_update_whiteboard(b, hashes)

        # No matching bug was found, let's create one
        else:
            yesno = opts.assumeyes
            # If we are at a terminal, and weren't told to assume 'yes'
            if not yesno and sys.stdin.isatty():
                yesno = raw_input("File a new bug (y|n): ").lower() == 'y'
            if yesno:
                # File a new bug
                b = bz_file_bug(product=opts.bz_product, version=opts.bz_version,
                    component=component, summary=summary, hashes=hashes,
                    description=description, blocked=opts.blocks)
                print "Filed new bug: http://%s/%s" % (urlparse.urlparse(default_bz).netloc, str(b.bug_id))
            else:
                log.warn("Not filing bug at user request")