From 7a732c56fc31ab12cab0932f6450d38db4aa09f1 Mon Sep 17 00:00:00 2001 From: Todd Zullinger Date: Tue, 22 Jan 2008 23:36:56 -0500 Subject: initial commit --- check-licenses.py | 231 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100755 check-licenses.py (limited to 'check-licenses.py') diff --git a/check-licenses.py b/check-licenses.py new file mode 100755 index 0000000..8894cdd --- /dev/null +++ b/check-licenses.py @@ -0,0 +1,231 @@ +#!/usr/bin/python + +"""Report problems with License: tags in Fedora spec files. + +This script checks the License: tag in spec files. The default is to check spec +files in a checkout of the devel branch. Download a daily checkout seed from: + +http://cvs.fedoraproject.org/webfiles/ + +You will need to fix up the CVS/Root entries: + +find devel -wholename '*/CVS/Root' | \ + xargs sed -i 's/^/:pserver:anonymous@cvs.fedora.redhat.com:/' + +To generate a report sorted by owner, you will need to grab PackageOwners.py +from the extras-repoclosure module in fedora CVS: + +http://cvs.fedoraproject.org/viewcvs/extras-repoclosure/?root=fedora + +You also need to create an FASauth.py file that contains your Fedora Account +System username and password. Something like: + +username = 'username' +password = 'password' +""" + +# TODO +# +# display the full license tag in the output as well as the bad licenses. this +# will make it easier to spot problems in the script or license tags that are +# mostly valid but just use the wrong syntax for specifying multiple licenses. + +import os +import re +import sys +import glob +import string +import optparse +from time import gmtime, strftime +from FASauth import username, password +from PackageOwners import PackageOwners + +blacklist = ['glibc32', 'glibc64', 'olpc-logos'] +no_owner = '_No_Owner_in_PackageDB' +# which of these is least ugly? suggestions to improve readability welcome. +regex = re.compile('\(([^)]+)\)|\s(?:and|or)\s') +#regex = re.compile('[(]([^)]+)[)]|\s(?:and|or)\s') + +# these packages have spec file names which don't match the cvs module +# the key is the specfile name, the value is the cvs module +cvs_module_map = { + 'audacious-docklet': 'audacious-plugins-docklet', + 'gcc41': 'gcc', + 'gconfmm': 'gconfmm26', + 'glibmm': 'glibmm24', + 'gnome-vfsmm': 'gnome-vfsmm26', + 'gtkmm': 'gtkmm24', + 'kernel-xen': 'kernel-xen-2.6', + 'libglademm': 'libglademm24', + 'libgnomecanvasmm': 'libgnomecanvasmm26', + 'libgnomemm': 'libgnomemm26', + 'libgnomeuimm': 'libgnomeuimm26', + 'libsigc++': 'libsigc++20', +} + +def find_bad_licenses(license, valid_licenses): + if license in valid_licenses: + return [] + last = [] + while 42: + licenses = split_license(license) + if licenses == last: + break + for l in licenses[:]: + if l in valid_licenses: + licenses.remove(l) + if not licenses: + break + last = licenses + license = ' or '.join(licenses) + return licenses + + +def get_valid_licenses(): + rpmlint_path = '/usr/share/rpmlint' + rpmlint_conf = os.path.join(rpmlint_path, 'config') + sys.path.append(rpmlint_path) + + import Config + execfile(rpmlint_conf) + return Config.getOption('ValidLicenses') + + +def split_license(license): + return map(string.strip, [l for l in regex.split(license) if l]) + + +def get_owners(): + owners = PackageOwners() + if not owners.FromURL(username=username, password=password): + print 'Unable to retrieve package owner data' + raise SystemExit(1) + addressmap = dict((v, k) for k, v in owners.usermap.iteritems()) + return owners, addressmap + + +def status_by_owner(bogus, owners, addressmap, show_addrs): + + bad = {} + for srpm, licenses in bogus.items(): + owner = owners.GetOwner(srpm) + if owner: + owner = addressmap[owner] + else: + owner = no_owner + bad.setdefault(owner, {}).setdefault(srpm, licenses) + + for owner, data in sorted(bad.items()): + print owner, + if show_addrs and owner != no_owner: + print '(%s)' % owners.usermap[owner].replace('@', ' at '), + print '[%d]:' % len(data) + for srpm, licenses in sorted(data.items()): + print '\t%s: %s' % (srpm, ' '.join(['"%s"' % l for l in licenses])) + print + + +def status_by_package(bogus, owners, addressmap): + for srpm, licenses in sorted(bogus.items()): + owner = owners.GetOwner(srpm) + if owner: + owner = addressmap[owner] + else: + owner = no_owner + print '%s (%s): %s' % (srpm, owner, + ' '.join(['"%s"' % l for l in licenses])) + print + + +def _parse_args(): + parser = optparse.OptionParser() + parser.add_option('-a', '--show-addrs', dest='addrs', + action='store_true', default=False, + help='print email addresses for owners [%default]') + parser.add_option('--glob', dest='glob_pattern', action='store', + default='devel/*/*.spec', metavar='pattern', + help='glob pattern for finding spec files [%default]') + parser.add_option('--license', dest='license_regex', action='store', + default=None, metavar='regex', + help='find licenses matching the regex [%default]') + parser.add_option('-o', '--owners', dest='owners', + action='store_true', default=False, + help='group output by owner [%default]') + parser.add_option('-p', '--package', dest='packages', + action='store_true', default=True, + help='group output by package [%default]') + parser.add_option('-q', '--quiet', dest='quiet', + action='store_true', default=False, + help='only print a summary [%default]') + opts, args = parser.parse_args() + # opts.owners trumps opts.packages + if opts.owners: + opts.packages = False + # quiet trumps both owners and packages + if opts.quiet: + opts.owners = opts.packages = False + return opts, args, parser + + +def main(): + opts, args, parser = _parse_args() + specs = sorted(glob.glob(opts.glob_pattern)) + bogus = {} + valid_licenses = get_valid_licenses() + + if not specs: + print 'No specfiles found.' + return 1 + + for spec in specs: + base = os.path.basename(spec) + srpm = os.path.splitext(base)[0] + dead = os.path.join(os.path.split(spec)[0], 'dead.package') + if srpm in cvs_module_map: + srpm = cvs_module_map[srpm] + if srpm in blacklist or os.path.exists(dead): + specs.remove(spec) + continue + licenses = [re.sub('License:\s*', '', l).strip('\n').strip('\\') + for l in open(spec).readlines() if l.startswith('License:')] + bad_licenses = [] + for license in licenses: + license = license.strip() + if not opts.license_regex: + bad_licenses.extend(find_bad_licenses(license, valid_licenses)) + elif re.search(opts.license_regex, license): + bad_licenses.append(license) + if not licenses: + bad_licenses.append('NO LICENSE TAG IN SPEC FILE') + + if bad_licenses: + bogus[srpm] = bad_licenses + + if bogus: + bad = len(bogus) + all = len(specs) + percent = (bad / float(all)) * 100 + owners, addressmap = get_owners() + if opts.owners: + status_by_owner(bogus, owners, addressmap, opts.addrs) + elif opts.packages: + status_by_package(bogus, owners, addressmap) + + if not opts.license_regex: + datestr = strftime('%Y-%m-%d %H:%M UTC', gmtime()) + summary = 'Invalid licenses: %d out of %d (%.2f%%) [as of %s]' % ( + bad, all, percent, datestr) + print summary + elif opts.license_regex: + print 'No licenses matched "%s"' % opts.license_regex + else: + # check the script, there must be an error. :) + print 'All license tags are valid.' + + +if __name__ == '__main__': + try: + status = main() + except KeyboardInterrupt: + status = 1 + raise SystemExit(status) -- cgit