summaryrefslogtreecommitdiffstats
path: root/check-licenses.py
diff options
context:
space:
mode:
Diffstat (limited to 'check-licenses.py')
-rwxr-xr-xcheck-licenses.py231
1 files changed, 231 insertions, 0 deletions
diff --git a/check-licenses.py b/check-licenses.py
new file mode 100755
index 0000000..8894cdd
--- /dev/null
+++ b/check-licenses.py
@@ -0,0 +1,231 @@
+#!/usr/bin/python
+
+"""Report problems with License: tags in Fedora spec files.
+
+This script checks the License: tag in spec files. The default is to check spec
+files in a checkout of the devel branch. Download a daily checkout seed from:
+
+http://cvs.fedoraproject.org/webfiles/
+
+You will need to fix up the CVS/Root entries:
+
+find devel -wholename '*/CVS/Root' | \
+ xargs sed -i 's/^/:pserver:anonymous@cvs.fedora.redhat.com:/'
+
+To generate a report sorted by owner, you will need to grab PackageOwners.py
+from the extras-repoclosure module in fedora CVS:
+
+http://cvs.fedoraproject.org/viewcvs/extras-repoclosure/?root=fedora
+
+You also need to create an FASauth.py file that contains your Fedora Account
+System username and password. Something like:
+
+username = 'username'
+password = 'password'
+"""
+
+# TODO
+#
+# display the full license tag in the output as well as the bad licenses. this
+# will make it easier to spot problems in the script or license tags that are
+# mostly valid but just use the wrong syntax for specifying multiple licenses.
+
+import os
+import re
+import sys
+import glob
+import string
+import optparse
+from time import gmtime, strftime
+from FASauth import username, password
+from PackageOwners import PackageOwners
+
+blacklist = ['glibc32', 'glibc64', 'olpc-logos']
+no_owner = '_No_Owner_in_PackageDB'
+# which of these is least ugly? suggestions to improve readability welcome.
+regex = re.compile('\(([^)]+)\)|\s(?:and|or)\s')
+#regex = re.compile('[(]([^)]+)[)]|\s(?:and|or)\s')
+
+# these packages have spec file names which don't match the cvs module
+# the key is the specfile name, the value is the cvs module
+cvs_module_map = {
+ 'audacious-docklet': 'audacious-plugins-docklet',
+ 'gcc41': 'gcc',
+ 'gconfmm': 'gconfmm26',
+ 'glibmm': 'glibmm24',
+ 'gnome-vfsmm': 'gnome-vfsmm26',
+ 'gtkmm': 'gtkmm24',
+ 'kernel-xen': 'kernel-xen-2.6',
+ 'libglademm': 'libglademm24',
+ 'libgnomecanvasmm': 'libgnomecanvasmm26',
+ 'libgnomemm': 'libgnomemm26',
+ 'libgnomeuimm': 'libgnomeuimm26',
+ 'libsigc++': 'libsigc++20',
+}
+
+def find_bad_licenses(license, valid_licenses):
+ if license in valid_licenses:
+ return []
+ last = []
+ while 42:
+ licenses = split_license(license)
+ if licenses == last:
+ break
+ for l in licenses[:]:
+ if l in valid_licenses:
+ licenses.remove(l)
+ if not licenses:
+ break
+ last = licenses
+ license = ' or '.join(licenses)
+ return licenses
+
+
+def get_valid_licenses():
+ rpmlint_path = '/usr/share/rpmlint'
+ rpmlint_conf = os.path.join(rpmlint_path, 'config')
+ sys.path.append(rpmlint_path)
+
+ import Config
+ execfile(rpmlint_conf)
+ return Config.getOption('ValidLicenses')
+
+
+def split_license(license):
+ return map(string.strip, [l for l in regex.split(license) if l])
+
+
+def get_owners():
+ owners = PackageOwners()
+ if not owners.FromURL(username=username, password=password):
+ print 'Unable to retrieve package owner data'
+ raise SystemExit(1)
+ addressmap = dict((v, k) for k, v in owners.usermap.iteritems())
+ return owners, addressmap
+
+
+def status_by_owner(bogus, owners, addressmap, show_addrs):
+
+ bad = {}
+ for srpm, licenses in bogus.items():
+ owner = owners.GetOwner(srpm)
+ if owner:
+ owner = addressmap[owner]
+ else:
+ owner = no_owner
+ bad.setdefault(owner, {}).setdefault(srpm, licenses)
+
+ for owner, data in sorted(bad.items()):
+ print owner,
+ if show_addrs and owner != no_owner:
+ print '(%s)' % owners.usermap[owner].replace('@', ' at '),
+ print '[%d]:' % len(data)
+ for srpm, licenses in sorted(data.items()):
+ print '\t%s: %s' % (srpm, ' '.join(['"%s"' % l for l in licenses]))
+ print
+
+
+def status_by_package(bogus, owners, addressmap):
+ for srpm, licenses in sorted(bogus.items()):
+ owner = owners.GetOwner(srpm)
+ if owner:
+ owner = addressmap[owner]
+ else:
+ owner = no_owner
+ print '%s (%s): %s' % (srpm, owner,
+ ' '.join(['"%s"' % l for l in licenses]))
+ print
+
+
+def _parse_args():
+ parser = optparse.OptionParser()
+ parser.add_option('-a', '--show-addrs', dest='addrs',
+ action='store_true', default=False,
+ help='print email addresses for owners [%default]')
+ parser.add_option('--glob', dest='glob_pattern', action='store',
+ default='devel/*/*.spec', metavar='pattern',
+ help='glob pattern for finding spec files [%default]')
+ parser.add_option('--license', dest='license_regex', action='store',
+ default=None, metavar='regex',
+ help='find licenses matching the regex [%default]')
+ parser.add_option('-o', '--owners', dest='owners',
+ action='store_true', default=False,
+ help='group output by owner [%default]')
+ parser.add_option('-p', '--package', dest='packages',
+ action='store_true', default=True,
+ help='group output by package [%default]')
+ parser.add_option('-q', '--quiet', dest='quiet',
+ action='store_true', default=False,
+ help='only print a summary [%default]')
+ opts, args = parser.parse_args()
+ # opts.owners trumps opts.packages
+ if opts.owners:
+ opts.packages = False
+ # quiet trumps both owners and packages
+ if opts.quiet:
+ opts.owners = opts.packages = False
+ return opts, args, parser
+
+
+def main():
+ opts, args, parser = _parse_args()
+ specs = sorted(glob.glob(opts.glob_pattern))
+ bogus = {}
+ valid_licenses = get_valid_licenses()
+
+ if not specs:
+ print 'No specfiles found.'
+ return 1
+
+ for spec in specs:
+ base = os.path.basename(spec)
+ srpm = os.path.splitext(base)[0]
+ dead = os.path.join(os.path.split(spec)[0], 'dead.package')
+ if srpm in cvs_module_map:
+ srpm = cvs_module_map[srpm]
+ if srpm in blacklist or os.path.exists(dead):
+ specs.remove(spec)
+ continue
+ licenses = [re.sub('License:\s*', '', l).strip('\n').strip('\\')
+ for l in open(spec).readlines() if l.startswith('License:')]
+ bad_licenses = []
+ for license in licenses:
+ license = license.strip()
+ if not opts.license_regex:
+ bad_licenses.extend(find_bad_licenses(license, valid_licenses))
+ elif re.search(opts.license_regex, license):
+ bad_licenses.append(license)
+ if not licenses:
+ bad_licenses.append('NO LICENSE TAG IN SPEC FILE')
+
+ if bad_licenses:
+ bogus[srpm] = bad_licenses
+
+ if bogus:
+ bad = len(bogus)
+ all = len(specs)
+ percent = (bad / float(all)) * 100
+ owners, addressmap = get_owners()
+ if opts.owners:
+ status_by_owner(bogus, owners, addressmap, opts.addrs)
+ elif opts.packages:
+ status_by_package(bogus, owners, addressmap)
+
+ if not opts.license_regex:
+ datestr = strftime('%Y-%m-%d %H:%M UTC', gmtime())
+ summary = 'Invalid licenses: %d out of %d (%.2f%%) [as of %s]' % (
+ bad, all, percent, datestr)
+ print summary
+ elif opts.license_regex:
+ print 'No licenses matched "%s"' % opts.license_regex
+ else:
+ # check the script, there must be an error. :)
+ print 'All license tags are valid.'
+
+
+if __name__ == '__main__':
+ try:
+ status = main()
+ except KeyboardInterrupt:
+ status = 1
+ raise SystemExit(status)