#!/usr/bin/python """Report problems with License: tags in Fedora spec files. This script checks the License: tag in spec files. The default is to check spec files in a checkout of the devel branch. You can use the get-cvs-tree script to get a current checkout. This requires approximately 700MB of disk space. In order to gather the package and owner information, you need to create a file named FASauth.py in the same dir as this script. It should contain your Fedora Account System username and password. For example: username = 'tmz' password = 'secret' """ # TODO # # display the full license tag in the output as well as the bad licenses. this # will make it easier to spot problems in the script or license tags that are # mostly valid but just use the wrong syntax for specifying multiple licenses. import os import re import sys import glob import string import optparse from time import gmtime, strftime from FASauth import username, password from fedora.client import BaseClient, AuthError, ServerError blacklist = ['glibc32', 'glibc64', 'olpc-logos'] no_owner = '_No_Owner_in_PackageDB' # which of these is least ugly? suggestions to improve readability welcome. regex = re.compile('\(([^)]+)\)|\s(?:and|or)\s') #regex = re.compile('[(]([^)]+)[)]|\s(?:and|or)\s') # these packages have spec file names which don't match the cvs module # the key is the specfile name, the value is the cvs module cvs_module_map = { 'audacious-docklet': 'audacious-plugins-docklet', 'gcc43': 'gcc', 'gconfmm': 'gconfmm26', 'glibmm': 'glibmm24', 'gnome-vfsmm': 'gnome-vfsmm26', 'gtkmm': 'gtkmm24', 'kernel': 'kernel-xen-2.6', 'libglademm': 'libglademm24', 'libgnomecanvasmm': 'libgnomecanvasmm26', 'libgnomemm': 'libgnomemm26', 'libgnomeuimm': 'libgnomeuimm26', 'libsigc++': 'libsigc++20', } def find_bad_licenses(license, valid_licenses): if license in valid_licenses: return [] last = [] while 42: licenses = split_license(license) if licenses == last: break for l in licenses[:]: if l in valid_licenses: licenses.remove(l) if not licenses: break last = licenses license = ' or '.join(licenses) return licenses def get_valid_licenses(): rpmlint_path = '/usr/share/rpmlint' if os.path.exists('rpmlint.config'): rpmlint_conf = 'rpmlint.config' else: rpmlint_conf = os.path.join(rpmlint_path, 'config') sys.path.append(rpmlint_path) import Config execfile(rpmlint_conf) return Config.getOption('ValidLicenses') def split_license(license): return map(string.strip, [l for l in regex.split(license) if l]) def get_packages(): try: pkgdb_url = 'https://admin.fedoraproject.org/pkgdb' pkgdb_client = BaseClient(pkgdb_url) pkgdb_data = pkgdb_client.send_request('acls/bugzilla') packages = pkgdb_data['bugzillaAcls']['Fedora'] except AuthError, e: raise SystemExit('Authentication error: %s' % e) except ServerError, e: raise SystemExit('Server error: %s' % e) return packages def get_owners(): try: acct_url = 'https://admin.fedoraproject.org/accounts/' acct_client = BaseClient(acct_url, username=username, password=password) acct_data = acct_client.send_request('group/dump', auth=True) owners = {} for p in acct_data['people']: user, addr, name = p[0:3] owners[user] = {'addr': addr, 'name': name} except AuthError, e: raise SystemExit('Authentication error: %s' % e) except ServerError, e: raise SystemExit('Server error: %s' % e) return owners def status_by_owner(bogus, owners, packages, show_addrs): bad = {} for srpm, licenses in bogus.items(): owner = packages[srpm]['owner'] bad.setdefault(owner, {}).setdefault(srpm, licenses) for owner, data in sorted(bad.items()): print owner, if show_addrs and owner != no_owner: print '(%s)' % owners[owner]['addr'].replace('@', ' at '), print '[%d]:' % len(data) for srpm, licenses in sorted(data.items()): print '\t%s: %s' % (srpm, ' '.join(['"%s"' % l for l in licenses])) print def status_by_package(bogus, packages): for srpm, licenses in sorted(bogus.items()): owner = packages[srpm]['owner'] print '%s (%s): %s' % (srpm, owner, ' '.join(['"%s"' % l for l in licenses])) print def _parse_args(): parser = optparse.OptionParser() parser.add_option('-a', '--show-addrs', dest='addrs', action='store_true', default=False, help='print email addresses for owners [%default]') parser.add_option('--glob', dest='glob_pattern', action='store', default='devel/*/*.spec', metavar='pattern', help='glob pattern for finding spec files [%default]') parser.add_option('--license', dest='license_regex', action='store', default=None, metavar='regex', help='find licenses matching the regex [%default]') parser.add_option('-o', '--owners', dest='owners', action='store_true', default=False, help='group output by owner [%default]') parser.add_option('-p', '--package', dest='packages', action='store_true', default=True, help='group output by package [%default]') parser.add_option('-q', '--quiet', dest='quiet', action='store_true', default=False, help='only print a summary [%default]') opts, args = parser.parse_args() # opts.owners trumps opts.packages if opts.owners: opts.packages = False # quiet trumps both owners and packages if opts.quiet: opts.owners = opts.packages = False return opts, args, parser def main(): opts, args, parser = _parse_args() specs = sorted(glob.glob(opts.glob_pattern)) bogus = {} valid_licenses = get_valid_licenses() if not specs: print 'No specfiles found.' return 1 for spec in specs: base = os.path.basename(spec) srpm = os.path.splitext(base)[0] dead = os.path.join(os.path.split(spec)[0], 'dead.package') if srpm in cvs_module_map: srpm = cvs_module_map[srpm] if srpm in blacklist or os.path.exists(dead): specs.remove(spec) continue # the \ strip is needed for the mk_pkg define in baekmuk-ttf-fonts licenses = [re.sub('License:\s*', '', l).strip('\n').strip('\\') for l in open(spec).readlines() if l.startswith('License:')] bad_licenses = [] for license in licenses: license = license.strip() if not opts.license_regex: bad_licenses.extend(find_bad_licenses(license, valid_licenses)) elif re.search(opts.license_regex, license): bad_licenses.append(license) if not licenses: bad_licenses.append('NO LICENSE TAG IN SPEC FILE') if bad_licenses: bogus[srpm] = bad_licenses if bogus: bad = len(bogus) all = len(specs) percent = (bad / float(all)) * 100 owners = get_owners() packages = get_packages() if opts.owners: status_by_owner(bogus, owners, packages, opts.addrs) elif opts.packages: status_by_package(bogus, packages) if not opts.license_regex: datestr = strftime('%Y-%m-%d %H:%M UTC', gmtime()) summary = 'Invalid licenses: %d out of %d (%.2f%%) [as of %s]' % ( bad, all, percent, datestr) print summary elif opts.license_regex: print 'No licenses matched "%s"' % opts.license_regex else: # check the script, there must be an error. :) print 'All license tags are valid.' if __name__ == '__main__': try: status = main() except KeyboardInterrupt: status = 1 raise SystemExit(status)