#!/usr/bin/python -t # primary functions and glue for generating the repository metadata # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Library General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Copyright 2004 Duke University # Copyright 2007 Jonathan Dieter import os import sys import getopt import rpm import libxml2 import string import fnmatch import urlgrabber import dumpMetadata from dumpMetadata import _gzipOpen __version__ = '0.4.3' def errorprint(stuff): print >> sys.stderr, stuff def _(args): """Stub function for translation""" return args def usage(retval=1): print _(""" createrepo [options] directory-of-packages Options: -u, --baseurl = optional base url location for all files -o, --outputdir = optional directory to output to -x, --exclude = files globs to exclude, can be specified multiple times -q, --quiet = run quietly -v, --verbose = run verbosely -c, --cachedir = specify which dir to use for the checksum cache -h, --help = show this help -V, --version = output version -p, --pretty = output xml files in pretty format. """) sys.exit(retval) class MetaDataGenerator: def __init__(self, cmds): self.cmds = cmds self.ts = rpm.TransactionSet() self.pkgcount = 0 self.newrpms = {} self.files = [] def getFileList(self, basepath, path, ext, filelist): """Return all files in path matching ext, store them in filelist, recurse dirs. Returns a list object""" extlen = len(ext) totalpath = os.path.normpath(os.path.join(basepath, path)) try: dir_list = os.listdir(totalpath) except OSError, e: errorprint(_('Error accessing directory %s, %s') % (totalpath, e)) sys.exit(1) for d in dir_list: if os.path.isdir(totalpath + '/' + d): filelist = self.getFileList(basepath, os.path.join(path, d), ext, filelist) else: if string.lower(d[-extlen:]) == '%s' % (ext): if totalpath.find(basepath) == 0: relativepath = totalpath.replace(basepath, "", 1) relativepath = relativepath.lstrip("/") filelist.append(os.path.join(relativepath, d)) else: raise "basepath '%s' not found in path '%s'" % (basepath, totalpath) return filelist def trimRpms(self, files): badrpms = [] for file in files: for glob in self.cmds['excludes']: if fnmatch.fnmatch(file, glob): # print 'excluded: %s' % file if file not in badrpms: badrpms.append(file) for file in badrpms: if file in files: files.remove(file) return files def doPkgMetadata(self, directory): """all the heavy lifting for the package metadata""" # rpms we're going to be dealing with files = self.getFileList(self.cmds['basedir'], directory, '.drpm', []) files = self.trimRpms(files) self.pkgcount = len(files) self.openMetadataDocs() self.writeMetadataDocs(files) self.closeMetadataDocs() def openMetadataDocs(self): self._setupPresto() def _setupPresto(self): # setup the base metadata doc self.prestodoc = libxml2.newDoc("1.0") self.prestoroot = self.prestodoc.newChild(None, "metadata", None) basens = self.prestoroot.newNs('http://linux.duke.edu/metadata/common', None) self.formatns = self.prestoroot.newNs('http://linux.duke.edu/metadata/rpm', 'rpm') self.prestoroot.setNs(basens) prestofilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['prestofile']) self.prestofile = _gzipOpen(prestofilepath, 'w') self.prestofile.write('\n') self.prestofile.write('\n' % self.pkgcount) def writeMetadataDocs(self, files, current=0): for file in files: current+=1 try: mdobj = dumpMetadata.RpmMetaData(self.ts, self.cmds['basedir'], file, self.cmds, True) if not self.cmds['quiet']: if self.cmds['verbose']: print '%d/%d - %s' % (current, len(files), file) else: sys.stdout.write('\r' + ' ' * 80) sys.stdout.write("\r%d/%d - %s" % (current, self.pkgcount, file)) sys.stdout.flush() except dumpMetadata.MDError, e: errorprint('\n%s - %s' % (e, file)) continue else: try: dumpMetadata.generateXML(self.prestodoc, self.prestoroot, self.formatns, mdobj, self.cmds['sumtype'], self.newrpms) except dumpMetadata.MDError, e: errorprint(_('\nAn error occurred creating presto metadata: %s') % e) continue return current def closeMetadataDocs(self): if not self.cmds['quiet']: print '' # save them up to the tmp locations: if not self.cmds['quiet']: print _('Saving Presto metadata') output = self.prestoroot.serialize('UTF-8', self.cmds['pretty']) output = output[output.find("\n")+1:] self.prestofile.write(output) self.prestofile.write("\n") self.prestofile.close() self.prestodoc.freeDoc() def doRepoMetadata(self): """wrapper to generate the prestomd.xml file that stores the info on the other files""" repodoc = libxml2.newDoc("1.0") reporoot = repodoc.newChild(None, "repomd", None) repons = reporoot.newNs('http://linux.duke.edu/metadata/repo', None) reporoot.setNs(repons) repofilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['prestomdfile']) try: dumpMetadata.repoXML(reporoot, self.cmds) except dumpMetadata.MDError, e: errorprint(_('Error generating repo xml file: %s') % e) sys.exit(1) try: repodoc.saveFormatFileEnc(repofilepath, 'UTF-8', 1) except: errorprint(_('Error saving temp file for rep xml: %s') % repofilepath) sys.exit(1) del repodoc class SplitMetaDataGenerator(MetaDataGenerator): def __init__(self, cmds): MetaDataGenerator.__init__(self, cmds) self.initialdir = self.cmds['basedir'] def _getFragmentUrl(self, url, fragment): import urlparse urlparse.uses_fragment.append('media') if not url: return url (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url) return urlparse.urlunsplit((scheme, netloc, path, query, str(fragment))) def doPkgMetadata(self, directories): """all the heavy lifting for the package metadata""" import types if type(directories) == types.StringType: MetaDataGenerator.doPkgMetadata(self, directories) return filematrix = {} for mydir in directories: filematrix[mydir] = self.getFileList(os.path.join(self.initialdir, mydir), '.', '.rpm', []) self.trimRpms(filematrix[mydir]) self.pkgcount += len(filematrix[mydir]) mediano = 1 current = 0 self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], mediano) self.cmds['basedir'] = os.path.join(self.initialdir, directories[0]) self.openMetadataDocs() for mydir in directories: self.cmds['basedir'] = os.path.join(self.initialdir, mydir) self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], mediano) current = self.writeMetadataDocs(filematrix[mydir], current) mediano += 1 self.cmds['basedir'] = os.path.join(self.initialdir, directories[0]) self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], 1) self.closeMetadataDocs() def checkAndMakeDir(dir): """ check out the dir and make it, if possible, return 1 if done, else return 0 """ if os.path.exists(dir): if not os.path.isdir(dir): errorprint(_('%s is not a dir') % dir) result = False else: if not os.access(dir, os.W_OK): errorprint(_('%s is not writable') % dir) result = False else: result = True else: try: os.mkdir(dir) except OSError, e: errorprint(_('Error creating dir %s: %s') % (dir, e)) result = False else: result = True return result def parseArgs(args): """ Parse the command line args return a commands dict and directory. Sanity check all the things being passed in. """ cmds = {} cmds['quiet'] = 0 cmds['verbose'] = 0 cmds['excludes'] = [] cmds['baseurl'] = None cmds['sumtype'] = 'sha' cmds['pretty'] = 0 cmds['cachedir'] = None cmds['basedir'] = os.getcwd() cmds['cache'] = False cmds['split'] = False cmds['outputdir'] = "" cmds['file-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$'] cmds['dir-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*'] try: gopts, argsleft = getopt.getopt(args, 'phqVvs:x:u:c:o:', ['help', 'exclude=', 'quiet', 'verbose', 'cachedir=', 'basedir=', 'baseurl=', 'checksum=', 'version', 'pretty', 'split', 'outputdir=']) except getopt.error, e: errorprint(_('Options Error: %s.') % e) usage() try: for arg,a in gopts: if arg in ['-h','--help']: usage(retval=0) elif arg in ['-V', '--version']: print '%s' % __version__ sys.exit(0) elif arg == '--split': cmds['split'] = True except ValueError, e: errorprint(_('Options Error: %s') % e) usage() # make sure our dir makes sense before we continue if len(argsleft) > 1 and not cmds['split']: errorprint(_('Error: Only one directory allowed per run.')) usage() elif len(argsleft) == 0: errorprint(_('Error: Must specify a directory to index.')) usage() else: directories = argsleft try: for arg,a in gopts: if arg in ['-v', '--verbose']: cmds['verbose'] = 1 elif arg in ["-q", '--quiet']: cmds['quiet'] = 1 elif arg in ['-u', '--baseurl']: if cmds['baseurl'] is not None: errorprint(_('Error: Only one baseurl allowed.')) usage() else: cmds['baseurl'] = a elif arg in ['-x', '--exclude']: cmds['excludes'].append(a) elif arg in ['-p', '--pretty']: cmds['pretty'] = 1 elif arg in ['-c', '--cachedir']: cmds['cache'] = True cmds['cachedir'] = a elif arg == '--basedir': cmds['basedir'] = a elif arg in ['-o','--outputdir']: cmds['outputdir'] = a except ValueError, e: errorprint(_('Options Error: %s') % e) usage() directory = directories[0] # Fix paths directory = os.path.normpath(directory) if cmds['split']: pass elif os.path.isabs(directory): cmds['basedir'] = directory directory = '.' else: cmds['basedir'] = os.path.realpath(os.path.join(cmds['basedir'], directory)) directory = '.' if not cmds['outputdir']: cmds['outputdir'] = cmds['basedir'] if cmds['cachedir']: a = cmds ['cachedir'] if not os.path.isabs(a): a = os.path.join(cmds['basedir'] ,a) if not checkAndMakeDir(a): errorprint(_('Error: cannot open/write to cache dir %s' % a)) usage() cmds['cachedir'] = a #setup some defaults cmds['prestofile'] = 'presto.xml.gz' cmds['prestomdfile'] = 'prestomd.xml' cmds['tempdir'] = '.repodata' cmds['finaldir'] = 'repodata' cmds['olddir'] = '.olddata' # Fixup first directory directories[0] = directory return cmds, directories def main(args): cmds, directories = parseArgs(args) directory = directories[0] # start the sanity/stupidity checks if not os.path.exists(os.path.join(cmds['basedir'], directory)): errorprint(_('Directory must exist')) sys.exit(1) if not os.path.isdir(os.path.join(cmds['basedir'], directory)): errorprint(_('Directory of packages must be a directory.')) sys.exit(1) if not os.access(cmds['outputdir'], os.W_OK): errorprint(_('Directory must be writable.')) sys.exit(1) if cmds['split']: oldbase = cmds['basedir'] cmds['basedir'] = os.path.join(cmds['basedir'], directory) if not checkAndMakeDir(os.path.join(cmds['outputdir'], cmds['tempdir'])): sys.exit(1) if not checkAndMakeDir(os.path.join(cmds['outputdir'], cmds['finaldir'])): sys.exit(1) if os.path.exists(os.path.join(cmds['outputdir'], cmds['olddir'])): errorprint(_('Old data directory exists, please remove: %s') % cmds['olddir']) sys.exit(1) # make sure we can write to where we want to write to: for direc in ['tempdir', 'finaldir']: for file in ['prestofile', 'prestomdfile']: filepath = os.path.join(cmds['outputdir'], cmds[direc], cmds[file]) if os.path.exists(filepath): if not os.access(filepath, os.W_OK): errorprint(_('error in must be able to write to metadata files:\n -> %s') % filepath) usage() if cmds['split']: cmds['basedir'] = oldbase mdgen = SplitMetaDataGenerator(cmds) mdgen.doPkgMetadata(directories) else: mdgen = MetaDataGenerator(cmds) mdgen.doPkgMetadata(directory) mdgen.doRepoMetadata() if os.path.exists(os.path.join(cmds['outputdir'], cmds['finaldir'])): try: os.rename(os.path.join(cmds['outputdir'], cmds['finaldir']), os.path.join(cmds['outputdir'], cmds['olddir'])) except: errorprint(_('Error moving final %s to old dir %s' % (os.path.join(cmds['outputdir'], cmds['finaldir']), os.path.join(cmds['outputdir'], cmds['olddir'])))) sys.exit(1) try: os.rename(os.path.join(cmds['outputdir'], cmds['tempdir']), os.path.join(cmds['outputdir'], cmds['finaldir'])) except: errorprint(_('Error moving final metadata into place')) # put the old stuff back os.rename(os.path.join(cmds['outputdir'], cmds['olddir']), os.path.join(cmds['outputdir'], cmds['finaldir'])) sys.exit(1) for file in ['prestofile', 'prestomdfile']: if cmds[file]: fn = os.path.basename(cmds[file]) else: continue oldfile = os.path.join(cmds['outputdir'], cmds['olddir'], fn) if os.path.exists(oldfile): try: os.remove(oldfile) except OSError, e: errorprint(_('Could not remove old metadata file: %s') % oldfile) errorprint(_('Error was %s') % e) sys.exit(1) #XXX: fix to remove tree as we mung basedir try: os.rmdir(os.path.join(cmds['outputdir'], cmds['olddir'])) except OSError, e: errorprint(_('Could not remove old metadata dir: %s') % cmds['olddir']) errorprint(_('Error was %s') % e) errorprint(_('Please clean up this directory manually.')) if __name__ == "__main__": if len(sys.argv) > 1: if sys.argv[1] == 'profile': import hotshot p = hotshot.Profile(os.path.expanduser("~/createprestorepo.prof")) p.run('main(sys.argv[2:])') p.close() else: main(sys.argv[1:]) else: main(sys.argv[1:])