summaryrefslogtreecommitdiffstats
path: root/makerepo/genprestometadata.py
diff options
context:
space:
mode:
Diffstat (limited to 'makerepo/genprestometadata.py')
-rw-r--r--makerepo/genprestometadata.py496
1 files changed, 496 insertions, 0 deletions
diff --git a/makerepo/genprestometadata.py b/makerepo/genprestometadata.py
new file mode 100644
index 0000000..402af25
--- /dev/null
+++ b/makerepo/genprestometadata.py
@@ -0,0 +1,496 @@
+#!/usr/bin/python -t
+# primary functions and glue for generating the repository metadata
+#
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# Copyright 2004 Duke University
+# Copyright 2007 Jonathan Dieter
+
+
+import os
+import sys
+import getopt
+import rpm
+import libxml2
+import string
+import fnmatch
+import urlgrabber
+
+import dumpMetadata
+from dumpMetadata import _gzipOpen
+__version__ = '0.4.3'
+
+def errorprint(stuff):
+ print >> sys.stderr, stuff
+
+def _(args):
+ """Stub function for translation"""
+ return args
+
+def usage(retval=1):
+ print _("""
+ createrepo [options] directory-of-packages
+
+ Options:
+ -u, --baseurl <url> = optional base url location for all files
+ -o, --outputdir <dir> = optional directory to output to
+ -x, --exclude = files globs to exclude, can be specified multiple times
+ -q, --quiet = run quietly
+ -v, --verbose = run verbosely
+ -c, --cachedir <dir> = specify which dir to use for the checksum cache
+ -h, --help = show this help
+ -V, --version = output version
+ -p, --pretty = output xml files in pretty format.
+ """)
+
+ sys.exit(retval)
+
+class MetaDataGenerator:
+ def __init__(self, cmds):
+ self.cmds = cmds
+ self.ts = rpm.TransactionSet()
+ self.pkgcount = 0
+ self.files = []
+
+ def getFileList(self, basepath, path, ext, filelist):
+ """Return all files in path matching ext, store them in filelist,
+ recurse dirs. Returns a list object"""
+
+ extlen = len(ext)
+ totalpath = os.path.normpath(os.path.join(basepath, path))
+ try:
+ dir_list = os.listdir(totalpath)
+ except OSError, e:
+ errorprint(_('Error accessing directory %s, %s') % (totalpath, e))
+ sys.exit(1)
+
+ for d in dir_list:
+ if os.path.isdir(totalpath + '/' + d):
+ filelist = self.getFileList(basepath, os.path.join(path, d), ext, filelist)
+ else:
+ if string.lower(d[-extlen:]) == '%s' % (ext):
+ if totalpath.find(basepath) == 0:
+ relativepath = totalpath.replace(basepath, "", 1)
+ relativepath = relativepath.lstrip("/")
+ filelist.append(os.path.join(relativepath, d))
+ else:
+ raise "basepath '%s' not found in path '%s'" % (basepath, totalpath)
+
+ return filelist
+
+
+ def trimRpms(self, files):
+ badrpms = []
+ for file in files:
+ for glob in self.cmds['excludes']:
+ if fnmatch.fnmatch(file, glob):
+ # print 'excluded: %s' % file
+ if file not in badrpms:
+ badrpms.append(file)
+ for file in badrpms:
+ if file in files:
+ files.remove(file)
+ return files
+
+ def doPkgMetadata(self, directory):
+ """all the heavy lifting for the package metadata"""
+
+ # rpms we're going to be dealing with
+ files = self.getFileList(self.cmds['basedir'], directory, '.dpm', [])
+ files = self.trimRpms(files)
+ self.pkgcount = len(files)
+ self.openMetadataDocs()
+ self.writeMetadataDocs(files)
+ self.closeMetadataDocs()
+
+
+ def openMetadataDocs(self):
+ self._setupPresto()
+
+ def _setupPresto(self):
+ # setup the base metadata doc
+ self.prestodoc = libxml2.newDoc("1.0")
+ self.prestoroot = self.prestodoc.newChild(None, "metadata", None)
+ basens = self.prestoroot.newNs('http://linux.duke.edu/metadata/common', None)
+ self.formatns = self.prestoroot.newNs('http://linux.duke.edu/metadata/rpm', 'rpm')
+ self.prestoroot.setNs(basens)
+ prestofilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['prestofile'])
+ self.prestofile = _gzipOpen(prestofilepath, 'w')
+ self.prestofile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+ self.prestofile.write('<metadata xmlns="http://linux.duke.edu/metadata/common" xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s">\n' %
+ self.pkgcount)
+
+
+ def writeMetadataDocs(self, files, current=0):
+ for file in files:
+ current+=1
+ try:
+ mdobj = dumpMetadata.RpmMetaData(self.ts, self.cmds['basedir'], file, self.cmds)
+ if not self.cmds['quiet']:
+ if self.cmds['verbose']:
+ print '%d/%d - %s' % (current, len(files), file)
+ else:
+ sys.stdout.write('\r' + ' ' * 80)
+ sys.stdout.write("\r%d/%d - %s" % (current, self.pkgcount, file))
+ sys.stdout.flush()
+ except dumpMetadata.MDError, e:
+ errorprint('\n%s - %s' % (e, file))
+ continue
+ else:
+ try:
+ node = dumpMetadata.generateXML(self.prestodoc, self.prestoroot, self.formatns, mdobj, self.cmds['sumtype'])
+ except dumpMetadata.MDError, e:
+ errorprint(_('\nAn error occurred creating presto metadata: %s') % e)
+ continue
+ else:
+ output = node.serialize('UTF-8', self.cmds['pretty'])
+ self.prestofile.write(output)
+ self.prestofile.write('\n')
+ node.unlinkNode()
+ node.freeNode()
+ del node
+
+ return current
+
+
+ def closeMetadataDocs(self):
+ if not self.cmds['quiet']:
+ print ''
+
+ # save them up to the tmp locations:
+ if not self.cmds['quiet']:
+ print _('Saving Presto metadata')
+ self.prestofile.write('\n</metadata>')
+ self.prestofile.close()
+ self.prestodoc.freeDoc()
+
+ def doRepoMetadata(self):
+ """wrapper to generate the prestomd.xml file that stores the info on the other files"""
+ repodoc = libxml2.newDoc("1.0")
+ reporoot = repodoc.newChild(None, "repomd", None)
+ repons = reporoot.newNs('http://linux.duke.edu/metadata/repo', None)
+ reporoot.setNs(repons)
+ repofilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['prestomdfile'])
+
+ try:
+ dumpMetadata.repoXML(reporoot, self.cmds)
+ except dumpMetadata.MDError, e:
+ errorprint(_('Error generating repo xml file: %s') % e)
+ sys.exit(1)
+
+ try:
+ repodoc.saveFormatFileEnc(repofilepath, 'UTF-8', 1)
+ except:
+ errorprint(_('Error saving temp file for rep xml: %s') % repofilepath)
+ sys.exit(1)
+
+ del repodoc
+
+class SplitMetaDataGenerator(MetaDataGenerator):
+
+ def __init__(self, cmds):
+ MetaDataGenerator.__init__(self, cmds)
+ self.initialdir = self.cmds['basedir']
+
+ def _getFragmentUrl(self, url, fragment):
+ import urlparse
+ urlparse.uses_fragment.append('media')
+ if not url:
+ return url
+ (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url)
+ return urlparse.urlunsplit((scheme, netloc, path, query, str(fragment)))
+
+ def doPkgMetadata(self, directories):
+ """all the heavy lifting for the package metadata"""
+ import types
+ if type(directories) == types.StringType:
+ MetaDataGenerator.doPkgMetadata(self, directories)
+ return
+ filematrix = {}
+ for mydir in directories:
+ filematrix[mydir] = self.getFileList(os.path.join(self.initialdir, mydir), '.', '.rpm', [])
+ self.trimRpms(filematrix[mydir])
+ self.pkgcount += len(filematrix[mydir])
+
+ mediano = 1
+ current = 0
+ self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], mediano)
+ self.cmds['basedir'] = os.path.join(self.initialdir, directories[0])
+ self.openMetadataDocs()
+ for mydir in directories:
+ self.cmds['basedir'] = os.path.join(self.initialdir, mydir)
+ self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], mediano)
+ current = self.writeMetadataDocs(filematrix[mydir], current)
+ mediano += 1
+ self.cmds['basedir'] = os.path.join(self.initialdir, directories[0])
+ self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], 1)
+ self.closeMetadataDocs()
+
+
+def checkAndMakeDir(dir):
+ """
+ check out the dir and make it, if possible, return 1 if done, else return 0
+ """
+ if os.path.exists(dir):
+ if not os.path.isdir(dir):
+ errorprint(_('%s is not a dir') % dir)
+ result = False
+ else:
+ if not os.access(dir, os.W_OK):
+ errorprint(_('%s is not writable') % dir)
+ result = False
+ else:
+ result = True
+ else:
+ try:
+ os.mkdir(dir)
+ except OSError, e:
+ errorprint(_('Error creating dir %s: %s') % (dir, e))
+ result = False
+ else:
+ result = True
+ return result
+
+def parseArgs(args):
+ """
+ Parse the command line args return a commands dict and directory.
+ Sanity check all the things being passed in.
+ """
+ cmds = {}
+ cmds['quiet'] = 0
+ cmds['verbose'] = 0
+ cmds['excludes'] = []
+ cmds['baseurl'] = None
+ cmds['sumtype'] = 'sha'
+ cmds['pretty'] = 0
+ cmds['cachedir'] = None
+ cmds['basedir'] = os.getcwd()
+ cmds['cache'] = False
+ cmds['split'] = False
+ cmds['outputdir'] = ""
+ cmds['file-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
+ cmds['dir-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*']
+
+ try:
+ gopts, argsleft = getopt.getopt(args, 'phqVvs:x:u:c:o:', ['help', 'exclude=',
+ 'quiet', 'verbose', 'cachedir=', 'basedir=',
+ 'baseurl=', 'checksum=',
+ 'version', 'pretty', 'split', 'outputdir='])
+ except getopt.error, e:
+ errorprint(_('Options Error: %s.') % e)
+ usage()
+
+ try:
+ for arg,a in gopts:
+ if arg in ['-h','--help']:
+ usage(retval=0)
+ elif arg in ['-V', '--version']:
+ print '%s' % __version__
+ sys.exit(0)
+ elif arg == '--split':
+ cmds['split'] = True
+ except ValueError, e:
+ errorprint(_('Options Error: %s') % e)
+ usage()
+
+
+ # make sure our dir makes sense before we continue
+ if len(argsleft) > 1 and not cmds['split']:
+ errorprint(_('Error: Only one directory allowed per run.'))
+ usage()
+ elif len(argsleft) == 0:
+ errorprint(_('Error: Must specify a directory to index.'))
+ usage()
+ else:
+ directories = argsleft
+
+ try:
+ for arg,a in gopts:
+ if arg in ['-v', '--verbose']:
+ cmds['verbose'] = 1
+ elif arg in ["-q", '--quiet']:
+ cmds['quiet'] = 1
+ elif arg in ['-u', '--baseurl']:
+ if cmds['baseurl'] is not None:
+ errorprint(_('Error: Only one baseurl allowed.'))
+ usage()
+ else:
+ cmds['baseurl'] = a
+ elif arg in ['-x', '--exclude']:
+ cmds['excludes'].append(a)
+ elif arg in ['-p', '--pretty']:
+ cmds['pretty'] = 1
+ elif arg in ['-c', '--cachedir']:
+ cmds['cache'] = True
+ cmds['cachedir'] = a
+ elif arg == '--basedir':
+ cmds['basedir'] = a
+ elif arg in ['-o','--outputdir']:
+ cmds['outputdir'] = a
+
+ except ValueError, e:
+ errorprint(_('Options Error: %s') % e)
+ usage()
+
+ directory = directories[0]
+# Fix paths
+ directory = os.path.normpath(directory)
+ if cmds['split']:
+ pass
+ elif os.path.isabs(directory):
+ cmds['basedir'] = directory
+ directory = '.'
+ else:
+ cmds['basedir'] = os.path.realpath(os.path.join(cmds['basedir'], directory))
+ directory = '.'
+ if not cmds['outputdir']:
+ cmds['outputdir'] = cmds['basedir']
+ if cmds['groupfile']:
+ a = cmds['groupfile']
+ if cmds['split']:
+ a = os.path.join(cmds['basedir'], directory, cmds['groupfile'])
+ elif not os.path.isabs(a):
+ a = os.path.join(cmds['basedir'], cmds['groupfile'])
+ if not os.path.exists(a):
+ errorprint(_('Error: groupfile %s cannot be found.' % a))
+ usage()
+ cmds['groupfile'] = a
+ if cmds['cachedir']:
+ a = cmds ['cachedir']
+ if not os.path.isabs(a):
+ a = os.path.join(cmds['basedir'] ,a)
+ if not checkAndMakeDir(a):
+ errorprint(_('Error: cannot open/write to cache dir %s' % a))
+ usage()
+ cmds['cachedir'] = a
+
+ #setup some defaults
+ cmds['prestofile'] = 'presto.xml.gz'
+ cmds['prestomdfile'] = 'prestomd.xml'
+ cmds['tempdir'] = '.repodata'
+ cmds['finaldir'] = 'repodata'
+ cmds['olddir'] = '.olddata'
+
+ # Fixup first directory
+ directories[0] = directory
+ return cmds, directories
+
+def main(args):
+ cmds, directories = parseArgs(args)
+ directory = directories[0]
+ # start the sanity/stupidity checks
+ if not os.path.exists(os.path.join(cmds['basedir'], directory)):
+ errorprint(_('Directory must exist'))
+ sys.exit(1)
+
+ if not os.path.isdir(os.path.join(cmds['basedir'], directory)):
+ errorprint(_('Directory of packages must be a directory.'))
+ sys.exit(1)
+
+ if not os.access(cmds['outputdir'], os.W_OK):
+ errorprint(_('Directory must be writable.'))
+ sys.exit(1)
+
+ if cmds['split']:
+ oldbase = cmds['basedir']
+ cmds['basedir'] = os.path.join(cmds['basedir'], directory)
+ if not checkAndMakeDir(os.path.join(cmds['outputdir'], cmds['tempdir'])):
+ sys.exit(1)
+
+ if not checkAndMakeDir(os.path.join(cmds['outputdir'], cmds['finaldir'])):
+ sys.exit(1)
+
+ if os.path.exists(os.path.join(cmds['outputdir'], cmds['olddir'])):
+ errorprint(_('Old data directory exists, please remove: %s') % cmds['olddir'])
+ sys.exit(1)
+
+ # make sure we can write to where we want to write to:
+ for direc in ['tempdir', 'finaldir']:
+ for file in ['prestofile', 'prestomdfile']:
+ filepath = os.path.join(cmds['outputdir'], cmds[direc], cmds[file])
+ if os.path.exists(filepath):
+ if not os.access(filepath, os.W_OK):
+ errorprint(_('error in must be able to write to metadata files:\n -> %s') % filepath)
+ usage()
+
+ if cmds['split']:
+ cmds['basedir'] = oldbase
+ mdgen = SplitMetaDataGenerator(cmds)
+ mdgen.doPkgMetadata(directories)
+ else:
+ mdgen = MetaDataGenerator(cmds)
+ mdgen.doPkgMetadata(directory)
+ mdgen.doRepoMetadata()
+
+ if os.path.exists(os.path.join(cmds['outputdir'], cmds['finaldir'])):
+ try:
+ os.rename(os.path.join(cmds['outputdir'], cmds['finaldir']),
+ os.path.join(cmds['outputdir'], cmds['olddir']))
+ except:
+ errorprint(_('Error moving final %s to old dir %s' % (os.path.join(cmds['outputdir'], cmds['finaldir']),
+ os.path.join(cmds['outputdir'], cmds['olddir']))))
+ sys.exit(1)
+
+ try:
+ os.rename(os.path.join(cmds['outputdir'], cmds['tempdir']),
+ os.path.join(cmds['outputdir'], cmds['finaldir']))
+ except:
+ errorprint(_('Error moving final metadata into place'))
+ # put the old stuff back
+ os.rename(os.path.join(cmds['outputdir'], cmds['olddir']),
+ os.path.join(cmds['outputdir'], cmds['finaldir']))
+ sys.exit(1)
+
+ for file in ['prestofile', 'prestomdfile']:
+ if cmds[file]:
+ fn = os.path.basename(cmds[file])
+ else:
+ continue
+ oldfile = os.path.join(cmds['outputdir'], cmds['olddir'], fn)
+ if os.path.exists(oldfile):
+ try:
+ os.remove(oldfile)
+ except OSError, e:
+ errorprint(_('Could not remove old metadata file: %s') % oldfile)
+ errorprint(_('Error was %s') % e)
+ sys.exit(1)
+
+ # Clean up any update metadata
+ mdpath = os.path.join(cmds['basedir'], cmds['olddir'], cmds['update-info-dir'])
+ if os.path.isdir(mdpath):
+ for file in os.listdir(mdpath):
+ os.remove(os.path.join(mdpath, file))
+ os.rmdir(mdpath)
+
+
+#XXX: fix to remove tree as we mung basedir
+ try:
+ os.rmdir(os.path.join(cmds['outputdir'], cmds['olddir']))
+ except OSError, e:
+ errorprint(_('Could not remove old metadata dir: %s') % cmds['olddir'])
+ errorprint(_('Error was %s') % e)
+ errorprint(_('Please clean up this directory manually.'))
+
+if __name__ == "__main__":
+ if len(sys.argv) > 1:
+ if sys.argv[1] == 'profile':
+ import hotshot
+ p = hotshot.Profile(os.path.expanduser("~/createprestorepo.prof"))
+ p.run('main(sys.argv[2:])')
+ p.close()
+ else:
+ main(sys.argv[1:])
+ else:
+ main(sys.argv[1:])