summaryrefslogtreecommitdiffstats
path: root/createprestorepo/genprestometadata.py
diff options
context:
space:
mode:
authorJonathan Dieter <jdieter@gmail.com>2007-06-19 20:58:07 +0300
committerJonathan Dieter <jdieter@gmail.com>2007-06-19 20:58:07 +0300
commitdce0600bc64c793ba6e8f67c56c286d8d97e7c4c (patch)
tree71c559e031b3c10ba56a187e0a017f09d4d25137 /createprestorepo/genprestometadata.py
parent93b2295180471308e969640472bdc601d1f10015 (diff)
downloadpresto-dce0600bc64c793ba6e8f67c56c286d8d97e7c4c.tar.gz
presto-dce0600bc64c793ba6e8f67c56c286d8d97e7c4c.tar.xz
presto-dce0600bc64c793ba6e8f67c56c286d8d97e7c4c.zip
Many bugfixes and a few enhancements
Signed-off-by: Jonathan Dieter <jdieter@gmail.com>
Diffstat (limited to 'createprestorepo/genprestometadata.py')
-rwxr-xr-xcreateprestorepo/genprestometadata.py475
1 files changed, 0 insertions, 475 deletions
diff --git a/createprestorepo/genprestometadata.py b/createprestorepo/genprestometadata.py
deleted file mode 100755
index bbce09d..0000000
--- a/createprestorepo/genprestometadata.py
+++ /dev/null
@@ -1,475 +0,0 @@
-#!/usr/bin/python -t
-# primary functions and glue for generating the repository metadata
-#
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Library General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-# Copyright 2004 Duke University
-# Copyright 2007 Jonathan Dieter
-
-
-import os
-import sys
-import getopt
-import rpm
-import libxml2
-import string
-import fnmatch
-import urlgrabber
-
-import dumpMetadata
-from dumpMetadata import _gzipOpen
-__version__ = '0.4.3'
-
-def errorprint(stuff):
- print >> sys.stderr, stuff
-
-def _(args):
- """Stub function for translation"""
- return args
-
-def usage(retval=1):
- print _("""
- createrepo [options] directory-of-packages
-
- Options:
- -u, --baseurl <url> = optional base url location for all files
- -o, --outputdir <dir> = optional directory to output to
- -x, --exclude = files globs to exclude, can be specified multiple times
- -q, --quiet = run quietly
- -v, --verbose = run verbosely
- -c, --cachedir <dir> = specify which dir to use for the checksum cache
- -h, --help = show this help
- -V, --version = output version
- -p, --pretty = output xml files in pretty format.
- """)
-
- sys.exit(retval)
-
-class MetaDataGenerator:
- def __init__(self, cmds):
- self.cmds = cmds
- self.ts = rpm.TransactionSet()
- self.pkgcount = 0
- self.newrpms = {}
- self.files = []
-
- def getFileList(self, basepath, path, ext, filelist):
- """Return all files in path matching ext, store them in filelist,
- recurse dirs. Returns a list object"""
-
- extlen = len(ext)
- totalpath = os.path.normpath(os.path.join(basepath, path))
- try:
- dir_list = os.listdir(totalpath)
- except OSError, e:
- errorprint(_('Error accessing directory %s, %s') % (totalpath, e))
- sys.exit(1)
-
- for d in dir_list:
- if os.path.isdir(totalpath + '/' + d):
- filelist = self.getFileList(basepath, os.path.join(path, d), ext, filelist)
- else:
- if string.lower(d[-extlen:]) == '%s' % (ext):
- if totalpath.find(basepath) == 0:
- relativepath = totalpath.replace(basepath, "", 1)
- relativepath = relativepath.lstrip("/")
- filelist.append(os.path.join(relativepath, d))
- else:
- raise "basepath '%s' not found in path '%s'" % (basepath, totalpath)
-
- return filelist
-
-
- def trimRpms(self, files):
- badrpms = []
- for file in files:
- for glob in self.cmds['excludes']:
- if fnmatch.fnmatch(file, glob):
- # print 'excluded: %s' % file
- if file not in badrpms:
- badrpms.append(file)
- for file in badrpms:
- if file in files:
- files.remove(file)
- return files
-
- def doPkgMetadata(self, directory):
- """all the heavy lifting for the package metadata"""
-
- # rpms we're going to be dealing with
- files = self.getFileList(self.cmds['basedir'], directory, '.drpm', [])
- files = self.trimRpms(files)
- self.pkgcount = len(files)
- self.openMetadataDocs()
- self.writeMetadataDocs(files)
- self.closeMetadataDocs()
-
-
- def openMetadataDocs(self):
- self._setupPresto()
-
- def _setupPresto(self):
- # setup the base metadata doc
- self.prestodoc = libxml2.newDoc("1.0")
- self.prestoroot = self.prestodoc.newChild(None, "metadata", None)
- basens = self.prestoroot.newNs('http://linux.duke.edu/metadata/common', None)
- self.formatns = self.prestoroot.newNs('http://linux.duke.edu/metadata/rpm', 'rpm')
- self.prestoroot.setNs(basens)
- prestofilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['prestofile'])
- self.prestofile = _gzipOpen(prestofilepath, 'w')
- self.prestofile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
- self.prestofile.write('<metadata xmlns="http://linux.duke.edu/metadata/common" xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s">\n' %
- self.pkgcount)
-
-
- def writeMetadataDocs(self, files, current=0):
- for file in files:
- current+=1
- try:
- mdobj = dumpMetadata.RpmMetaData(self.ts, self.cmds['basedir'], file, self.cmds, True)
- if not self.cmds['quiet']:
- if self.cmds['verbose']:
- print '%d/%d - %s' % (current, len(files), file)
- else:
- sys.stdout.write('\r' + ' ' * 80)
- sys.stdout.write("\r%d/%d - %s" % (current, self.pkgcount, file))
- sys.stdout.flush()
- except dumpMetadata.MDError, e:
- errorprint('\n%s - %s' % (e, file))
- continue
- else:
- try:
- dumpMetadata.generateXML(self.prestodoc, self.prestoroot, self.formatns, mdobj, self.cmds['sumtype'], self.newrpms)
- except dumpMetadata.MDError, e:
- errorprint(_('\nAn error occurred creating presto metadata: %s') % e)
- continue
- return current
-
-
- def closeMetadataDocs(self):
- if not self.cmds['quiet']:
- print ''
-
- # save them up to the tmp locations:
- if not self.cmds['quiet']:
- print _('Saving Presto metadata')
- output = self.prestoroot.serialize('UTF-8', self.cmds['pretty'])
- output = output[output.find("\n")+1:]
- self.prestofile.write(output)
- self.prestofile.write("\n")
- self.prestofile.close()
- self.prestodoc.freeDoc()
-
- def doRepoMetadata(self):
- """wrapper to generate the prestomd.xml file that stores the info on the other files"""
- repodoc = libxml2.newDoc("1.0")
- reporoot = repodoc.newChild(None, "repomd", None)
- repons = reporoot.newNs('http://linux.duke.edu/metadata/repo', None)
- reporoot.setNs(repons)
- repofilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['prestomdfile'])
-
- try:
- dumpMetadata.repoXML(reporoot, self.cmds)
- except dumpMetadata.MDError, e:
- errorprint(_('Error generating repo xml file: %s') % e)
- sys.exit(1)
-
- try:
- repodoc.saveFormatFileEnc(repofilepath, 'UTF-8', 1)
- except:
- errorprint(_('Error saving temp file for rep xml: %s') % repofilepath)
- sys.exit(1)
-
- del repodoc
-
-class SplitMetaDataGenerator(MetaDataGenerator):
-
- def __init__(self, cmds):
- MetaDataGenerator.__init__(self, cmds)
- self.initialdir = self.cmds['basedir']
-
- def _getFragmentUrl(self, url, fragment):
- import urlparse
- urlparse.uses_fragment.append('media')
- if not url:
- return url
- (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url)
- return urlparse.urlunsplit((scheme, netloc, path, query, str(fragment)))
-
- def doPkgMetadata(self, directories):
- """all the heavy lifting for the package metadata"""
- import types
- if type(directories) == types.StringType:
- MetaDataGenerator.doPkgMetadata(self, directories)
- return
- filematrix = {}
- for mydir in directories:
- filematrix[mydir] = self.getFileList(os.path.join(self.initialdir, mydir), '.', '.rpm', [])
- self.trimRpms(filematrix[mydir])
- self.pkgcount += len(filematrix[mydir])
-
- mediano = 1
- current = 0
- self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], mediano)
- self.cmds['basedir'] = os.path.join(self.initialdir, directories[0])
- self.openMetadataDocs()
- for mydir in directories:
- self.cmds['basedir'] = os.path.join(self.initialdir, mydir)
- self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], mediano)
- current = self.writeMetadataDocs(filematrix[mydir], current)
- mediano += 1
- self.cmds['basedir'] = os.path.join(self.initialdir, directories[0])
- self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], 1)
- self.closeMetadataDocs()
-
-
-def checkAndMakeDir(dir):
- """
- check out the dir and make it, if possible, return 1 if done, else return 0
- """
- if os.path.exists(dir):
- if not os.path.isdir(dir):
- errorprint(_('%s is not a dir') % dir)
- result = False
- else:
- if not os.access(dir, os.W_OK):
- errorprint(_('%s is not writable') % dir)
- result = False
- else:
- result = True
- else:
- try:
- os.mkdir(dir)
- except OSError, e:
- errorprint(_('Error creating dir %s: %s') % (dir, e))
- result = False
- else:
- result = True
- return result
-
-def parseArgs(args):
- """
- Parse the command line args return a commands dict and directory.
- Sanity check all the things being passed in.
- """
- cmds = {}
- cmds['quiet'] = 0
- cmds['verbose'] = 0
- cmds['excludes'] = []
- cmds['baseurl'] = None
- cmds['sumtype'] = 'sha'
- cmds['pretty'] = 0
- cmds['cachedir'] = None
- cmds['basedir'] = os.getcwd()
- cmds['cache'] = False
- cmds['split'] = False
- cmds['outputdir'] = ""
- cmds['file-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
- cmds['dir-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*']
-
- try:
- gopts, argsleft = getopt.getopt(args, 'phqVvs:x:u:c:o:', ['help', 'exclude=',
- 'quiet', 'verbose', 'cachedir=', 'basedir=',
- 'baseurl=', 'checksum=',
- 'version', 'pretty', 'split', 'outputdir='])
- except getopt.error, e:
- errorprint(_('Options Error: %s.') % e)
- usage()
-
- try:
- for arg,a in gopts:
- if arg in ['-h','--help']:
- usage(retval=0)
- elif arg in ['-V', '--version']:
- print '%s' % __version__
- sys.exit(0)
- elif arg == '--split':
- cmds['split'] = True
- except ValueError, e:
- errorprint(_('Options Error: %s') % e)
- usage()
-
-
- # make sure our dir makes sense before we continue
- if len(argsleft) > 1 and not cmds['split']:
- errorprint(_('Error: Only one directory allowed per run.'))
- usage()
- elif len(argsleft) == 0:
- errorprint(_('Error: Must specify a directory to index.'))
- usage()
- else:
- directories = argsleft
-
- try:
- for arg,a in gopts:
- if arg in ['-v', '--verbose']:
- cmds['verbose'] = 1
- elif arg in ["-q", '--quiet']:
- cmds['quiet'] = 1
- elif arg in ['-u', '--baseurl']:
- if cmds['baseurl'] is not None:
- errorprint(_('Error: Only one baseurl allowed.'))
- usage()
- else:
- cmds['baseurl'] = a
- elif arg in ['-x', '--exclude']:
- cmds['excludes'].append(a)
- elif arg in ['-p', '--pretty']:
- cmds['pretty'] = 1
- elif arg in ['-c', '--cachedir']:
- cmds['cache'] = True
- cmds['cachedir'] = a
- elif arg == '--basedir':
- cmds['basedir'] = a
- elif arg in ['-o','--outputdir']:
- cmds['outputdir'] = a
-
- except ValueError, e:
- errorprint(_('Options Error: %s') % e)
- usage()
-
- directory = directories[0]
-# Fix paths
- directory = os.path.normpath(directory)
- if cmds['split']:
- pass
- elif os.path.isabs(directory):
- cmds['basedir'] = directory
- directory = '.'
- else:
- cmds['basedir'] = os.path.realpath(os.path.join(cmds['basedir'], directory))
- directory = '.'
- if not cmds['outputdir']:
- cmds['outputdir'] = cmds['basedir']
- if cmds['cachedir']:
- a = cmds ['cachedir']
- if not os.path.isabs(a):
- a = os.path.join(cmds['basedir'] ,a)
- if not checkAndMakeDir(a):
- errorprint(_('Error: cannot open/write to cache dir %s' % a))
- usage()
- cmds['cachedir'] = a
-
- #setup some defaults
- cmds['prestofile'] = 'presto.xml.gz'
- cmds['prestomdfile'] = 'prestomd.xml'
- cmds['tempdir'] = '.repodata'
- cmds['finaldir'] = 'repodata'
- cmds['olddir'] = '.olddata'
-
- # Fixup first directory
- directories[0] = directory
- return cmds, directories
-
-def main(args):
- cmds, directories = parseArgs(args)
- directory = directories[0]
- # start the sanity/stupidity checks
- if not os.path.exists(os.path.join(cmds['basedir'], directory)):
- errorprint(_('Directory must exist'))
- sys.exit(1)
-
- if not os.path.isdir(os.path.join(cmds['basedir'], directory)):
- errorprint(_('Directory of packages must be a directory.'))
- sys.exit(1)
-
- if not os.access(cmds['outputdir'], os.W_OK):
- errorprint(_('Directory must be writable.'))
- sys.exit(1)
-
- if cmds['split']:
- oldbase = cmds['basedir']
- cmds['basedir'] = os.path.join(cmds['basedir'], directory)
- if not checkAndMakeDir(os.path.join(cmds['outputdir'], cmds['tempdir'])):
- sys.exit(1)
-
- if not checkAndMakeDir(os.path.join(cmds['outputdir'], cmds['finaldir'])):
- sys.exit(1)
-
- if os.path.exists(os.path.join(cmds['outputdir'], cmds['olddir'])):
- errorprint(_('Old data directory exists, please remove: %s') % cmds['olddir'])
- sys.exit(1)
-
- # make sure we can write to where we want to write to:
- for direc in ['tempdir', 'finaldir']:
- for file in ['prestofile', 'prestomdfile']:
- filepath = os.path.join(cmds['outputdir'], cmds[direc], cmds[file])
- if os.path.exists(filepath):
- if not os.access(filepath, os.W_OK):
- errorprint(_('error in must be able to write to metadata files:\n -> %s') % filepath)
- usage()
-
- if cmds['split']:
- cmds['basedir'] = oldbase
- mdgen = SplitMetaDataGenerator(cmds)
- mdgen.doPkgMetadata(directories)
- else:
- mdgen = MetaDataGenerator(cmds)
- mdgen.doPkgMetadata(directory)
- mdgen.doRepoMetadata()
-
- if os.path.exists(os.path.join(cmds['outputdir'], cmds['finaldir'])):
- try:
- os.rename(os.path.join(cmds['outputdir'], cmds['finaldir']),
- os.path.join(cmds['outputdir'], cmds['olddir']))
- except:
- errorprint(_('Error moving final %s to old dir %s' % (os.path.join(cmds['outputdir'], cmds['finaldir']),
- os.path.join(cmds['outputdir'], cmds['olddir']))))
- sys.exit(1)
-
- try:
- os.rename(os.path.join(cmds['outputdir'], cmds['tempdir']),
- os.path.join(cmds['outputdir'], cmds['finaldir']))
- except:
- errorprint(_('Error moving final metadata into place'))
- # put the old stuff back
- os.rename(os.path.join(cmds['outputdir'], cmds['olddir']),
- os.path.join(cmds['outputdir'], cmds['finaldir']))
- sys.exit(1)
-
- for file in ['prestofile', 'prestomdfile']:
- if cmds[file]:
- fn = os.path.basename(cmds[file])
- else:
- continue
- oldfile = os.path.join(cmds['outputdir'], cmds['olddir'], fn)
- if os.path.exists(oldfile):
- try:
- os.remove(oldfile)
- except OSError, e:
- errorprint(_('Could not remove old metadata file: %s') % oldfile)
- errorprint(_('Error was %s') % e)
- sys.exit(1)
-
-
-#XXX: fix to remove tree as we mung basedir
- try:
- os.rmdir(os.path.join(cmds['outputdir'], cmds['olddir']))
- except OSError, e:
- errorprint(_('Could not remove old metadata dir: %s') % cmds['olddir'])
- errorprint(_('Error was %s') % e)
- errorprint(_('Please clean up this directory manually.'))
-
-if __name__ == "__main__":
- if len(sys.argv) > 1:
- if sys.argv[1] == 'profile':
- import hotshot
- p = hotshot.Profile(os.path.expanduser("~/createprestorepo.prof"))
- p.run('main(sys.argv[2:])')
- p.close()
- else:
- main(sys.argv[1:])
- else:
- main(sys.argv[1:])