diff options
Diffstat (limited to 'createprestorepo')
-rw-r--r-- | createprestorepo/Makefile | 6 | ||||
-rwxr-xr-x | createprestorepo/createprestorepo.py | 308 | ||||
-rwxr-xr-x | createprestorepo/dumpMetadata.py | 456 | ||||
-rw-r--r-- | createprestorepo/genprestometadata.py | 496 |
4 files changed, 1266 insertions, 0 deletions
diff --git a/createprestorepo/Makefile b/createprestorepo/Makefile new file mode 100644 index 0000000..38fbfc6 --- /dev/null +++ b/createprestorepo/Makefile @@ -0,0 +1,6 @@ +clean: + rm -f *.pyc *.pyo *~ + +install: +# mkdir -p $(DESTDIR)/usr/share/createprestorepo +# install -m 644 presto.py $(DESTDIR)/usr/lib/yum-plugins diff --git a/createprestorepo/createprestorepo.py b/createprestorepo/createprestorepo.py new file mode 100755 index 0000000..a217045 --- /dev/null +++ b/createprestorepo/createprestorepo.py @@ -0,0 +1,308 @@ +#!/usr/bin/python -t +# -*- mode: Python; indent-tabs-mode: nil; -*- +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import errno, os, sys, gzip +import fnmatch, re +import rpmUtils.transaction, rpmUtils.miscutils +import commands, libxml2 +import dumpMetadata +from dumpMetadata import _gzipOpen, getChecksum +#### import Utils + +DEBUG = True +#### Utils.setdebug(DEBUG) + +SUFFIX='drpm' +DRPMWORTHKEEPINGTHRESH=0.5 +DEBUG=0 +REPODATA="repodata" +REPOFILE="presto.xml" +REPOMDFILE="prestomd.xml" +SUM_TYPE="sha" + +def XML_start_newrpm(node, (f, n, e, v, r, a), srcdir_len): + newrpm_node = node.newChild(None, "package", None) + newrpm_node.newProp("type", "rpm") + newrpm_node.newChild(None, "name", n) + newrpm_node.newChild(None, "arch", str(a)) + version = newrpm_node.newChild(None, "version", None) + version.newProp("epoch", str(e)) + version.newProp("ver", str(v)) + version.newProp("rel", str(r)) + deltas = newrpm_node.newChild(None, "deltas", None) + return deltas + +def XML_oldrpm(newrpm_node, drpm_file, oldrpm, newrpm, sequence, size): + (f, n, e, v, r, a) = oldrpm + (nf, nn, ne, nv, nr, na) = newrpm + oldrpm_node = newrpm_node.newChild(None, "oldrpm", None) + checksum = getChecksum(SUM_TYPE, drpm_file) + if n != nn: + oldrpm_node.newChild(None, "name", n) + if a != na: + oldrpm_node.newChild(None, "arch", str(a)) + version = oldrpm_node.newChild(None, "version", None) + if e != ne: + version.newProp("epoch", str(e)) + if v != nv: + version.newProp("ver", str(v)) + version.newProp("rel", str(r)) + oldrpm_node.newChild(None, "drpm_filename", drpm_file) + oldrpm_node.newChild(None, "size", str(size)) + oldrpm_node.newChild(None, "sequence", str(sequence)) + cs_node = oldrpm_node.newChild(None, "checksum", str(checksum)) + cs_node.newProp("type", SUM_TYPE) + +def startXML(): + basedoc = libxml2.newDoc("1.0") + baseroot = basedoc.newChild(None, "metadata", None) + basens = baseroot.newNs('http://linux.duke.edu/metadata/common', None) + formatns = baseroot.newNs('http://linux.duke.edu/metadata/rpm', 'rpm') + baseroot.setNs(basens) + return (basedoc, baseroot) + +def endXML(xmldoc, filename, srcdir, compressed=True): + if compressed: + outfile = _gzipOpen("%s%s/%s.gz" % (srcdir, REPODATA, filename), "w") + output = xmldoc.serialize('UTF-8', 1) + outfile.write(output) + outfile.close() + else: + xmldoc.saveFormatFileEnc("%s%s/%s" % (srcdir, REPODATA, filename), 'UTF-8', 1) + xmldoc.freeDoc() + +def repoXML(srcdir): + """generate the repomd.xml file that stores the info on the other files""" + repodoc = libxml2.newDoc("1.0") + reporoot = repodoc.newChild(None, "repomd", None) + repons = reporoot.newNs('http://linux.duke.edu/metadata/repo', None) + reporoot.setNs(repons) + repofilepath = "%s%s/%s" % (srcdir, REPODATA, REPOMDFILE) + filename = "%s%s/%s.gz" % (srcdir, REPODATA, REPOFILE) + filetype = "deltas" + zfo = _gzipOpen(filename, "rb") + uncsum = getChecksum(SUM_TYPE, zfo) + zfo.close() + csum = getChecksum(SUM_TYPE, filename) + timestamp = os.stat(filename)[8] + data = reporoot.newChild(None, 'data', None) + data.newProp('type', filetype) + location = data.newChild(None, 'location', None) + location.newProp('href', "%s/%s.gz" % (REPODATA, REPOFILE)) + checksum = data.newChild(None, 'checksum', csum) + checksum.newProp('type', SUM_TYPE) + timestamp = data.newChild(None, 'timestamp', str(timestamp)) + unchecksum = data.newChild(None, 'open-checksum', uncsum) + unchecksum.newProp('type', SUM_TYPE) + endXML(repodoc, REPOMDFILE, srcdir, False) + +def genDeltaRPM(ts, newrpm, oldrpm, is_new_package, srcdir, dstdir, locroot): + (f1,n1,e1,v1,r1,a1) = newrpm + (f2,n2,e2,v2,r2,a2) = oldrpm + hdr = rpmUtils.miscutils.hdrFromPackage(ts,f1) + arch = hdr['arch'] + v12 = "_".join([v1,v2]) + r12 = "_".join([r1,r2]) + deltaRPMName= '%s/%s.%s.%s' % (dstdir, "-".join([n1,v12,r12]), a1, SUFFIX) + if DEBUG: + print "DEBUG " + deltaCommand + # If the drpm doesn't exists, make it, else skip it + if os.path.exists("%s%s" % (srcdir, deltaRPMName)): + dsize = os.path.getsize("%s%s" % (srcdir, deltaRPMName)) + if e1 == e2: + print 'Using pre-generated delta rpm for %s.%s - %s.%s => %s.%s' % (n1, a1, v2, r2, v1, r1) + else: + print 'Using pre-generated delta rpm for %s.%s - %s:%s.%s => %s:%s.%s' % (n1, a1, e2, v2, r2, e1, v1, r1) + # Get checksum + seqfile = open("%s%s.seq" % (srcdir, deltaRPMName), "r") + sequence = seqfile.read()[:-1] + sequence = sequence[sequence.rfind("-")+1:] + seqfile.close() + if is_new_package: + locroot = XML_start_newrpm(locroot, newrpm, len(srcdir)) + is_new_package = False + XML_oldrpm(locroot, deltaRPMName, oldrpm, newrpm, sequence, dsize) + if DEBUG: + print "DEBUG skipping %s" % (deltaRPMName) + elif os.path.exists("%s%s.dontdelta" % (srcdir, deltaRPMName)): + pass + else: + deltaCommand = 'makedeltarpm -s %s%s.seq %s %s %s%s' % (srcdir, deltaRPMName, f2, f1, srcdir, deltaRPMName) + (code, out) = commands.getstatusoutput(deltaCommand) + if code: + #raise Exception("genDeltaRPM: exitcode was %s - Reported Error: %s" % (code, out)) + print "Error genDeltaRPM for %s: exitcode was %s - Reported Error: %s" % (n1, code, out) + + # Get size + dsize = os.path.getsize("%s%s" % (srcdir, deltaRPMName)) + + # Get checksum + seqfile = open("%s%s.seq" % (srcdir, deltaRPMName), "r") + sequence = seqfile.read()[:-1] + sequence = sequence[sequence.rfind("-")+1:] + seqfile.close() + + # Check whether or not we should keep the drpm + if not drpmIsWorthKeeping(deltaRPMName, f1, srcdir): + if DEBUG: + print 'deleting %s' % (deltaRPMName) + try: + os.unlink("%s%s" % (srcdir, deltaRPMName)) + except Exception, e: + print "Error deleting deltarpm %s" % (deltaRPMName), str(e) + try: + os.unlink("%s%s.seq" % (srcdir, deltaRPMName)) + except Exception, e: + print "Error deleting checksum %s.seq" % (deltaRPMName), str(e) + f = open("%s%s.dontdelta" % (srcdir, deltaRPMName), "w") + f.close() + else: + if e1 == e2: + print 'Generated delta rpm for %s.%s - %s.%s => %s.%s' % (n1, a1, v2, r2, v1, r1) + else: + print 'Generated delta rpm for %s.%s - %s:%s.%s => %s:%s.%s' % (n1, a1, e2, v2, r2, e1, v1, r1) + + if is_new_package: + locroot = XML_start_newrpm(locroot, newrpm, len(srcdir)) + is_new_package = False + XML_oldrpm(locroot, deltaRPMName, oldrpm, newrpm, sequence, dsize) + return (is_new_package, locroot) + +def drpmIsWorthKeeping(deltaRPMName, newrpm, srcdir): + newsize = os.path.getsize(newrpm) + drpmsize = os.path.getsize("%s%s" % (srcdir, deltaRPMName)) + # Delete the drpm if it's too large + if drpmsize > DRPMWORTHKEEPINGTHRESH * newsize: + return False + return True + +def createPrestoRepo(srcdir, dstdir): + ts = rpmUtils.transaction.initReadOnlyTransaction() + changed = False + + # Create list of .rpm files. + # We don't use "glob", so sub-directories are supported. + print 'Using source dir: %s' % srcdir + print 'Using destination dir: %s' % dstdir + if dstdir[-1] == "/": + dstdir = dstdir[:-1] + srcfiles = [] + for root, dirs, files in os.walk(srcdir): + for f in fnmatch.filter(files,'*.rpm'): + srcfiles.append(os.path.join(root,f)) + if not len(srcfiles): + print ' Nothing found.' + return changed + assert srcfiles[0].startswith(srcdir) + + # Check whether dstdir exists, and if it doesn't, create it + if not os.access(dstdir, os.F_OK): + os.makedirs(dstdir, 0755) + elif not os.access(dstdir, os.W_OK): + print 'ERROR: Unable to write to %s' % dstdir + sys.exit(1) + + # Check whether REPODATA exists, and if it doesn't, create it + if not os.access("%s%s" % (srcdir, REPODATA), os.F_OK): + os.makedirs("%s%s" % (srcdir, REPODATA), 0755) + elif not os.access(dstdir, os.W_OK): + print 'ERROR: Unable to write to %s' % REPODATA + sys.exit(1) + + # Create XML document +# xmldoc = libxml2.newDoc("1.0") +# xmlroot = xmldoc + (xmldoc, xmlroot) = startXML() + + # Create map: rpm %name -> list of tuples (filename,name,e,v,r) + newestsrcrpms = {} + for f in srcfiles: + hdr = rpmUtils.miscutils.hdrFromPackage(ts, f) + nm = hdr['name'] + "." + hdr['arch'] + n = hdr['name'] + a = hdr['arch'] + v = hdr['version'] + r = hdr['release'] + e = hdr['epoch'] + if e is None: + e = 0 + newestsrcrpms.setdefault(nm,[]) + newestsrcrpms[nm].append((f,n,e,v,r,a)) + + # Now purge old src.rpm unless their %name matches a white-list pattern. + for l in newestsrcrpms.itervalues(): + x = len(l) + + if x > 1: + def sortByEVR(fnevr1, fnevr2): + (f1,n1,e1,v1,r1,a1) = fnevr1 + (f2,n2,e2,v2,r2,a2) = fnevr2 + rc = rpmUtils.miscutils.compareEVR((e1,v1,r1),(e2,v2,r2)) + if rc == 0: + return 0 + if rc > 0: + return -1 + if rc < 0: + return 1 + + l.sort(sortByEVR) # highest first in list + + # Generate delta rpm + is_new_package = True + locroot = xmlroot + for rpm in l[1:]: + (is_new_package, locroot) = genDeltaRPM(ts, l[0], rpm, is_new_package, srcdir, dstdir, locroot) + + if not len(srcfiles): + print 'WARNING: No .rpms left. Stopping here.' + return changed + + # Write out end of deltas.xml file + endXML(xmldoc, REPOFILE, srcdir, True) + repoXML(srcdir) + + # Examine binary repository directories and remove everything which + # is missing its corresponding src.rpm. + return changed + + +def main(bin_rpm_path, delta_rpm_path): + assert rpmUtils.miscutils.compareEVR((1,2,3),(1,2,0)) > 0 + assert rpmUtils.miscutils.compareEVR((0,1,2),(0,1,2)) == 0 + assert rpmUtils.miscutils.compareEVR((1,2,3),(4,0,99)) < 0 + + return createPrestoRepo(bin_rpm_path, delta_rpm_path) + + +if __name__ == '__main__': + if len(sys.argv) < 2: + print 'Usage: %s <bin_rpm_dir> <delta_rpm_dir> \n' % os.path.basename(sys.argv[0]) + sys.exit(errno.EINVAL) + bin_rpm_path = sys.argv[1] + delta_rpm_path = sys.argv[2] + + #### cfg = Utils.load_config_module(sys.argv[1]) + + #### Utils.signer_gid_check(cfg.signersgid) + #### os.umask(cfg.signersumask) + + #### for dist in sys.argv[2:]: + #### if not cfg.archdict.has_key(dist): + #### print "No distribution release named '%s' found" % dist + #### sys.exit(errno.EINVAL) + main(bin_rpm_path, delta_rpm_path) + sys.exit(0) diff --git a/createprestorepo/dumpMetadata.py b/createprestorepo/dumpMetadata.py new file mode 100755 index 0000000..2f074da --- /dev/null +++ b/createprestorepo/dumpMetadata.py @@ -0,0 +1,456 @@ +#!/usr/bin/python -t +# base classes and functions for dumping out package Metadata +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2004 Duke University + +# $Id: dumpMetadata.py,v 1.36 2006/02/21 20:10:08 pnasrat Exp $ + +import os +import rpm +import exceptions +import md5 +import sha +import types +import struct +import re +import stat + +# done to fix gzip randomly changing the checksum +import gzip +from zlib import error as zlibError +from gzip import write32u, FNAME + +__all__ = ["GzipFile","open"] + +class GzipFile(gzip.GzipFile): + def _write_gzip_header(self): + self.fileobj.write('\037\213') # magic header + self.fileobj.write('\010') # compression method + fname = self.filename[:-3] + flags = 0 + if fname: + flags = FNAME + self.fileobj.write(chr(flags)) + write32u(self.fileobj, long(0)) + self.fileobj.write('\002') + self.fileobj.write('\377') + if fname: + self.fileobj.write(fname + '\000') + + +def _gzipOpen(filename, mode="rb", compresslevel=9): + return GzipFile(filename, mode, compresslevel) + + + +def returnFD(filename): + try: + fdno = os.open(filename, os.O_RDONLY) + except OSError: + raise MDError, "Error opening file" + return fdno + +def returnHdr(ts, package): + """hand back the rpm header or raise an Error if the pkg is fubar""" + opened_here = 0 + try: + if type(package) is types.StringType: + opened_here = 1 + fdno = os.open(package, os.O_RDONLY) + else: + fdno = package # let's assume this is an fdno and go with it :) + except OSError: + raise MDError, "Error opening file" + ts.setVSFlags((rpm._RPMVSF_NOSIGNATURES|rpm.RPMVSF_NOMD5|rpm.RPMVSF_NEEDPAYLOAD)) + try: + hdr = ts.hdrFromFdno(fdno) + except rpm.error: + raise MDError, "Error opening package" + if type(hdr) != rpm.hdr: + raise MDError, "Error opening package" + ts.setVSFlags(0) + + if opened_here: + os.close(fdno) + del fdno + + return hdr + +def getChecksum(sumtype, file, CHUNK=2**16): + """takes filename, hand back Checksum of it + sumtype = md5 or sha + filename = /path/to/file + CHUNK=65536 by default""" + + # chunking brazenly lifted from Ryan Tomayko + opened_here = 0 + try: + if type(file) is not types.StringType: + fo = file # assume it's a file-like-object + else: + opened_here = 1 + fo = open(file, 'rb', CHUNK) + + if sumtype == 'md5': + sum = md5.new() + elif sumtype == 'sha': + sum = sha.new() + else: + raise MDError, 'Error Checksumming file, wrong checksum type %s' % sumtype + chunk = fo.read + while chunk: + chunk = fo.read(CHUNK) + sum.update(chunk) + + if opened_here: + fo.close() + del fo + + return sum.hexdigest() + except: + raise MDError, 'Error opening file for checksum: %s' % file + + +def utf8String(string): + """hands back a unicoded string""" + if string is None: + return '' + elif isinstance(string, unicode): + return string + try: + x = unicode(string, 'ascii') + return string + except UnicodeError: + encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2'] + for enc in encodings: + try: + x = unicode(string, enc) + except UnicodeError: + pass + else: + if x.encode(enc) == string: + return x.encode('utf-8') + newstring = '' + for char in string: + if ord(char) > 127: + newstring = newstring + '?' + else: + newstring = newstring + char + return newstring + + +def byteranges(file): + """takes an rpm file or fileobject and returns byteranges for location of the header""" + opened_here = 0 + if type(file) is not types.StringType: + fo = file + else: + opened_here = 1 + fo = open(file, 'r') + #read in past lead and first 8 bytes of sig header + fo.seek(104) + # 104 bytes in + binindex = fo.read(4) + # 108 bytes in + (sigindex, ) = struct.unpack('>I', binindex) + bindata = fo.read(4) + # 112 bytes in + (sigdata, ) = struct.unpack('>I', bindata) + # each index is 4 32bit segments - so each is 16 bytes + sigindexsize = sigindex * 16 + sigsize = sigdata + sigindexsize + # we have to round off to the next 8 byte boundary + disttoboundary = (sigsize % 8) + if disttoboundary != 0: + disttoboundary = 8 - disttoboundary + # 112 bytes - 96 == lead, 8 = magic and reserved, 8 == sig header data + hdrstart = 112 + sigsize + disttoboundary + + fo.seek(hdrstart) # go to the start of the header + fo.seek(8,1) # read past the magic number and reserved bytes + + binindex = fo.read(4) + (hdrindex, ) = struct.unpack('>I', binindex) + bindata = fo.read(4) + (hdrdata, ) = struct.unpack('>I', bindata) + + # each index is 4 32bit segments - so each is 16 bytes + hdrindexsize = hdrindex * 16 + # add 16 to the hdrsize to account for the 16 bytes of misc data b/t the + # end of the sig and the header. + hdrsize = hdrdata + hdrindexsize + 16 + + # header end is hdrstart + hdrsize + hdrend = hdrstart + hdrsize + if opened_here: + fo.close() + del fo + return (hdrstart, hdrend) + + +class MDError(exceptions.Exception): + def __init__(self, args=None): + exceptions.Exception.__init__(self) + self.args = args + + + +class RpmMetaData: + """each drpm is one object, you pass it an rpm file + it opens the file, and pulls the information out in bite-sized chunks :) + """ + + mode_cache = {} + + def __init__(self, ts, basedir, filename, options): + try: + stats = os.stat(os.path.join(basedir, filename)) + self.size = stats[6] + self.mtime = stats[8] + del stats + except OSError, e: + raise MDError, "Error Stat'ing file %s %s" % (basedir, filename) + self.options = options + self.localurl = options['baseurl'] + self.relativepath = filename + fd = returnFD(os.path.join(basedir, filename)) + self.hdr = returnHdr(ts, fd) + os.lseek(fd, 0, 0) + fo = os.fdopen(fd, 'rb') + self.pkgid = self.doChecksumCache(fo) + fo.seek(0) + (self.rangestart, self.rangeend) = byteranges(fo) + fo.close() + del fo + del fd + + def arch(self): + if self.tagByName('sourcepackage') == 1: + return 'src' + else: + return self.tagByName('arch') + + def _correctVersion(self, vers): + returnvers = [] + vertuple = (None, None, None) + if vers is None: + returnvers.append(vertuple) + return returnvers + + if type(vers) is not types.ListType: + if vers is not None: + vertuple = self._stringToVersion(vers) + else: + vertuple = (None, None, None) + returnvers.append(vertuple) + else: + for ver in vers: + if ver is not None: + vertuple = self._stringToVersion(ver) + else: + vertuple = (None, None, None) + returnvers.append(vertuple) + return returnvers + + + def _stringToVersion(self, strng): + i = strng.find(':') + if i != -1: + epoch = strng[:i] + else: + epoch = '0' + j = strng.find('-') + if j != -1: + if strng[i + 1:j] == '': + version = None + else: + version = strng[i + 1:j] + release = strng[j + 1:] + else: + if strng[i + 1:] == '': + version = None + else: + version = strng[i + 1:] + release = None + return (epoch, version, release) + + ########### + # Title: Remove duplicates from a sequence + # Submitter: Tim Peters + # From: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52560 + + def _uniq(self,s): + """Return a list of the elements in s, but without duplicates. + + For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3], + unique("abcabc") some permutation of ["a", "b", "c"], and + unique(([1, 2], [2, 3], [1, 2])) some permutation of + [[2, 3], [1, 2]]. + + For best speed, all sequence elements should be hashable. Then + unique() will usually work in linear time. + + If not possible, the sequence elements should enjoy a total + ordering, and if list(s).sort() doesn't raise TypeError it's + assumed that they do enjoy a total ordering. Then unique() will + usually work in O(N*log2(N)) time. + + If that's not possible either, the sequence elements must support + equality-testing. Then unique() will usually work in quadratic + time. + """ + + n = len(s) + if n == 0: + return [] + + # Try using a dict first, as that's the fastest and will usually + # work. If it doesn't work, it will usually fail quickly, so it + # usually doesn't cost much to *try* it. It requires that all the + # sequence elements be hashable, and support equality comparison. + u = {} + try: + for x in s: + u[x] = 1 + except TypeError: + del u # move on to the next method + else: + return u.keys() + + # We can't hash all the elements. Second fastest is to sort, + # which brings the equal elements together; then duplicates are + # easy to weed out in a single pass. + # NOTE: Python's list.sort() was designed to be efficient in the + # presence of many duplicate elements. This isn't true of all + # sort functions in all languages or libraries, so this approach + # is more effective in Python than it may be elsewhere. + try: + t = list(s) + t.sort() + except TypeError: + del t # move on to the next method + else: + assert n > 0 + last = t[0] + lasti = i = 1 + while i < n: + if t[i] != last: + t[lasti] = last = t[i] + lasti += 1 + i += 1 + return t[:lasti] + + # Brute force is all that's left. + u = [] + for x in s: + if x not in u: + u.append(x) + return u + + def tagByName(self, tag): + data = self.hdr[tag] + if type(data) is types.ListType: + if len(data) > 0: + return data[0] + else: + return '' + else: + return data + + def listTagByName(self, tag): + """take a tag that should be a list and make sure it is one""" + lst = [] + data = self.hdr[tag] + if data is None: + return lst + + if type(data) is types.ListType: + lst.extend(data) + else: + lst.append(data) + return lst + + def epoch(self): + if self.hdr['epoch'] is None: + return 0 + else: + return self.tagByName('epoch') + + def doChecksumCache(self, fo): + """return a checksum for a package: + - check if the checksum cache is enabled + if not - return the checksum + if so - check to see if it has a cache file + if so, open it and return the first line's contents + if not, grab the checksum and write it to a file for this pkg + """ + if not self.options['cache']: + return getChecksum(self.options['sumtype'], fo) + + csumtag = '%s-%s' % (self.hdr['name'] , self.hdr[rpm.RPMTAG_SHA1HEADER]) + csumfile = '%s/%s' % (self.options['cachedir'], csumtag) + if os.path.exists(csumfile) and self.mtime <= os.stat(csumfile)[8]: + csumo = open(csumfile, 'r') + checksum = csumo.readline() + csumo.close() + + else: + checksum = getChecksum(self.options['sumtype'], fo) + csumo = open(csumfile, 'w') + csumo.write(checksum) + csumo.close() + + return checksum + + + +def generateXML(doc, node, formatns, drpmObj, sumtype): + """takes an xml doc object and a package metadata entry node, populates a + package node with the md information""" + ns = node.ns() + pkgNode = node.newChild(None, "package", None) + pkgNode.newProp('type', 'rpm') + pkgNode.newChild(None, 'name', drpmObj.tagByName('name')) + pkgNode.newChild(None, 'arch', drpmObj.arch()) + version = pkgNode.newChild(None, 'version', None) + version.newProp('epoch', str(drpmObj.epoch())) + version.newProp('ver', str(drpmObj.tagByName('version'))) + version.newProp('rel', str(drpmObj.tagByName('release'))) + return pkgNode + +def repoXML(node, cmds): + """generate the repomd.xml file that stores the info on the other files""" + sumtype = cmds['sumtype'] + workfiles = [(cmds['prestofile'], 'deltas')] + + + for (file, ftype) in workfiles: + zfo = _gzipOpen(os.path.join(cmds['outputdir'], cmds['tempdir'], file)) + uncsum = getChecksum(sumtype, zfo) + zfo.close() + csum = getChecksum(sumtype, os.path.join(cmds['outputdir'], cmds['tempdir'], file)) + timestamp = os.stat(os.path.join(cmds['outputdir'], cmds['tempdir'], file))[8] + data = node.newChild(None, 'data', None) + data.newProp('type', ftype) + location = data.newChild(None, 'location', None) + if cmds['baseurl'] is not None: + location.newProp('xml:base', cmds['baseurl']) + location.newProp('href', os.path.join(cmds['finaldir'], file)) + checksum = data.newChild(None, 'checksum', csum) + checksum.newProp('type', sumtype) + timestamp = data.newChild(None, 'timestamp', str(timestamp)) + unchecksum = data.newChild(None, 'open-checksum', uncsum) + unchecksum.newProp('type', sumtype) diff --git a/createprestorepo/genprestometadata.py b/createprestorepo/genprestometadata.py new file mode 100644 index 0000000..402af25 --- /dev/null +++ b/createprestorepo/genprestometadata.py @@ -0,0 +1,496 @@ +#!/usr/bin/python -t +# primary functions and glue for generating the repository metadata +# + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2004 Duke University +# Copyright 2007 Jonathan Dieter + + +import os +import sys +import getopt +import rpm +import libxml2 +import string +import fnmatch +import urlgrabber + +import dumpMetadata +from dumpMetadata import _gzipOpen +__version__ = '0.4.3' + +def errorprint(stuff): + print >> sys.stderr, stuff + +def _(args): + """Stub function for translation""" + return args + +def usage(retval=1): + print _(""" + createrepo [options] directory-of-packages + + Options: + -u, --baseurl <url> = optional base url location for all files + -o, --outputdir <dir> = optional directory to output to + -x, --exclude = files globs to exclude, can be specified multiple times + -q, --quiet = run quietly + -v, --verbose = run verbosely + -c, --cachedir <dir> = specify which dir to use for the checksum cache + -h, --help = show this help + -V, --version = output version + -p, --pretty = output xml files in pretty format. + """) + + sys.exit(retval) + +class MetaDataGenerator: + def __init__(self, cmds): + self.cmds = cmds + self.ts = rpm.TransactionSet() + self.pkgcount = 0 + self.files = [] + + def getFileList(self, basepath, path, ext, filelist): + """Return all files in path matching ext, store them in filelist, + recurse dirs. Returns a list object""" + + extlen = len(ext) + totalpath = os.path.normpath(os.path.join(basepath, path)) + try: + dir_list = os.listdir(totalpath) + except OSError, e: + errorprint(_('Error accessing directory %s, %s') % (totalpath, e)) + sys.exit(1) + + for d in dir_list: + if os.path.isdir(totalpath + '/' + d): + filelist = self.getFileList(basepath, os.path.join(path, d), ext, filelist) + else: + if string.lower(d[-extlen:]) == '%s' % (ext): + if totalpath.find(basepath) == 0: + relativepath = totalpath.replace(basepath, "", 1) + relativepath = relativepath.lstrip("/") + filelist.append(os.path.join(relativepath, d)) + else: + raise "basepath '%s' not found in path '%s'" % (basepath, totalpath) + + return filelist + + + def trimRpms(self, files): + badrpms = [] + for file in files: + for glob in self.cmds['excludes']: + if fnmatch.fnmatch(file, glob): + # print 'excluded: %s' % file + if file not in badrpms: + badrpms.append(file) + for file in badrpms: + if file in files: + files.remove(file) + return files + + def doPkgMetadata(self, directory): + """all the heavy lifting for the package metadata""" + + # rpms we're going to be dealing with + files = self.getFileList(self.cmds['basedir'], directory, '.dpm', []) + files = self.trimRpms(files) + self.pkgcount = len(files) + self.openMetadataDocs() + self.writeMetadataDocs(files) + self.closeMetadataDocs() + + + def openMetadataDocs(self): + self._setupPresto() + + def _setupPresto(self): + # setup the base metadata doc + self.prestodoc = libxml2.newDoc("1.0") + self.prestoroot = self.prestodoc.newChild(None, "metadata", None) + basens = self.prestoroot.newNs('http://linux.duke.edu/metadata/common', None) + self.formatns = self.prestoroot.newNs('http://linux.duke.edu/metadata/rpm', 'rpm') + self.prestoroot.setNs(basens) + prestofilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['prestofile']) + self.prestofile = _gzipOpen(prestofilepath, 'w') + self.prestofile.write('<?xml version="1.0" encoding="UTF-8"?>\n') + self.prestofile.write('<metadata xmlns="http://linux.duke.edu/metadata/common" xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s">\n' % + self.pkgcount) + + + def writeMetadataDocs(self, files, current=0): + for file in files: + current+=1 + try: + mdobj = dumpMetadata.RpmMetaData(self.ts, self.cmds['basedir'], file, self.cmds) + if not self.cmds['quiet']: + if self.cmds['verbose']: + print '%d/%d - %s' % (current, len(files), file) + else: + sys.stdout.write('\r' + ' ' * 80) + sys.stdout.write("\r%d/%d - %s" % (current, self.pkgcount, file)) + sys.stdout.flush() + except dumpMetadata.MDError, e: + errorprint('\n%s - %s' % (e, file)) + continue + else: + try: + node = dumpMetadata.generateXML(self.prestodoc, self.prestoroot, self.formatns, mdobj, self.cmds['sumtype']) + except dumpMetadata.MDError, e: + errorprint(_('\nAn error occurred creating presto metadata: %s') % e) + continue + else: + output = node.serialize('UTF-8', self.cmds['pretty']) + self.prestofile.write(output) + self.prestofile.write('\n') + node.unlinkNode() + node.freeNode() + del node + + return current + + + def closeMetadataDocs(self): + if not self.cmds['quiet']: + print '' + + # save them up to the tmp locations: + if not self.cmds['quiet']: + print _('Saving Presto metadata') + self.prestofile.write('\n</metadata>') + self.prestofile.close() + self.prestodoc.freeDoc() + + def doRepoMetadata(self): + """wrapper to generate the prestomd.xml file that stores the info on the other files""" + repodoc = libxml2.newDoc("1.0") + reporoot = repodoc.newChild(None, "repomd", None) + repons = reporoot.newNs('http://linux.duke.edu/metadata/repo', None) + reporoot.setNs(repons) + repofilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['prestomdfile']) + + try: + dumpMetadata.repoXML(reporoot, self.cmds) + except dumpMetadata.MDError, e: + errorprint(_('Error generating repo xml file: %s') % e) + sys.exit(1) + + try: + repodoc.saveFormatFileEnc(repofilepath, 'UTF-8', 1) + except: + errorprint(_('Error saving temp file for rep xml: %s') % repofilepath) + sys.exit(1) + + del repodoc + +class SplitMetaDataGenerator(MetaDataGenerator): + + def __init__(self, cmds): + MetaDataGenerator.__init__(self, cmds) + self.initialdir = self.cmds['basedir'] + + def _getFragmentUrl(self, url, fragment): + import urlparse + urlparse.uses_fragment.append('media') + if not url: + return url + (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url) + return urlparse.urlunsplit((scheme, netloc, path, query, str(fragment))) + + def doPkgMetadata(self, directories): + """all the heavy lifting for the package metadata""" + import types + if type(directories) == types.StringType: + MetaDataGenerator.doPkgMetadata(self, directories) + return + filematrix = {} + for mydir in directories: + filematrix[mydir] = self.getFileList(os.path.join(self.initialdir, mydir), '.', '.rpm', []) + self.trimRpms(filematrix[mydir]) + self.pkgcount += len(filematrix[mydir]) + + mediano = 1 + current = 0 + self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], mediano) + self.cmds['basedir'] = os.path.join(self.initialdir, directories[0]) + self.openMetadataDocs() + for mydir in directories: + self.cmds['basedir'] = os.path.join(self.initialdir, mydir) + self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], mediano) + current = self.writeMetadataDocs(filematrix[mydir], current) + mediano += 1 + self.cmds['basedir'] = os.path.join(self.initialdir, directories[0]) + self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], 1) + self.closeMetadataDocs() + + +def checkAndMakeDir(dir): + """ + check out the dir and make it, if possible, return 1 if done, else return 0 + """ + if os.path.exists(dir): + if not os.path.isdir(dir): + errorprint(_('%s is not a dir') % dir) + result = False + else: + if not os.access(dir, os.W_OK): + errorprint(_('%s is not writable') % dir) + result = False + else: + result = True + else: + try: + os.mkdir(dir) + except OSError, e: + errorprint(_('Error creating dir %s: %s') % (dir, e)) + result = False + else: + result = True + return result + +def parseArgs(args): + """ + Parse the command line args return a commands dict and directory. + Sanity check all the things being passed in. + """ + cmds = {} + cmds['quiet'] = 0 + cmds['verbose'] = 0 + cmds['excludes'] = [] + cmds['baseurl'] = None + cmds['sumtype'] = 'sha' + cmds['pretty'] = 0 + cmds['cachedir'] = None + cmds['basedir'] = os.getcwd() + cmds['cache'] = False + cmds['split'] = False + cmds['outputdir'] = "" + cmds['file-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$'] + cmds['dir-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*'] + + try: + gopts, argsleft = getopt.getopt(args, 'phqVvs:x:u:c:o:', ['help', 'exclude=', + 'quiet', 'verbose', 'cachedir=', 'basedir=', + 'baseurl=', 'checksum=', + 'version', 'pretty', 'split', 'outputdir=']) + except getopt.error, e: + errorprint(_('Options Error: %s.') % e) + usage() + + try: + for arg,a in gopts: + if arg in ['-h','--help']: + usage(retval=0) + elif arg in ['-V', '--version']: + print '%s' % __version__ + sys.exit(0) + elif arg == '--split': + cmds['split'] = True + except ValueError, e: + errorprint(_('Options Error: %s') % e) + usage() + + + # make sure our dir makes sense before we continue + if len(argsleft) > 1 and not cmds['split']: + errorprint(_('Error: Only one directory allowed per run.')) + usage() + elif len(argsleft) == 0: + errorprint(_('Error: Must specify a directory to index.')) + usage() + else: + directories = argsleft + + try: + for arg,a in gopts: + if arg in ['-v', '--verbose']: + cmds['verbose'] = 1 + elif arg in ["-q", '--quiet']: + cmds['quiet'] = 1 + elif arg in ['-u', '--baseurl']: + if cmds['baseurl'] is not None: + errorprint(_('Error: Only one baseurl allowed.')) + usage() + else: + cmds['baseurl'] = a + elif arg in ['-x', '--exclude']: + cmds['excludes'].append(a) + elif arg in ['-p', '--pretty']: + cmds['pretty'] = 1 + elif arg in ['-c', '--cachedir']: + cmds['cache'] = True + cmds['cachedir'] = a + elif arg == '--basedir': + cmds['basedir'] = a + elif arg in ['-o','--outputdir']: + cmds['outputdir'] = a + + except ValueError, e: + errorprint(_('Options Error: %s') % e) + usage() + + directory = directories[0] +# Fix paths + directory = os.path.normpath(directory) + if cmds['split']: + pass + elif os.path.isabs(directory): + cmds['basedir'] = directory + directory = '.' + else: + cmds['basedir'] = os.path.realpath(os.path.join(cmds['basedir'], directory)) + directory = '.' + if not cmds['outputdir']: + cmds['outputdir'] = cmds['basedir'] + if cmds['groupfile']: + a = cmds['groupfile'] + if cmds['split']: + a = os.path.join(cmds['basedir'], directory, cmds['groupfile']) + elif not os.path.isabs(a): + a = os.path.join(cmds['basedir'], cmds['groupfile']) + if not os.path.exists(a): + errorprint(_('Error: groupfile %s cannot be found.' % a)) + usage() + cmds['groupfile'] = a + if cmds['cachedir']: + a = cmds ['cachedir'] + if not os.path.isabs(a): + a = os.path.join(cmds['basedir'] ,a) + if not checkAndMakeDir(a): + errorprint(_('Error: cannot open/write to cache dir %s' % a)) + usage() + cmds['cachedir'] = a + + #setup some defaults + cmds['prestofile'] = 'presto.xml.gz' + cmds['prestomdfile'] = 'prestomd.xml' + cmds['tempdir'] = '.repodata' + cmds['finaldir'] = 'repodata' + cmds['olddir'] = '.olddata' + + # Fixup first directory + directories[0] = directory + return cmds, directories + +def main(args): + cmds, directories = parseArgs(args) + directory = directories[0] + # start the sanity/stupidity checks + if not os.path.exists(os.path.join(cmds['basedir'], directory)): + errorprint(_('Directory must exist')) + sys.exit(1) + + if not os.path.isdir(os.path.join(cmds['basedir'], directory)): + errorprint(_('Directory of packages must be a directory.')) + sys.exit(1) + + if not os.access(cmds['outputdir'], os.W_OK): + errorprint(_('Directory must be writable.')) + sys.exit(1) + + if cmds['split']: + oldbase = cmds['basedir'] + cmds['basedir'] = os.path.join(cmds['basedir'], directory) + if not checkAndMakeDir(os.path.join(cmds['outputdir'], cmds['tempdir'])): + sys.exit(1) + + if not checkAndMakeDir(os.path.join(cmds['outputdir'], cmds['finaldir'])): + sys.exit(1) + + if os.path.exists(os.path.join(cmds['outputdir'], cmds['olddir'])): + errorprint(_('Old data directory exists, please remove: %s') % cmds['olddir']) + sys.exit(1) + + # make sure we can write to where we want to write to: + for direc in ['tempdir', 'finaldir']: + for file in ['prestofile', 'prestomdfile']: + filepath = os.path.join(cmds['outputdir'], cmds[direc], cmds[file]) + if os.path.exists(filepath): + if not os.access(filepath, os.W_OK): + errorprint(_('error in must be able to write to metadata files:\n -> %s') % filepath) + usage() + + if cmds['split']: + cmds['basedir'] = oldbase + mdgen = SplitMetaDataGenerator(cmds) + mdgen.doPkgMetadata(directories) + else: + mdgen = MetaDataGenerator(cmds) + mdgen.doPkgMetadata(directory) + mdgen.doRepoMetadata() + + if os.path.exists(os.path.join(cmds['outputdir'], cmds['finaldir'])): + try: + os.rename(os.path.join(cmds['outputdir'], cmds['finaldir']), + os.path.join(cmds['outputdir'], cmds['olddir'])) + except: + errorprint(_('Error moving final %s to old dir %s' % (os.path.join(cmds['outputdir'], cmds['finaldir']), + os.path.join(cmds['outputdir'], cmds['olddir'])))) + sys.exit(1) + + try: + os.rename(os.path.join(cmds['outputdir'], cmds['tempdir']), + os.path.join(cmds['outputdir'], cmds['finaldir'])) + except: + errorprint(_('Error moving final metadata into place')) + # put the old stuff back + os.rename(os.path.join(cmds['outputdir'], cmds['olddir']), + os.path.join(cmds['outputdir'], cmds['finaldir'])) + sys.exit(1) + + for file in ['prestofile', 'prestomdfile']: + if cmds[file]: + fn = os.path.basename(cmds[file]) + else: + continue + oldfile = os.path.join(cmds['outputdir'], cmds['olddir'], fn) + if os.path.exists(oldfile): + try: + os.remove(oldfile) + except OSError, e: + errorprint(_('Could not remove old metadata file: %s') % oldfile) + errorprint(_('Error was %s') % e) + sys.exit(1) + + # Clean up any update metadata + mdpath = os.path.join(cmds['basedir'], cmds['olddir'], cmds['update-info-dir']) + if os.path.isdir(mdpath): + for file in os.listdir(mdpath): + os.remove(os.path.join(mdpath, file)) + os.rmdir(mdpath) + + +#XXX: fix to remove tree as we mung basedir + try: + os.rmdir(os.path.join(cmds['outputdir'], cmds['olddir'])) + except OSError, e: + errorprint(_('Could not remove old metadata dir: %s') % cmds['olddir']) + errorprint(_('Error was %s') % e) + errorprint(_('Please clean up this directory manually.')) + +if __name__ == "__main__": + if len(sys.argv) > 1: + if sys.argv[1] == 'profile': + import hotshot + p = hotshot.Profile(os.path.expanduser("~/createprestorepo.prof")) + p.run('main(sys.argv[2:])') + p.close() + else: + main(sys.argv[1:]) + else: + main(sys.argv[1:]) |