From ec53c5c2b2f6ea31b22c47e4ae8f71c4eff2cd37 Mon Sep 17 00:00:00 2001 From: Jonathan Dieter Date: Wed, 11 Jul 2007 18:37:03 +0300 Subject: Complete rewrite of yum-presto. New xml format. Signed-off-by: Jonathan Dieter --- presto-utils/ChangeLog | 4 + presto-utils/Makefile | 20 ++- presto-utils/README | 17 ++- presto-utils/createdeltarpms | 2 +- presto-utils/createprestorepo | 2 - presto-utils/createprestorepo-0.2 | 2 + presto-utils/createprestorepo-0.3 | 2 + presto-utils/doprunedrpms.py | 110 ++++++++++++++ presto-utils/dumpMetadata.py | 1 - presto-utils/genpresto.py | 313 ++++++++++++++++++++++++++++++++++++++ presto-utils/prunedrpms | 2 + presto-utils/relaxng/presto.rnc | 20 +++ 12 files changed, 478 insertions(+), 17 deletions(-) delete mode 100755 presto-utils/createprestorepo create mode 100755 presto-utils/createprestorepo-0.2 create mode 100755 presto-utils/createprestorepo-0.3 create mode 100755 presto-utils/doprunedrpms.py create mode 100755 presto-utils/genpresto.py create mode 100755 presto-utils/prunedrpms create mode 100644 presto-utils/relaxng/presto.rnc (limited to 'presto-utils') diff --git a/presto-utils/ChangeLog b/presto-utils/ChangeLog index 1640457..8bc2ac3 100644 --- a/presto-utils/ChangeLog +++ b/presto-utils/ChangeLog @@ -1,3 +1,7 @@ +* Wed Jul 11 2007 Jonathan Dieter - 0.3.0 + - New XML format by Jeremy Katz + - Added prunedrpm script + * Tue Jun 19 2007 Jonathan Dieter - 0.2.0 - Now works with createrepo and modifyrepo - Many bugfixes diff --git a/presto-utils/Makefile b/presto-utils/Makefile index 9e8c6eb..643539c 100644 --- a/presto-utils/Makefile +++ b/presto-utils/Makefile @@ -2,10 +2,18 @@ clean: rm -f *.pyc *.pyo *~ install: - install -m 755 createprestorepo $(DESTDIR)/usr/bin/ + mkdir -p $(DESTDIR)/usr/bin + install -m 755 createprestorepo-0.2 $(DESTDIR)/usr/bin/ + install -m 755 createprestorepo-0.3 $(DESTDIR)/usr/bin/ + ln -s createprestorepo-0.3 $(DESTDIR)/usr/bin/createprestorepo install -m 755 createdeltarpms $(DESTDIR)/usr/bin/ - mkdir -p $(DESTDIR)/usr/share/createprestorepo - install -m 755 genprestometadata.py $(DESTDIR)/usr/share/createprestorepo - install -m 644 dumpMetadata.py $(DESTDIR)/usr/share/createprestorepo - install -m 755 gendeltarpms.py $(DESTDIR)/usr/share/createprestorepo - install -m 644 packagelist.py $(DESTDIR)/usr/share/createprestorepo + install -m 755 prunedrpms $(DESTDIR)/usr/bin/ + mkdir -p $(DESTDIR)/usr/share/presto-utils + install -m 755 genprestometadata.py $(DESTDIR)/usr/share/presto-utils/ + install -m 755 genpresto.py $(DESTDIR)/usr/share/presto-utils/ + install -m 644 dumpMetadata.py $(DESTDIR)/usr/share/presto-utils/ + install -m 755 gendeltarpms.py $(DESTDIR)/usr/share/presto-utils/ + install -m 644 packagelist.py $(DESTDIR)/usr/share/presto-utils/ + install -m 755 doprunedrpms.py $(DESTDIR)/usr/share/presto-utils/ + mkdir -p $(DESTDIR)/usr/share/presto-utils/relaxng + install -m 644 relaxng/presto.rnc $(DESTDIR)/usr/share/presto-utils/relaxng/ diff --git a/presto-utils/README b/presto-utils/README index a81187a..ddb3faa 100644 --- a/presto-utils/README +++ b/presto-utils/README @@ -2,21 +2,24 @@ Presto: A project to add delta rpm support into yum for Fedora users http://hosted.fedoraproject.org/projects/presto. A list of presto-enabled repositories is available there. -createprestorepo: The presto repository creater +presto-utils: Tools for working on presto repositories Installation: ============= 1- Untar the package 2- Run 'make install' -Running: +To create a presto repository: 1- First run 'createdeltarpms ' where is the base directory for your repository and is the subdirectory you want to create the deltarpms into 2- Run 'createprestorepo ' where is the base directory for - your repository. + your repository. Run 'createprestorepo -m ' instead if you want to + merge the presto information into the normal repository information. + +To remove old deltarpms: +1- First remove old rpms from your repository using the tools available in + yum-utils +2- Run 'prunedrpms ' where is the base directory for your + repository. -WARNING: createprestorepo does *NOT* yet know how to deal with the metadata - created by createrepo. You will have to manually move metadata from - .olddata to repodata after running both createprestorepo and - createrepo. diff --git a/presto-utils/createdeltarpms b/presto-utils/createdeltarpms index 99bedcc..18cee00 100755 --- a/presto-utils/createdeltarpms +++ b/presto-utils/createdeltarpms @@ -1,2 +1,2 @@ #!/bin/sh -exec /usr/share/createprestorepo/gendeltarpms.py "$@" +exec /usr/share/presto-utils/gendeltarpms.py "$@" diff --git a/presto-utils/createprestorepo b/presto-utils/createprestorepo deleted file mode 100755 index 5334a80..0000000 --- a/presto-utils/createprestorepo +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -exec /usr/share/createprestorepo/genprestometadata.py "$@" diff --git a/presto-utils/createprestorepo-0.2 b/presto-utils/createprestorepo-0.2 new file mode 100755 index 0000000..ed25c95 --- /dev/null +++ b/presto-utils/createprestorepo-0.2 @@ -0,0 +1,2 @@ +#!/bin/sh +exec /usr/share/presto-utils/genprestometadata.py "$@" diff --git a/presto-utils/createprestorepo-0.3 b/presto-utils/createprestorepo-0.3 new file mode 100755 index 0000000..a8f5200 --- /dev/null +++ b/presto-utils/createprestorepo-0.3 @@ -0,0 +1,2 @@ +#!/bin/sh +exec /usr/share/presto-utils/genpresto.py "$@" diff --git a/presto-utils/doprunedrpms.py b/presto-utils/doprunedrpms.py new file mode 100755 index 0000000..629970a --- /dev/null +++ b/presto-utils/doprunedrpms.py @@ -0,0 +1,110 @@ +#!/usr/bin/python -t +# -*- mode: Python; indent-tabs-mode: nil; -*- +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import errno, os, sys +import fnmatch, re +import rpmUtils.transaction, rpmUtils.miscutils +import string + +DEBUG = False + +def getFileList(path, ext, filelist=[]): + """Return all files in path matching ext, store them in filelist, + recurse dirs. Returns a list object""" + + extlen = len(ext) + totalpath = os.path.normpath(path) + try: + dir_list = os.listdir(totalpath) + except OSError, e: + errorprint(_('Error accessing directory %s, %s') % (totalpath, e)) + sys.exit(1) + + for d in dir_list: + if os.path.isdir(totalpath + '/' + d): + filelist = getFileList(os.path.join(totalpath, d), ext, filelist) + else: + if string.lower(d[-extlen:]) == '%s' % (ext): + filelist.append(os.path.join(totalpath, d)) + + return filelist + +def pruneRepo(srcdir): + ts = rpmUtils.transaction.initReadOnlyTransaction() + changed = False + + # Create list of src.rpm files. + # We don't use "glob", so sub-directories are supported. + print 'Expiring :', srcdir + + rpmfiles = getFileList(srcdir, ".rpm") + foundrpms = {} + for f in rpmfiles: + try: + hdr = rpmUtils.miscutils.hdrFromPackage(ts,f) + except: + print "Unable to open %s" % f + else: + n = hdr['name'] + v = hdr['version'] + r = hdr['release'] + e = hdr['epoch'] + a = hdr['arch'] + if e is None: + e = 0 + foundrpms.setdefault((n,e,v,r,a), True) + + drpmfiles = getFileList(srcdir, ".drpm") + + rm_files = [] + for f in drpmfiles: + try: + hdr = rpmUtils.miscutils.hdrFromPackage(ts,f) + except: + print "Unable to open %s" % f + else: + n = hdr['name'] + v = hdr['version'] + r = hdr['release'] + e = hdr['epoch'] + a = hdr['arch'] + if e is None: + e = 0 + if not foundrpms.has_key((n,e,v,r,a)): + rm_files.append(f) + + for f in rm_files: + print ' Removing', os.path.basename(f) + if not DEBUG: + os.remove(f) + + +def main(srcdir): + assert rpmUtils.miscutils.compareEVR((1,2,3),(1,2,0)) > 0 + assert rpmUtils.miscutils.compareEVR((0,1,2),(0,1,2)) == 0 + assert rpmUtils.miscutils.compareEVR((1,2,3),(4,0,99)) < 0 + + return pruneRepo(srcdir) + + +if __name__ == '__main__': + if len(sys.argv) < 2: + print 'Usage: %s \n' % os.path.basename(sys.argv[0]) + sys.exit(errno.EINVAL) + + main(sys.argv[1]) + sys.exit(0) diff --git a/presto-utils/dumpMetadata.py b/presto-utils/dumpMetadata.py index 0ec2c20..61b30d6 100644 --- a/presto-utils/dumpMetadata.py +++ b/presto-utils/dumpMetadata.py @@ -1,4 +1,3 @@ -#!/usr/bin/python -t # base classes and functions for dumping out package Metadata # # This program is free software; you can redistribute it and/or modify diff --git a/presto-utils/genpresto.py b/presto-utils/genpresto.py new file mode 100755 index 0000000..24330a1 --- /dev/null +++ b/presto-utils/genpresto.py @@ -0,0 +1,313 @@ +#!/usr/bin/python +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# Copyright 2007 Red Hat, Inc. -- Jeremy Katz +# Based on genprestometadata.py which was based on genmetadata.py +# Copyright 2007 Jonathan Dieter, Copyright 2004 Duke University + +import os, sys, string +import optparse +import gzip +import rpm +import types +import sha +import struct +import libxml2 + +__version__ = '0.3.0' + +class MDError(Exception): + pass + +def getFileList(directory, ext): + extlen = len(ext) + + def extension_visitor(arg, dirname, names): + for fn in names: + if os.path.isdir(fn): + continue + elif string.lower(fn[-extlen:]) == '%s' % (ext): + reldir = os.path.basename(dirname) + if reldir == os.path.basename(directory): + reldir = "" + arg.append(os.path.join(reldir,fn)) + + rpmlist = [] + startdir = os.path.join(directory) + os.path.walk(startdir, extension_visitor, rpmlist) + return rpmlist + +def generateXML(doc, node, drpmObj, sumtype, pkgDeltas): + """takes an xml doc object and a package metadata entry node, populates a + package node with the md information""" + name = drpmObj.tagByName('name') + arch = drpmObj.tagByName('arch') + epoch = str(drpmObj.epoch()) + ver = str(drpmObj.tagByName('version')) + rel = str(drpmObj.tagByName('release')) + if not pkgDeltas.has_key('%s-%s:%s-%s.%s' % (name, epoch, ver, rel, arch)): + pkgNode = node.newChild(None, "newpackage", None) + pkgNode.newProp('name', name) + pkgNode.newProp('epoch', epoch) + pkgNode.newProp('version', ver) + pkgNode.newProp('release', rel) + pkgNode.newProp('arch', arch) + pkgDeltas['%s-%s:%s-%s.%s' % (name, epoch, ver, rel, arch)] = pkgNode + else: + pkgNode = pkgDeltas['%s-%s:%s-%s.%s' % (name, epoch, ver, rel, arch)] + delta = pkgNode.newChild(None, "delta", None) + delta.newChild(None, 'filename', drpmObj.relativepath) + delta.newChild(None, 'sequence', "%s-%s" %(drpmObj.oldnevrstring, drpmObj.sequence)) + delta.newChild(None, 'size', str(drpmObj.size)) + sum = delta.newChild(None, 'checksum', drpmObj.pkgid) + sum.newProp('type', 'sha') + (oldname, oldepoch, oldver, oldrel) = drpmObj.oldnevr + delta.newProp('oldepoch', oldepoch) + delta.newProp('oldversion', oldver) + delta.newProp('oldrelease', oldrel) + + +def byteranges(file): + """takes an rpm file or fileobject and returns byteranges for location of the header""" + opened_here = 0 + if type(file) is not types.StringType: + fo = file + else: + opened_here = 1 + fo = open(file, 'r') + #read in past lead and first 8 bytes of sig header + fo.seek(104) + # 104 bytes in + binindex = fo.read(4) + # 108 bytes in + (sigindex, ) = struct.unpack('>I', binindex) + bindata = fo.read(4) + # 112 bytes in + (sigdata, ) = struct.unpack('>I', bindata) + # each index is 4 32bit segments - so each is 16 bytes + sigindexsize = sigindex * 16 + sigsize = sigdata + sigindexsize + # we have to round off to the next 8 byte boundary + disttoboundary = (sigsize % 8) + if disttoboundary != 0: + disttoboundary = 8 - disttoboundary + # 112 bytes - 96 == lead, 8 = magic and reserved, 8 == sig header data + hdrstart = 112 + sigsize + disttoboundary + + fo.seek(hdrstart) # go to the start of the header + fo.seek(8,1) # read past the magic number and reserved bytes + + binindex = fo.read(4) + (hdrindex, ) = struct.unpack('>I', binindex) + bindata = fo.read(4) + (hdrdata, ) = struct.unpack('>I', bindata) + + # each index is 4 32bit segments - so each is 16 bytes + hdrindexsize = hdrindex * 16 + # add 16 to the hdrsize to account for the 16 bytes of misc data b/t the + # end of the sig and the header. + hdrsize = hdrdata + hdrindexsize + 16 + + # header end is hdrstart + hdrsize + hdrend = hdrstart + hdrsize + if opened_here: + fo.close() + del fo + return (hdrstart, hdrend) + +class DrpmMetaData: + """each drpm is one object, you pass it an rpm file + it opens the file, and pulls the information out in bite-sized chunks :) + """ + + mode_cache = {} + + def __init__(self, ts, basedir, filename): + try: + stats = os.stat(os.path.join(basedir, filename)) + self.size = stats[6] + self.mtime = stats[8] + del stats + except OSError, e: + raise MDError, "Error Stat'ing file %s %s" % (basedir, filename) + self.relativepath = filename + fd = os.open(os.path.join(basedir, filename), os.O_RDONLY) + self.hdr = ts.hdrFromFdno(fd) + os.lseek(fd, 0, 0) + fo = os.fdopen(fd, 'rb') + self.pkgid = self.getChecksum("sha", fo) + fo.seek(0) + (start, end) = byteranges(fo) + fo.seek(end) + self._getOldInfo(fo) + del fo + del fd + + def arch(self): + if self.tagByName('sourcepackage') == 1: + return 'src' + else: + return self.tagByName('arch') + + def _stringToNEVR(self, string): + i = string.rfind("-", 0, string.rfind("-")-1) + name = string[:i] + (epoch, ver, rel) = self._stringToVersion(string[i+1:]) + return (name, epoch, ver, rel) + + def _getLength(self, in_data): + length = 0 + for val in in_data: + length = length * 256 + length += ord(val) + return length + + def _getOldInfo(self, fo): + try: + compobj = gzip.GzipFile("", "rb", 9, fo) + except: + raise zlibError("Data not stored in gzip format") + + if compobj.read(4)[:3] != "DLT": + raise Exception("Not a deltarpm") + + nevr_length = self._getLength(compobj.read(4)) + nevr = compobj.read(nevr_length).strip("\x00") + seq_length = self._getLength(compobj.read(4)) + seq = compobj.read(seq_length) + hex_seq = "" + for char in seq: + hex_seq += str("%02x" % ord(char)) + self.oldnevrstring = nevr + self.oldnevr = self._stringToNEVR(nevr) + self.sequence = hex_seq + compobj.close() + + def _stringToVersion(self, strng): + i = strng.find(':') + if i != -1: + epoch = strng[:i] + else: + epoch = '0' + j = strng.find('-') + if j != -1: + if strng[i + 1:j] == '': + version = None + else: + version = strng[i + 1:j] + release = strng[j + 1:] + else: + if strng[i + 1:] == '': + version = None + else: + version = strng[i + 1:] + release = None + return (epoch, version, release) + + def tagByName(self, tag): + data = self.hdr[tag] + if type(data) is types.ListType: + if len(data) > 0: + return data[0] + else: + return '' + else: + return data + + def epoch(self): + if self.hdr['epoch'] is None: + return 0 + else: + return self.tagByName('epoch') + + def getChecksum(self, sumtype, file, CHUNK=2**16): + """takes filename, hand back Checksum of it + sumtype = md5 or sha + filename = /path/to/file + CHUNK=65536 by default""" + + # chunking brazenly lifted from Ryan Tomayko + opened_here = 0 + try: + if type(file) is not types.StringType: + fo = file # assume it's a file-like-object + else: + opened_here = 1 + fo = open(file, 'rb', CHUNK) + + if sumtype == 'md5': + sum = md5.new() + elif sumtype == 'sha': + sum = sha.new() + else: + raise MDError, 'Error Checksumming file, wrong checksum type %s' % sumtype + chunk = fo.read + while chunk: + chunk = fo.read(CHUNK) + sum.update(chunk) + + if opened_here: + fo.close() + del fo + + return sum.hexdigest() + except Exception, e: + print e + raise MDError, 'Error opening file for checksum: %s' % file + +def writePrestoData(deltadir, outputdir): + files = getFileList(deltadir, ".drpm") + + doc = libxml2.newDoc("1.0") + root = doc.newChild(None, "prestodeltas", None) + + deltas = {} + ts = rpm.TransactionSet() + ts.setVSFlags(-1) + for f in files: + drpmobj = DrpmMetaData(ts, deltadir, f) + generateXML(doc, root, drpmobj, "sha", deltas) + + prestofile = open("%s/prestodelta.xml" %(outputdir,), "w") + prestofile.write('\n') + prestofile.write(root.serialize("UTF-8", True)) + prestofile.close() + +def usage(): + print >> sys.stderr, "Usage: %s " %(sys.argv[0]) + +def main(args): + if len(args) == 0: + usage() + sys.exit(1) + + deltadir = args[0] + if len(args) > 1: + outputdir = args[1] + else: + outputdir = "%s/repodata" %(deltadir,) + + if not os.path.isdir(deltadir): + print >> sys.stderr, "Delta directory must exist." + sys.exit(1) + if not os.access(outputdir, os.W_OK): + print >> sys.stderr, "Output directory must be writable." + sys.exit(1) + + writePrestoData(deltadir, outputdir) + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/presto-utils/prunedrpms b/presto-utils/prunedrpms new file mode 100755 index 0000000..65ee9dc --- /dev/null +++ b/presto-utils/prunedrpms @@ -0,0 +1,2 @@ +#!/bin/sh +exec /usr/share/presto-utils/doprunedrpms.py "$@" diff --git a/presto-utils/relaxng/presto.rnc b/presto-utils/relaxng/presto.rnc new file mode 100644 index 0000000..90a4c34 --- /dev/null +++ b/presto-utils/relaxng/presto.rnc @@ -0,0 +1,20 @@ +start = element prestodeltas { newpackage+ } +newpackage = element newpackage { + attribute name { text }, + attribute epoch { text }?, + attribute version { text }, + attribute release { text }, + attribute arch { text }, + element delta { + attribute oldepoch { text }?, + attribute oldversion { text }?, + attribute oldrelease { text }?, + element filename { text }, + element sequence { text }, + element size { text }, + element checksum { + attribute type { "sha" | "md5" }, + text + } + }+ +} -- cgit