#!/usr/bin/python # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Library General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # Copyright 2007 Red Hat, Inc. -- Jeremy Katz # Based on genprestometadata.py which was based on genmetadata.py # Copyright 2007 Jonathan Dieter, Copyright 2004 Duke University import os, sys, string import optparse import gzip import rpm import types import sha import struct import libxml2 __version__ = '0.3.0' class MDError(Exception): pass def getFileList(directory, ext): extlen = len(ext) def extension_visitor(arg, dirname, names): for fn in names: if os.path.isdir(fn): continue elif string.lower(fn[-extlen:]) == '%s' % (ext): reldir = os.path.basename(dirname) if reldir == os.path.basename(directory): reldir = "" arg.append(os.path.join(reldir,fn)) rpmlist = [] startdir = os.path.join(directory) os.path.walk(startdir, extension_visitor, rpmlist) return rpmlist def generateXML(doc, node, drpmObj, sumtype, pkgDeltas): """takes an xml doc object and a package metadata entry node, populates a package node with the md information""" name = drpmObj.tagByName('name') arch = drpmObj.tagByName('arch') epoch = str(drpmObj.epoch()) ver = str(drpmObj.tagByName('version')) rel = str(drpmObj.tagByName('release')) if not pkgDeltas.has_key('%s-%s:%s-%s.%s' % (name, epoch, ver, rel, arch)): pkgNode = node.newChild(None, "newpackage", None) pkgNode.newProp('name', name) pkgNode.newProp('epoch', epoch) pkgNode.newProp('version', ver) pkgNode.newProp('release', rel) pkgNode.newProp('arch', arch) pkgDeltas['%s-%s:%s-%s.%s' % (name, epoch, ver, rel, arch)] = pkgNode else: pkgNode = pkgDeltas['%s-%s:%s-%s.%s' % (name, epoch, ver, rel, arch)] delta = pkgNode.newChild(None, "delta", None) delta.newChild(None, 'filename', drpmObj.relativepath) delta.newChild(None, 'sequence', "%s-%s" %(drpmObj.oldnevrstring, drpmObj.sequence)) delta.newChild(None, 'size', str(drpmObj.size)) sum = delta.newChild(None, 'checksum', drpmObj.pkgid) sum.newProp('type', 'sha') (oldname, oldepoch, oldver, oldrel) = drpmObj.oldnevr delta.newProp('oldepoch', oldepoch) delta.newProp('oldversion', oldver) delta.newProp('oldrelease', oldrel) def byteranges(file): """takes an rpm file or fileobject and returns byteranges for location of the header""" opened_here = 0 if type(file) is not types.StringType: fo = file else: opened_here = 1 fo = open(file, 'r') #read in past lead and first 8 bytes of sig header fo.seek(104) # 104 bytes in binindex = fo.read(4) # 108 bytes in (sigindex, ) = struct.unpack('>I', binindex) bindata = fo.read(4) # 112 bytes in (sigdata, ) = struct.unpack('>I', bindata) # each index is 4 32bit segments - so each is 16 bytes sigindexsize = sigindex * 16 sigsize = sigdata + sigindexsize # we have to round off to the next 8 byte boundary disttoboundary = (sigsize % 8) if disttoboundary != 0: disttoboundary = 8 - disttoboundary # 112 bytes - 96 == lead, 8 = magic and reserved, 8 == sig header data hdrstart = 112 + sigsize + disttoboundary fo.seek(hdrstart) # go to the start of the header fo.seek(8,1) # read past the magic number and reserved bytes binindex = fo.read(4) (hdrindex, ) = struct.unpack('>I', binindex) bindata = fo.read(4) (hdrdata, ) = struct.unpack('>I', bindata) # each index is 4 32bit segments - so each is 16 bytes hdrindexsize = hdrindex * 16 # add 16 to the hdrsize to account for the 16 bytes of misc data b/t the # end of the sig and the header. hdrsize = hdrdata + hdrindexsize + 16 # header end is hdrstart + hdrsize hdrend = hdrstart + hdrsize if opened_here: fo.close() del fo return (hdrstart, hdrend) class DrpmMetaData: """each drpm is one object, you pass it an rpm file it opens the file, and pulls the information out in bite-sized chunks :) """ mode_cache = {} def __init__(self, ts, basedir, filename): try: stats = os.stat(os.path.join(basedir, filename)) self.size = stats[6] self.mtime = stats[8] del stats except OSError, e: raise MDError, "Error Stat'ing file %s %s" % (basedir, filename) self.relativepath = filename fd = os.open(os.path.join(basedir, filename), os.O_RDONLY) self.hdr = ts.hdrFromFdno(fd) os.lseek(fd, 0, 0) fo = os.fdopen(fd, 'rb') self.pkgid = self.getChecksum("sha", fo) fo.seek(0) (start, end) = byteranges(fo) fo.seek(end) self._getOldInfo(fo) del fo del fd def arch(self): if self.tagByName('sourcepackage') == 1: return 'src' else: return self.tagByName('arch') def _stringToNEVR(self, string): i = string.rfind("-", 0, string.rfind("-")-1) name = string[:i] (epoch, ver, rel) = self._stringToVersion(string[i+1:]) return (name, epoch, ver, rel) def _getLength(self, in_data): length = 0 for val in in_data: length = length * 256 length += ord(val) return length def _getOldInfo(self, fo): try: compobj = gzip.GzipFile("", "rb", 9, fo) except: raise zlibError("Data not stored in gzip format") if compobj.read(4)[:3] != "DLT": raise Exception("Not a deltarpm") nevr_length = self._getLength(compobj.read(4)) nevr = compobj.read(nevr_length).strip("\x00") seq_length = self._getLength(compobj.read(4)) seq = compobj.read(seq_length) hex_seq = "" for char in seq: hex_seq += str("%02x" % ord(char)) self.oldnevrstring = nevr self.oldnevr = self._stringToNEVR(nevr) self.sequence = hex_seq compobj.close() def _stringToVersion(self, strng): i = strng.find(':') if i != -1: epoch = strng[:i] else: epoch = '0' j = strng.find('-') if j != -1: if strng[i + 1:j] == '': version = None else: version = strng[i + 1:j] release = strng[j + 1:] else: if strng[i + 1:] == '': version = None else: version = strng[i + 1:] release = None return (epoch, version, release) def tagByName(self, tag): data = self.hdr[tag] if type(data) is types.ListType: if len(data) > 0: return data[0] else: return '' else: return data def epoch(self): if self.hdr['epoch'] is None: return 0 else: return self.tagByName('epoch') def getChecksum(self, sumtype, file, CHUNK=2**16): """takes filename, hand back Checksum of it sumtype = md5 or sha filename = /path/to/file CHUNK=65536 by default""" # chunking brazenly lifted from Ryan Tomayko opened_here = 0 try: if type(file) is not types.StringType: fo = file # assume it's a file-like-object else: opened_here = 1 fo = open(file, 'rb', CHUNK) if sumtype == 'md5': sum = md5.new() elif sumtype == 'sha': sum = sha.new() else: raise MDError, 'Error Checksumming file, wrong checksum type %s' % sumtype chunk = fo.read while chunk: chunk = fo.read(CHUNK) sum.update(chunk) if opened_here: fo.close() del fo return sum.hexdigest() except Exception, e: print e raise MDError, 'Error opening file for checksum: %s' % file def writePrestoData(deltadir, outputdir): files = getFileList(deltadir, ".drpm") doc = libxml2.newDoc("1.0") root = doc.newChild(None, "prestodelta", None) deltas = {} ts = rpm.TransactionSet() ts.setVSFlags(-1) for f in files: drpmobj = DrpmMetaData(ts, deltadir, f) generateXML(doc, root, drpmobj, "sha", deltas) prestofile = open("%s/prestodelta.xml" %(outputdir,), "w") prestofile.write('\n') prestofile.write(root.serialize("UTF-8", True)) prestofile.close() def usage(): print >> sys.stderr, "Usage: %s " %(sys.argv[0]) def main(args): if len(args) == 0: usage() sys.exit(1) deltadir = args[0] if len(args) > 1: outputdir = args[1] else: outputdir = "%s/repodata" %(deltadir,) if not os.path.isdir(deltadir): print >> sys.stderr, "Delta directory must exist." sys.exit(1) if not os.access(outputdir, os.W_OK): print >> sys.stderr, "Output directory must be writable." sys.exit(1) writePrestoData(deltadir, outputdir) if __name__ == "__main__": main(sys.argv[1:])