summaryrefslogtreecommitdiffstats
path: root/presto-utils/genpresto.py
diff options
context:
space:
mode:
authorJonathan Dieter <jdieter@gmail.com>2007-07-11 18:37:03 +0300
committerJonathan Dieter <jdieter@gmail.com>2007-07-11 18:37:03 +0300
commitec53c5c2b2f6ea31b22c47e4ae8f71c4eff2cd37 (patch)
tree8f6deaef1c82c121fc71d5b7b10fa2b0ac0fa853 /presto-utils/genpresto.py
parentdce0600bc64c793ba6e8f67c56c286d8d97e7c4c (diff)
downloadpresto-ec53c5c2b2f6ea31b22c47e4ae8f71c4eff2cd37.tar.gz
presto-ec53c5c2b2f6ea31b22c47e4ae8f71c4eff2cd37.tar.xz
presto-ec53c5c2b2f6ea31b22c47e4ae8f71c4eff2cd37.zip
Complete rewrite of yum-presto. New xml format.
Signed-off-by: Jonathan Dieter <jdieter@gmail.com>
Diffstat (limited to 'presto-utils/genpresto.py')
-rwxr-xr-xpresto-utils/genpresto.py313
1 files changed, 313 insertions, 0 deletions
diff --git a/presto-utils/genpresto.py b/presto-utils/genpresto.py
new file mode 100755
index 0000000..24330a1
--- /dev/null
+++ b/presto-utils/genpresto.py
@@ -0,0 +1,313 @@
+#!/usr/bin/python
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# Copyright 2007 Red Hat, Inc. -- Jeremy Katz <katzj@redhat.com>
+# Based on genprestometadata.py which was based on genmetadata.py
+# Copyright 2007 Jonathan Dieter, Copyright 2004 Duke University
+
+import os, sys, string
+import optparse
+import gzip
+import rpm
+import types
+import sha
+import struct
+import libxml2
+
+__version__ = '0.3.0'
+
+class MDError(Exception):
+ pass
+
+def getFileList(directory, ext):
+ extlen = len(ext)
+
+ def extension_visitor(arg, dirname, names):
+ for fn in names:
+ if os.path.isdir(fn):
+ continue
+ elif string.lower(fn[-extlen:]) == '%s' % (ext):
+ reldir = os.path.basename(dirname)
+ if reldir == os.path.basename(directory):
+ reldir = ""
+ arg.append(os.path.join(reldir,fn))
+
+ rpmlist = []
+ startdir = os.path.join(directory)
+ os.path.walk(startdir, extension_visitor, rpmlist)
+ return rpmlist
+
+def generateXML(doc, node, drpmObj, sumtype, pkgDeltas):
+ """takes an xml doc object and a package metadata entry node, populates a
+ package node with the md information"""
+ name = drpmObj.tagByName('name')
+ arch = drpmObj.tagByName('arch')
+ epoch = str(drpmObj.epoch())
+ ver = str(drpmObj.tagByName('version'))
+ rel = str(drpmObj.tagByName('release'))
+ if not pkgDeltas.has_key('%s-%s:%s-%s.%s' % (name, epoch, ver, rel, arch)):
+ pkgNode = node.newChild(None, "newpackage", None)
+ pkgNode.newProp('name', name)
+ pkgNode.newProp('epoch', epoch)
+ pkgNode.newProp('version', ver)
+ pkgNode.newProp('release', rel)
+ pkgNode.newProp('arch', arch)
+ pkgDeltas['%s-%s:%s-%s.%s' % (name, epoch, ver, rel, arch)] = pkgNode
+ else:
+ pkgNode = pkgDeltas['%s-%s:%s-%s.%s' % (name, epoch, ver, rel, arch)]
+ delta = pkgNode.newChild(None, "delta", None)
+ delta.newChild(None, 'filename', drpmObj.relativepath)
+ delta.newChild(None, 'sequence', "%s-%s" %(drpmObj.oldnevrstring, drpmObj.sequence))
+ delta.newChild(None, 'size', str(drpmObj.size))
+ sum = delta.newChild(None, 'checksum', drpmObj.pkgid)
+ sum.newProp('type', 'sha')
+ (oldname, oldepoch, oldver, oldrel) = drpmObj.oldnevr
+ delta.newProp('oldepoch', oldepoch)
+ delta.newProp('oldversion', oldver)
+ delta.newProp('oldrelease', oldrel)
+
+
+def byteranges(file):
+ """takes an rpm file or fileobject and returns byteranges for location of the header"""
+ opened_here = 0
+ if type(file) is not types.StringType:
+ fo = file
+ else:
+ opened_here = 1
+ fo = open(file, 'r')
+ #read in past lead and first 8 bytes of sig header
+ fo.seek(104)
+ # 104 bytes in
+ binindex = fo.read(4)
+ # 108 bytes in
+ (sigindex, ) = struct.unpack('>I', binindex)
+ bindata = fo.read(4)
+ # 112 bytes in
+ (sigdata, ) = struct.unpack('>I', bindata)
+ # each index is 4 32bit segments - so each is 16 bytes
+ sigindexsize = sigindex * 16
+ sigsize = sigdata + sigindexsize
+ # we have to round off to the next 8 byte boundary
+ disttoboundary = (sigsize % 8)
+ if disttoboundary != 0:
+ disttoboundary = 8 - disttoboundary
+ # 112 bytes - 96 == lead, 8 = magic and reserved, 8 == sig header data
+ hdrstart = 112 + sigsize + disttoboundary
+
+ fo.seek(hdrstart) # go to the start of the header
+ fo.seek(8,1) # read past the magic number and reserved bytes
+
+ binindex = fo.read(4)
+ (hdrindex, ) = struct.unpack('>I', binindex)
+ bindata = fo.read(4)
+ (hdrdata, ) = struct.unpack('>I', bindata)
+
+ # each index is 4 32bit segments - so each is 16 bytes
+ hdrindexsize = hdrindex * 16
+ # add 16 to the hdrsize to account for the 16 bytes of misc data b/t the
+ # end of the sig and the header.
+ hdrsize = hdrdata + hdrindexsize + 16
+
+ # header end is hdrstart + hdrsize
+ hdrend = hdrstart + hdrsize
+ if opened_here:
+ fo.close()
+ del fo
+ return (hdrstart, hdrend)
+
+class DrpmMetaData:
+ """each drpm is one object, you pass it an rpm file
+ it opens the file, and pulls the information out in bite-sized chunks :)
+ """
+
+ mode_cache = {}
+
+ def __init__(self, ts, basedir, filename):
+ try:
+ stats = os.stat(os.path.join(basedir, filename))
+ self.size = stats[6]
+ self.mtime = stats[8]
+ del stats
+ except OSError, e:
+ raise MDError, "Error Stat'ing file %s %s" % (basedir, filename)
+ self.relativepath = filename
+ fd = os.open(os.path.join(basedir, filename), os.O_RDONLY)
+ self.hdr = ts.hdrFromFdno(fd)
+ os.lseek(fd, 0, 0)
+ fo = os.fdopen(fd, 'rb')
+ self.pkgid = self.getChecksum("sha", fo)
+ fo.seek(0)
+ (start, end) = byteranges(fo)
+ fo.seek(end)
+ self._getOldInfo(fo)
+ del fo
+ del fd
+
+ def arch(self):
+ if self.tagByName('sourcepackage') == 1:
+ return 'src'
+ else:
+ return self.tagByName('arch')
+
+ def _stringToNEVR(self, string):
+ i = string.rfind("-", 0, string.rfind("-")-1)
+ name = string[:i]
+ (epoch, ver, rel) = self._stringToVersion(string[i+1:])
+ return (name, epoch, ver, rel)
+
+ def _getLength(self, in_data):
+ length = 0
+ for val in in_data:
+ length = length * 256
+ length += ord(val)
+ return length
+
+ def _getOldInfo(self, fo):
+ try:
+ compobj = gzip.GzipFile("", "rb", 9, fo)
+ except:
+ raise zlibError("Data not stored in gzip format")
+
+ if compobj.read(4)[:3] != "DLT":
+ raise Exception("Not a deltarpm")
+
+ nevr_length = self._getLength(compobj.read(4))
+ nevr = compobj.read(nevr_length).strip("\x00")
+ seq_length = self._getLength(compobj.read(4))
+ seq = compobj.read(seq_length)
+ hex_seq = ""
+ for char in seq:
+ hex_seq += str("%02x" % ord(char))
+ self.oldnevrstring = nevr
+ self.oldnevr = self._stringToNEVR(nevr)
+ self.sequence = hex_seq
+ compobj.close()
+
+ def _stringToVersion(self, strng):
+ i = strng.find(':')
+ if i != -1:
+ epoch = strng[:i]
+ else:
+ epoch = '0'
+ j = strng.find('-')
+ if j != -1:
+ if strng[i + 1:j] == '':
+ version = None
+ else:
+ version = strng[i + 1:j]
+ release = strng[j + 1:]
+ else:
+ if strng[i + 1:] == '':
+ version = None
+ else:
+ version = strng[i + 1:]
+ release = None
+ return (epoch, version, release)
+
+ def tagByName(self, tag):
+ data = self.hdr[tag]
+ if type(data) is types.ListType:
+ if len(data) > 0:
+ return data[0]
+ else:
+ return ''
+ else:
+ return data
+
+ def epoch(self):
+ if self.hdr['epoch'] is None:
+ return 0
+ else:
+ return self.tagByName('epoch')
+
+ def getChecksum(self, sumtype, file, CHUNK=2**16):
+ """takes filename, hand back Checksum of it
+ sumtype = md5 or sha
+ filename = /path/to/file
+ CHUNK=65536 by default"""
+
+ # chunking brazenly lifted from Ryan Tomayko
+ opened_here = 0
+ try:
+ if type(file) is not types.StringType:
+ fo = file # assume it's a file-like-object
+ else:
+ opened_here = 1
+ fo = open(file, 'rb', CHUNK)
+
+ if sumtype == 'md5':
+ sum = md5.new()
+ elif sumtype == 'sha':
+ sum = sha.new()
+ else:
+ raise MDError, 'Error Checksumming file, wrong checksum type %s' % sumtype
+ chunk = fo.read
+ while chunk:
+ chunk = fo.read(CHUNK)
+ sum.update(chunk)
+
+ if opened_here:
+ fo.close()
+ del fo
+
+ return sum.hexdigest()
+ except Exception, e:
+ print e
+ raise MDError, 'Error opening file for checksum: %s' % file
+
+def writePrestoData(deltadir, outputdir):
+ files = getFileList(deltadir, ".drpm")
+
+ doc = libxml2.newDoc("1.0")
+ root = doc.newChild(None, "prestodeltas", None)
+
+ deltas = {}
+ ts = rpm.TransactionSet()
+ ts.setVSFlags(-1)
+ for f in files:
+ drpmobj = DrpmMetaData(ts, deltadir, f)
+ generateXML(doc, root, drpmobj, "sha", deltas)
+
+ prestofile = open("%s/prestodelta.xml" %(outputdir,), "w")
+ prestofile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+ prestofile.write(root.serialize("UTF-8", True))
+ prestofile.close()
+
+def usage():
+ print >> sys.stderr, "Usage: %s <deltadir> <outputdir>" %(sys.argv[0])
+
+def main(args):
+ if len(args) == 0:
+ usage()
+ sys.exit(1)
+
+ deltadir = args[0]
+ if len(args) > 1:
+ outputdir = args[1]
+ else:
+ outputdir = "%s/repodata" %(deltadir,)
+
+ if not os.path.isdir(deltadir):
+ print >> sys.stderr, "Delta directory must exist."
+ sys.exit(1)
+ if not os.access(outputdir, os.W_OK):
+ print >> sys.stderr, "Output directory must be writable."
+ sys.exit(1)
+
+ writePrestoData(deltadir, outputdir)
+
+if __name__ == "__main__":
+ main(sys.argv[1:])