From dce0600bc64c793ba6e8f67c56c286d8d97e7c4c Mon Sep 17 00:00:00 2001 From: Jonathan Dieter Date: Tue, 19 Jun 2007 20:58:07 +0300 Subject: Many bugfixes and a few enhancements Signed-off-by: Jonathan Dieter --- createprestorepo/dumpMetadata.py | 496 --------------------------------------- 1 file changed, 496 deletions(-) delete mode 100644 createprestorepo/dumpMetadata.py (limited to 'createprestorepo/dumpMetadata.py') diff --git a/createprestorepo/dumpMetadata.py b/createprestorepo/dumpMetadata.py deleted file mode 100644 index 6969400..0000000 --- a/createprestorepo/dumpMetadata.py +++ /dev/null @@ -1,496 +0,0 @@ -#!/usr/bin/python -t -# base classes and functions for dumping out package Metadata -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Library General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# Copyright 2004 Duke University - -# $Id: dumpMetadata.py,v 1.36 2006/02/21 20:10:08 pnasrat Exp $ - -import os -import rpm -import exceptions -import md5 -import sha -import types -import struct -import re -import stat - -# done to fix gzip randomly changing the checksum -import gzip -from zlib import error as zlibError -from gzip import write32u, FNAME - -__all__ = ["GzipFile","open"] - -class GzipFile(gzip.GzipFile): - def _write_gzip_header(self): - self.fileobj.write('\037\213') # magic header - self.fileobj.write('\010') # compression method - fname = self.filename[:-3] - flags = 0 - if fname: - flags = FNAME - self.fileobj.write(chr(flags)) - write32u(self.fileobj, long(0)) - self.fileobj.write('\002') - self.fileobj.write('\377') - if fname: - self.fileobj.write(fname + '\000') - - -def _gzipOpen(filename, mode="rb", compresslevel=9): - return GzipFile(filename, mode, compresslevel) - - - -def returnFD(filename): - try: - fdno = os.open(filename, os.O_RDONLY) - except OSError: - raise MDError, "Error opening file" - return fdno - -def returnHdr(ts, package): - """hand back the rpm header or raise an Error if the pkg is fubar""" - opened_here = 0 - try: - if type(package) is types.StringType: - opened_here = 1 - fdno = os.open(package, os.O_RDONLY) - else: - fdno = package # let's assume this is an fdno and go with it :) - except OSError: - raise MDError, "Error opening file" - ts.setVSFlags((rpm._RPMVSF_NOSIGNATURES|rpm.RPMVSF_NOMD5|rpm.RPMVSF_NEEDPAYLOAD)) - try: - hdr = ts.hdrFromFdno(fdno) - except rpm.error: - raise MDError, "Error opening package" - if type(hdr) != rpm.hdr: - raise MDError, "Error opening package" - ts.setVSFlags(0) - - if opened_here: - os.close(fdno) - del fdno - - return hdr - -def getChecksum(sumtype, file, CHUNK=2**16): - """takes filename, hand back Checksum of it - sumtype = md5 or sha - filename = /path/to/file - CHUNK=65536 by default""" - - # chunking brazenly lifted from Ryan Tomayko - opened_here = 0 - try: - if type(file) is not types.StringType: - fo = file # assume it's a file-like-object - else: - opened_here = 1 - fo = open(file, 'rb', CHUNK) - - if sumtype == 'md5': - sum = md5.new() - elif sumtype == 'sha': - sum = sha.new() - else: - raise MDError, 'Error Checksumming file, wrong checksum type %s' % sumtype - chunk = fo.read - while chunk: - chunk = fo.read(CHUNK) - sum.update(chunk) - - if opened_here: - fo.close() - del fo - - return sum.hexdigest() - except: - raise MDError, 'Error opening file for checksum: %s' % file - - -def utf8String(string): - """hands back a unicoded string""" - if string is None: - return '' - elif isinstance(string, unicode): - return string - try: - x = unicode(string, 'ascii') - return string - except UnicodeError: - encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2'] - for enc in encodings: - try: - x = unicode(string, enc) - except UnicodeError: - pass - else: - if x.encode(enc) == string: - return x.encode('utf-8') - newstring = '' - for char in string: - if ord(char) > 127: - newstring = newstring + '?' - else: - newstring = newstring + char - return newstring - - -def byteranges(file): - """takes an rpm file or fileobject and returns byteranges for location of the header""" - opened_here = 0 - if type(file) is not types.StringType: - fo = file - else: - opened_here = 1 - fo = open(file, 'r') - #read in past lead and first 8 bytes of sig header - fo.seek(104) - # 104 bytes in - binindex = fo.read(4) - # 108 bytes in - (sigindex, ) = struct.unpack('>I', binindex) - bindata = fo.read(4) - # 112 bytes in - (sigdata, ) = struct.unpack('>I', bindata) - # each index is 4 32bit segments - so each is 16 bytes - sigindexsize = sigindex * 16 - sigsize = sigdata + sigindexsize - # we have to round off to the next 8 byte boundary - disttoboundary = (sigsize % 8) - if disttoboundary != 0: - disttoboundary = 8 - disttoboundary - # 112 bytes - 96 == lead, 8 = magic and reserved, 8 == sig header data - hdrstart = 112 + sigsize + disttoboundary - - fo.seek(hdrstart) # go to the start of the header - fo.seek(8,1) # read past the magic number and reserved bytes - - binindex = fo.read(4) - (hdrindex, ) = struct.unpack('>I', binindex) - bindata = fo.read(4) - (hdrdata, ) = struct.unpack('>I', bindata) - - # each index is 4 32bit segments - so each is 16 bytes - hdrindexsize = hdrindex * 16 - # add 16 to the hdrsize to account for the 16 bytes of misc data b/t the - # end of the sig and the header. - hdrsize = hdrdata + hdrindexsize + 16 - - # header end is hdrstart + hdrsize - hdrend = hdrstart + hdrsize - if opened_here: - fo.close() - del fo - return (hdrstart, hdrend) - - -class MDError(exceptions.Exception): - def __init__(self, args=None): - exceptions.Exception.__init__(self) - self.args = args - - - -class RpmMetaData: - """each drpm is one object, you pass it an rpm file - it opens the file, and pulls the information out in bite-sized chunks :) - """ - - mode_cache = {} - - def __init__(self, ts, basedir, filename, options, is_drpm): - try: - stats = os.stat(os.path.join(basedir, filename)) - self.size = stats[6] - self.mtime = stats[8] - del stats - except OSError, e: - raise MDError, "Error Stat'ing file %s %s" % (basedir, filename) - self.options = options - self.localurl = options['baseurl'] - self.relativepath = filename - fd = returnFD(os.path.join(basedir, filename)) - self.hdr = returnHdr(ts, fd) - os.lseek(fd, 0, 0) - fo = os.fdopen(fd, 'rb') - self.pkgid = self.doChecksumCache(fo) - fo.seek(0) - (self.rangestart, self.rangeend) = byteranges(fo) - self.is_drpm = False - if is_drpm: - fo.seek(self.rangeend) - self._getOldInfo(fo) - self.is_drpm = True - del fo - del fd - - def arch(self): - if self.tagByName('sourcepackage') == 1: - return 'src' - else: - return self.tagByName('arch') - - def _stringToNEVR(self, string): - i = string.rfind("-", 0, string.rfind("-")-1) - name = string[:i] - (epoch, ver, rel) = self._stringToVersion(string[i+1:]) - return (name, epoch, ver, rel) - - def _getLength(self, in_data): - length = 0 - for val in in_data: - length = length * 256 - length += ord(val) - return length - - def _getOldInfo(self, fo): - try: - compobj = gzip.GzipFile("", "rb", 9, fo) - except: - raise zlibError("Data not stored in gzip format") - - if compobj.read(4)[:3] != "DLT": - raise Exception("Not a deltarpm") - - nevr_length = self._getLength(compobj.read(4)) - nevr = compobj.read(nevr_length).strip("\x00") - seq_length = self._getLength(compobj.read(4)) - seq = compobj.read(seq_length) - hex_seq = "" - for char in seq: - hex_seq += str("%02x" % ord(char)) - self.oldnevrstring = nevr - self.oldnevr = self._stringToNEVR(nevr) - self.sequence = hex_seq - compobj.close() - - def _stringToVersion(self, strng): - i = strng.find(':') - if i != -1: - epoch = strng[:i] - else: - epoch = '0' - j = strng.find('-') - if j != -1: - if strng[i + 1:j] == '': - version = None - else: - version = strng[i + 1:j] - release = strng[j + 1:] - else: - if strng[i + 1:] == '': - version = None - else: - version = strng[i + 1:] - release = None - return (epoch, version, release) - - ########### - # Title: Remove duplicates from a sequence - # Submitter: Tim Peters - # From: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52560 - - def _uniq(self,s): - """Return a list of the elements in s, but without duplicates. - - For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3], - unique("abcabc") some permutation of ["a", "b", "c"], and - unique(([1, 2], [2, 3], [1, 2])) some permutation of - [[2, 3], [1, 2]]. - - For best speed, all sequence elements should be hashable. Then - unique() will usually work in linear time. - - If not possible, the sequence elements should enjoy a total - ordering, and if list(s).sort() doesn't raise TypeError it's - assumed that they do enjoy a total ordering. Then unique() will - usually work in O(N*log2(N)) time. - - If that's not possible either, the sequence elements must support - equality-testing. Then unique() will usually work in quadratic - time. - """ - - n = len(s) - if n == 0: - return [] - - # Try using a dict first, as that's the fastest and will usually - # work. If it doesn't work, it will usually fail quickly, so it - # usually doesn't cost much to *try* it. It requires that all the - # sequence elements be hashable, and support equality comparison. - u = {} - try: - for x in s: - u[x] = 1 - except TypeError: - del u # move on to the next method - else: - return u.keys() - - # We can't hash all the elements. Second fastest is to sort, - # which brings the equal elements together; then duplicates are - # easy to weed out in a single pass. - # NOTE: Python's list.sort() was designed to be efficient in the - # presence of many duplicate elements. This isn't true of all - # sort functions in all languages or libraries, so this approach - # is more effective in Python than it may be elsewhere. - try: - t = list(s) - t.sort() - except TypeError: - del t # move on to the next method - else: - assert n > 0 - last = t[0] - lasti = i = 1 - while i < n: - if t[i] != last: - t[lasti] = last = t[i] - lasti += 1 - i += 1 - return t[:lasti] - - # Brute force is all that's left. - u = [] - for x in s: - if x not in u: - u.append(x) - return u - - def tagByName(self, tag): - data = self.hdr[tag] - if type(data) is types.ListType: - if len(data) > 0: - return data[0] - else: - return '' - else: - return data - - def listTagByName(self, tag): - """take a tag that should be a list and make sure it is one""" - lst = [] - data = self.hdr[tag] - if data is None: - return lst - - if type(data) is types.ListType: - lst.extend(data) - else: - lst.append(data) - return lst - - def epoch(self): - if self.hdr['epoch'] is None: - return 0 - else: - return self.tagByName('epoch') - - def doChecksumCache(self, fo): - """return a checksum for a package: - - check if the checksum cache is enabled - if not - return the checksum - if so - check to see if it has a cache file - if so, open it and return the first line's contents - if not, grab the checksum and write it to a file for this pkg - """ - if not self.options['cache']: - return getChecksum(self.options['sumtype'], fo) - - csumtag = os.path.basename(self.relativepath) + ".cache" - csumfile = '%s/%s' % (self.options['cachedir'], csumtag) - if os.path.exists(csumfile) and self.mtime <= os.stat(csumfile)[8]: - csumo = open(csumfile, 'r') - checksum = csumo.readline() - csumo.close() - - else: - checksum = getChecksum(self.options['sumtype'], fo) - csumo = open(csumfile, 'w') - csumo.write(checksum) - csumo.close() - - return checksum - - - -def generateXML(doc, node, formatns, drpmObj, sumtype, pkgDeltas): - """takes an xml doc object and a package metadata entry node, populates a - package node with the md information""" - name = drpmObj.tagByName('name') - arch = drpmObj.arch() - epoch = str(drpmObj.epoch()) - ver = str(drpmObj.tagByName('version')) - rel = str(drpmObj.tagByName('release')) - if not pkgDeltas.has_key('%s-%s:%s-%s.%s' % (name, epoch, ver, rel, arch)): - pkgNode = node.newChild(None, "package", None) - pkgNode.newProp('type', 'rpm') - pkgNode.newChild(None, 'name', name) - pkgNode.newChild(None, 'arch', arch) - version = pkgNode.newChild(None, 'version', None) - version.newProp('epoch', epoch) - version.newProp('ver', ver) - version.newProp('rel', rel) - deltas = pkgNode.newChild(None, 'deltas', None) - pkgDeltas['%s-%s:%s-%s.%s' % (name, epoch, ver, rel, arch)] = deltas - else: - deltas = pkgDeltas['%s-%s:%s-%s.%s' % (name, epoch, ver, rel, arch)] - (oldname, oldepoch, oldver, oldrel) = drpmObj.oldnevr - drpmNode = deltas.newChild(None, "oldrpm", None) - if name != oldname: - drpmNode.newChild(None, 'name', oldname) - # oldrpm arch is not stored in drpm, so we can only work within same arch - version = drpmNode.newChild(None, 'version', None) - if epoch != oldepoch: - version.newProp('epoch', oldepoch) - if ver != oldver: - version.newProp('ver', oldver) - version.newProp('rel', oldrel) - drpmNode.newChild(None, 'drpm_filename', drpmObj.relativepath) - drpmNode.newChild(None, 'size', str(drpmObj.size)) - drpmNode.newChild(None, 'sequence', '%s-%s' % (drpmObj.oldnevrstring, drpmObj.sequence)) - checksum = drpmNode.newChild(None, 'checksum', drpmObj.pkgid) - checksum.newProp('type', drpmObj.options['sumtype']) - - -def repoXML(node, cmds): - """generate the repomd.xml file that stores the info on the other files""" - sumtype = cmds['sumtype'] - workfiles = [(cmds['prestofile'], 'deltas')] - - - for (file, ftype) in workfiles: - zfo = _gzipOpen(os.path.join(cmds['outputdir'], cmds['tempdir'], file)) - uncsum = getChecksum(sumtype, zfo) - zfo.close() - csum = getChecksum(sumtype, os.path.join(cmds['outputdir'], cmds['tempdir'], file)) - timestamp = os.stat(os.path.join(cmds['outputdir'], cmds['tempdir'], file))[8] - data = node.newChild(None, 'data', None) - data.newProp('type', ftype) - location = data.newChild(None, 'location', None) - if cmds['baseurl'] is not None: - location.newProp('xml:base', cmds['baseurl']) - location.newProp('href', os.path.join(cmds['finaldir'], file)) - checksum = data.newChild(None, 'checksum', csum) - checksum.newProp('type', sumtype) - timestamp = data.newChild(None, 'timestamp', str(timestamp)) - unchecksum = data.newChild(None, 'open-checksum', uncsum) - unchecksum.newProp('type', sumtype) -- cgit