From 5ea857b0e948b687785b8e55e08866c6171fb715 Mon Sep 17 00:00:00 2001 From: Jonathan Dieter Date: Thu, 29 Mar 2007 19:43:26 +0300 Subject: Split server and client Signed-off-by: Jonathan Dieter --- createprestorepo/dumpMetadata.py | 456 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 456 insertions(+) create mode 100755 createprestorepo/dumpMetadata.py (limited to 'createprestorepo/dumpMetadata.py') diff --git a/createprestorepo/dumpMetadata.py b/createprestorepo/dumpMetadata.py new file mode 100755 index 0000000..2f074da --- /dev/null +++ b/createprestorepo/dumpMetadata.py @@ -0,0 +1,456 @@ +#!/usr/bin/python -t +# base classes and functions for dumping out package Metadata +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2004 Duke University + +# $Id: dumpMetadata.py,v 1.36 2006/02/21 20:10:08 pnasrat Exp $ + +import os +import rpm +import exceptions +import md5 +import sha +import types +import struct +import re +import stat + +# done to fix gzip randomly changing the checksum +import gzip +from zlib import error as zlibError +from gzip import write32u, FNAME + +__all__ = ["GzipFile","open"] + +class GzipFile(gzip.GzipFile): + def _write_gzip_header(self): + self.fileobj.write('\037\213') # magic header + self.fileobj.write('\010') # compression method + fname = self.filename[:-3] + flags = 0 + if fname: + flags = FNAME + self.fileobj.write(chr(flags)) + write32u(self.fileobj, long(0)) + self.fileobj.write('\002') + self.fileobj.write('\377') + if fname: + self.fileobj.write(fname + '\000') + + +def _gzipOpen(filename, mode="rb", compresslevel=9): + return GzipFile(filename, mode, compresslevel) + + + +def returnFD(filename): + try: + fdno = os.open(filename, os.O_RDONLY) + except OSError: + raise MDError, "Error opening file" + return fdno + +def returnHdr(ts, package): + """hand back the rpm header or raise an Error if the pkg is fubar""" + opened_here = 0 + try: + if type(package) is types.StringType: + opened_here = 1 + fdno = os.open(package, os.O_RDONLY) + else: + fdno = package # let's assume this is an fdno and go with it :) + except OSError: + raise MDError, "Error opening file" + ts.setVSFlags((rpm._RPMVSF_NOSIGNATURES|rpm.RPMVSF_NOMD5|rpm.RPMVSF_NEEDPAYLOAD)) + try: + hdr = ts.hdrFromFdno(fdno) + except rpm.error: + raise MDError, "Error opening package" + if type(hdr) != rpm.hdr: + raise MDError, "Error opening package" + ts.setVSFlags(0) + + if opened_here: + os.close(fdno) + del fdno + + return hdr + +def getChecksum(sumtype, file, CHUNK=2**16): + """takes filename, hand back Checksum of it + sumtype = md5 or sha + filename = /path/to/file + CHUNK=65536 by default""" + + # chunking brazenly lifted from Ryan Tomayko + opened_here = 0 + try: + if type(file) is not types.StringType: + fo = file # assume it's a file-like-object + else: + opened_here = 1 + fo = open(file, 'rb', CHUNK) + + if sumtype == 'md5': + sum = md5.new() + elif sumtype == 'sha': + sum = sha.new() + else: + raise MDError, 'Error Checksumming file, wrong checksum type %s' % sumtype + chunk = fo.read + while chunk: + chunk = fo.read(CHUNK) + sum.update(chunk) + + if opened_here: + fo.close() + del fo + + return sum.hexdigest() + except: + raise MDError, 'Error opening file for checksum: %s' % file + + +def utf8String(string): + """hands back a unicoded string""" + if string is None: + return '' + elif isinstance(string, unicode): + return string + try: + x = unicode(string, 'ascii') + return string + except UnicodeError: + encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2'] + for enc in encodings: + try: + x = unicode(string, enc) + except UnicodeError: + pass + else: + if x.encode(enc) == string: + return x.encode('utf-8') + newstring = '' + for char in string: + if ord(char) > 127: + newstring = newstring + '?' + else: + newstring = newstring + char + return newstring + + +def byteranges(file): + """takes an rpm file or fileobject and returns byteranges for location of the header""" + opened_here = 0 + if type(file) is not types.StringType: + fo = file + else: + opened_here = 1 + fo = open(file, 'r') + #read in past lead and first 8 bytes of sig header + fo.seek(104) + # 104 bytes in + binindex = fo.read(4) + # 108 bytes in + (sigindex, ) = struct.unpack('>I', binindex) + bindata = fo.read(4) + # 112 bytes in + (sigdata, ) = struct.unpack('>I', bindata) + # each index is 4 32bit segments - so each is 16 bytes + sigindexsize = sigindex * 16 + sigsize = sigdata + sigindexsize + # we have to round off to the next 8 byte boundary + disttoboundary = (sigsize % 8) + if disttoboundary != 0: + disttoboundary = 8 - disttoboundary + # 112 bytes - 96 == lead, 8 = magic and reserved, 8 == sig header data + hdrstart = 112 + sigsize + disttoboundary + + fo.seek(hdrstart) # go to the start of the header + fo.seek(8,1) # read past the magic number and reserved bytes + + binindex = fo.read(4) + (hdrindex, ) = struct.unpack('>I', binindex) + bindata = fo.read(4) + (hdrdata, ) = struct.unpack('>I', bindata) + + # each index is 4 32bit segments - so each is 16 bytes + hdrindexsize = hdrindex * 16 + # add 16 to the hdrsize to account for the 16 bytes of misc data b/t the + # end of the sig and the header. + hdrsize = hdrdata + hdrindexsize + 16 + + # header end is hdrstart + hdrsize + hdrend = hdrstart + hdrsize + if opened_here: + fo.close() + del fo + return (hdrstart, hdrend) + + +class MDError(exceptions.Exception): + def __init__(self, args=None): + exceptions.Exception.__init__(self) + self.args = args + + + +class RpmMetaData: + """each drpm is one object, you pass it an rpm file + it opens the file, and pulls the information out in bite-sized chunks :) + """ + + mode_cache = {} + + def __init__(self, ts, basedir, filename, options): + try: + stats = os.stat(os.path.join(basedir, filename)) + self.size = stats[6] + self.mtime = stats[8] + del stats + except OSError, e: + raise MDError, "Error Stat'ing file %s %s" % (basedir, filename) + self.options = options + self.localurl = options['baseurl'] + self.relativepath = filename + fd = returnFD(os.path.join(basedir, filename)) + self.hdr = returnHdr(ts, fd) + os.lseek(fd, 0, 0) + fo = os.fdopen(fd, 'rb') + self.pkgid = self.doChecksumCache(fo) + fo.seek(0) + (self.rangestart, self.rangeend) = byteranges(fo) + fo.close() + del fo + del fd + + def arch(self): + if self.tagByName('sourcepackage') == 1: + return 'src' + else: + return self.tagByName('arch') + + def _correctVersion(self, vers): + returnvers = [] + vertuple = (None, None, None) + if vers is None: + returnvers.append(vertuple) + return returnvers + + if type(vers) is not types.ListType: + if vers is not None: + vertuple = self._stringToVersion(vers) + else: + vertuple = (None, None, None) + returnvers.append(vertuple) + else: + for ver in vers: + if ver is not None: + vertuple = self._stringToVersion(ver) + else: + vertuple = (None, None, None) + returnvers.append(vertuple) + return returnvers + + + def _stringToVersion(self, strng): + i = strng.find(':') + if i != -1: + epoch = strng[:i] + else: + epoch = '0' + j = strng.find('-') + if j != -1: + if strng[i + 1:j] == '': + version = None + else: + version = strng[i + 1:j] + release = strng[j + 1:] + else: + if strng[i + 1:] == '': + version = None + else: + version = strng[i + 1:] + release = None + return (epoch, version, release) + + ########### + # Title: Remove duplicates from a sequence + # Submitter: Tim Peters + # From: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52560 + + def _uniq(self,s): + """Return a list of the elements in s, but without duplicates. + + For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3], + unique("abcabc") some permutation of ["a", "b", "c"], and + unique(([1, 2], [2, 3], [1, 2])) some permutation of + [[2, 3], [1, 2]]. + + For best speed, all sequence elements should be hashable. Then + unique() will usually work in linear time. + + If not possible, the sequence elements should enjoy a total + ordering, and if list(s).sort() doesn't raise TypeError it's + assumed that they do enjoy a total ordering. Then unique() will + usually work in O(N*log2(N)) time. + + If that's not possible either, the sequence elements must support + equality-testing. Then unique() will usually work in quadratic + time. + """ + + n = len(s) + if n == 0: + return [] + + # Try using a dict first, as that's the fastest and will usually + # work. If it doesn't work, it will usually fail quickly, so it + # usually doesn't cost much to *try* it. It requires that all the + # sequence elements be hashable, and support equality comparison. + u = {} + try: + for x in s: + u[x] = 1 + except TypeError: + del u # move on to the next method + else: + return u.keys() + + # We can't hash all the elements. Second fastest is to sort, + # which brings the equal elements together; then duplicates are + # easy to weed out in a single pass. + # NOTE: Python's list.sort() was designed to be efficient in the + # presence of many duplicate elements. This isn't true of all + # sort functions in all languages or libraries, so this approach + # is more effective in Python than it may be elsewhere. + try: + t = list(s) + t.sort() + except TypeError: + del t # move on to the next method + else: + assert n > 0 + last = t[0] + lasti = i = 1 + while i < n: + if t[i] != last: + t[lasti] = last = t[i] + lasti += 1 + i += 1 + return t[:lasti] + + # Brute force is all that's left. + u = [] + for x in s: + if x not in u: + u.append(x) + return u + + def tagByName(self, tag): + data = self.hdr[tag] + if type(data) is types.ListType: + if len(data) > 0: + return data[0] + else: + return '' + else: + return data + + def listTagByName(self, tag): + """take a tag that should be a list and make sure it is one""" + lst = [] + data = self.hdr[tag] + if data is None: + return lst + + if type(data) is types.ListType: + lst.extend(data) + else: + lst.append(data) + return lst + + def epoch(self): + if self.hdr['epoch'] is None: + return 0 + else: + return self.tagByName('epoch') + + def doChecksumCache(self, fo): + """return a checksum for a package: + - check if the checksum cache is enabled + if not - return the checksum + if so - check to see if it has a cache file + if so, open it and return the first line's contents + if not, grab the checksum and write it to a file for this pkg + """ + if not self.options['cache']: + return getChecksum(self.options['sumtype'], fo) + + csumtag = '%s-%s' % (self.hdr['name'] , self.hdr[rpm.RPMTAG_SHA1HEADER]) + csumfile = '%s/%s' % (self.options['cachedir'], csumtag) + if os.path.exists(csumfile) and self.mtime <= os.stat(csumfile)[8]: + csumo = open(csumfile, 'r') + checksum = csumo.readline() + csumo.close() + + else: + checksum = getChecksum(self.options['sumtype'], fo) + csumo = open(csumfile, 'w') + csumo.write(checksum) + csumo.close() + + return checksum + + + +def generateXML(doc, node, formatns, drpmObj, sumtype): + """takes an xml doc object and a package metadata entry node, populates a + package node with the md information""" + ns = node.ns() + pkgNode = node.newChild(None, "package", None) + pkgNode.newProp('type', 'rpm') + pkgNode.newChild(None, 'name', drpmObj.tagByName('name')) + pkgNode.newChild(None, 'arch', drpmObj.arch()) + version = pkgNode.newChild(None, 'version', None) + version.newProp('epoch', str(drpmObj.epoch())) + version.newProp('ver', str(drpmObj.tagByName('version'))) + version.newProp('rel', str(drpmObj.tagByName('release'))) + return pkgNode + +def repoXML(node, cmds): + """generate the repomd.xml file that stores the info on the other files""" + sumtype = cmds['sumtype'] + workfiles = [(cmds['prestofile'], 'deltas')] + + + for (file, ftype) in workfiles: + zfo = _gzipOpen(os.path.join(cmds['outputdir'], cmds['tempdir'], file)) + uncsum = getChecksum(sumtype, zfo) + zfo.close() + csum = getChecksum(sumtype, os.path.join(cmds['outputdir'], cmds['tempdir'], file)) + timestamp = os.stat(os.path.join(cmds['outputdir'], cmds['tempdir'], file))[8] + data = node.newChild(None, 'data', None) + data.newProp('type', ftype) + location = data.newChild(None, 'location', None) + if cmds['baseurl'] is not None: + location.newProp('xml:base', cmds['baseurl']) + location.newProp('href', os.path.join(cmds['finaldir'], file)) + checksum = data.newChild(None, 'checksum', csum) + checksum.newProp('type', sumtype) + timestamp = data.newChild(None, 'timestamp', str(timestamp)) + unchecksum = data.newChild(None, 'open-checksum', uncsum) + unchecksum.newProp('type', sumtype) -- cgit