diff options
Diffstat (limited to 'share-presto')
-rw-r--r-- | share-presto/deltarpm.py | 86 | ||||
-rw-r--r-- | share-presto/prestoRepo.py | 588 | ||||
-rw-r--r-- | share-presto/prestoTransaction.py | 65 | ||||
-rw-r--r-- | share-presto/prestomdparser.py | 167 |
4 files changed, 906 insertions, 0 deletions
diff --git a/share-presto/deltarpm.py b/share-presto/deltarpm.py new file mode 100644 index 0000000..710a8bb --- /dev/null +++ b/share-presto/deltarpm.py @@ -0,0 +1,86 @@ +# author: Jonathan Dieter <jdieter@gmail.com> +# +# mostly taken from deltarpm.py created by +# Lars Herrmann <herrmann@redhat.com> +# and modified for Presto by +# Ahmed Kamal <email.ahmedkamal@googlemail.com> +# +# license: GPL (see COPYING file in distribution) +# +# this module provides a python wrapper around deltarpm tools written by suse +# +# TODO: catch exceptions wherever possible and raise useful ones ;) +# see TODO lines in methods + +APPLY='/usr/bin/applydeltarpm' + +import popen2 +import string +import os + +class Process: + """wrapper class to execute programs and return exitcode and output (stdout and stderr combined)""" + def __init__(self, conduit): + self.__stdout=None + self.__returncode=None + self.__command=None + self.__args=None + self.conduit = conduit + + def run(self, command, *args): + self.__command=command + self.__args=args + cmdline=command+" "+string.join(args, " ") + self.conduit.info(7, '%s.%s: executing %s' % (self.__class__, 'run', cmdline)) + pipe = popen2.Popen4(cmdline) + self.__stdout=pipe.fromchild.read() + retcode = pipe.wait() + if os.WIFEXITED(retcode): + self.__returncode = os.WEXITSTATUS(retcode) + else: + self.__returncode = retcode + # fallback to old implementation - works better ? + #stdoutp = os.popen(cmdline,'r',1) + #self.__stdout = stdoutp.read() + #retcode = stdoutp.close() + #if retcode is None: + # self.__returncode = 0 + #else: + # self.__returncode = retcode + + def getOutput(self): + return self.__stdout + + def returnCode(self): + return self.__returncode + +class DeltaRpmWrapper: + """wrapper around deltarpm binaries - implement methods for applying and verifying delta rpms + - raises exceptions if exitcode of binaries was != 0""" + + def __init__(self, conduit): + self.conduit = conduit + self.conduit.info(7, '%s.%s: created' % (self.__class__, '__init__')) + + def apply(self, newrpmfile, deltarpmfile): + """wraps execution of applydeltarpm [-r oldrpm] deltarpm newrpm - + constructs file names and paths based on given RpmDescription and instance settings for directories""" + # TODO: test args for type == instance and __class__ == RpmDescription + self.conduit.info(7, '%s.apply(%s,%s)' % (self.__class__, newrpmfile, deltarpmfile)) + p=Process(self.conduit) + # targetrpm filename + p.run(APPLY, deltarpmfile, newrpmfile) + if p.returnCode(): + # in case of error, raise exception + raise Exception("Could not apply deltarpm: %d" % (p.returnCode())) + return newrpmfile + + def verifySequence(self, sequence): + """wraps execution of applydeltarpm [-r oldrpm] -s seqfilecontent - + constructs file names and paths based on given RpmDescription and instance settings for directories""" + self.conduit.info(7, '%s.verify(%s)' % (self.__class__, sequence)) + p = Process(self.conduit) + p.run(APPLY, '-s', sequence) + if p.returnCode(): + # in case of error, raise exception + raise Exception("Could not verify sequence of deltarpm: %d" % (p.returnCode())) diff --git a/share-presto/prestoRepo.py b/share-presto/prestoRepo.py new file mode 100644 index 0000000..3b62caa --- /dev/null +++ b/share-presto/prestoRepo.py @@ -0,0 +1,588 @@ +# author: Jonathan Dieter <jdieter@gmail.com> +# +# mostly taken from yumRepo.py (part of yum) with a few minor modifications +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2005 Duke University + +import os +import re +import time +import types +import urlparse + +from yum import Errors +from urlgrabber.grabber import URLGrabber +import urlgrabber.mirror +from urlgrabber.grabber import URLGrabError +from yum.repos import Repository +from yum import repoMDObject +from yum import parser +from yum import config +from yum import misc + +class PrestoRepository(Repository): + """ + This is an actual repository object + + Configuration attributes are pulled in from config.RepoConf. + """ + + def __init__(self, repo, conduit): + Repository.__init__(self, repo.id) + + # If there's a specific deltarpm url, use that + is_different = False + if repo.deltaurl != []: + self.baseurl = repo.deltaurl + is_different = True + conduit.info(5, 'Manual url set: %s' % self.baseurl) + else: + self.baseurl = repo.baseurl + + # If there's a specific mirrorlist, use that + if repo.deltamirrorlist != None: + self.mirrorlist = repo.deltamirrorlist + is_different = True + conduit.info(5, 'Manual mirrorlist set: %s' % self.mirrorlist) + else: + self.mirrorlist = repo.mirrorlist + + self.conduit = conduit + self.urls = [] + self.is_different = is_different + if is_different: + self.repoMDFile = 'repodata/prestomd.xml' + self.metadata_cookie_fn = 'presto_cachecookie' + else: + self.repoMDFile = 'repodata/repomd.xml' + self.metadata_cookie_fn = 'cachecookie' + self.repoXML = None + self.cache = 0 + self.mirrorlistparsed = 0 + self.yumvar = {} # empty dict of yumvariables for $string replacement + self._proxy_dict = {} + self.http_headers = {} + + # throw in some stubs for things that will be set by the config class + self.basecachedir = "" + self.cachedir = "" + self.pkgdir = "" + self.hdrdir = "" + self.enabled = True + + # holder for stuff we've grabbed + self.retrieved = { 'deltas':0 } + + # callbacks + self.keepalive = repo.keepalive + self.bandwidth = repo.bandwidth + self.retries = repo.retries + self.throttle = repo.throttle + self.proxy = repo.proxy + self.proxy_username = repo.proxy_username + self.proxy_password = repo.proxy_password + self.timeout = repo.timeout + self.http_caching = repo.http_caching + self.failovermethod = repo.failovermethod + self.metadata_expire = repo.metadata_expire + self.basecachedir = repo.basecachedir + self.callback = repo.callback + self.failure_obj = None + self.mirror_failure_obj = None + self.interrupt_callback = None + self.drpm_list = {} + self.parent = repo + repo.p_repo = self + + + def __getProxyDict(self): + self.doProxyDict() + if self._proxy_dict: + return self._proxy_dict + return None + + # consistent access to how proxy information should look (and ensuring + # that it's actually determined for the repo) + proxy_dict = property(__getProxyDict) + + def ready(self): + """Returns true if this repository is setup and ready for use.""" + return self.repoXML is not None + + def __cmp__(self, other): + if self.id > other.id: + return 1 + elif self.id < other.id: + return -1 + else: + return 0 + + def __str__(self): + return self.id + + def _checksum(self, sumtype, file, CHUNK=2**16): + """takes filename, hand back Checksum of it + sumtype = md5 or sha + filename = /path/to/file + CHUNK=65536 by default""" + try: + return misc.checksum(sumtype, file, CHUNK) + except (Errors.MiscError, EnvironmentError), e: + raise Errors.RepoError, 'Error opening file for checksum: %s' % e + + def dump(self): + output = '[%s]\n' % self.id + vars = ['name', 'bandwidth', 'enabled', + 'keepalive', 'proxy', + 'proxy_password', 'proxy_username', + 'retries', 'throttle', 'timeout', 'mirrorlist', + 'cachedir', 'deltasdir' ] + vars.sort() + for attr in vars: + output = output + '%s = %s\n' % (attr, getattr(self, attr)) + output = output + 'baseurl =' + for url in self.urls: + output = output + ' %s\n' % url + + return output + + def check(self): + """self-check the repo information - if we don't have enough to move + on then raise a repo error""" + if len(self.urls) < 1: + raise Errors.RepoError, \ + 'Cannot find a valid deltaurl for repo: %s' % self.id + + def doProxyDict(self): + if self._proxy_dict: + return + + self._proxy_dict = {} # zap it + proxy_string = None + if self.proxy not in [None, '_none_']: + proxy_string = '%s' % self.proxy + if self.proxy_username is not None: + proxy_parsed = urlparse.urlsplit(self.proxy, allow_fragments=0) + proxy_proto = proxy_parsed[0] + proxy_host = proxy_parsed[1] + proxy_rest = proxy_parsed[2] + '?' + proxy_parsed[3] + proxy_string = '%s://%s@%s%s' % (proxy_proto, + self.proxy_username, proxy_host, proxy_rest) + + if self.proxy_password is not None: + proxy_string = '%s://%s:%s@%s%s' % (proxy_proto, + self.proxy_username, self.proxy_password, + proxy_host, proxy_rest) + + if proxy_string is not None: + self._proxy_dict['http'] = proxy_string + self._proxy_dict['https'] = proxy_string + self._proxy_dict['ftp'] = proxy_string + + def __headersListFromDict(self): + """Convert our dict of headers to a list of 2-tuples for urlgrabber.""" + headers = [] + + keys = self.http_headers.keys() + for key in keys: + headers.append((key, self.http_headers[key])) + + return headers + + def setupGrab(self): + """sets up the grabber functions with the already stocked in urls for + the mirror groups""" + + if self.failovermethod == 'roundrobin': + mgclass = urlgrabber.mirror.MGRandomOrder + else: + mgclass = urlgrabber.mirror.MirrorGroup + + headers = tuple(self.__headersListFromDict()) + + self.grabfunc = URLGrabber(keepalive=self.keepalive, + bandwidth=self.bandwidth, + retry=self.retries, + throttle=self.throttle, + progress_obj=self.callback, + proxies = self.proxy_dict, + failure_callback=self.failure_obj, + interrupt_callback=self.interrupt_callback, + timeout=self.timeout, + http_headers=headers, + reget='simple') + + + self.grab = mgclass(self.grabfunc, self.urls, + failure_callback=self.mirror_failure_obj) + + def dirSetup(self): + """make the necessary dirs, if possible, raise on failure""" + + cachedir = os.path.join(self.parent.basecachedir, self.id) + deltasdir = os.path.join(cachedir, 'deltas') + self.parent.setAttribute('deltasdir', deltasdir) + + cookie = cachedir + '/' + self.metadata_cookie_fn + self.setAttribute('metadata_cookie', cookie) + + for dir in [cachedir, self.parent.deltasdir]: + if self.cache == 0: + if os.path.exists(dir) and os.path.isdir(dir): + continue + else: + try: + os.makedirs(dir, mode=0755) + except OSError, e: + raise Errors.RepoError, \ + "Error making cache directory: %s error was: %s" % (dir, e) + else: + if not os.path.exists(dir): + raise Errors.RepoError, \ + "Cannot access repository dir %s" % dir + + def baseurlSetup(self): + """go through the baseurls and mirrorlists and populate self.urls + with valid ones, run self.check() at the end to make sure it worked""" + + goodurls = [] + if self.mirrorlist and not self.mirrorlistparsed: + mirrorurls = getMirrorList(self.mirrorlist, self.proxy_dict) + self.mirrorlistparsed = 1 + for url in mirrorurls: + url = parser.varReplace(url, self.yumvar) + self.baseurl.append(url) + + for url in self.baseurl: + url = parser.varReplace(url, self.yumvar) + (s,b,p,q,f,o) = urlparse.urlparse(url) + if s not in ['http', 'ftp', 'file', 'https']: + print 'not using ftp, http[s], or file for repos, skipping - %s' % (url) + continue + else: + goodurls.append(url) + + self.setAttribute('urls', goodurls) + self.check() + self.setupGrab() # update the grabber for the urls + + def __get(self, url=None, relative=None, local=None, start=None, end=None, + copy_local=0, checkfunc=None, text=None, reget='simple', cache=True): + """retrieve file from the mirrorgroup for the repo + relative to local, optionally get range from + start to end, also optionally retrieve from a specific baseurl""" + + # if local or relative is None: raise an exception b/c that shouldn't happen + # if url is not None - then do a grab from the complete url - not through + # the mirror, raise errors as need be + # if url is None do a grab via the mirror group/grab for the repo + # return the path to the local file + + # Turn our dict into a list of 2-tuples + headers = self.__headersListFromDict() + + # We will always prefer to send no-cache. + if not (cache or self.http_headers.has_key('Pragma')): + headers.append(('Pragma', 'no-cache')) + + headers = tuple(headers) + + if local is None or relative is None: + raise Errors.RepoError, \ + "get request for Repo %s, gave no source or dest" % self.id + + if self.cache == 1: + if os.path.exists(local): # FIXME - we should figure out a way + return local # to run the checkfunc from here + + else: # ain't there - raise + raise Errors.RepoError, \ + "Caching enabled but no local cache of %s from %s" % (local, + self) + + if url is not None: + ug = URLGrabber(keepalive = self.keepalive, + bandwidth = self.bandwidth, + retry = self.retries, + throttle = self.throttle, + progress_obj = self.callback, + copy_local = copy_local, + reget = reget, + proxies = self.proxy_dict, + failure_callback = self.failure_obj, + interrupt_callback=self.interrupt_callback, + timeout=self.timeout, + checkfunc=checkfunc, + http_headers=headers, + ) + + remote = url + '/' + relative + + try: + result = ug.urlgrab(remote, local, + text=text, + range=(start, end), + ) + except URLGrabError, e: + raise Errors.RepoError, \ + "failed to retrieve %s from %s\nerror was %s" % (relative, self.id, e) + + else: + try: + result = self.grab.urlgrab(relative, local, + text = text, + range = (start, end), + copy_local=copy_local, + reget = reget, + checkfunc=checkfunc, + http_headers=headers, + ) + except URLGrabError, e: + raise Errors.RepoError, "failure: %s from %s: %s" % (relative, self.id, e) + + return result + + + def metadataCurrent(self): + """Check if there is a metadata_cookie and check its age. If the + age of the cookie is less than metadata_expire time then return true + else return False""" + + val = False + if os.path.exists(self.metadata_cookie): + cookie_info = os.stat(self.metadata_cookie) + if cookie_info[8] + self.metadata_expire > time.time(): + val = True + # WE ARE FROM THE FUTURE!!!! + elif cookie_info[8] > time.time(): + val = False + return val + + def setMetadataCookie(self): + """if possible, set touch the metadata_cookie file""" + + check = self.metadata_cookie + if not os.path.exists(self.metadata_cookie): + check = self.cachedir + + if os.access(check, os.W_OK): + fo = open(self.metadata_cookie, 'w+') + fo.close() + del fo + + + def setup(self, cache): + try: + self.cache = cache + self.baseurlSetup() + self.dirSetup() + except Errors.RepoError, e: + raise + + try: + self._loadRepoXML(text=self) + except Errors.RepoError, e: + raise Errors.RepoError, ('Cannot open/read %s file for repository: %s' % (self.repoMDFile, self)) + + + def _loadRepoXML(self, text=None): + """retrieve/check/read in repomd.xml from the repository""" + + remote = self.repoMDFile + if self.is_different: + local = self.cachedir + '/prestomd.xml' + else: + local = self.cachedir + '/repomd.xml' + + if self.repoXML is not None: + return + + if self.cache or self.metadataCurrent(): + if not os.path.exists(local): + raise Errors.RepoError, 'Cannot find %s file for %s' % (self.repoMDFile, self) + else: + result = local + else: + checkfunc = (self._checkRepoXML, (), {}) + try: + result = self.__get(relative=remote, + local=local, + copy_local=1, + text=text, + reget=None, + checkfunc=checkfunc, + cache=self.http_caching == 'all') + + + except URLGrabError, e: + raise Errors.RepoError, 'Error downloading file %s: %s' % (local, e) + # if we have a 'fresh' repomd.xml then update the cookie + self.setMetadataCookie() + + try: + self.repoXML = repoMDObject.RepoMD(self.id, result) + except Errors.RepoMDError, e: + raise Errors.RepoError, 'Error importing %s from %s: %s' % (self.repoMDFile, self, e) + + def _checkRepoXML(self, fo): + if type(fo) is types.InstanceType: + filepath = fo.filename + else: + filepath = fo + + try: + repoMDObject.RepoMD(self.id, filepath) + except Errors.RepoMDError, e: + raise URLGrabError(-1, 'Error importing %s for %s: %s' % (self.repoMDFile, self, e)) + + + def checkMD(self, fn, mdtype): + """check the metadata type against its checksum""" + + thisdata = self.repoXML.getData(mdtype) + + (r_ctype, r_csum) = thisdata.checksum # get the remote checksum + + if type(fn) == types.InstanceType: # this is an urlgrabber check + file = fn.filename + else: + file = fn + + try: + l_csum = self._checksum(r_ctype, file) # get the local checksum + except Errors.RepoError, e: + raise URLGrabError(-3, 'Error performing checksum') + + if l_csum == r_csum: + return 1 + else: + raise URLGrabError(-1, 'Metadata file does not match checksum') + + + + def retrieveMD(self, mdtype): + """base function to retrieve metadata files from the remote url + returns the path to the local metadata file of a 'mdtype' + mdtype must be 'deltas'.""" + try: + thisdata = self.repoXML.getData(mdtype) + except Errors.RepoMDError: + self.enabled = False + self.conduit.info(5, "No drpms available for %s" % self.id) + return + + (r_base, remote) = thisdata.location + fname = os.path.basename(remote) + local = self.cachedir + '/' + fname + + if self.retrieved.has_key(mdtype): + if self.retrieved[mdtype]: # got it, move along + return local + + if self.cache == 1: + if os.path.exists(local): + try: + self.checkMD(local, mdtype) + except URLGrabError, e: + raise Errors.RepoError, \ + "Caching enabled and local cache: %s does not match checksum" % local + else: + return local + + else: # ain't there - raise + raise Errors.RepoError, \ + "Caching enabled but no local cache of %s from %s" % (local, + self) + + if os.path.exists(local): + try: + self.checkMD(local, mdtype) + except URLGrabError, e: + pass + else: + self.retrieved[mdtype] = 1 + return local # it's the same return the local one + + try: + checkfunc = (self.checkMD, (mdtype,), {}) + local = self.__get(relative=remote, local=local, copy_local=1, + checkfunc=checkfunc, reget=None, + cache=self.http_caching == 'all') + except URLGrabError, e: + raise Errors.RepoError, \ + "Could not retrieve %s matching remote checksum from %s" % (local, self) + else: + self.retrieved[mdtype] = 1 + return local + + + def getPrestoXML(self): + """this gets you the path to the primary.xml file, retrieving it if we + need a new one""" + + return self.retrieveMD('deltas') + + def setCallback(self, callback): + self.callback = callback + self.setupGrab() + + def setFailureObj(self, failure_obj): + self.failure_obj = failure_obj + self.setupGrab() + + def setMirrorFailureObj(self, failure_obj): + self.mirror_failure_obj = failure_obj + self.setupGrab() + + def setInterruptCallback(self, callback): + self.interrupt_callback = callback + self.setupGrab() + +def getMirrorList(mirrorlist, pdict = None): + """retrieve an up2date-style mirrorlist file from a url, + we also s/$ARCH/$BASEARCH/ and move along + returns a list of the urls from that file""" + + returnlist = [] + if hasattr(urlgrabber.grabber, 'urlopen'): + urlresolver = urlgrabber.grabber + else: + import urllib + urlresolver = urllib + + scheme = urlparse.urlparse(mirrorlist)[0] + if scheme == '': + url = 'file://' + mirrorlist + else: + url = mirrorlist + + try: + fo = urlresolver.urlopen(url, proxies=pdict) + except urlgrabber.grabber.URLGrabError, e: + print "Could not retrieve mirrorlist %s error was\n%s" % (url, e) + fo = None + + if fo is not None: + content = fo.readlines() + for line in content: + if re.match('^\s*\#.*', line) or re.match('^\s*$', line): + continue + mirror = re.sub('\n$', '', line) # no more trailing \n's + (mirror, count) = re.subn('\$ARCH', '$BASEARCH', mirror) + returnlist.append(mirror) + + return returnlist + diff --git a/share-presto/prestoTransaction.py b/share-presto/prestoTransaction.py new file mode 100644 index 0000000..5b7e3d4 --- /dev/null +++ b/share-presto/prestoTransaction.py @@ -0,0 +1,65 @@ +# author: Jonathan Dieter <jdieter@gmail.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2005 Duke University + +import os +import deltarpm + +def find_available_drpms(conduit, newpkg): + """Find any applicable drpms for newpkg + newpkg is a TransactionMember""" + + rpmdb = conduit.getRpmDB() + + # Saves us some typing + p_repo = newpkg.po.repo.p_repo + chosen_drpm = None + + if p_repo.enabled: + # Don't try to download deltarpm if full rpm already exists + if not os.path.exists(newpkg.po.localpath): + # First part of key when matching drpms + key1 = "%s*%s*%i*%s*%s" % (newpkg.name, newpkg.arch, int(newpkg.epoch), newpkg.version, newpkg.release) + + # Find any installed packages that match the ones we want to download + installed = rpmdb.searchNevra(newpkg.name, None, None, None, newpkg.arch) + + for oldpkg in installed: + # Generate second part of key for matching drpms, then full key + key2 = "%s*%s*%i*%s*%s" % (oldpkg.name, oldpkg.arch, int(oldpkg.epoch), oldpkg.version, oldpkg.release) + key = "%s!!%s" % (key1, key2) + + # Check whether we have a matching drpm + if p_repo.deltalist.has_key(key): + # Check whether or not we already have a matching drpm, then choose smallest of the two if we do + if chosen_drpm == None or p_repo.deltalist[key]['size'] < chosen_drpm['size']: + + # Get sequence code for drpm + sequence = p_repo.deltalist[key]['sequence'] + if int(oldpkg.epoch) == 0: + seq = "%s-%s-%s-%s" % (oldpkg.name, oldpkg.version, oldpkg.release, sequence) + else: + seq = "%s-%i:%s-%s-%s" % (oldpkg.name, int(oldpkg.epoch), oldpkg.version, oldpkg.release, sequence) + drpm = deltarpm.DeltaRpmWrapper(conduit) + + # Attempt to apply sequence code for drpm. If this fails, drpm will not apply cleanly, so + # don't even try to download it. + try: + drpm.verifySequence(seq) + chosen_drpm = p_repo.deltalist[key] + except: + conduit.info(5, "Verification of %s failed" % seq) + return chosen_drpm diff --git a/share-presto/prestomdparser.py b/share-presto/prestomdparser.py new file mode 100644 index 0000000..1713531 --- /dev/null +++ b/share-presto/prestomdparser.py @@ -0,0 +1,167 @@ +#!/usr/bin/python -t +# +# author: Jonathan Dieter <jdieter@gmail.com> +# +# mostly taken from mdparser.py (part of yum) with a few minor modifications +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2005 Duke University + +import gzip +from cElementTree import iterparse + +from cStringIO import StringIO + +#TODO: document everything here + +class PrestoMDParser: + + def __init__(self, filename): + + # Set up mapping of meta types to handler classes + handlers = { + '{http://linux.duke.edu/metadata/common}metadata': DeltasEntry, + } + + self.total = None + self.count = 0 + self._handlercls = None + + # Read in type, set package node handler and get total number of + # packages + if filename[-3:] == '.gz': fh = gzip.open(filename, 'r') + else: fh = open(filename, 'r') + parser = iterparse(fh, events=('start', 'end')) + self.reader = parser.__iter__() + event, elem = self.reader.next() + self._handlercls = handlers.get(elem.tag, None) + if not self._handlercls: + raise ValueError('Unknown repodata type "%s" in %s' % ( + elem.tag, filename)) + + def getDeltaList(self): + for event, elem in self.reader: + if event == 'end' and elem.tag == '{http://linux.duke.edu/metadata/common}metadata': + return self._handlercls(elem) + + +class BaseEntry: + def __init__(self, elem): + self._p = {} + + def __getitem__(self, k): + return self._p[k] + + def keys(self): + return self._p.keys() + + def values(self): + return self._p.values() + + def has_key(self, k): + return self._p.has_key(k) + + def __str__(self): + out = StringIO() + keys = self.keys() + keys.sort() + for k in keys: + line = u'%s=%s\n' % (k, self[k]) + out.write(line.encode('utf8')) + return out.getvalue() + + def _bn(self, qn): + if qn.find('}') == -1: return qn + return qn.split('}')[1] + + def _prefixprops(self, elem, prefix): + ret = {} + for key in elem.attrib.keys(): + ret[prefix + '_' + self._bn(key)] = elem.attrib[key] + return ret + +class DeltasEntry(BaseEntry): + def __init__(self, deltas): + BaseEntry.__init__(self, deltas) + # Avoid excess typing :) + p = self._p + + for elem in deltas: + temp = {} + key1 = "" + key2 = "" + for child in elem: + name = self._bn(child.tag) + if name in ('name', 'arch'): + temp[name] = child.text + + elif name == 'version': + attrib = child.attrib + try: + attrib['epoch'] = int(attrib['epoch']) + except: + attrib['epoch'] = 0 + key1 = "%s*%s*%i*%s*%s" % (temp['name'], temp['arch'], attrib['epoch'], attrib['ver'], attrib['rel']) + + elif name == 'deltas': + for oldrpm in child: + temp2 = {} + value = {} + key = None + for oldrpm_child in oldrpm: + name = self._bn(oldrpm_child.tag) + if name in ('name', 'arch'): + temp2[name] = oldrpm_child.text + + elif name == 'version': + ch_attrib = oldrpm_child.attrib + try: + ch_attrib['epoch'] = int(ch_attrib['epoch']) + except: + ch_attrib['epoch'] = attrib['epoch'] + try: + ch_attrib['ver'] = ch_attrib['ver'] + except: + ch_attrib['ver'] = attrib['ver'] + if not temp2.has_key('name'): + temp2['name'] = temp['name'] + if not temp2.has_key('arch'): + temp2['arch'] = temp['arch'] + key2 = "%s*%s*%i*%s*%s" % (temp2['name'], temp2['arch'], ch_attrib['epoch'], ch_attrib['ver'], ch_attrib['rel']) + key = "%s!!%s" % (key1, key2) + p[key] = {} + + if name in ('sequence', 'drpm_filename', 'size'): + p[key][name] = oldrpm_child.text + + if name == "checksum": + p[key][name] = oldrpm_child.text + p[key]["%s_type" % name] = oldrpm_child.attrib['type'] + deltas.clear() + +def test(): + import sys + + parser = PrestoMDParser(sys.argv[1]) + + deltalist = parser.getDeltaList() + + print '-' * 40 + print deltalist + + print 'read: %s deltarpms ' % (len(deltalist.keys())) + +if __name__ == '__main__': + test() |