diff options
author | Jonathan Dieter <jdieter@gmail.com> | 2007-03-29 19:43:26 +0300 |
---|---|---|
committer | Jonathan Dieter <jdieter@gmail.com> | 2007-03-29 19:43:26 +0300 |
commit | 5ea857b0e948b687785b8e55e08866c6171fb715 (patch) | |
tree | b819860880b50d41f31ba627d0261dd64b9667b1 /yum-presto/shared/prestoRepo.py | |
parent | b1147b441a7f4873f39c5ce337d516b2cd1483eb (diff) | |
download | presto-5ea857b0e948b687785b8e55e08866c6171fb715.tar.gz presto-5ea857b0e948b687785b8e55e08866c6171fb715.tar.xz presto-5ea857b0e948b687785b8e55e08866c6171fb715.zip |
Split server and client
Signed-off-by: Jonathan Dieter <jdieter@gmail.com>
Diffstat (limited to 'yum-presto/shared/prestoRepo.py')
-rw-r--r-- | yum-presto/shared/prestoRepo.py | 612 |
1 files changed, 612 insertions, 0 deletions
diff --git a/yum-presto/shared/prestoRepo.py b/yum-presto/shared/prestoRepo.py new file mode 100644 index 0000000..582dc2f --- /dev/null +++ b/yum-presto/shared/prestoRepo.py @@ -0,0 +1,612 @@ +# author: Jonathan Dieter <jdieter@gmail.com> +# +# mostly taken from yumRepo.py (part of yum) with a few minor modifications +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2005 Duke University + +import os +import re +import time +import types +import urlparse + +from yum import Errors +from urlgrabber.grabber import URLGrabber +import urlgrabber.mirror +from urlgrabber.grabber import URLGrabError +from yum.repos import Repository +from yum import repoMDObject +from yum import parser +from yum import config +from yum import misc + +class PrestoRepository(Repository): + """ + This is an actual repository object + + Configuration attributes are pulled in from config.RepoConf. + """ + + def __init__(self, repo, conduit): + Repository.__init__(self, repo.id) + + # If there's a specific deltarpm url, use that + is_different = False + if conduit.confString(repo.id, 'deltaurl'): + self.baseurl = [conduit.confString(repo.id, 'deltaurl')] + is_different = True + conduit.info(5, 'Manual url set from presto.conf: %s' % self.baseurl) + elif repo.deltaurl != []: + self.baseurl = repo.deltaurl + is_different = True + conduit.info(5, 'Manual url set from repository conf file: %s' % self.baseurl) + else: + self.baseurl = repo.baseurl + + # If there's a specific mirrorlist, use that + if conduit.confString(repo.id, 'deltamirrorlist'): + self.mirrorlist = conduit.confString(repo.id, 'deltamirrorlist') + self.baseurl = None + is_different = True + conduit.info(5, 'Manual mirrorlist set from presto.conf: %s' % self.mirrorlist) + elif repo.deltamirrorlist != None: + self.mirrorlist = repo.deltamirrorlist + self.baseurl = None + is_different = True + conduit.info(5, 'Manual mirrorlist set from repository conf file: %s' % self.mirrorlist) + else: + if self.baseurl == repo.baseurl: + self.mirrorlist = repo.mirrorlist + else: + self.mirrorlist = None + + self.conduit = conduit + self.urls = [] + self.is_different = is_different + if is_different: + self.repoMDFile = 'repodata/prestomd.xml' + self.metadata_cookie_fn = 'presto_cachecookie' + else: + self.repoMDFile = 'repodata/repomd.xml' + self.metadata_cookie_fn = 'cachecookie' + self.repoXML = None + self.cache = 0 + self.mirrorlistparsed = 0 + self.yumvar = {} # empty dict of yumvariables for $string replacement + self._proxy_dict = {} + self.http_headers = {} + + # throw in some stubs for things that will be set by the config class + self.basecachedir = "" + self.cachedir = "" + self.pkgdir = "" + self.hdrdir = "" + self.enabled = True + + # holder for stuff we've grabbed + self.retrieved = { 'deltas':0 } + + # callbacks + self.keepalive = repo.keepalive + self.bandwidth = repo.bandwidth + self.retries = repo.retries + self.throttle = repo.throttle + self.proxy = repo.proxy + self.proxy_username = repo.proxy_username + self.proxy_password = repo.proxy_password + self.timeout = repo.timeout + self.http_caching = repo.http_caching + self.failovermethod = repo.failovermethod + self.metadata_expire = repo.metadata_expire + self.basecachedir = repo.basecachedir + self.callback = repo.callback + self.failure_obj = repo.failure_obj + self.mirror_failure_obj = repo.mirror_failure_obj + self.interrupt_callback = repo.interrupt_callback + self.drpm_list = {} + self.parent = repo + repo.p_repo = self + + + def __getProxyDict(self): + self.doProxyDict() + if self._proxy_dict: + return self._proxy_dict + return None + + # consistent access to how proxy information should look (and ensuring + # that it's actually determined for the repo) + proxy_dict = property(__getProxyDict) + + def ready(self): + """Returns true if this repository is setup and ready for use.""" + return self.repoXML is not None + + def __cmp__(self, other): + if self.id > other.id: + return 1 + elif self.id < other.id: + return -1 + else: + return 0 + + def __str__(self): + return self.id + + def _checksum(self, sumtype, file, CHUNK=2**16): + """takes filename, hand back Checksum of it + sumtype = md5 or sha + filename = /path/to/file + CHUNK=65536 by default""" + try: + return misc.checksum(sumtype, file, CHUNK) + except (Errors.MiscError, EnvironmentError), e: + raise Errors.RepoError, 'Error opening file for checksum: %s' % e + + def dump(self): + output = '[%s]\n' % self.id + vars = ['id', 'bandwidth', 'enabled', + 'keepalive', 'proxy', + 'proxy_password', 'proxy_username', + 'retries', 'throttle', 'timeout', 'mirrorlist', + 'cachedir' ] + vars.sort() + for attr in vars: + output = output + '%s = %s\n' % (attr, getattr(self, attr)) + output = output + 'baseurl =' + for url in self.urls: + output = output + ' %s\n' % url + + return output + + def check(self): + """self-check the repo information - if we don't have enough to move + on then raise a repo error""" + if len(self.urls) < 1: + raise Errors.RepoError, \ + 'Cannot find a valid deltaurl for repo: %s' % self.id + + def doProxyDict(self): + if self._proxy_dict: + return + + self._proxy_dict = {} # zap it + proxy_string = None + if self.proxy not in [None, '_none_']: + proxy_string = '%s' % self.proxy + if self.proxy_username is not None: + proxy_parsed = urlparse.urlsplit(self.proxy, allow_fragments=0) + proxy_proto = proxy_parsed[0] + proxy_host = proxy_parsed[1] + proxy_rest = proxy_parsed[2] + '?' + proxy_parsed[3] + proxy_string = '%s://%s@%s%s' % (proxy_proto, + self.proxy_username, proxy_host, proxy_rest) + + if self.proxy_password is not None: + proxy_string = '%s://%s:%s@%s%s' % (proxy_proto, + self.proxy_username, self.proxy_password, + proxy_host, proxy_rest) + + if proxy_string is not None: + self._proxy_dict['http'] = proxy_string + self._proxy_dict['https'] = proxy_string + self._proxy_dict['ftp'] = proxy_string + + def __headersListFromDict(self): + """Convert our dict of headers to a list of 2-tuples for urlgrabber.""" + headers = [] + + keys = self.http_headers.keys() + for key in keys: + headers.append((key, self.http_headers[key])) + + return headers + + def setupGrab(self): + """sets up the grabber functions with the already stocked in urls for + the mirror groups""" + + if self.failovermethod == 'roundrobin': + mgclass = urlgrabber.mirror.MGRandomOrder + else: + mgclass = urlgrabber.mirror.MirrorGroup + + headers = tuple(self.__headersListFromDict()) + + self.grabfunc = URLGrabber(keepalive=self.keepalive, + bandwidth=self.bandwidth, + retry=self.retries, + throttle=self.throttle, + progress_obj=self.callback, + proxies = self.proxy_dict, + failure_callback=self.failure_obj, + interrupt_callback=self.interrupt_callback, + timeout=self.timeout, + http_headers=headers, + reget='simple') + + self.grab = mgclass(self.grabfunc, self.urls, + failure_callback=self.mirror_failure_obj) + + def dirSetup(self): + """make the necessary dirs, if possible, raise on failure""" + + cachedir = os.path.join(self.parent.basecachedir, self.id) + deltasdir = os.path.join(cachedir, 'deltas') + self.parent.setAttribute('deltasdir', deltasdir) + self.setAttribute('cachedir', cachedir) + + cookie = cachedir + '/' + self.metadata_cookie_fn + self.setAttribute('metadata_cookie', cookie) + + for dir in [cachedir, self.parent.deltasdir]: + if self.cache == 0: + if os.path.exists(dir) and os.path.isdir(dir): + continue + else: + try: + os.makedirs(dir, mode=0755) + except OSError, e: + raise Errors.RepoError, \ + "Error making cache directory: %s error was: %s" % (dir, e) + else: + if not os.path.exists(dir): + raise Errors.RepoError, \ + "Cannot access repository dir %s" % dir + + def baseurlSetup(self): + """go through the baseurls and mirrorlists and populate self.urls + with valid ones, run self.check() at the end to make sure it worked""" + + goodurls = [] + if self.mirrorlist and not self.mirrorlistparsed: + mirrorurls = getMirrorList(self.mirrorlist, self.proxy_dict) + self.mirrorlistparsed = 1 + for url in mirrorurls: + url = parser.varReplace(url, self.yumvar) + self.baseurl.append(url) + + for url in self.baseurl: + url = parser.varReplace(url, self.yumvar) + (s,b,p,q,f,o) = urlparse.urlparse(url) + if s not in ['http', 'ftp', 'file', 'https']: + print 'not using ftp, http[s], or file for repos, skipping - %s' % (url) + continue + else: + goodurls.append(url) + + self.setAttribute('urls', goodurls) + self.check() + self.setupGrab() # update the grabber for the urls + + def __get(self, url=None, relative=None, local=None, start=None, end=None, + copy_local=0, checkfunc=None, text=None, reget='simple', cache=True): + """retrieve file from the mirrorgroup for the repo + relative to local, optionally get range from + start to end, also optionally retrieve from a specific baseurl""" + + # if local or relative is None: raise an exception b/c that shouldn't happen + # if url is not None - then do a grab from the complete url - not through + # the mirror, raise errors as need be + # if url is None do a grab via the mirror group/grab for the repo + # return the path to the local file + + # Turn our dict into a list of 2-tuples + headers = self.__headersListFromDict() + + # We will always prefer to send no-cache. + if not (cache or self.http_headers.has_key('Pragma')): + headers.append(('Pragma', 'no-cache')) + + headers = tuple(headers) + + if local is None or relative is None: + raise Errors.RepoError, \ + "get request for Repo %s, gave no source or dest" % self.id + + if self.cache == 1: + if os.path.exists(local): # FIXME - we should figure out a way + return local # to run the checkfunc from here + + else: # ain't there - raise + raise Errors.RepoError, \ + "Caching enabled but no local cache of %s from %s" % (local, + self) + if url is not None: + ug = URLGrabber(keepalive = self.keepalive, + bandwidth = self.bandwidth, + retry = self.retries, + throttle = self.throttle, + progress_obj = self.callback, + copy_local = copy_local, + reget = reget, + proxies = self.proxy_dict, + failure_callback = self.failure_obj, + interrupt_callback=self.interrupt_callback, + timeout=self.timeout, + checkfunc=checkfunc, + http_headers=headers, + ) + + remote = url + '/' + relative + + try: + result = ug.urlgrab(remote, local, + text=text, + range=(start, end), + ) + except URLGrabError, e: + raise Errors.RepoError, \ + "failed to retrieve %s from %s\nerror was %s" % (relative, self.id, e) + + else: + try: + result = self.grab.urlgrab(relative, local, + text = text, + range = (start, end), + copy_local=copy_local, + reget = reget, + checkfunc=checkfunc, + http_headers=headers, + ) + except URLGrabError, e: + raise Errors.RepoError, "failure: %s from %s: %s" % (relative, self.id, e) + + return result + + def getPackage(self, package, checkfunc = None, text = None, cache = True): + remote = package.returnSimple('relativepath') + local = package.localPkg() + basepath = package.returnSimple('basepath') + + return self.__get(url=basepath, + relative=remote, + local=local, + checkfunc=checkfunc, + text=text, + cache=cache + ) + + def metadataCurrent(self): + """Check if there is a metadata_cookie and check its age. If the + age of the cookie is less than metadata_expire time then return true + else return False""" + + val = False + if os.path.exists(self.metadata_cookie): + cookie_info = os.stat(self.metadata_cookie) + if cookie_info[8] + self.metadata_expire > time.time(): + val = True + # WE ARE FROM THE FUTURE!!!! + elif cookie_info[8] > time.time(): + val = False + return val + + def setMetadataCookie(self): + """if possible, set touch the metadata_cookie file""" + + check = self.metadata_cookie + if not os.path.exists(self.metadata_cookie): + check = self.cachedir + + if os.access(check, os.W_OK): + fo = open(self.metadata_cookie, 'w+') + fo.close() + del fo + + + def setup(self, cache): + try: + self.cache = cache + self.baseurlSetup() + self.dirSetup() + except Errors.RepoError, e: + raise + + try: + self._loadRepoXML(text=self) + except Errors.RepoError, e: + raise Errors.RepoError, ('Cannot open/read %s file for repository: %s' % (self.repoMDFile, self)) + + + def _loadRepoXML(self, text=None): + """retrieve/check/read in repomd.xml from the repository""" + + remote = self.repoMDFile + if self.is_different: + local = self.cachedir + '/prestomd.xml' + else: + local = self.cachedir + '/repomd.xml' + + if self.repoXML is not None: + return + + if self.cache or self.metadataCurrent(): + if not os.path.exists(local): + raise Errors.RepoError, 'Cannot find %s file for %s' % (self.repoMDFile, self) + else: + result = local + else: + checkfunc = (self._checkRepoXML, (), {}) + try: + result = self.__get(relative=remote, + local=local, + copy_local=1, + text=text, + reget=None, + checkfunc=checkfunc, + cache=self.http_caching == 'all') + + + except URLGrabError, e: + raise Errors.RepoError, 'Error downloading file %s: %s' % (local, e) + # if we have a 'fresh' repomd.xml then update the cookie + self.setMetadataCookie() + + try: + self.repoXML = repoMDObject.RepoMD(self.id, result) + except Errors.RepoMDError, e: + raise Errors.RepoError, 'Error importing %s from %s: %s' % (self.repoMDFile, self, e) + + def _checkRepoXML(self, fo): + if type(fo) is types.InstanceType: + filepath = fo.filename + else: + filepath = fo + + try: + repoMDObject.RepoMD(self.id, filepath) + except Errors.RepoMDError, e: + raise URLGrabError(-1, 'Error importing %s for %s: %s' % (self.repoMDFile, self, e)) + + + def checkMD(self, fn, mdtype): + """check the metadata type against its checksum""" + + thisdata = self.repoXML.getData(mdtype) + + (r_ctype, r_csum) = thisdata.checksum # get the remote checksum + + if type(fn) == types.InstanceType: # this is an urlgrabber check + file = fn.filename + else: + file = fn + + try: + l_csum = self._checksum(r_ctype, file) # get the local checksum + except Errors.RepoError, e: + raise URLGrabError(-3, 'Error performing checksum') + + if l_csum == r_csum: + return 1 + else: + raise URLGrabError(-1, 'Metadata file does not match checksum') + + + + def retrieveMD(self, mdtype): + """base function to retrieve metadata files from the remote url + returns the path to the local metadata file of a 'mdtype' + mdtype must be 'deltas'.""" + try: + thisdata = self.repoXML.getData(mdtype) + except Errors.RepoMDError: + self.enabled = False + self.conduit.info(5, "No drpms available for %s" % self.id) + return + + (r_base, remote) = thisdata.location + fname = os.path.basename(remote) + local = self.cachedir + '/' + fname + + if self.retrieved.has_key(mdtype): + if self.retrieved[mdtype]: # got it, move along + return local + + if self.cache == 1: + if os.path.exists(local): + try: + self.checkMD(local, mdtype) + except URLGrabError, e: + raise Errors.RepoError, \ + "Caching enabled and local cache: %s does not match checksum" % local + else: + return local + + else: # ain't there - raise + raise Errors.RepoError, \ + "Caching enabled but no local cache of %s from %s" % (local, + self) + + if os.path.exists(local): + try: + self.checkMD(local, mdtype) + except URLGrabError, e: + pass + else: + self.retrieved[mdtype] = 1 + return local # it's the same return the local one + + try: + checkfunc = (self.checkMD, (mdtype,), {}) + local = self.__get(relative=remote, local=local, copy_local=1, + checkfunc=checkfunc, reget=None, + cache=self.http_caching == 'all') + except URLGrabError, e: + raise Errors.RepoError, \ + "Could not retrieve %s matching remote checksum from %s" % (local, self) + else: + self.retrieved[mdtype] = 1 + return local + + + def getPrestoXML(self): + """this gets you the path to the primary.xml file, retrieving it if we + need a new one""" + + return self.retrieveMD('deltas') + + def setCallback(self, callback): + self.callback = callback + self.setupGrab() + + def setFailureObj(self, failure_obj): + self.failure_obj = failure_obj + self.setupGrab() + + def setMirrorFailureObj(self, failure_obj): + self.mirror_failure_obj = failure_obj + self.setupGrab() + + def setInterruptCallback(self, callback): + self.interrupt_callback = callback + self.setupGrab() + +def getMirrorList(mirrorlist, pdict = None): + """retrieve an up2date-style mirrorlist file from a url, + we also s/$ARCH/$BASEARCH/ and move along + returns a list of the urls from that file""" + + returnlist = [] + if hasattr(urlgrabber.grabber, 'urlopen'): + urlresolver = urlgrabber.grabber + else: + import urllib + urlresolver = urllib + + scheme = urlparse.urlparse(mirrorlist)[0] + if scheme == '': + url = 'file://' + mirrorlist + else: + url = mirrorlist + + try: + fo = urlresolver.urlopen(url, proxies=pdict) + except urlgrabber.grabber.URLGrabError, e: + print "Could not retrieve mirrorlist %s error was\n%s" % (url, e) + fo = None + + if fo is not None: + content = fo.readlines() + for line in content: + if re.match('^\s*\#.*', line) or re.match('^\s*$', line): + continue + mirror = re.sub('\n$', '', line) # no more trailing \n's + (mirror, count) = re.subn('\$ARCH', '$BASEARCH', mirror) + returnlist.append(mirror) + + return returnlist + |