Split server and client

Signed-off-by: Jonathan Dieter <jdieter@gmail.com>
author: Jonathan Dieter <jdieter@gmail.com> 2007-03-29 19:43:26 +0300
committer: Jonathan Dieter <jdieter@gmail.com> 2007-03-29 19:43:26 +0300
commit: 5ea857b0e948b687785b8e55e08866c6171fb715 (patch)
tree: b819860880b50d41f31ba627d0261dd64b9667b1 /yum-presto/shared/prestoRepo.py
parent: b1147b441a7f4873f39c5ce337d516b2cd1483eb (diff)
download: presto-5ea857b0e948b687785b8e55e08866c6171fb715.tar.gz
presto-5ea857b0e948b687785b8e55e08866c6171fb715.tar.xz
presto-5ea857b0e948b687785b8e55e08866c6171fb715.zip
1 files changed, 612 insertions, 0 deletions
diff --git a/yum-presto/shared/prestoRepo.py b/yum-presto/shared/prestoRepo.py
new file mode 100644
index 0000000..582dc2f
--- /dev/null
+++ b/yum-presto/shared/prestoRepo.py
@@ -0,0 +1,612 @@
+# author: Jonathan Dieter <jdieter@gmail.com>
+#
+# mostly taken from yumRepo.py (part of yum) with a few minor modifications
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# Copyright 2005 Duke University
+
+import os
+import re
+import time
+import types
+import urlparse
+
+from yum import Errors
+from urlgrabber.grabber import URLGrabber
+import urlgrabber.mirror
+from urlgrabber.grabber import URLGrabError
+from yum.repos import Repository
+from yum import repoMDObject
+from yum import parser
+from yum import config
+from yum import misc
+        
+class PrestoRepository(Repository):
+    """
+    This is an actual repository object
+   
+    Configuration attributes are pulled in from config.RepoConf.
+    """
+                
+    def __init__(self, repo, conduit):
+        Repository.__init__(self, repo.id)
+        
+        # If there's a specific deltarpm url, use that
+        is_different = False
+        if conduit.confString(repo.id, 'deltaurl'):
+            self.baseurl = [conduit.confString(repo.id, 'deltaurl')]
+            is_different = True
+            conduit.info(5, 'Manual url set from presto.conf: %s' % self.baseurl)        
+        elif repo.deltaurl != []:
+            self.baseurl = repo.deltaurl
+            is_different = True
+            conduit.info(5, 'Manual url set from repository conf file: %s' % self.baseurl)
+        else:
+            self.baseurl = repo.baseurl
+            
+        # If there's a specific mirrorlist, use that
+        if conduit.confString(repo.id, 'deltamirrorlist'):
+            self.mirrorlist = conduit.confString(repo.id, 'deltamirrorlist')
+            self.baseurl = None
+            is_different = True
+            conduit.info(5, 'Manual mirrorlist set from presto.conf: %s' % self.mirrorlist)        
+        elif repo.deltamirrorlist != None:
+            self.mirrorlist = repo.deltamirrorlist
+            self.baseurl = None
+            is_different = True
+            conduit.info(5, 'Manual mirrorlist set from repository conf file: %s' % self.mirrorlist)
+        else:
+            if self.baseurl == repo.baseurl:
+                self.mirrorlist = repo.mirrorlist
+            else:
+                self.mirrorlist = None
+            
+        self.conduit = conduit
+        self.urls = []
+        self.is_different = is_different
+        if is_different:
+            self.repoMDFile = 'repodata/prestomd.xml'
+            self.metadata_cookie_fn = 'presto_cachecookie'
+        else:
+            self.repoMDFile = 'repodata/repomd.xml'
+            self.metadata_cookie_fn = 'cachecookie'
+        self.repoXML = None
+        self.cache = 0
+        self.mirrorlistparsed = 0
+        self.yumvar = {} # empty dict of yumvariables for $string replacement
+        self._proxy_dict = {}
+        self.http_headers = {}
+        
+        # throw in some stubs for things that will be set by the config class
+        self.basecachedir = ""
+        self.cachedir = ""
+        self.pkgdir = ""
+        self.hdrdir = ""
+        self.enabled = True
+        
+        # holder for stuff we've grabbed
+        self.retrieved = { 'deltas':0 }
+
+        # callbacks
+        self.keepalive = repo.keepalive
+        self.bandwidth = repo.bandwidth
+        self.retries = repo.retries
+        self.throttle = repo.throttle
+        self.proxy = repo.proxy
+        self.proxy_username = repo.proxy_username
+        self.proxy_password = repo.proxy_password
+        self.timeout = repo.timeout
+        self.http_caching = repo.http_caching
+        self.failovermethod = repo.failovermethod
+        self.metadata_expire = repo.metadata_expire
+        self.basecachedir = repo.basecachedir
+        self.callback = repo.callback
+        self.failure_obj = repo.failure_obj
+        self.mirror_failure_obj = repo.mirror_failure_obj
+        self.interrupt_callback = repo.interrupt_callback
+        self.drpm_list = {}
+        self.parent = repo
+        repo.p_repo = self
+
+
+    def __getProxyDict(self):
+        self.doProxyDict()
+        if self._proxy_dict:
+            return self._proxy_dict
+        return None
+
+    # consistent access to how proxy information should look (and ensuring
+    # that it's actually determined for the repo)
+    proxy_dict = property(__getProxyDict)
+
+    def ready(self):
+        """Returns true if this repository is setup and ready for use."""
+        return self.repoXML is not None
+
+    def __cmp__(self, other):
+        if self.id > other.id:
+            return 1
+        elif self.id < other.id:
+            return -1
+        else:
+            return 0
+
+    def __str__(self):
+        return self.id
+
+    def _checksum(self, sumtype, file, CHUNK=2**16):
+        """takes filename, hand back Checksum of it
+           sumtype = md5 or sha
+           filename = /path/to/file
+           CHUNK=65536 by default"""
+        try:
+            return misc.checksum(sumtype, file, CHUNK)
+        except (Errors.MiscError, EnvironmentError), e:
+            raise Errors.RepoError, 'Error opening file for checksum: %s' % e
+
+    def dump(self):
+        output = '[%s]\n' % self.id
+        vars = ['id', 'bandwidth', 'enabled',
+                 'keepalive', 'proxy',
+                 'proxy_password', 'proxy_username',
+                 'retries', 'throttle', 'timeout', 'mirrorlist',
+                 'cachedir' ]
+        vars.sort()
+        for attr in vars:
+            output = output + '%s = %s\n' % (attr, getattr(self, attr))
+        output = output + 'baseurl ='
+        for url in self.urls:
+            output = output + ' %s\n' % url
+
+        return output
+
+    def check(self):
+        """self-check the repo information  - if we don't have enough to move
+           on then raise a repo error"""
+        if len(self.urls) < 1:
+            raise Errors.RepoError, \
+             'Cannot find a valid deltaurl for repo: %s' % self.id
+
+    def doProxyDict(self):
+        if self._proxy_dict:
+            return
+
+        self._proxy_dict = {} # zap it
+        proxy_string = None
+        if self.proxy not in [None, '_none_']:
+            proxy_string = '%s' % self.proxy
+            if self.proxy_username is not None:
+                proxy_parsed = urlparse.urlsplit(self.proxy, allow_fragments=0)
+                proxy_proto = proxy_parsed[0]
+                proxy_host = proxy_parsed[1]
+                proxy_rest = proxy_parsed[2] + '?' + proxy_parsed[3]
+                proxy_string = '%s://%s@%s%s' % (proxy_proto,
+                        self.proxy_username, proxy_host, proxy_rest)
+
+                if self.proxy_password is not None:
+                    proxy_string = '%s://%s:%s@%s%s' % (proxy_proto,
+                              self.proxy_username, self.proxy_password,
+                              proxy_host, proxy_rest)
+
+        if proxy_string is not None:
+            self._proxy_dict['http'] = proxy_string
+            self._proxy_dict['https'] = proxy_string
+            self._proxy_dict['ftp'] = proxy_string
+
+    def __headersListFromDict(self):
+        """Convert our dict of headers to a list of 2-tuples for urlgrabber."""
+        headers = []
+
+        keys = self.http_headers.keys()
+        for key in keys:
+            headers.append((key, self.http_headers[key]))
+
+        return headers
+
+    def setupGrab(self):
+        """sets up the grabber functions with the already stocked in urls for
+           the mirror groups"""
+
+        if self.failovermethod == 'roundrobin':
+            mgclass = urlgrabber.mirror.MGRandomOrder
+        else:
+            mgclass = urlgrabber.mirror.MirrorGroup
+
+        headers = tuple(self.__headersListFromDict())
+
+        self.grabfunc = URLGrabber(keepalive=self.keepalive,
+                                   bandwidth=self.bandwidth,
+                                   retry=self.retries,
+                                   throttle=self.throttle,
+                                   progress_obj=self.callback,
+                                   proxies = self.proxy_dict,
+                                   failure_callback=self.failure_obj,
+                                   interrupt_callback=self.interrupt_callback,
+                                   timeout=self.timeout,
+                                   http_headers=headers,
+                                   reget='simple')
+
+        self.grab = mgclass(self.grabfunc, self.urls,
+                            failure_callback=self.mirror_failure_obj)
+
+    def dirSetup(self):
+        """make the necessary dirs, if possible, raise on failure"""
+
+        cachedir = os.path.join(self.parent.basecachedir, self.id)
+        deltasdir = os.path.join(cachedir, 'deltas')
+        self.parent.setAttribute('deltasdir', deltasdir)
+        self.setAttribute('cachedir', cachedir)
+        
+        cookie = cachedir + '/' + self.metadata_cookie_fn
+        self.setAttribute('metadata_cookie', cookie)
+
+        for dir in [cachedir, self.parent.deltasdir]:
+            if self.cache == 0:
+                if os.path.exists(dir) and os.path.isdir(dir):
+                    continue
+                else:
+                    try:
+                        os.makedirs(dir, mode=0755)
+                    except OSError, e:
+                        raise Errors.RepoError, \
+                            "Error making cache directory: %s error was: %s" % (dir, e)
+            else:
+                if not os.path.exists(dir):
+                    raise Errors.RepoError, \
+                        "Cannot access repository dir %s" % dir
+
+    def baseurlSetup(self):
+        """go through the baseurls and mirrorlists and populate self.urls
+           with valid ones, run  self.check() at the end to make sure it worked"""
+
+        goodurls = []
+        if self.mirrorlist and not self.mirrorlistparsed:
+            mirrorurls = getMirrorList(self.mirrorlist, self.proxy_dict)
+            self.mirrorlistparsed = 1
+            for url in mirrorurls:
+                url = parser.varReplace(url, self.yumvar)
+                self.baseurl.append(url)
+
+        for url in self.baseurl:
+            url = parser.varReplace(url, self.yumvar)
+            (s,b,p,q,f,o) = urlparse.urlparse(url)
+            if s not in ['http', 'ftp', 'file', 'https']:
+                print 'not using ftp, http[s], or file for repos, skipping - %s' % (url)
+                continue
+            else:
+                goodurls.append(url)
+    
+        self.setAttribute('urls', goodurls)
+        self.check()
+        self.setupGrab() # update the grabber for the urls
+
+    def __get(self, url=None, relative=None, local=None, start=None, end=None,
+            copy_local=0, checkfunc=None, text=None, reget='simple', cache=True):
+        """retrieve file from the mirrorgroup for the repo
+           relative to local, optionally get range from
+           start to end, also optionally retrieve from a specific baseurl"""
+
+        # if local or relative is None: raise an exception b/c that shouldn't happen
+        # if url is not None - then do a grab from the complete url - not through
+        # the mirror, raise errors as need be
+        # if url is None do a grab via the mirror group/grab for the repo
+        # return the path to the local file
+
+        # Turn our dict into a list of 2-tuples
+        headers = self.__headersListFromDict()
+
+        # We will always prefer to send no-cache.
+        if not (cache or self.http_headers.has_key('Pragma')):
+            headers.append(('Pragma', 'no-cache'))
+
+        headers = tuple(headers)
+
+        if local is None or relative is None:
+            raise Errors.RepoError, \
+                  "get request for Repo %s, gave no source or dest" % self.id
+
+        if self.cache == 1:
+            if os.path.exists(local): # FIXME - we should figure out a way
+                return local          # to run the checkfunc from here
+
+            else: # ain't there - raise
+                raise Errors.RepoError, \
+                    "Caching enabled but no local cache of %s from %s" % (local,
+                           self)
+        if url is not None:
+            ug = URLGrabber(keepalive = self.keepalive,
+                            bandwidth = self.bandwidth,
+                            retry = self.retries,
+                            throttle = self.throttle,
+                            progress_obj = self.callback,
+                            copy_local = copy_local,
+                            reget = reget,
+                            proxies = self.proxy_dict,
+                            failure_callback = self.failure_obj,
+                            interrupt_callback=self.interrupt_callback,
+                            timeout=self.timeout,
+                            checkfunc=checkfunc,
+                            http_headers=headers,
+                            )
+
+            remote = url + '/' + relative
+
+            try:
+                result = ug.urlgrab(remote, local,
+                                    text=text,
+                                    range=(start, end),
+                                    )
+            except URLGrabError, e:
+                raise Errors.RepoError, \
+                    "failed to retrieve %s from %s\nerror was %s" % (relative, self.id, e)
+
+        else:
+            try:
+                result = self.grab.urlgrab(relative, local,
+                                           text = text,
+                                           range = (start, end),
+                                           copy_local=copy_local,
+                                           reget = reget,
+                                           checkfunc=checkfunc,
+                                           http_headers=headers,
+                                           )
+            except URLGrabError, e:
+                raise Errors.RepoError, "failure: %s from %s: %s" % (relative, self.id, e)
+
+        return result 
+
+    def getPackage(self, package, checkfunc = None, text = None, cache = True):
+        remote = package.returnSimple('relativepath')
+        local = package.localPkg()
+        basepath = package.returnSimple('basepath')
+            
+        return self.__get(url=basepath,
+                        relative=remote,
+                        local=local,
+                        checkfunc=checkfunc,
+                        text=text,
+                        cache=cache
+                        )
+
+    def metadataCurrent(self):
+        """Check if there is a metadata_cookie and check its age. If the
+        age of the cookie is less than metadata_expire time then return true
+        else return False"""
+
+        val = False
+        if os.path.exists(self.metadata_cookie):
+            cookie_info = os.stat(self.metadata_cookie)
+            if cookie_info[8] + self.metadata_expire > time.time():
+                val = True
+            # WE ARE FROM THE FUTURE!!!!
+            elif cookie_info[8] > time.time():
+                val = False
+        return val
+
+    def setMetadataCookie(self):
+        """if possible, set touch the metadata_cookie file"""
+
+        check = self.metadata_cookie
+        if not os.path.exists(self.metadata_cookie):
+            check = self.cachedir
+
+        if os.access(check, os.W_OK):
+            fo = open(self.metadata_cookie, 'w+')
+            fo.close()
+            del fo
+
+
+    def setup(self, cache):
+        try:
+            self.cache = cache
+            self.baseurlSetup()
+            self.dirSetup()
+        except Errors.RepoError, e:
+            raise
+
+        try:
+            self._loadRepoXML(text=self)
+        except Errors.RepoError, e:
+            raise Errors.RepoError, ('Cannot open/read %s file for repository: %s' % (self.repoMDFile, self))
+
+
+    def _loadRepoXML(self, text=None):
+        """retrieve/check/read in repomd.xml from the repository"""
+
+        remote = self.repoMDFile
+        if self.is_different:
+            local = self.cachedir + '/prestomd.xml'
+        else:
+            local = self.cachedir + '/repomd.xml'
+            
+        if self.repoXML is not None:
+            return
+    
+        if self.cache or self.metadataCurrent():
+            if not os.path.exists(local):
+                raise Errors.RepoError, 'Cannot find %s file for %s' % (self.repoMDFile, self)
+            else:
+                result = local
+        else:
+            checkfunc = (self._checkRepoXML, (), {})
+            try:
+                result = self.__get(relative=remote,
+                                  local=local,
+                                  copy_local=1,
+                                  text=text,
+                                  reget=None,
+                                  checkfunc=checkfunc,
+                                  cache=self.http_caching == 'all')
+
+
+            except URLGrabError, e:
+                raise Errors.RepoError, 'Error downloading file %s: %s' % (local, e)
+            # if we have a 'fresh' repomd.xml then update the cookie
+            self.setMetadataCookie()
+
+        try:
+            self.repoXML = repoMDObject.RepoMD(self.id, result)
+        except Errors.RepoMDError, e:
+            raise Errors.RepoError, 'Error importing %s from %s: %s' % (self.repoMDFile, self, e)
+
+    def _checkRepoXML(self, fo):
+        if type(fo) is types.InstanceType:
+            filepath = fo.filename
+        else:
+            filepath = fo
+
+        try:
+            repoMDObject.RepoMD(self.id, filepath)
+        except Errors.RepoMDError, e:
+            raise URLGrabError(-1, 'Error importing %s for %s: %s' % (self.repoMDFile, self, e))
+
+
+    def checkMD(self, fn, mdtype):
+        """check the metadata type against its checksum"""
+        
+        thisdata = self.repoXML.getData(mdtype)
+        
+        (r_ctype, r_csum) = thisdata.checksum # get the remote checksum
+
+        if type(fn) == types.InstanceType: # this is an urlgrabber check
+            file = fn.filename
+        else:
+            file = fn
+
+        try:
+            l_csum = self._checksum(r_ctype, file) # get the local checksum
+        except Errors.RepoError, e:
+            raise URLGrabError(-3, 'Error performing checksum')
+
+        if l_csum == r_csum:
+            return 1
+        else:
+            raise URLGrabError(-1, 'Metadata file does not match checksum')
+
+
+
+    def retrieveMD(self, mdtype):
+        """base function to retrieve metadata files from the remote url
+           returns the path to the local metadata file of a 'mdtype'
+           mdtype must be 'deltas'."""
+        try:
+            thisdata = self.repoXML.getData(mdtype)
+        except Errors.RepoMDError:
+            self.enabled = False
+            self.conduit.info(5, "No drpms available for %s" % self.id)
+            return
+        
+        (r_base, remote) = thisdata.location
+        fname = os.path.basename(remote)
+        local = self.cachedir + '/' + fname
+
+        if self.retrieved.has_key(mdtype):
+            if self.retrieved[mdtype]: # got it, move along
+                return local
+
+        if self.cache == 1:
+            if os.path.exists(local):
+                try:
+                    self.checkMD(local, mdtype)
+                except URLGrabError, e:
+                    raise Errors.RepoError, \
+                        "Caching enabled and local cache: %s does not match checksum" % local
+                else:
+                    return local
+
+            else: # ain't there - raise
+                raise Errors.RepoError, \
+                    "Caching enabled but no local cache of %s from %s" % (local,
+                           self)
+
+        if os.path.exists(local):
+            try:
+                self.checkMD(local, mdtype)
+            except URLGrabError, e:
+                pass
+            else:
+                self.retrieved[mdtype] = 1
+                return local # it's the same return the local one
+
+        try:
+            checkfunc = (self.checkMD, (mdtype,), {})
+            local = self.__get(relative=remote, local=local, copy_local=1,
+                             checkfunc=checkfunc, reget=None,
+                             cache=self.http_caching == 'all')
+        except URLGrabError, e:
+            raise Errors.RepoError, \
+                "Could not retrieve %s matching remote checksum from %s" % (local, self)
+        else:
+            self.retrieved[mdtype] = 1
+            return local
+
+
+    def getPrestoXML(self):
+        """this gets you the path to the primary.xml file, retrieving it if we
+           need a new one"""
+
+        return self.retrieveMD('deltas')
+
+    def setCallback(self, callback):
+        self.callback = callback
+        self.setupGrab()
+
+    def setFailureObj(self, failure_obj):
+        self.failure_obj = failure_obj
+        self.setupGrab()
+
+    def setMirrorFailureObj(self, failure_obj):
+        self.mirror_failure_obj = failure_obj
+        self.setupGrab()
+
+    def setInterruptCallback(self, callback):
+        self.interrupt_callback = callback
+        self.setupGrab()
+
+def getMirrorList(mirrorlist, pdict = None):
+    """retrieve an up2date-style mirrorlist file from a url,
+       we also s/$ARCH/$BASEARCH/ and move along
+       returns a list of the urls from that file"""
+
+    returnlist = []
+    if hasattr(urlgrabber.grabber, 'urlopen'):
+        urlresolver = urlgrabber.grabber
+    else:
+        import urllib
+        urlresolver = urllib
+
+    scheme = urlparse.urlparse(mirrorlist)[0]
+    if scheme == '':
+        url = 'file://' + mirrorlist
+    else:
+        url = mirrorlist
+
+    try:
+        fo = urlresolver.urlopen(url, proxies=pdict)
+    except urlgrabber.grabber.URLGrabError, e:
+        print "Could not retrieve mirrorlist %s error was\n%s" % (url, e)
+        fo = None
+
+    if fo is not None:
+        content = fo.readlines()
+        for line in content:
+            if re.match('^\s*\#.*', line) or re.match('^\s*$', line):
+                continue
+            mirror = re.sub('\n$', '', line) # no more trailing \n's
+            (mirror, count) = re.subn('\$ARCH', '$BASEARCH', mirror)
+            returnlist.append(mirror)
+
+    return returnlist
+
author	Jonathan Dieter <jdieter@gmail.com>	2007-03-29 19:43:26 +0300
committer	Jonathan Dieter <jdieter@gmail.com>	2007-03-29 19:43:26 +0300
commit	5ea857b0e948b687785b8e55e08866c6171fb715 (patch)
tree	b819860880b50d41f31ba627d0261dd64b9667b1 /yum-presto/shared/prestoRepo.py
parent	b1147b441a7f4873f39c5ce337d516b2cd1483eb (diff)
download	presto-5ea857b0e948b687785b8e55e08866c6171fb715.tar.gz presto-5ea857b0e948b687785b8e55e08866c6171fb715.tar.xz presto-5ea857b0e948b687785b8e55e08866c6171fb715.zip