Whee - rework repofs.py to allow for multiple repos without collision

author: Will Woods <wwoods@redhat.com> 2009-02-03 18:48:57 -0500
committer: Will Woods <wwoods@redhat.com> 2009-02-03 18:48:57 -0500
commit: f4ab050f507d284e88197249262f6271cb210bb4 (patch)
tree: 13f29d33ce8e528b0166f6fa3413cbf32aba90c2
parent: c886a23f260cde6c9f43f5209a8aa1e4ca594717 (diff)
download: debuginfofs-f4ab050f507d284e88197249262f6271cb210bb4.tar.gz
debuginfofs-f4ab050f507d284e88197249262f6271cb210bb4.tar.xz
debuginfofs-f4ab050f507d284e88197249262f6271cb210bb4.zip
1 files changed, 131 insertions, 89 deletions
diff --git a/repofs.py b/repofs.py
index 1656ab2..e9698be 100755
--- a/repofs.py
+++ b/repofs.py
@@ -4,6 +4,10 @@
 # Copyright 2009 Red Hat, Inc. GPLv2+ BOILERPLATE GOES HERE.
 #
 # Author: Will Woods <wwoods@redhat.com>
+#
+# Exports the contents of the repo(s) in a filesystem that looks like this:
+# $PACKAGE_UID/[package contents]
+# where $PACKAGE_UID is some unique package identifier (e.g. ENVRA in RPM)
 
 import os
 import glob
@@ -23,6 +27,9 @@ import bz2, gzip
 
 import yum.repoMDObject
 
+FILEMODE = S_IRUSR|S_IRGRP|S_IROTH
+DIRMODE = FILEMODE|S_IFDIR|S_IXUSR|S_IXGRP|S_IXOTH
+
 def bunzip(infile,outfile):
     (p,f) = os.path.split(outfile)
     if not os.path.isdir(p):
@@ -49,12 +56,20 @@ def fuseclonestat(stat):
             s.st_atime, s.st_mtime, s.st_ctime) = stat
     return s
 
+def leading_paths(dir):
+    dirlist = []
+    while dir:
+        dirlist.append(dir)
+        if dir == '/':
+            break
+        (dir, dummy) = os.path.split(dir)
+    return dirlist
+      
 class SimpleYumRepo(object):
     def __init__(self, path=None, cachedir=None):
         self.path = path
         self.cachedir = cachedir
-        self._attrcache = {}
-        self._linkcache = {}
+        self.pkgkey = {}
         if path:
             self.parse_repomd()
 
@@ -75,28 +90,45 @@ class SimpleYumRepo(object):
                 # TODO: elif .gz, else...
                 con = sqlite.connect(dbfile)
                 setattr(self,t,con)
+        for p in self.package_uids():
+            pass
+
+    # This is kind of stupid - we need this cached to use files_for_package..
+    # Maybe this should populate the cache and package_uids() can just return
+    # pkgkey.keys()
+    def package_uids(self):
+        '''Generator for package UIDs in this repo'''
+        c = self.primary_db.cursor()
+        c.execute("SELECT epoch, name, version, release, arch, pkgKey FROM packages")
+        for (e,n,v,r,a,key) in c:
+            nevra = "%s-%s:%s-%s.%s" % (n,e,v,r,a)
+            self.pkgkey[nevra] = key
+            yield nevra
 
-    def packages_for_file(self, path):
-        '''Return a (possibly-empty) list of RPMs containing the file with the
-        given path.'''
-        keys = []
-        (dirname,filename) = os.path.split(path)
+    def files_for_package(self, packageuid):
+        '''Generator for files in the given packageuid.
+        Each item is a tuple of the form (abspath,type) where type is
+        'd' or 'f' (dir/file).'''
         c = self.filelists_db.cursor()
-        c.execute("SELECT pkgKey,filenames FROM filelist "
-                  "WHERE dirname=?",(dirname,))
-        keys = [k for (k, f) in c if filename in f.split('/')]
-        c.close()
-        rpms = []
-        for key in keys:
-            # FIXME do one query? WHERE pkgKey=? OR ...
-            # Nope, too long, but maybe we could do a join..
-            c = self.primary_db.cursor()
-            c.execute("SELECT location_base,location_href FROM packages "
-                      "WHERE pkgKey=?",(key,))
-            (base,href) = c.fetchone()
-            rpms.append(os.path.join(self.path,href))
-            c.close()
-        return rpms
+        key = self.pkgkey.get(packageuid)
+        if not key:
+            raise StopIteration # like returning an empty list
+        c.execute("SELECT dirname, filenames, filetypes FROM filelist "
+                  "WHERE pkgKey=?",(key,))
+        dirs = []
+        for (dir,names,types) in c:
+            if dir not in dirs:
+                for d in leading_paths(dir): 
+                    if d not in dirs:
+                        dirs.append(d)
+                        yield (d,'d')
+            for (n,t) in zip(names.split('/'),types):
+                f = os.path.join(dir,n)
+                if t == 'd' and f in dirs:
+                    continue
+                else:
+                    dirs.append(f)
+                yield (f,t)
 
     def unpack(self,rpm,targetdir=None):
         if targetdir:
@@ -175,83 +207,93 @@ class Repofs(FuseRO):
             self.repos.append(r)
             self.log(" cachedir=%s, repopath=%s" % (r.cachedir, r.path))
 
-    def _package_for_file(self, path):
-        for repo in self.repos:
-            packages = repo.packages_for_file(path)
-            if packages:
-                return (packages[0], repo)
+    def _splitpath(self, path):
+        '''Split an absolute path into (packageuid, path)'''
+        path = path.lstrip('/')
+        p = path.split('/',1)
+        if len(p) == 1:
+            p.append('')
+        p[1] = '/' + p[1]
+        return p
 
-    def _cachefile(self, path):
-        '''Find the given filename in the repo, extract it from whatever
-        package it's in, and save it into the cache.
-        Returns the actual on-disk path for the (now cached) file.'''
-        cachefile = os.path.join(self.cachedir, path.strip('/'))
-        if os.path.exists(cachefile):
-            return cachefile
-        # FIXME try/except
-        # FIXME multiple repos
-        # FIXME assumes that filenames are unique
-        (package, repo) = self._package_for_file(path)
-        if not package:
-            return None
-        repo.unpack(package, self.cachedir) 
-        self.log("unpacked %s" % package)
-        if os.path.exists(cachefile):
-            return cachefile
+    def readdir(self, path, offset):
+        self.log("readdir('%s', %s)" % (path, str(offset)))
+        for repo in self.repos:
+            if path == "/":
+                for uid in repo.package_uids():
+                    d = fuse.Direntry(str(uid))
+                    d.type = S_IFDIR
+                    yield d
+            else:
+                (packageuid, path) = self._splitpath(path)
+                for (f,t) in repo.files_for_package(packageuid):
+                    (dir, basename) = os.path.split(f)
+                    if dir == path and basename:
+                        d = fuse.Direntry(str(basename))
+                        if t == 'd':
+                            d.type = S_IFDIR
+                        else:
+                            d.type = S_IFREG
+                        yield d
 
     def getattr(self, path):
-        self.log("getattr(%s)" % path)
-        f = self._cachefile(path)
-        if not f:
-            return -errno.ENOENT
-        # Not os.lstat() - we're ignoring symlinks.
-        # This means we don't have to implement readlink().
-        attr = fuseclonestat(os.stat(f))
-        # No writeable files - turn off write bits
-        attr.st_mode &= ~(S_IWUSR|S_IWGRP|S_IWOTH)
-        return attr
-
-    # FIXME actually use the provided arg
-    def utime(self, path, times):
-        self.log("utime(%s, %s)" % path, str(times))
-        return os.utime(self._cachefile(path))
-    def access(self, path, mode):
-        self.log("access(%s, %s)" % path, mode)
-        if not os.access(self._cachefile(path)):
-            return -errno.EACCES
-
-    def readdir(self, path, offset):
-        '''Return an empty list. We don't let you list directory contents.'''
-        self.log("Huh? readdir(%s)" % path)
-        return []
+        self.log("getattr('%s')" % path)
+        attr = fuse.Stat()
+        attr.st_mode = None
+        attr.st_nlink = 2 # sure, why not
+        if (path == '/'):
+            attr.st_mode = DIRMODE
+            return attr
+        (packageuid, path) = self._splitpath(path)
+        for repo in self.repos:
+            for (f,t) in repo.files_for_package(packageuid):
+                if f == path:
+                    # found it! set mode to -r--r--r--
+                    attr.st_mode = FILEMODE
+                    if t == 'd': # change mode to dr-xr-xr-x
+                        attr.st_mode = DIRMODE
+                    # TODO: set some more attributes
+                    return attr
+        #raise OSError(errno.ENOENT, "No such file or directory")
+        # SourceForge FUSE Python reference says to use this instead:
+        return -errno.ENOENT
 
+#    # FIXME actually use the provided arg
+#    def utime(self, path, times):
+#        self.log("utime(%s, %s)" % path, str(times))
+#        return os.utime(self._cachefile(path))
+#    def access(self, path, mode):
+#        self.log("access(%s, %s)" % path, mode)
+#        if not os.access(self._cachefile(path)):
+#            return -errno.EACCES
+#
     def statfs(self):
         #self.log("statfs()")
         s = fuse.StatVFS()
         local_s = os.statvfs(self.cachedir)
         # FIXME modify s using info from local_s
         return local_s
-
-    # XXX explicitly declare other functions that return proper error codes?
-    
-    # FIXME: use file objects instead?
-
-    def open(self, path, flags):
-        self.log("open(%s,%s)" % (path,flags))
-        return open(self._cachefile(path),flag2mode(flags))
-
-    def read(self, path, length, offset, fh=None):
-        self.log("read(%s,%s,%s)" % (path,length,offset))
-        fh.seek(offset)
-        return fh.read(length)
-
-    def release(self, path, fh=None):
-        self.log("release(%s)" % path)
-        fh.close()
-
-    def fgetattr(self, path, fh=None):
-        self.log("fgetattr(%s)" % path)
-        return os.fstat(fh.fileno())
+#
+#    # XXX explicitly declare other functions that return proper error codes?
+#    
+#    # FIXME: use file objects instead?
+#
+#    def open(self, path, flags):
+#        self.log("open(%s,%s)" % (path,flags))
+#        return open(self._cachefile(path),flag2mode(flags))
+#
+#    def read(self, path, length, offset, fh=None):
+#        self.log("read(%s,%s,%s)" % (path,length,offset))
+#        fh.seek(offset)
+#        return fh.read(length)
+#
+#    def release(self, path, fh=None):
+#        self.log("release(%s)" % path)
+#        fh.close()
+#
+#    def fgetattr(self, path, fh=None):
+#        self.log("fgetattr(%s)" % path)
+#        return os.fstat(fh.fileno())
 
     def main(self, *a, **kw):
         return Fuse.main(self, *a, **kw)
author	Will Woods <wwoods@redhat.com>	2009-02-03 18:48:57 -0500
committer	Will Woods <wwoods@redhat.com>	2009-02-03 18:48:57 -0500
commit	f4ab050f507d284e88197249262f6271cb210bb4 (patch)
tree	13f29d33ce8e528b0166f6fa3413cbf32aba90c2
parent	c886a23f260cde6c9f43f5209a8aa1e4ca594717 (diff)
download	debuginfofs-f4ab050f507d284e88197249262f6271cb210bb4.tar.gz debuginfofs-f4ab050f507d284e88197249262f6271cb210bb4.tar.xz debuginfofs-f4ab050f507d284e88197249262f6271cb210bb4.zip