#!/usr/bin/python # repofs.py - Export the contents of a package repo as a readonly filesystem. # # Copyright 2009 Red Hat, Inc. GPLv2+ BOILERPLATE GOES HERE. # # Author: Will Woods # # Exports the contents of the repo(s) in a filesystem that looks like this: # $PACKAGE_UID/[package contents] # where $PACKAGE_UID is some unique package identifier (e.g. ENVRA in RPM) import os import glob from stat import * import errno import subprocess import time import fuse fuse.fuse_python_api = (0, 2) fuse.feature_assert('stateful_files', 'has_init') from fuse import Fuse from sqlite3 import dbapi2 as sqlite import bz2, gzip import yum.repoMDObject FILEMODE = S_IRUSR|S_IRGRP|S_IROTH DIRMODE = FILEMODE|S_IFDIR|S_IXUSR|S_IXGRP|S_IXOTH def bunzip(infile,outfile): (p,f) = os.path.split(outfile) if not os.path.isdir(p): os.makedirs(p) outf = open(outfile,"w") inf = bz2.BZ2File(infile) data = inf.read(4096) while data: outf.write(data) data = inf.read(4096) inf.close() outf.close() def flag2mode(flags): md = {os.O_RDONLY: 'r', os.O_WRONLY: 'w', os.O_RDWR: 'w+'} m = md[flags & (os.O_RDONLY | os.O_WRONLY | os.O_RDWR)] if flags | os.O_APPEND: m = m.replace('w', 'a', 1) return m def fuseclonestat(stat): s = fuse.Stat() (s.st_mode, s.st_ino, s.st_dev, s.st_nlink, s.st_uid, s.st_gid, s.st_size, s.st_atime, s.st_mtime, s.st_ctime) = stat return s def leading_paths(dir): dirlist = [] while dir: dirlist.append(dir) if dir == '/': break (dir, dummy) = os.path.split(dir) return dirlist class SimpleYumRepo(object): def __init__(self, path=None, cachedir=None): self.path = path self.cachedir = cachedir self.pkgkey = {} if path: self.parse_repomd() def parse_repomd(self): repomd = os.path.join(self.path, "repodata/repomd.xml") repoXML = yum.repoMDObject.RepoMD(self.path, repomd) for t in ('primary_db', 'filelists_db'): if t in repoXML.fileTypes(): d = repoXML.getData(t) (base,dbpath) = d.location dbfile = os.path.join(self.path,dbpath) # TODO check for existing db file if dbfile.endswith(".bz2"): outfile = os.path.join(self.cachedir,".repodata", os.path.basename(dbfile)[:-4]) bunzip(dbfile,outfile) dbfile = outfile # TODO: elif .gz, else... con = sqlite.connect(dbfile) setattr(self,t,con) for p in self.package_uids(): pass # This is kind of stupid - we need this cached to use files_for_package.. # Maybe this should populate the cache and package_uids() can just return # pkgkey.keys() def package_uids(self): '''Generator for package UIDs in this repo''' c = self.primary_db.cursor() c.execute("SELECT epoch, name, version, release, arch, pkgKey FROM packages") for (e,n,v,r,a,key) in c: nevra = "%s-%s:%s-%s.%s" % (n,e,v,r,a) self.pkgkey[nevra] = key yield nevra def files_for_package(self, packageuid): '''Generator for files in the given packageuid. Each item is a tuple of the form (abspath,type) where type is 'd' or 'f' (dir/file).''' c = self.filelists_db.cursor() key = self.pkgkey.get(packageuid) if not key: raise StopIteration # like returning an empty list c.execute("SELECT dirname, filenames, filetypes FROM filelist " "WHERE pkgKey=?",(key,)) dirs = [] for (dir,names,types) in c: if dir not in dirs: for d in leading_paths(dir): if d not in dirs: dirs.append(d) yield (d,'d') for (n,t) in zip(names.split('/'),types): f = os.path.join(dir,n) if t == 'd' and f in dirs: continue else: dirs.append(f) yield (f,t) def unpack(self,rpm,targetdir=None): if targetdir: if not os.path.isdir(targetdir): os.makedirs(targetdir) else: targetdir = self.cachedir inf = open(rpm) # Find RPM header and read compression algorithm # Skip forward to gzipped CPIO archive # FIXME: Awful. Just awful. At least use rpm2cpio. header = inf.read(409600) offset = header.index("\x1f\x8b") del header inf.seek(offset) gz = gzip.GzipFile(fileobj=inf, mode="rb") # Open a pipe to "cpio -iumd --quiet" cpio = subprocess.Popen(args=["cpio","-iumd","--quiet"], cwd=targetdir, stdin=subprocess.PIPE) data = gz.read(4096) while data: cpio.stdin.write(data) data = gz.read(4096) gz.close() inf.close() cpio.stdin.close() cpio.wait() class FuseRO(Fuse): '''A Fuse subclass for implementing readonly filesystems.''' # chmod chown create link mkdir mknod rename rmdir setxattr(?) symlink # truncate unlink write def __rofs(self, *args): '''Raises OSError(EROFS,"Read-only filesystem")''' raise OSError(errno.EROFS, "Read-only filesystem") chmod = __rofs chown = __rofs link = __rofs mkdir = __rofs rename = __rofs rmdir = __rofs setxattr = __rofs # You might override this so you can store xattrs symlink = __rofs truncate = __rofs unlink = __rofs def write(self, *args): '''write() function that raises IOError(EBADF) You can't open files for writing; this is a readonly filesystem!''' raise IOError(errno.EBADF, "write() on readonly filesystem") def _check_open(self, flags): '''checks the open() flags, and returns False if write access was requested. Returns True otherwise.''' accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR return (flags & accmode) == os.O_RDONLY class Repofs(FuseRO): def __init__(self, *args, **kw): Fuse.__init__(self, *args, **kw) # FIXME: this logging is terrible def log(self,message): self.logfile.write("%s %s\n" % (time.asctime(), message)) def fsinit(self): if not os.path.isdir(self.cachedir): os.makedirs(self.cachedir) # FIXME this logging is awful self.logfile = open(os.path.join(self.cachedir,".log"),"a",0) self.log("fsinit(path=%s). Hang on.." % self.repopath) # TODO: figure out the repo type (Yum, etc) and use the right class. # That way we can support other distros. Yay! self.repos = [] for rp in self.repopath.split(":"): r = SimpleYumRepo(path=rp, cachedir=self.cachedir) self.repos.append(r) self.log(" cachedir=%s, repopath=%s" % (r.cachedir, r.path)) def _splitpath(self, path): '''Split an absolute path into (packageuid, path)''' path = path.lstrip('/') p = path.split('/',1) if len(p) == 1: p.append('') p[1] = '/' + p[1] return p def readdir(self, path, offset): self.log("readdir('%s', %s)" % (path, str(offset))) for repo in self.repos: if path == "/": for uid in repo.package_uids(): d = fuse.Direntry(str(uid)) d.type = S_IFDIR yield d else: (packageuid, path) = self._splitpath(path) for (f,t) in repo.files_for_package(packageuid): (dir, basename) = os.path.split(f) if dir == path and basename: d = fuse.Direntry(str(basename)) if t == 'd': d.type = S_IFDIR else: d.type = S_IFREG yield d def getattr(self, path): self.log("getattr('%s')" % path) attr = fuse.Stat() attr.st_mode = None attr.st_nlink = 2 # sure, why not if (path == '/'): attr.st_mode = DIRMODE return attr (packageuid, path) = self._splitpath(path) for repo in self.repos: for (f,t) in repo.files_for_package(packageuid): if f == path: # found it! set mode to -r--r--r-- attr.st_mode = FILEMODE if t == 'd': # change mode to dr-xr-xr-x attr.st_mode = DIRMODE # TODO: set some more attributes return attr #raise OSError(errno.ENOENT, "No such file or directory") # SourceForge FUSE Python reference says to use this instead: return -errno.ENOENT # # FIXME actually use the provided arg # def utime(self, path, times): # self.log("utime(%s, %s)" % path, str(times)) # return os.utime(self._cachefile(path)) # def access(self, path, mode): # self.log("access(%s, %s)" % path, mode) # if not os.access(self._cachefile(path)): # return -errno.EACCES # def statfs(self): #self.log("statfs()") s = fuse.StatVFS() local_s = os.statvfs(self.cachedir) # FIXME modify s using info from local_s return local_s # # # XXX explicitly declare other functions that return proper error codes? # # # FIXME: use file objects instead? # # def open(self, path, flags): # self.log("open(%s,%s)" % (path,flags)) # return open(self._cachefile(path),flag2mode(flags)) # # def read(self, path, length, offset, fh=None): # self.log("read(%s,%s,%s)" % (path,length,offset)) # fh.seek(offset) # return fh.read(length) # # def release(self, path, fh=None): # self.log("release(%s)" % path) # fh.close() # # def fgetattr(self, path, fh=None): # self.log("fgetattr(%s)" % path) # return os.fstat(fh.fileno()) def main(self, *a, **kw): return Fuse.main(self, *a, **kw) def main(): usage = 'Repofs: mount a package repo and export all the files in the packages.\n\n' + Fuse.fusage server = Repofs(version="%prog " + fuse.__version__, usage=usage, dash_s_do='setsingle') server.parser.add_option(mountopt="repo", metavar="PATH", dest="repopath", help="Package repo to mount") server.parser.add_option(mountopt="cachedir", metavar="PATH", help="Cache dir for expanded packages") server.parse(values=server, errex=1) server.main() if __name__ == '__main__': main()