#!/usr/bin/python # repofs.py - Export the contents of a package repo as a readonly filesystem. # Copyright 2009 Red Hat, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # Author: Will Woods # # Exports the contents of the repo(s) in a filesystem that looks like this: # $PACKAGE_UID/[package contents] # where $PACKAGE_UID is some unique package identifier (e.g. ENVRA in RPM) # # Known bugs: # - SimpleYumRepo needs a way to reload metadata # - SimpleYumRepo.filecache never shrinks # - Stuff goes all to heck if you run multithreaded # - Probably need some locking or something to keep threads from fighting # over the filecache # # TODO: # - Actually frickin' implement open() and read() # - Test mem/disk use with actual repos and actual users # - Rewrite unpack() to use rpm2cpio # - Rewrite stupid log() method to use logging module import os import glob from stat import * import errno import subprocess import time import fuse fuse.fuse_python_api = (0, 2) fuse.feature_assert('stateful_files', 'has_init') from fuse import Fuse from sqlite3 import dbapi2 as sqlite import bz2, gzip import yum.repoMDObject def bunzip(infile,outfile): (p,f) = os.path.split(outfile) if not os.path.isdir(p): os.makedirs(p) outf = open(outfile,"w") inf = bz2.BZ2File(infile) data = inf.read(4096) while data: outf.write(data) data = inf.read(4096) inf.close() outf.close() def flag2mode(flags): md = {os.O_RDONLY: 'r', os.O_WRONLY: 'w', os.O_RDWR: 'w+'} m = md[flags & (os.O_RDONLY | os.O_WRONLY | os.O_RDWR)] if flags | os.O_APPEND: m = m.replace('w', 'a', 1) return m def fuseclonestat(stat): s = fuse.Stat() (s.st_mode, s.st_ino, s.st_dev, s.st_nlink, s.st_uid, s.st_gid, s.st_size, s.st_atime, s.st_mtime, s.st_ctime) = stat return s def leading_paths(dir): dirlist = [] while dir: dirlist.append(dir) if dir == '/': break (dir, dummy) = os.path.split(dir) return dirlist class SimpleYumRepo(object): def __init__(self, path=None, cachedir=None): self.path = path self.cachedir = cachedir self.pkgkey = {} self.filecache = {} if path: self.parse_repomd() def parse_repomd(self): repomd = os.path.join(self.path, "repodata/repomd.xml") repoXML = yum.repoMDObject.RepoMD(self.path, repomd) for t in ('primary_db', 'filelists_db'): if t in repoXML.fileTypes(): d = repoXML.getData(t) (base,dbpath) = d.location dbfile = os.path.join(self.path,dbpath) # TODO check for existing db file if dbfile.endswith(".bz2"): outfile = os.path.join(self.cachedir,".repodata", os.path.basename(dbfile)[:-4]) bunzip(dbfile,outfile) dbfile = outfile # TODO: elif .gz, else... con = sqlite.connect(dbfile) setattr(self,t,con) self.pkgkey = self.package_keys_from_db() # TODO: need a refresh method to check the repodata and reload it def package_uids(self): '''return a list of unique identifiers for every package in the repo''' return self.pkgkey.keys() def package_keys_from_db(self): '''Return a dict of {packageuid:dbkey,...}''' c = self.primary_db.cursor() c.execute("SELECT epoch, name, version, release, arch, pkgKey FROM packages") pkgkey = {} for (e,n,v,r,a,key) in c: nevra = "%s-%s:%s-%s.%s" % (n,e,v,r,a) pkgkey[nevra] = key return pkgkey # Cache filelist data pulled from the database. # XXX: Can we make FUSE cache this info instead? # XXX: Seriously this is going to expand forever and consume gobs of memory. # XXX: Then again, "gobs" might turn out to be, like, several dozen MB # (i.e. No Big Deal). Need more testing here! def files_for_package(self, packageuid): if packageuid not in self.filecache: self.filecache[packageuid] = self.files_for_package_from_db(packageuid) return self.filecache[packageuid] def files_for_package_from_db(self, packageuid): '''Return a list of info about files in the given packageuid. Each item is a tuple of the form (abspath,type) where type is 'd' or 'f' (dir/file).''' c = self.filelists_db.cursor() key = self.pkgkey.get(packageuid) filelist = [] if not key: return filelist c.execute("SELECT dirname, filenames, filetypes FROM filelist " "WHERE pkgKey=?",(key,)) dirs = [] for (dir,names,types) in c: if dir not in dirs: for d in leading_paths(dir): if d not in dirs: dirs.append(d) filelist.append((d,'d')) for (n,t) in zip(names.split('/'),types): f = os.path.join(dir,n) if t == 'd' and f in dirs: continue else: dirs.append(f) filelist.append((f,t)) return filelist def unpack(self,rpm,targetdir=None): if targetdir: if not os.path.isdir(targetdir): os.makedirs(targetdir) else: targetdir = self.cachedir inf = open(rpm) # Find RPM header and read compression algorithm # Skip forward to gzipped CPIO archive # FIXME: Awful. Just awful. At least use rpm2cpio. header = inf.read(409600) offset = header.index("\x1f\x8b") del header inf.seek(offset) gz = gzip.GzipFile(fileobj=inf, mode="rb") # Open a pipe to "cpio -iumd --quiet" cpio = subprocess.Popen(args=["cpio","-iumd","--quiet"], cwd=targetdir, stdin=subprocess.PIPE) data = gz.read(4096) while data: cpio.stdin.write(data) data = gz.read(4096) gz.close() inf.close() cpio.stdin.close() cpio.wait() class FileStat(fuse.Stat): def __init__(self, **kw): fuse.Stat.__init__(self, **kw) self.st_mode = S_IFREG|S_IRUSR|S_IRGRP|S_IROTH self.st_nlink = 1 class DirStat(fuse.Stat): def __init__(self, **kw): fuse.Stat.__init__(self, **kw) self.st_mode = S_IFDIR|S_IRUSR|S_IRGRP|S_IROTH|S_IXUSR|S_IXGRP|S_IXOTH self.st_nlink = 2 class FuseRO(Fuse): '''A Fuse subclass for implementing readonly filesystems.''' def __rofs(self, *args): '''Raises OSError(EROFS,"Read-only filesystem")''' raise OSError(errno.EROFS, "Read-only filesystem") chmod = __rofs chown = __rofs ftruncate = __rofs link = __rofs mkdir = __rofs mknod = __rofs removexattr = __rofs rename = __rofs rmdir = __rofs setxattr = __rofs symlink = __rofs truncate = __rofs unlink = __rofs def write(self, *args): '''write() function that raises IOError(EBADF) You can't open files for writing; this is a readonly filesystem!''' raise IOError(errno.EBADF, "write() on readonly filesystem") def _check_open(self, flags): '''checks the open() flags, and returns False if write access was requested. Returns True otherwise.''' accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR return (flags & accmode) == os.O_RDONLY class Repofs(FuseRO): def __init__(self, *args, **kw): Fuse.__init__(self, *args, **kw) self.do_logging = False # FIXME: this logging is terrible def log(self,message): if self.do_logging: self.logfile.write("%s %s\n" % (time.asctime(), message)) def fsinit(self): if not os.path.isdir(self.cachedir): os.makedirs(self.cachedir) if self.do_logging: self.logfile = open(os.path.join(self.cachedir,".log"),"a",0) self.log("fsinit(path=%s). Hang on.." % self.repopath) # TODO: figure out the repo type (Yum, etc) and use the right class. # That way we can support other distros. Yay! self.repos = [] for rp in self.repopath.split(":"): r = SimpleYumRepo(path=rp, cachedir=self.cachedir) self.repos.append(r) self.log(" cachedir=%s, repopath=%s" % (r.cachedir, r.path)) def _splitpath(self, path): '''Split an absolute path into (packageuid, path)''' path = path.lstrip('/') p = path.split('/',1) if len(p) == 1: p.append('') p[1] = '/' + p[1] return p def readdir(self, path, offset): self.log("readdir('%s', %s)" % (path, str(offset))) for repo in self.repos: if path == "/": for uid in repo.package_uids(): d = fuse.Direntry(str(uid)) d.type = S_IFDIR yield d else: (packageuid, path) = self._splitpath(path) for (f,t) in repo.files_for_package(packageuid): (dir, basename) = os.path.split(f) if dir == path and basename: d = fuse.Direntry(str(basename)) if t == 'd': d.type = S_IFDIR else: d.type = S_IFREG yield d def getattr(self, path): self.log("getattr('%s')" % path) if (path == '/'): return DirStat() (packageuid, path) = self._splitpath(path) for repo in self.repos: for (f,t) in repo.files_for_package(packageuid): if f == path: if t == 'f': return FileStat() elif t == 'd': return DirStat() #raise OSError(errno.ENOENT, "No such file or directory") # SourceForge FUSE Python reference says to use this instead: return -errno.ENOENT def statfs(self): #self.log("statfs()") local_s = os.statvfs(self.cachedir) #s = fuse.StatVFS() # FIXME modify s using info from local_s return local_s class RepofsFile(object): def __init__(self, path, flags, *mode): # TODO: fgetattr flush fsdestroy fsync fsyncdir # getxattr listxattr lock read utime utimens # NOTE open, opendir, release, releasedir: unused/unneeded. # NOTE bmap, readlink: not implemented (doesn't make sense) # def access(self, path, mode): # self.log("access('%s',%s)" % (path, oct(mode))) # s = self.getattr(path) # Will raise an exception if ENOENT # if mode & os.W_OK: # self.__rofs() # Raises EROFS # if S_ISREG(s.st_mode) and mode & os.X_OK: # raise OSError(errno.EACCES, "Permission denied") # # # FIXME: use file objects instead? # # def open(self, path, flags): # self.log("open(%s,%s)" % (path,flags)) # return open(self._cachefile(path),flag2mode(flags)) # # def read(self, path, length, offset, fh=None): # self.log("read(%s,%s,%s)" % (path,length,offset)) # fh.seek(offset) # return fh.read(length) # # def release(self, path, fh=None): # self.log("release(%s)" % path) # fh.close() # # def fgetattr(self, path, fh=None): # self.log("fgetattr(%s)" % path) # return os.fstat(fh.fileno()) def main(self, *a, **kw): return Fuse.main(self, *a, **kw) def main(): usage = 'Repofs: mount a package repo and export all the files in the packages.\n\n' + Fuse.fusage server = Repofs(version="%prog " + fuse.__version__, usage=usage, dash_s_do='setsingle') server.parser.add_option(mountopt="repo", metavar="PATH", dest="repopath", help="Package repo to mount") server.parser.add_option(mountopt="cachedir", metavar="PATH", help="Cache dir for expanded packages") server.parse(values=server, errex=1) server.main() if __name__ == '__main__': main() def will_test_setup(): r = Repofs() r.repopath="/tmp/test-repofs/repo" r.cachedir="/tmp/test-repofs/cache" r.fsinit() return r