diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/nfs | |
download | kernel-crypto-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.gz kernel-crypto-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.xz kernel-crypto-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.zip |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/Makefile | 15 | ||||
-rw-r--r-- | fs/nfs/callback.c | 187 | ||||
-rw-r--r-- | fs/nfs/callback.h | 70 | ||||
-rw-r--r-- | fs/nfs/callback_proc.c | 85 | ||||
-rw-r--r-- | fs/nfs/callback_xdr.c | 481 | ||||
-rw-r--r-- | fs/nfs/delegation.c | 342 | ||||
-rw-r--r-- | fs/nfs/delegation.h | 57 | ||||
-rw-r--r-- | fs/nfs/dir.c | 1562 | ||||
-rw-r--r-- | fs/nfs/direct.c | 808 | ||||
-rw-r--r-- | fs/nfs/file.c | 484 | ||||
-rw-r--r-- | fs/nfs/idmap.c | 498 | ||||
-rw-r--r-- | fs/nfs/inode.c | 2003 | ||||
-rw-r--r-- | fs/nfs/mount_clnt.c | 183 | ||||
-rw-r--r-- | fs/nfs/nfs2xdr.c | 711 | ||||
-rw-r--r-- | fs/nfs/nfs3proc.c | 859 | ||||
-rw-r--r-- | fs/nfs/nfs3xdr.c | 1023 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 2786 | ||||
-rw-r--r-- | fs/nfs/nfs4renewd.c | 148 | ||||
-rw-r--r-- | fs/nfs/nfs4state.c | 932 | ||||
-rw-r--r-- | fs/nfs/nfs4xdr.c | 4034 | ||||
-rw-r--r-- | fs/nfs/nfsroot.c | 513 | ||||
-rw-r--r-- | fs/nfs/pagelist.c | 309 | ||||
-rw-r--r-- | fs/nfs/proc.c | 655 | ||||
-rw-r--r-- | fs/nfs/read.c | 618 | ||||
-rw-r--r-- | fs/nfs/symlink.c | 117 | ||||
-rw-r--r-- | fs/nfs/unlink.c | 227 | ||||
-rw-r--r-- | fs/nfs/write.c | 1431 |
27 files changed, 21138 insertions, 0 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile new file mode 100644 index 00000000000..b4baa031edf --- /dev/null +++ b/fs/nfs/Makefile @@ -0,0 +1,15 @@ +# +# Makefile for the Linux nfs filesystem routines. +# + +obj-$(CONFIG_NFS_FS) += nfs.o + +nfs-y := dir.o file.o inode.o nfs2xdr.o pagelist.o \ + proc.o read.o symlink.o unlink.o write.o +nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o +nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o +nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ + delegation.o idmap.o \ + callback.o callback_xdr.o callback_proc.o +nfs-$(CONFIG_NFS_DIRECTIO) += direct.o +nfs-objs := $(nfs-y) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c new file mode 100644 index 00000000000..560d6175dd5 --- /dev/null +++ b/fs/nfs/callback.c @@ -0,0 +1,187 @@ +/* + * linux/fs/nfs/callback.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback handling + */ + +#include <linux/config.h> +#include <linux/completion.h> +#include <linux/ip.h> +#include <linux/module.h> +#include <linux/smp_lock.h> +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/svcsock.h> +#include <linux/nfs_fs.h> +#include "callback.h" + +#define NFSDBG_FACILITY NFSDBG_CALLBACK + +struct nfs_callback_data { + unsigned int users; + struct svc_serv *serv; + pid_t pid; + struct completion started; + struct completion stopped; +}; + +static struct nfs_callback_data nfs_callback_info; +static DECLARE_MUTEX(nfs_callback_sema); +static struct svc_program nfs4_callback_program; + +unsigned short nfs_callback_tcpport; + +/* + * This is the callback kernel thread. + */ +static void nfs_callback_svc(struct svc_rqst *rqstp) +{ + struct svc_serv *serv = rqstp->rq_server; + int err; + + __module_get(THIS_MODULE); + lock_kernel(); + + nfs_callback_info.pid = current->pid; + daemonize("nfsv4-svc"); + /* Process request with signals blocked, but allow SIGKILL. */ + allow_signal(SIGKILL); + + complete(&nfs_callback_info.started); + + while (nfs_callback_info.users != 0 || !signalled()) { + /* + * Listen for a request on the socket + */ + err = svc_recv(serv, rqstp, MAX_SCHEDULE_TIMEOUT); + if (err == -EAGAIN || err == -EINTR) + continue; + if (err < 0) { + printk(KERN_WARNING + "%s: terminating on error %d\n", + __FUNCTION__, -err); + break; + } + dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__, + NIPQUAD(rqstp->rq_addr.sin_addr.s_addr)); + svc_process(serv, rqstp); + } + + nfs_callback_info.pid = 0; + complete(&nfs_callback_info.stopped); + unlock_kernel(); + module_put_and_exit(0); +} + +/* + * Bring up the server process if it is not already up. + */ +int nfs_callback_up(void) +{ + struct svc_serv *serv; + struct svc_sock *svsk; + int ret = 0; + + lock_kernel(); + down(&nfs_callback_sema); + if (nfs_callback_info.users++ || nfs_callback_info.pid != 0) + goto out; + init_completion(&nfs_callback_info.started); + init_completion(&nfs_callback_info.stopped); + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE); + ret = -ENOMEM; + if (!serv) + goto out_err; + /* FIXME: We don't want to register this socket with the portmapper */ + ret = svc_makesock(serv, IPPROTO_TCP, 0); + if (ret < 0) + goto out_destroy; + if (!list_empty(&serv->sv_permsocks)) { + svsk = list_entry(serv->sv_permsocks.next, + struct svc_sock, sk_list); + nfs_callback_tcpport = ntohs(inet_sk(svsk->sk_sk)->sport); + dprintk ("Callback port = 0x%x\n", nfs_callback_tcpport); + } else + BUG(); + ret = svc_create_thread(nfs_callback_svc, serv); + if (ret < 0) + goto out_destroy; + nfs_callback_info.serv = serv; + wait_for_completion(&nfs_callback_info.started); +out: + up(&nfs_callback_sema); + unlock_kernel(); + return ret; +out_destroy: + svc_destroy(serv); +out_err: + nfs_callback_info.users--; + goto out; +} + +/* + * Kill the server process if it is not already up. + */ +int nfs_callback_down(void) +{ + int ret = 0; + + lock_kernel(); + down(&nfs_callback_sema); + if (--nfs_callback_info.users || nfs_callback_info.pid == 0) + goto out; + kill_proc(nfs_callback_info.pid, SIGKILL, 1); + wait_for_completion(&nfs_callback_info.stopped); +out: + up(&nfs_callback_sema); + unlock_kernel(); + return ret; +} + +static int nfs_callback_authenticate(struct svc_rqst *rqstp) +{ + struct in_addr *addr = &rqstp->rq_addr.sin_addr; + struct nfs4_client *clp; + + /* Don't talk to strangers */ + clp = nfs4_find_client(addr); + if (clp == NULL) + return SVC_DROP; + dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr)); + nfs4_put_client(clp); + switch (rqstp->rq_authop->flavour) { + case RPC_AUTH_NULL: + if (rqstp->rq_proc != CB_NULL) + return SVC_DENIED; + break; + case RPC_AUTH_UNIX: + break; + case RPC_AUTH_GSS: + /* FIXME: RPCSEC_GSS handling? */ + default: + return SVC_DENIED; + } + return SVC_OK; +} + +/* + * Define NFS4 callback program + */ +extern struct svc_version nfs4_callback_version1; + +static struct svc_version *nfs4_callback_version[] = { + [1] = &nfs4_callback_version1, +}; + +static struct svc_stat nfs4_callback_stats; + +static struct svc_program nfs4_callback_program = { + .pg_prog = NFS4_CALLBACK, /* RPC service number */ + .pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */ + .pg_vers = nfs4_callback_version, /* version table */ + .pg_name = "NFSv4 callback", /* service name */ + .pg_class = "nfs", /* authentication class */ + .pg_stats = &nfs4_callback_stats, + .pg_authenticate = nfs_callback_authenticate, +}; diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h new file mode 100644 index 00000000000..a0db2d4f941 --- /dev/null +++ b/fs/nfs/callback.h @@ -0,0 +1,70 @@ +/* + * linux/fs/nfs/callback.h + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback definitions + */ +#ifndef __LINUX_FS_NFS_CALLBACK_H +#define __LINUX_FS_NFS_CALLBACK_H + +#define NFS4_CALLBACK 0x40000000 +#define NFS4_CALLBACK_XDRSIZE 2048 +#define NFS4_CALLBACK_BUFSIZE (1024 + NFS4_CALLBACK_XDRSIZE) + +enum nfs4_callback_procnum { + CB_NULL = 0, + CB_COMPOUND = 1, +}; + +enum nfs4_callback_opnum { + OP_CB_GETATTR = 3, + OP_CB_RECALL = 4, + OP_CB_ILLEGAL = 10044, +}; + +struct cb_compound_hdr_arg { + int taglen; + const char *tag; + unsigned int callback_ident; + unsigned nops; +}; + +struct cb_compound_hdr_res { + uint32_t *status; + int taglen; + const char *tag; + uint32_t *nops; +}; + +struct cb_getattrargs { + struct sockaddr_in *addr; + struct nfs_fh fh; + uint32_t bitmap[2]; +}; + +struct cb_getattrres { + uint32_t status; + uint32_t bitmap[2]; + uint64_t size; + uint64_t change_attr; + struct timespec ctime; + struct timespec mtime; +}; + +struct cb_recallargs { + struct sockaddr_in *addr; + struct nfs_fh fh; + nfs4_stateid stateid; + uint32_t truncate; +}; + +extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); +extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy); + +extern int nfs_callback_up(void); +extern int nfs_callback_down(void); + +extern unsigned short nfs_callback_tcpport; + +#endif /* __LINUX_FS_NFS_CALLBACK_H */ diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c new file mode 100644 index 00000000000..ece27e42b93 --- /dev/null +++ b/fs/nfs/callback_proc.c @@ -0,0 +1,85 @@ +/* + * linux/fs/nfs/callback_proc.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback procedures + */ +#include <linux/config.h> +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> +#include "callback.h" +#include "delegation.h" + +#define NFSDBG_FACILITY NFSDBG_CALLBACK + +unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res) +{ + struct nfs4_client *clp; + struct nfs_delegation *delegation; + struct nfs_inode *nfsi; + struct inode *inode; + + res->bitmap[0] = res->bitmap[1] = 0; + res->status = htonl(NFS4ERR_BADHANDLE); + clp = nfs4_find_client(&args->addr->sin_addr); + if (clp == NULL) + goto out; + inode = nfs_delegation_find_inode(clp, &args->fh); + if (inode == NULL) + goto out_putclient; + nfsi = NFS_I(inode); + down_read(&nfsi->rwsem); + delegation = nfsi->delegation; + if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0) + goto out_iput; + res->size = i_size_read(inode); + res->change_attr = NFS_CHANGE_ATTR(inode); + res->ctime = inode->i_ctime; + res->mtime = inode->i_mtime; + res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) & + args->bitmap[0]; + res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) & + args->bitmap[1]; + res->status = 0; +out_iput: + up_read(&nfsi->rwsem); + iput(inode); +out_putclient: + nfs4_put_client(clp); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status)); + return res->status; +} + +unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy) +{ + struct nfs4_client *clp; + struct inode *inode; + unsigned res; + + res = htonl(NFS4ERR_BADHANDLE); + clp = nfs4_find_client(&args->addr->sin_addr); + if (clp == NULL) + goto out; + inode = nfs_delegation_find_inode(clp, &args->fh); + if (inode == NULL) + goto out_putclient; + /* Set up a helper thread to actually return the delegation */ + switch(nfs_async_inode_return_delegation(inode, &args->stateid)) { + case 0: + res = 0; + break; + case -ENOENT: + res = htonl(NFS4ERR_BAD_STATEID); + break; + default: + res = htonl(NFS4ERR_RESOURCE); + } + iput(inode); +out_putclient: + nfs4_put_client(clp); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res)); + return res; +} diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c new file mode 100644 index 00000000000..d271df9df2b --- /dev/null +++ b/fs/nfs/callback_xdr.c @@ -0,0 +1,481 @@ +/* + * linux/fs/nfs/callback_xdr.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback encode/decode procedures + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/sunrpc/svc.h> +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> +#include "callback.h" + +#define CB_OP_TAGLEN_MAXSZ (512) +#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ) +#define CB_OP_GETATTR_BITMAP_MAXSZ (4) +#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ + CB_OP_GETATTR_BITMAP_MAXSZ + \ + 2 + 2 + 3 + 3) +#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) + +#define NFSDBG_FACILITY NFSDBG_CALLBACK + +typedef unsigned (*callback_process_op_t)(void *, void *); +typedef unsigned (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *); +typedef unsigned (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *); + + +struct callback_op { + callback_process_op_t process_op; + callback_decode_arg_t decode_args; + callback_encode_res_t encode_res; + long res_maxsize; +}; + +static struct callback_op callback_ops[]; + +static int nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return htonl(NFS4_OK); +} + +static int nfs4_decode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy) +{ + return xdr_argsize_check(rqstp, p); +} + +static int nfs4_encode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy) +{ + return xdr_ressize_check(rqstp, p); +} + +static uint32_t *read_buf(struct xdr_stream *xdr, int nbytes) +{ + uint32_t *p; + + p = xdr_inline_decode(xdr, nbytes); + if (unlikely(p == NULL)) + printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n"); + return p; +} + +static unsigned decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str) +{ + uint32_t *p; + + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *len = ntohl(*p); + + if (*len != 0) { + p = read_buf(xdr, *len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *str = (const char *)p; + } else + *str = NULL; + + return 0; +} + +static unsigned decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh) +{ + uint32_t *p; + + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + fh->size = ntohl(*p); + if (fh->size > NFS4_FHSIZE) + return htonl(NFS4ERR_BADHANDLE); + p = read_buf(xdr, fh->size); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + memcpy(&fh->data[0], p, fh->size); + memset(&fh->data[fh->size], 0, sizeof(fh->data) - fh->size); + return 0; +} + +static unsigned decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) +{ + uint32_t *p; + unsigned int attrlen; + + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + attrlen = ntohl(*p); + p = read_buf(xdr, attrlen << 2); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + if (likely(attrlen > 0)) + bitmap[0] = ntohl(*p++); + if (attrlen > 1) + bitmap[1] = ntohl(*p); + return 0; +} + +static unsigned decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) +{ + uint32_t *p; + + p = read_buf(xdr, 16); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + memcpy(stateid->data, p, 16); + return 0; +} + +static unsigned decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr) +{ + uint32_t *p; + unsigned int minor_version; + unsigned status; + + status = decode_string(xdr, &hdr->taglen, &hdr->tag); + if (unlikely(status != 0)) + return status; + /* We do not like overly long tags! */ + if (hdr->taglen > CB_OP_TAGLEN_MAXSZ-12 || hdr->taglen < 0) { + printk("NFSv4 CALLBACK %s: client sent tag of length %u\n", + __FUNCTION__, hdr->taglen); + return htonl(NFS4ERR_RESOURCE); + } + p = read_buf(xdr, 12); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + minor_version = ntohl(*p++); + /* Check minor version is zero. */ + if (minor_version != 0) { + printk(KERN_WARNING "%s: NFSv4 server callback with illegal minor version %u!\n", + __FUNCTION__, minor_version); + return htonl(NFS4ERR_MINOR_VERS_MISMATCH); + } + hdr->callback_ident = ntohl(*p++); + hdr->nops = ntohl(*p); + return 0; +} + +static unsigned decode_op_hdr(struct xdr_stream *xdr, unsigned int *op) +{ + uint32_t *p; + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *op = ntohl(*p); + return 0; +} + +static unsigned decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_getattrargs *args) +{ + unsigned status; + + status = decode_fh(xdr, &args->fh); + if (unlikely(status != 0)) + goto out; + args->addr = &rqstp->rq_addr; + status = decode_bitmap(xdr, args->bitmap); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, status); + return status; +} + +static unsigned decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args) +{ + uint32_t *p; + unsigned status; + + args->addr = &rqstp->rq_addr; + status = decode_stateid(xdr, &args->stateid); + if (unlikely(status != 0)) + goto out; + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) { + status = htonl(NFS4ERR_RESOURCE); + goto out; + } + args->truncate = ntohl(*p); + status = decode_fh(xdr, &args->fh); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, status); + return 0; +} + +static unsigned encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, 4 + len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + xdr_encode_opaque(p, str, len); + return 0; +} + +#define CB_SUPPORTED_ATTR0 (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) +#define CB_SUPPORTED_ATTR1 (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) +static unsigned encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, uint32_t **savep) +{ + uint32_t bm[2]; + uint32_t *p; + + bm[0] = htonl(bitmap[0] & CB_SUPPORTED_ATTR0); + bm[1] = htonl(bitmap[1] & CB_SUPPORTED_ATTR1); + if (bm[1] != 0) { + p = xdr_reserve_space(xdr, 16); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(2); + *p++ = bm[0]; + *p++ = bm[1]; + } else if (bm[0] != 0) { + p = xdr_reserve_space(xdr, 12); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(1); + *p++ = bm[0]; + } else { + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(0); + } + *savep = p; + return 0; +} + +static unsigned encode_attr_change(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t change) +{ + uint32_t *p; + + if (!(bitmap[0] & FATTR4_WORD0_CHANGE)) + return 0; + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == 0)) + return htonl(NFS4ERR_RESOURCE); + p = xdr_encode_hyper(p, change); + return 0; +} + +static unsigned encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t size) +{ + uint32_t *p; + + if (!(bitmap[0] & FATTR4_WORD0_SIZE)) + return 0; + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == 0)) + return htonl(NFS4ERR_RESOURCE); + p = xdr_encode_hyper(p, size); + return 0; +} + +static unsigned encode_attr_time(struct xdr_stream *xdr, const struct timespec *time) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, 12); + if (unlikely(p == 0)) + return htonl(NFS4ERR_RESOURCE); + p = xdr_encode_hyper(p, time->tv_sec); + *p = htonl(time->tv_nsec); + return 0; +} + +static unsigned encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time) +{ + if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) + return 0; + return encode_attr_time(xdr,time); +} + +static unsigned encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time) +{ + if (!(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) + return 0; + return encode_attr_time(xdr,time); +} + +static unsigned encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr) +{ + unsigned status; + + hdr->status = xdr_reserve_space(xdr, 4); + if (unlikely(hdr->status == NULL)) + return htonl(NFS4ERR_RESOURCE); + status = encode_string(xdr, hdr->taglen, hdr->tag); + if (unlikely(status != 0)) + return status; + hdr->nops = xdr_reserve_space(xdr, 4); + if (unlikely(hdr->nops == NULL)) + return htonl(NFS4ERR_RESOURCE); + return 0; +} + +static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(op); + *p = res; + return 0; +} + +static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res) +{ + uint32_t *savep; + unsigned status = res->status; + + if (unlikely(status != 0)) + goto out; + status = encode_attr_bitmap(xdr, res->bitmap, &savep); + if (unlikely(status != 0)) + goto out; + status = encode_attr_change(xdr, res->bitmap, res->change_attr); + if (unlikely(status != 0)) + goto out; + status = encode_attr_size(xdr, res->bitmap, res->size); + if (unlikely(status != 0)) + goto out; + status = encode_attr_ctime(xdr, res->bitmap, &res->ctime); + if (unlikely(status != 0)) + goto out; + status = encode_attr_mtime(xdr, res->bitmap, &res->mtime); + *savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1))); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, status); + return status; +} + +static unsigned process_op(struct svc_rqst *rqstp, + struct xdr_stream *xdr_in, void *argp, + struct xdr_stream *xdr_out, void *resp) +{ + struct callback_op *op; + unsigned int op_nr; + unsigned int status = 0; + long maxlen; + unsigned res; + + dprintk("%s: start\n", __FUNCTION__); + status = decode_op_hdr(xdr_in, &op_nr); + if (unlikely(status != 0)) { + op_nr = OP_CB_ILLEGAL; + op = &callback_ops[0]; + } else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) { + op_nr = OP_CB_ILLEGAL; + op = &callback_ops[0]; + status = htonl(NFS4ERR_OP_ILLEGAL); + } else + op = &callback_ops[op_nr]; + + maxlen = xdr_out->end - xdr_out->p; + if (maxlen > 0 && maxlen < PAGE_SIZE) { + if (likely(status == 0 && op->decode_args != NULL)) + status = op->decode_args(rqstp, xdr_in, argp); + if (likely(status == 0 && op->process_op != NULL)) + status = op->process_op(argp, resp); + } else + status = htonl(NFS4ERR_RESOURCE); + + res = encode_op_hdr(xdr_out, op_nr, status); + if (status == 0) + status = res; + if (op->encode_res != NULL && status == 0) + status = op->encode_res(rqstp, xdr_out, resp); + dprintk("%s: done, status = %d\n", __FUNCTION__, status); + return status; +} + +/* + * Decode, process and encode a COMPOUND + */ +static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp) +{ + struct cb_compound_hdr_arg hdr_arg; + struct cb_compound_hdr_res hdr_res; + struct xdr_stream xdr_in, xdr_out; + uint32_t *p; + unsigned int status; + unsigned int nops = 1; + + dprintk("%s: start\n", __FUNCTION__); + + xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); + + p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); + rqstp->rq_res.head[0].iov_len = PAGE_SIZE; + xdr_init_encode(&xdr_out, &rqstp->rq_res, p); + + decode_compound_hdr_arg(&xdr_in, &hdr_arg); + hdr_res.taglen = hdr_arg.taglen; + hdr_res.tag = hdr_arg.tag; + encode_compound_hdr_res(&xdr_out, &hdr_res); + + for (;;) { + status = process_op(rqstp, &xdr_in, argp, &xdr_out, resp); + if (status != 0) + break; + if (nops == hdr_arg.nops) + break; + nops++; + } + *hdr_res.status = status; + *hdr_res.nops = htonl(nops); + dprintk("%s: done, status = %u\n", __FUNCTION__, status); + return rpc_success; +} + +/* + * Define NFS4 callback COMPOUND ops. + */ +static struct callback_op callback_ops[] = { + [0] = { + .res_maxsize = CB_OP_HDR_RES_MAXSZ, + }, + [OP_CB_GETATTR] = { + .process_op = (callback_process_op_t)nfs4_callback_getattr, + .decode_args = (callback_decode_arg_t)decode_getattr_args, + .encode_res = (callback_encode_res_t)encode_getattr_res, + .res_maxsize = CB_OP_GETATTR_RES_MAXSZ, + }, + [OP_CB_RECALL] = { + .process_op = (callback_process_op_t)nfs4_callback_recall, + .decode_args = (callback_decode_arg_t)decode_recall_args, + .res_maxsize = CB_OP_RECALL_RES_MAXSZ, + } +}; + +/* + * Define NFS4 callback procedures + */ +static struct svc_procedure nfs4_callback_procedures1[] = { + [CB_NULL] = { + .pc_func = nfs4_callback_null, + .pc_decode = (kxdrproc_t)nfs4_decode_void, + .pc_encode = (kxdrproc_t)nfs4_encode_void, + .pc_xdrressize = 1, + }, + [CB_COMPOUND] = { + .pc_func = nfs4_callback_compound, + .pc_encode = (kxdrproc_t)nfs4_encode_void, + .pc_argsize = 256, + .pc_ressize = 256, + .pc_xdrressize = NFS4_CALLBACK_BUFSIZE, + } +}; + +struct svc_version nfs4_callback_version1 = { + .vs_vers = 1, + .vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1), + .vs_proc = nfs4_callback_procedures1, + .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, + .vs_dispatch = NULL, +}; + diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c new file mode 100644 index 00000000000..5b9c60f9779 --- /dev/null +++ b/fs/nfs/delegation.c @@ -0,0 +1,342 @@ +/* + * linux/fs/nfs/delegation.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFS file delegation management + * + */ +#include <linux/config.h> +#include <linux/completion.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/spinlock.h> + +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_xdr.h> + +#include "delegation.h" + +static struct nfs_delegation *nfs_alloc_delegation(void) +{ + return (struct nfs_delegation *)kmalloc(sizeof(struct nfs_delegation), GFP_KERNEL); +} + +static void nfs_free_delegation(struct nfs_delegation *delegation) +{ + if (delegation->cred) + put_rpccred(delegation->cred); + kfree(delegation); +} + +static void nfs_delegation_claim_opens(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *ctx; + struct nfs4_state *state; + +again: + spin_lock(&inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + state = ctx->state; + if (state == NULL) + continue; + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) + continue; + get_nfs_open_context(ctx); + spin_unlock(&inode->i_lock); + if (nfs4_open_delegation_recall(ctx->dentry, state) < 0) + return; + put_nfs_open_context(ctx); + goto again; + } + spin_unlock(&inode->i_lock); +} + +/* + * Set up a delegation on an inode + */ +void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) +{ + struct nfs_delegation *delegation = NFS_I(inode)->delegation; + + if (delegation == NULL) + return; + memcpy(delegation->stateid.data, res->delegation.data, + sizeof(delegation->stateid.data)); + delegation->type = res->delegation_type; + delegation->maxsize = res->maxsize; + put_rpccred(cred); + delegation->cred = get_rpccred(cred); + delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM; + NFS_I(inode)->delegation_state = delegation->type; + smp_wmb(); +} + +/* + * Set up a delegation on an inode + */ +int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) +{ + struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + int status = 0; + + delegation = nfs_alloc_delegation(); + if (delegation == NULL) + return -ENOMEM; + memcpy(delegation->stateid.data, res->delegation.data, + sizeof(delegation->stateid.data)); + delegation->type = res->delegation_type; + delegation->maxsize = res->maxsize; + delegation->cred = get_rpccred(cred); + delegation->inode = inode; + + spin_lock(&clp->cl_lock); + if (nfsi->delegation == NULL) { + list_add(&delegation->super_list, &clp->cl_delegations); + nfsi->delegation = delegation; + nfsi->delegation_state = delegation->type; + delegation = NULL; + } else { + if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, + sizeof(delegation->stateid)) != 0 || + delegation->type != nfsi->delegation->type) { + printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n", + __FUNCTION__, NIPQUAD(clp->cl_addr)); + status = -EIO; + } + } + spin_unlock(&clp->cl_lock); + if (delegation != NULL) + kfree(delegation); + return status; +} + +static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation) +{ + int res = 0; + + __nfs_revalidate_inode(NFS_SERVER(inode), inode); + + res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); + nfs_free_delegation(delegation); + return res; +} + +/* Sync all data to disk upon delegation return */ +static void nfs_msync_inode(struct inode *inode) +{ + filemap_fdatawrite(inode->i_mapping); + nfs_wb_all(inode); + filemap_fdatawait(inode->i_mapping); +} + +/* + * Basic procedure for returning a delegation to the server + */ +int nfs_inode_return_delegation(struct inode *inode) +{ + struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + int res = 0; + + nfs_msync_inode(inode); + down_read(&clp->cl_sem); + /* Guard against new delegated open calls */ + down_write(&nfsi->rwsem); + spin_lock(&clp->cl_lock); + delegation = nfsi->delegation; + if (delegation != NULL) { + list_del_init(&delegation->super_list); + nfsi->delegation = NULL; + nfsi->delegation_state = 0; + } + spin_unlock(&clp->cl_lock); + nfs_delegation_claim_opens(inode); + up_write(&nfsi->rwsem); + up_read(&clp->cl_sem); + nfs_msync_inode(inode); + + if (delegation != NULL) + res = nfs_do_return_delegation(inode, delegation); + return res; +} + +/* + * Return all delegations associated to a super block + */ +void nfs_return_all_delegations(struct super_block *sb) +{ + struct nfs4_client *clp = NFS_SB(sb)->nfs4_state; + struct nfs_delegation *delegation; + struct inode *inode; + + if (clp == NULL) + return; +restart: + spin_lock(&clp->cl_lock); + list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + if (delegation->inode->i_sb != sb) + continue; + inode = igrab(delegation->inode); + if (inode == NULL) + continue; + spin_unlock(&clp->cl_lock); + nfs_inode_return_delegation(inode); + iput(inode); + goto restart; + } + spin_unlock(&clp->cl_lock); +} + +/* + * Return all delegations following an NFS4ERR_CB_PATH_DOWN error. + */ +void nfs_handle_cb_pathdown(struct nfs4_client *clp) +{ + struct nfs_delegation *delegation; + struct inode *inode; + + if (clp == NULL) + return; +restart: + spin_lock(&clp->cl_lock); + list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + inode = igrab(delegation->inode); + if (inode == NULL) + continue; + spin_unlock(&clp->cl_lock); + nfs_inode_return_delegation(inode); + iput(inode); + goto restart; + } + spin_unlock(&clp->cl_lock); +} + +struct recall_threadargs { + struct inode *inode; + struct nfs4_client *clp; + const nfs4_stateid *stateid; + + struct completion started; + int result; +}; + +static int recall_thread(void *data) +{ + struct recall_threadargs *args = (struct recall_threadargs *)data; + struct inode *inode = igrab(args->inode); + struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + + daemonize("nfsv4-delegreturn"); + + nfs_msync_inode(inode); + down_read(&clp->cl_sem); + down_write(&nfsi->rwsem); + spin_lock(&clp->cl_lock); + delegation = nfsi->delegation; + if (delegation != NULL && memcmp(delegation->stateid.data, + args->stateid->data, + sizeof(delegation->stateid.data)) == 0) { + list_del_init(&delegation->super_list); + nfsi->delegation = NULL; + nfsi->delegation_state = 0; + args->result = 0; + } else { + delegation = NULL; + args->result = -ENOENT; + } + spin_unlock(&clp->cl_lock); + complete(&args->started); + nfs_delegation_claim_opens(inode); + up_write(&nfsi->rwsem); + up_read(&clp->cl_sem); + nfs_msync_inode(inode); + + if (delegation != NULL) + nfs_do_return_delegation(inode, delegation); + iput(inode); + module_put_and_exit(0); +} + +/* + * Asynchronous delegation recall! + */ +int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid) +{ + struct recall_threadargs data = { + .inode = inode, + .stateid = stateid, + }; + int status; + + init_completion(&data.started); + __module_get(THIS_MODULE); + status = kernel_thread(recall_thread, &data, CLONE_KERNEL); + if (status < 0) + goto out_module_put; + wait_for_completion(&data.started); + return data.result; +out_module_put: + module_put(THIS_MODULE); + return status; +} + +/* + * Retrieve the inode associated with a delegation + */ +struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle) +{ + struct nfs_delegation *delegation; + struct inode *res = NULL; + spin_lock(&clp->cl_lock); + list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { + res = igrab(delegation->inode); + break; + } + } + spin_unlock(&clp->cl_lock); + return res; +} + +/* + * Mark all delegations as needing to be reclaimed + */ +void nfs_delegation_mark_reclaim(struct nfs4_client *clp) +{ + struct nfs_delegation *delegation; + spin_lock(&clp->cl_lock); + list_for_each_entry(delegation, &clp->cl_delegations, super_list) + delegation->flags |= NFS_DELEGATION_NEED_RECLAIM; + spin_unlock(&clp->cl_lock); +} + +/* + * Reap all unclaimed delegations after reboot recovery is done + */ +void nfs_delegation_reap_unclaimed(struct nfs4_client *clp) +{ + struct nfs_delegation *delegation, *n; + LIST_HEAD(head); + spin_lock(&clp->cl_lock); + list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) { + if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) + continue; + list_move(&delegation->super_list, &head); + NFS_I(delegation->inode)->delegation = NULL; + NFS_I(delegation->inode)->delegation_state = 0; + } + spin_unlock(&clp->cl_lock); + while(!list_empty(&head)) { + delegation = list_entry(head.next, struct nfs_delegation, super_list); + list_del(&delegation->super_list); + nfs_free_delegation(delegation); + } +} diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h new file mode 100644 index 00000000000..3f6c45a29d6 --- /dev/null +++ b/fs/nfs/delegation.h @@ -0,0 +1,57 @@ +/* + * linux/fs/nfs/delegation.h + * + * Copyright (c) Trond Myklebust + * + * Definitions pertaining to NFS delegated files + */ +#ifndef FS_NFS_DELEGATION_H +#define FS_NFS_DELEGATION_H + +#if defined(CONFIG_NFS_V4) +/* + * NFSv4 delegation + */ +struct nfs_delegation { + struct list_head super_list; + struct rpc_cred *cred; + struct inode *inode; + nfs4_stateid stateid; + int type; +#define NFS_DELEGATION_NEED_RECLAIM 1 + long flags; + loff_t maxsize; +}; + +int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); +void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); +int nfs_inode_return_delegation(struct inode *inode); +int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); + +struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); +void nfs_return_all_delegations(struct super_block *sb); +void nfs_handle_cb_pathdown(struct nfs4_client *clp); + +void nfs_delegation_mark_reclaim(struct nfs4_client *clp); +void nfs_delegation_reap_unclaimed(struct nfs4_client *clp); + +/* NFSv4 delegation-related procedures */ +int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); +int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state); + +static inline int nfs_have_delegation(struct inode *inode, int flags) +{ + flags &= FMODE_READ|FMODE_WRITE; + smp_rmb(); + if ((NFS_I(inode)->delegation_state & flags) == flags) + return 1; + return 0; +} +#else +static inline int nfs_have_delegation(struct inode *inode, int flags) +{ + return 0; +} +#endif + +#endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c new file mode 100644 index 00000000000..73f96acd5d3 --- /dev/null +++ b/fs/nfs/dir.c @@ -0,0 +1,1562 @@ +/* + * linux/fs/nfs/dir.c + * + * Copyright (C) 1992 Rick Sladkey + * + * nfs directory handling functions + * + * 10 Apr 1996 Added silly rename for unlink --okir + * 28 Sep 1996 Improved directory cache --okir + * 23 Aug 1997 Claus Heine claus@momo.math.rwth-aachen.de + * Re-implemented silly rename for unlink, newly implemented + * silly rename for nfs_rename() following the suggestions + * of Olaf Kirch (okir) found in this file. + * Following Linus comments on my original hack, this version + * depends only on the dcache stuff and doesn't touch the inode + * layer (iput() and friends). + * 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM + */ + +#include <linux/time.h> +#include <linux/errno.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/pagemap.h> +#include <linux/smp_lock.h> +#include <linux/namei.h> + +#include "delegation.h" + +#define NFS_PARANOIA 1 +/* #define NFS_DEBUG_VERBOSE 1 */ + +static int nfs_opendir(struct inode *, struct file *); +static int nfs_readdir(struct file *, void *, filldir_t); +static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); +static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); +static int nfs_mkdir(struct inode *, struct dentry *, int); +static int nfs_rmdir(struct inode *, struct dentry *); +static int nfs_unlink(struct inode *, struct dentry *); +static int nfs_symlink(struct inode *, struct dentry *, const char *); +static int nfs_link(struct dentry *, struct inode *, struct dentry *); +static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); +static int nfs_rename(struct inode *, struct dentry *, + struct inode *, struct dentry *); +static int nfs_fsync_dir(struct file *, struct dentry *, int); + +struct file_operations nfs_dir_operations = { + .read = generic_read_dir, + .readdir = nfs_readdir, + .open = nfs_opendir, + .release = nfs_release, + .fsync = nfs_fsync_dir, +}; + +struct inode_operations nfs_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; + +#ifdef CONFIG_NFS_V4 + +static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); +struct inode_operations nfs4_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_atomic_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; + +#endif /* CONFIG_NFS_V4 */ + +/* + * Open file + */ +static int +nfs_opendir(struct inode *inode, struct file *filp) +{ + int res = 0; + + lock_kernel(); + /* Call generic open code in order to cache credentials */ + if (!res) + res = nfs_open(inode, filp); + unlock_kernel(); + return res; +} + +typedef u32 * (*decode_dirent_t)(u32 *, struct nfs_entry *, int); +typedef struct { + struct file *file; + struct page *page; + unsigned long page_index; + u32 *ptr; + u64 target; + struct nfs_entry *entry; + decode_dirent_t decode; + int plus; + int error; +} nfs_readdir_descriptor_t; + +/* Now we cache directories properly, by stuffing the dirent + * data directly in the page cache. + * + * Inode invalidation due to refresh etc. takes care of + * _everything_, no sloppy entry flushing logic, no extraneous + * copying, network direct to page cache, the way it was meant + * to be. + * + * NOTE: Dirent information verification is done always by the + * page-in of the RPC reply, nowhere else, this simplies + * things substantially. + */ +static +int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) +{ + struct file *file = desc->file; + struct inode *inode = file->f_dentry->d_inode; + struct rpc_cred *cred = nfs_file_cred(file); + unsigned long timestamp; + int error; + + dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index); + + again: + timestamp = jiffies; + error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->entry->cookie, page, + NFS_SERVER(inode)->dtsize, desc->plus); + if (error < 0) { + /* We requested READDIRPLUS, but the server doesn't grok it */ + if (error == -ENOTSUPP && desc->plus) { + NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; + NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + desc->plus = 0; + goto again; + } + goto error; + } + SetPageUptodate(page); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; + /* Ensure consistent page alignment of the data. + * Note: assumes we have exclusive access to this mapping either + * throught inode->i_sem or some other mechanism. + */ + if (page->index == 0) { + invalidate_inode_pages(inode->i_mapping); + NFS_I(inode)->readdir_timestamp = timestamp; + } + unlock_page(page); + return 0; + error: + SetPageError(page); + unlock_page(page); + nfs_zap_caches(inode); + desc->error = error; + return -EIO; +} + +static inline +int dir_decode(nfs_readdir_descriptor_t *desc) +{ + u32 *p = desc->ptr; + p = desc->decode(p, desc->entry, desc->plus); + if (IS_ERR(p)) + return PTR_ERR(p); + desc->ptr = p; + return 0; +} + +static inline +void dir_page_release(nfs_readdir_descriptor_t *desc) +{ + kunmap(desc->page); + page_cache_release(desc->page); + desc->page = NULL; + desc->ptr = NULL; +} + +/* + * Given a pointer to a buffer that has already been filled by a call + * to readdir, find the next entry. + * + * If the end of the buffer has been reached, return -EAGAIN, if not, + * return the offset within the buffer of the next entry to be + * read. + */ +static inline +int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page) +{ + struct nfs_entry *entry = desc->entry; + int loop_count = 0, + status; + + while((status = dir_decode(desc)) == 0) { + dfprintk(VFS, "NFS: found cookie %Lu\n", (long long)entry->cookie); + if (entry->prev_cookie == desc->target) + break; + if (loop_count++ > 200) { + loop_count = 0; + schedule(); + } + } + dfprintk(VFS, "NFS: find_dirent() returns %d\n", status); + return status; +} + +/* + * Find the given page, and call find_dirent() in order to try to + * return the next entry. + */ +static inline +int find_dirent_page(nfs_readdir_descriptor_t *desc) +{ + struct inode *inode = desc->file->f_dentry->d_inode; + struct page *page; + int status; + + dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index); + + page = read_cache_page(inode->i_mapping, desc->page_index, + (filler_t *)nfs_readdir_filler, desc); + if (IS_ERR(page)) { + status = PTR_ERR(page); + goto out; + } + if (!PageUptodate(page)) + goto read_error; + + /* NOTE: Someone else may have changed the READDIRPLUS flag */ + desc->page = page; + desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ + status = find_dirent(desc, page); + if (status < 0) + dir_page_release(desc); + out: + dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status); + return status; + read_error: + page_cache_release(page); + return -EIO; +} + +/* + * Recurse through the page cache pages, and return a + * filled nfs_entry structure of the next directory entry if possible. + * + * The target for the search is 'desc->target'. + */ +static inline +int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) +{ + int loop_count = 0; + int res; + + dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target); + for (;;) { + res = find_dirent_page(desc); + if (res != -EAGAIN) + break; + /* Align to beginning of next page */ + desc->page_index ++; + if (loop_count++ > 200) { + loop_count = 0; + schedule(); + } + } + dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res); + return res; +} + +static inline unsigned int dt_type(struct inode *inode) +{ + return (inode->i_mode >> 12) & 15; +} + +static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc); + +/* + * Once we've found the start of the dirent within a page: fill 'er up... + */ +static +int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, + filldir_t filldir) +{ + struct file *file = desc->file; + struct nfs_entry *entry = desc->entry; + struct dentry *dentry = NULL; + unsigned long fileid; + int loop_count = 0, + res; + + dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target); + + for(;;) { + unsigned d_type = DT_UNKNOWN; + /* Note: entry->prev_cookie contains the cookie for + * retrieving the current dirent on the server */ + fileid = nfs_fileid_to_ino_t(entry->ino); + + /* Get a dentry if we have one */ + if (dentry != NULL) + dput(dentry); + dentry = nfs_readdir_lookup(desc); + + /* Use readdirplus info */ + if (dentry != NULL && dentry->d_inode != NULL) { + d_type = dt_type(dentry->d_inode); + fileid = dentry->d_inode->i_ino; + } + + res = filldir(dirent, entry->name, entry->len, + entry->prev_cookie, fileid, d_type); + if (res < 0) + break; + file->f_pos = desc->target = entry->cookie; + if (dir_decode(desc) != 0) { + desc->page_index ++; + break; + } + if (loop_count++ > 200) { + loop_count = 0; + schedule(); + } + } + dir_page_release(desc); + if (dentry != NULL) + dput(dentry); + dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res); + return res; +} + +/* + * If we cannot find a cookie in our cache, we suspect that this is + * because it points to a deleted file, so we ask the server to return + * whatever it thinks is the next entry. We then feed this to filldir. + * If all goes well, we should then be able to find our way round the + * cache on the next call to readdir_search_pagecache(); + * + * NOTE: we cannot add the anonymous page to the pagecache because + * the data it contains might not be page aligned. Besides, + * we should already have a complete representation of the + * directory in the page cache by the time we get here. + */ +static inline +int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, + filldir_t filldir) +{ + struct file *file = desc->file; + struct inode *inode = file->f_dentry->d_inode; + struct rpc_cred *cred = nfs_file_cred(file); + struct page *page = NULL; + int status; + + dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target); + + page = alloc_page(GFP_HIGHUSER); + if (!page) { + status = -ENOMEM; + goto out; + } + desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target, + page, + NFS_SERVER(inode)->dtsize, + desc->plus); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; + desc->page = page; + desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ + if (desc->error >= 0) { + if ((status = dir_decode(desc)) == 0) + desc->entry->prev_cookie = desc->target; + } else + status = -EIO; + if (status < 0) + goto out_release; + + status = nfs_do_filldir(desc, dirent, filldir); + + /* Reset read descriptor so it searches the page cache from + * the start upon the next call to readdir_search_pagecache() */ + desc->page_index = 0; + desc->entry->cookie = desc->entry->prev_cookie = 0; + desc->entry->eof = 0; + out: + dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status); + return status; + out_release: + dir_page_release(desc); + goto out; +} + +/* The file offset position is now represented as a true offset into the + * page cache as is the case in most of the other filesystems. + */ +static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_dentry; + struct inode *inode = dentry->d_inode; + nfs_readdir_descriptor_t my_desc, + *desc = &my_desc; + struct nfs_entry my_entry; + struct nfs_fh fh; + struct nfs_fattr fattr; + long res; + + lock_kernel(); + + res = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (res < 0) { + unlock_kernel(); + return res; + } + + /* + * filp->f_pos points to the file offset in the page cache. + * but if the cache has meanwhile been zapped, we need to + * read from the last dirent to revalidate f_pos + * itself. + */ + memset(desc, 0, sizeof(*desc)); + + desc->file = filp; + desc->target = filp->f_pos; + desc->decode = NFS_PROTO(inode)->decode_dirent; + desc->plus = NFS_USE_READDIRPLUS(inode); + + my_entry.cookie = my_entry.prev_cookie = 0; + my_entry.eof = 0; + my_entry.fh = &fh; + my_entry.fattr = &fattr; + desc->entry = &my_entry; + + while(!desc->entry->eof) { + res = readdir_search_pagecache(desc); + if (res == -EBADCOOKIE) { + /* This means either end of directory */ + if (desc->entry->cookie != desc->target) { + /* Or that the server has 'lost' a cookie */ + res = uncached_readdir(desc, dirent, filldir); + if (res >= 0) + continue; + } + res = 0; + break; + } + if (res == -ETOOSMALL && desc->plus) { + NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + nfs_zap_caches(inode); + desc->plus = 0; + desc->entry->eof = 0; + continue; + } + if (res < 0) + break; + + res = nfs_do_filldir(desc, dirent, filldir); + if (res < 0) { + res = 0; + break; + } + } + unlock_kernel(); + if (desc->error < 0) + return desc->error; + if (res < 0) + return res; + return 0; +} + +/* + * All directory operations under NFS are synchronous, so fsync() + * is a dummy operation. + */ +int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) +{ + return 0; +} + +/* + * A check for whether or not the parent directory has changed. + * In the case it has, we assume that the dentries are untrustworthy + * and may need to be looked up again. + */ +static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry) +{ + if (IS_ROOT(dentry)) + return 1; + if ((NFS_FLAGS(dir) & NFS_INO_INVALID_ATTR) != 0 + || nfs_attribute_timeout(dir)) + return 0; + return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata); +} + +static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) +{ + dentry->d_fsdata = (void *)verf; +} + +/* + * Whenever an NFS operation succeeds, we know that the dentry + * is valid, so we update the revalidation timestamp. + */ +static inline void nfs_renew_times(struct dentry * dentry) +{ + dentry->d_time = jiffies; +} + +static inline +int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) +{ + struct nfs_server *server = NFS_SERVER(inode); + + if (nd != NULL) { + int ndflags = nd->flags; + /* VFS wants an on-the-wire revalidation */ + if (ndflags & LOOKUP_REVAL) + goto out_force; + /* This is an open(2) */ + if ((ndflags & LOOKUP_OPEN) && + !(ndflags & LOOKUP_CONTINUE) && + !(server->flags & NFS_MOUNT_NOCTO)) + goto out_force; + } + return nfs_revalidate_inode(server, inode); +out_force: + return __nfs_revalidate_inode(server, inode); +} + +/* + * We judge how long we want to trust negative + * dentries by looking at the parent inode mtime. + * + * If parent mtime has changed, we revalidate, else we wait for a + * period corresponding to the parent's attribute cache timeout value. + */ +static inline +int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + int ndflags = 0; + + if (nd) + ndflags = nd->flags; + /* Don't revalidate a negative dentry if we're creating a new file */ + if ((ndflags & LOOKUP_CREATE) && !(ndflags & LOOKUP_CONTINUE)) + return 0; + return !nfs_check_verifier(dir, dentry); +} + +/* + * This is called every time the dcache has a lookup hit, + * and we should check whether we can really trust that + * lookup. + * + * NOTE! The hit can be a negative hit too, don't assume + * we have an inode! + * + * If the parent directory is seen to have changed, we throw out the + * cached dentry and do a new lookup. + */ +static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) +{ + struct inode *dir; + struct inode *inode; + struct dentry *parent; + int error; + struct nfs_fh fhandle; + struct nfs_fattr fattr; + unsigned long verifier; + + parent = dget_parent(dentry); + lock_kernel(); + dir = parent->d_inode; + inode = dentry->d_inode; + + if (!inode) { + if (nfs_neg_need_reval(dir, dentry, nd)) + goto out_bad; + goto out_valid; + } + + if (is_bad_inode(inode)) { + dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + goto out_bad; + } + + /* Revalidate parent directory attribute cache */ + if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) + goto out_zap_parent; + + /* Force a full look up iff the parent directory has changed */ + if (nfs_check_verifier(dir, dentry)) { + if (nfs_lookup_verify_inode(inode, nd)) + goto out_zap_parent; + goto out_valid; + } + + if (NFS_STALE(inode)) + goto out_bad; + + verifier = nfs_save_change_attribute(dir); + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); + if (error) + goto out_bad; + if (nfs_compare_fh(NFS_FH(inode), &fhandle)) + goto out_bad; + if ((error = nfs_refresh_inode(inode, &fattr)) != 0) + goto out_bad; + + nfs_renew_times(dentry); + nfs_set_verifier(dentry, verifier); + out_valid: + unlock_kernel(); + dput(parent); + return 1; +out_zap_parent: + nfs_zap_caches(dir); + out_bad: + NFS_CACHEINV(dir); + if (inode && S_ISDIR(inode->i_mode)) { + /* Purge readdir caches. */ + nfs_zap_caches(inode); + /* If we have submounts, don't unhash ! */ + if (have_submounts(dentry)) + goto out_valid; + shrink_dcache_parent(dentry); + } + d_drop(dentry); + unlock_kernel(); + dput(parent); + return 0; +} + +/* + * This is called from dput() when d_count is going to 0. + */ +static int nfs_dentry_delete(struct dentry *dentry) +{ + dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + dentry->d_flags); + + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { + /* Unhash it, so that ->d_iput() would be called */ + return 1; + } + if (!(dentry->d_sb->s_flags & MS_ACTIVE)) { + /* Unhash it, so that ancestors of killed async unlink + * files will be cleaned up during umount */ + return 1; + } + return 0; + +} + +/* + * Called when the dentry loses inode. + * We use it to clean up silly-renamed files. + */ +static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) +{ + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { + lock_kernel(); + inode->i_nlink--; + nfs_complete_unlink(dentry); + unlock_kernel(); + } + /* When creating a negative dentry, we want to renew d_time */ + nfs_renew_times(dentry); + iput(inode); +} + +struct dentry_operations nfs_dentry_operations = { + .d_revalidate = nfs_lookup_revalidate, + .d_delete = nfs_dentry_delete, + .d_iput = nfs_dentry_iput, +}; + +static inline +int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) +{ + if (NFS_PROTO(dir)->version == 2) + return 0; + if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE)) + return 0; + return (nd->intent.open.flags & O_EXCL) != 0; +} + +static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +{ + struct dentry *res; + struct inode *inode = NULL; + int error; + struct nfs_fh fhandle; + struct nfs_fattr fattr; + + dfprintk(VFS, "NFS: lookup(%s/%s)\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + + res = ERR_PTR(-ENAMETOOLONG); + if (dentry->d_name.len > NFS_SERVER(dir)->namelen) + goto out; + + res = ERR_PTR(-ENOMEM); + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + lock_kernel(); + /* Revalidate parent directory attribute cache */ + error = nfs_revalidate_inode(NFS_SERVER(dir), dir); + if (error < 0) { + res = ERR_PTR(error); + goto out_unlock; + } + + /* If we're doing an exclusive create, optimize away the lookup */ + if (nfs_is_exclusive_create(dir, nd)) + goto no_entry; + + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); + if (error == -ENOENT) + goto no_entry; + if (error < 0) { + res = ERR_PTR(error); + goto out_unlock; + } + res = ERR_PTR(-EACCES); + inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); + if (!inode) + goto out_unlock; +no_entry: + res = d_add_unique(dentry, inode); + if (res != NULL) + dentry = res; + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); +out_unlock: + unlock_kernel(); +out: + return res; +} + +#ifdef CONFIG_NFS_V4 +static int nfs_open_revalidate(struct dentry *, struct nameidata *); + +struct dentry_operations nfs4_dentry_operations = { + .d_revalidate = nfs_open_revalidate, + .d_delete = nfs_dentry_delete, + .d_iput = nfs_dentry_iput, +}; + +static int is_atomic_open(struct inode *dir, struct nameidata *nd) +{ + if (!nd) + return 0; + /* Check that we are indeed trying to open this file */ + if ((nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_OPEN)) + return 0; + /* NFS does not (yet) have a stateful open for directories */ + if (nd->flags & LOOKUP_DIRECTORY) + return 0; + /* Are we trying to write to a read only partition? */ + if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + return 0; + return 1; +} + +static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *res = NULL; + struct inode *inode = NULL; + int error; + + /* Check that we are indeed trying to open this file */ + if (!is_atomic_open(dir, nd)) + goto no_open; + + if (dentry->d_name.len > NFS_SERVER(dir)->namelen) { + res = ERR_PTR(-ENAMETOOLONG); + goto out; + } + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + /* Let vfs_create() deal with O_EXCL */ + if (nd->intent.open.flags & O_EXCL) + goto no_entry; + + /* Open the file on the server */ + lock_kernel(); + /* Revalidate parent directory attribute cache */ + error = nfs_revalidate_inode(NFS_SERVER(dir), dir); + if (error < 0) { + res = ERR_PTR(error); + goto out; + } + + if (nd->intent.open.flags & O_CREAT) { + nfs_begin_data_update(dir); + inode = nfs4_atomic_open(dir, dentry, nd); + nfs_end_data_update(dir); + } else + inode = nfs4_atomic_open(dir, dentry, nd); + unlock_kernel(); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); + switch (error) { + /* Make a negative dentry */ + case -ENOENT: + inode = NULL; + break; + /* This turned out not to be a regular file */ + case -ELOOP: + if (!(nd->intent.open.flags & O_NOFOLLOW)) + goto no_open; + /* case -EISDIR: */ + /* case -EINVAL: */ + default: + res = ERR_PTR(error); + goto out; + } + } +no_entry: + res = d_add_unique(dentry, inode); + if (res != NULL) + dentry = res; + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); +out: + return res; +no_open: + return nfs_lookup(dir, dentry, nd); +} + +static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *parent = NULL; + struct inode *inode = dentry->d_inode; + struct inode *dir; + unsigned long verifier; + int openflags, ret = 0; + + parent = dget_parent(dentry); + dir = parent->d_inode; + if (!is_atomic_open(dir, nd)) + goto no_open; + /* We can't create new files in nfs_open_revalidate(), so we + * optimize away revalidation of negative dentries. + */ + if (inode == NULL) + goto out; + /* NFS only supports OPEN on regular files */ + if (!S_ISREG(inode->i_mode)) + goto no_open; + openflags = nd->intent.open.flags; + /* We cannot do exclusive creation on a positive dentry */ + if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) + goto no_open; + /* We can't create new files, or truncate existing ones here */ + openflags &= ~(O_CREAT|O_TRUNC); + + /* + * Note: we're not holding inode->i_sem and so may be racing with + * operations that change the directory. We therefore save the + * change attribute *before* we do the RPC call. + */ + lock_kernel(); + verifier = nfs_save_change_attribute(dir); + ret = nfs4_open_revalidate(dir, dentry, openflags); + if (!ret) + nfs_set_verifier(dentry, verifier); + unlock_kernel(); +out: + dput(parent); + if (!ret) + d_drop(dentry); + return ret; +no_open: + dput(parent); + if (inode != NULL && nfs_have_delegation(inode, FMODE_READ)) + return 1; + return nfs_lookup_revalidate(dentry, nd); +} +#endif /* CONFIG_NFSV4 */ + +static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) +{ + struct dentry *parent = desc->file->f_dentry; + struct inode *dir = parent->d_inode; + struct nfs_entry *entry = desc->entry; + struct dentry *dentry, *alias; + struct qstr name = { + .name = entry->name, + .len = entry->len, + }; + struct inode *inode; + + switch (name.len) { + case 2: + if (name.name[0] == '.' && name.name[1] == '.') + return dget_parent(parent); + break; + case 1: + if (name.name[0] == '.') + return dget(parent); + } + name.hash = full_name_hash(name.name, name.len); + dentry = d_lookup(parent, &name); + if (dentry != NULL) + return dentry; + if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR)) + return NULL; + /* Note: caller is already holding the dir->i_sem! */ + dentry = d_alloc(parent, &name); + if (dentry == NULL) + return NULL; + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); + if (!inode) { + dput(dentry); + return NULL; + } + alias = d_add_unique(dentry, inode); + if (alias != NULL) { + dput(dentry); + dentry = alias; + } + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + return dentry; +} + +/* + * Code common to create, mkdir, and mknod. + */ +int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct inode *inode; + int error = -EACCES; + + /* We may have been initialized further down */ + if (dentry->d_inode) + return 0; + if (fhandle->size == 0) { + struct inode *dir = dentry->d_parent->d_inode; + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); + if (error) + goto out_err; + } + if (!(fattr->valid & NFS_ATTR_FATTR)) { + struct nfs_server *server = NFS_SB(dentry->d_sb); + error = server->rpc_ops->getattr(server, fhandle, fattr); + if (error < 0) + goto out_err; + } + error = -ENOMEM; + inode = nfs_fhget(dentry->d_sb, fhandle, fattr); + if (inode == NULL) + goto out_err; + d_instantiate(dentry, inode); + return 0; +out_err: + d_drop(dentry); + return error; +} + +/* + * Following a failed create operation, we drop the dentry rather + * than retain a negative dentry. This avoids a problem in the event + * that the operation succeeded on the server, but an error in the + * reply path made it appear to have failed. + */ +static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + struct iattr attr; + int error; + int open_flags = 0; + + dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id, + dir->i_ino, dentry->d_name.name); + + attr.ia_mode = mode; + attr.ia_valid = ATTR_MODE; + + if (nd && (nd->flags & LOOKUP_CREATE)) + open_flags = nd->intent.open.flags; + + lock_kernel(); + nfs_begin_data_update(dir); + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); + nfs_end_data_update(dir); + if (error != 0) + goto out_err; + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + unlock_kernel(); + return 0; +out_err: + unlock_kernel(); + d_drop(dentry); + return error; +} + +/* + * See comments for nfs_proc_create regarding failed operations. + */ +static int +nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) +{ + struct iattr attr; + int status; + + dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id, + dir->i_ino, dentry->d_name.name); + + if (!new_valid_dev(rdev)) + return -EINVAL; + + attr.ia_mode = mode; + attr.ia_valid = ATTR_MODE; + + lock_kernel(); + nfs_begin_data_update(dir); + status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev); + nfs_end_data_update(dir); + if (status != 0) + goto out_err; + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + unlock_kernel(); + return 0; +out_err: + unlock_kernel(); + d_drop(dentry); + return status; +} + +/* + * See comments for nfs_proc_create regarding failed operations. + */ +static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct iattr attr; + int error; + + dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id, + dir->i_ino, dentry->d_name.name); + + attr.ia_valid = ATTR_MODE; + attr.ia_mode = mode | S_IFDIR; + + lock_kernel(); + nfs_begin_data_update(dir); + error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); + nfs_end_data_update(dir); + if (error != 0) + goto out_err; + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + unlock_kernel(); + return 0; +out_err: + d_drop(dentry); + unlock_kernel(); + return error; +} + +static int nfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + int error; + + dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id, + dir->i_ino, dentry->d_name.name); + + lock_kernel(); + nfs_begin_data_update(dir); + error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); + /* Ensure the VFS deletes this inode */ + if (error == 0 && dentry->d_inode != NULL) + dentry->d_inode->i_nlink = 0; + nfs_end_data_update(dir); + unlock_kernel(); + + return error; +} + +static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) +{ + static unsigned int sillycounter; + const int i_inosize = sizeof(dir->i_ino)*2; + const int countersize = sizeof(sillycounter)*2; + const int slen = sizeof(".nfs") + i_inosize + countersize - 1; + char silly[slen+1]; + struct qstr qsilly; + struct dentry *sdentry; + int error = -EIO; + + dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + atomic_read(&dentry->d_count)); + +#ifdef NFS_PARANOIA +if (!dentry->d_inode) +printk("NFS: silly-renaming %s/%s, negative dentry??\n", +dentry->d_parent->d_name.name, dentry->d_name.name); +#endif + /* + * We don't allow a dentry to be silly-renamed twice. + */ + error = -EBUSY; + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + goto out; + + sprintf(silly, ".nfs%*.*lx", + i_inosize, i_inosize, dentry->d_inode->i_ino); + + sdentry = NULL; + do { + char *suffix = silly + slen - countersize; + + dput(sdentry); + sillycounter++; + sprintf(suffix, "%*.*x", countersize, countersize, sillycounter); + + dfprintk(VFS, "trying to rename %s to %s\n", + dentry->d_name.name, silly); + + sdentry = lookup_one_len(silly, dentry->d_parent, slen); + /* + * N.B. Better to return EBUSY here ... it could be + * dangerous to delete the file while it's in use. + */ + if (IS_ERR(sdentry)) + goto out; + } while(sdentry->d_inode != NULL); /* need negative lookup */ + + qsilly.name = silly; + qsilly.len = strlen(silly); + nfs_begin_data_update(dir); + if (dentry->d_inode) { + nfs_begin_data_update(dentry->d_inode); + error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, + dir, &qsilly); + nfs_end_data_update(dentry->d_inode); + } else + error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, + dir, &qsilly); + nfs_end_data_update(dir); + if (!error) { + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + d_move(dentry, sdentry); + error = nfs_async_unlink(dentry); + /* If we return 0 we don't unlink */ + } + dput(sdentry); +out: + return error; +} + +/* + * Remove a file after making sure there are no pending writes, + * and after checking that the file has only one user. + * + * We invalidate the attribute cache and free the inode prior to the operation + * to avoid possible races if the server reuses the inode. + */ +static int nfs_safe_remove(struct dentry *dentry) +{ + struct inode *dir = dentry->d_parent->d_inode; + struct inode *inode = dentry->d_inode; + int error = -EBUSY; + + dfprintk(VFS, "NFS: safe_remove(%s/%s)\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + + /* If the dentry was sillyrenamed, we simply call d_delete() */ + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { + error = 0; + goto out; + } + + nfs_begin_data_update(dir); + if (inode != NULL) { + nfs_begin_data_update(inode); + error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + /* The VFS may want to delete this inode */ + if (error == 0) + inode->i_nlink--; + nfs_end_data_update(inode); + } else + error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + nfs_end_data_update(dir); +out: + return error; +} + +/* We do silly rename. In case sillyrename() returns -EBUSY, the inode + * belongs to an active ".nfs..." file and we return -EBUSY. + * + * If sillyrename() returns 0, we do nothing, otherwise we unlink. + */ +static int nfs_unlink(struct inode *dir, struct dentry *dentry) +{ + int error; + int need_rehash = 0; + + dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, + dir->i_ino, dentry->d_name.name); + + lock_kernel(); + spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); + if (atomic_read(&dentry->d_count) > 1) { + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); + error = nfs_sillyrename(dir, dentry); + unlock_kernel(); + return error; + } + if (!d_unhashed(dentry)) { + __d_drop(dentry); + need_rehash = 1; + } + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); + error = nfs_safe_remove(dentry); + if (!error) { + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + } else if (need_rehash) + d_rehash(dentry); + unlock_kernel(); + return error; +} + +static int +nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +{ + struct iattr attr; + struct nfs_fattr sym_attr; + struct nfs_fh sym_fh; + struct qstr qsymname; + int error; + + dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id, + dir->i_ino, dentry->d_name.name, symname); + +#ifdef NFS_PARANOIA +if (dentry->d_inode) +printk("nfs_proc_symlink: %s/%s not negative!\n", +dentry->d_parent->d_name.name, dentry->d_name.name); +#endif + /* + * Fill in the sattr for the call. + * Note: SunOS 4.1.2 crashes if the mode isn't initialized! + */ + attr.ia_valid = ATTR_MODE; + attr.ia_mode = S_IFLNK | S_IRWXUGO; + + qsymname.name = symname; + qsymname.len = strlen(symname); + + lock_kernel(); + nfs_begin_data_update(dir); + error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname, + &attr, &sym_fh, &sym_attr); + nfs_end_data_update(dir); + if (!error) { + error = nfs_instantiate(dentry, &sym_fh, &sym_attr); + } else { + if (error == -EEXIST) + printk("nfs_proc_symlink: %s/%s already exists??\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + d_drop(dentry); + } + unlock_kernel(); + return error; +} + +static int +nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = old_dentry->d_inode; + int error; + + dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n", + old_dentry->d_parent->d_name.name, old_dentry->d_name.name, + dentry->d_parent->d_name.name, dentry->d_name.name); + + /* + * Drop the dentry in advance to force a new lookup. + * Since nfs_proc_link doesn't return a file handle, + * we can't use the existing dentry. + */ + lock_kernel(); + d_drop(dentry); + + nfs_begin_data_update(dir); + nfs_begin_data_update(inode); + error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); + nfs_end_data_update(inode); + nfs_end_data_update(dir); + unlock_kernel(); + return error; +} + +/* + * RENAME + * FIXME: Some nfsds, like the Linux user space nfsd, may generate a + * different file handle for the same inode after a rename (e.g. when + * moving to a different directory). A fail-safe method to do so would + * be to look up old_dir/old_name, create a link to new_dir/new_name and + * rename the old file using the sillyrename stuff. This way, the original + * file in old_dir will go away when the last process iput()s the inode. + * + * FIXED. + * + * It actually works quite well. One needs to have the possibility for + * at least one ".nfs..." file in each directory the file ever gets + * moved or linked to which happens automagically with the new + * implementation that only depends on the dcache stuff instead of + * using the inode layer + * + * Unfortunately, things are a little more complicated than indicated + * above. For a cross-directory move, we want to make sure we can get + * rid of the old inode after the operation. This means there must be + * no pending writes (if it's a file), and the use count must be 1. + * If these conditions are met, we can drop the dentries before doing + * the rename. + */ +static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct inode *old_inode = old_dentry->d_inode; + struct inode *new_inode = new_dentry->d_inode; + struct dentry *dentry = NULL, *rehash = NULL; + int error = -EBUSY; + + /* + * To prevent any new references to the target during the rename, + * we unhash the dentry and free the inode in advance. + */ + lock_kernel(); + if (!d_unhashed(new_dentry)) { + d_drop(new_dentry); + rehash = new_dentry; + } + + dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", + old_dentry->d_parent->d_name.name, old_dentry->d_name.name, + new_dentry->d_parent->d_name.name, new_dentry->d_name.name, + atomic_read(&new_dentry->d_count)); + + /* + * First check whether the target is busy ... we can't + * safely do _any_ rename if the target is in use. + * + * For files, make a copy of the dentry and then do a + * silly-rename. If the silly-rename succeeds, the + * copied dentry is hashed and becomes the new target. + */ + if (!new_inode) + goto go_ahead; + if (S_ISDIR(new_inode->i_mode)) + goto out; + else if (atomic_read(&new_dentry->d_count) > 2) { + int err; + /* copy the target dentry's name */ + dentry = d_alloc(new_dentry->d_parent, + &new_dentry->d_name); + if (!dentry) + goto out; + + /* silly-rename the existing target ... */ + err = nfs_sillyrename(new_dir, new_dentry); + if (!err) { + new_dentry = rehash = dentry; + new_inode = NULL; + /* instantiate the replacement target */ + d_instantiate(new_dentry, NULL); + } else if (atomic_read(&new_dentry->d_count) > 1) { + /* dentry still busy? */ +#ifdef NFS_PARANOIA + printk("nfs_rename: target %s/%s busy, d_count=%d\n", + new_dentry->d_parent->d_name.name, + new_dentry->d_name.name, + atomic_read(&new_dentry->d_count)); +#endif + goto out; + } + } + +go_ahead: + /* + * ... prune child dentries and writebacks if needed. + */ + if (atomic_read(&old_dentry->d_count) > 1) { + nfs_wb_all(old_inode); + shrink_dcache_parent(old_dentry); + } + + if (new_inode) + d_delete(new_dentry); + + nfs_begin_data_update(old_dir); + nfs_begin_data_update(new_dir); + nfs_begin_data_update(old_inode); + error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, + new_dir, &new_dentry->d_name); + nfs_end_data_update(old_inode); + nfs_end_data_update(new_dir); + nfs_end_data_update(old_dir); +out: + if (rehash) + d_rehash(rehash); + if (!error) { + if (!S_ISDIR(old_inode->i_mode)) + d_move(old_dentry, new_dentry); + nfs_renew_times(new_dentry); + nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); + } + + /* new dentry created? */ + if (dentry) + dput(dentry); + unlock_kernel(); + return error; +} + +int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +{ + struct nfs_access_entry *cache = &NFS_I(inode)->cache_access; + + if (cache->cred != cred + || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) + || (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS)) + return -ENOENT; + memcpy(res, cache, sizeof(*res)); + return 0; +} + +void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +{ + struct nfs_access_entry *cache = &NFS_I(inode)->cache_access; + + if (cache->cred != set->cred) { + if (cache->cred) + put_rpccred(cache->cred); + cache->cred = get_rpccred(set->cred); + } + NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS; + cache->jiffies = set->jiffies; + cache->mask = set->mask; +} + +static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) +{ + struct nfs_access_entry cache; + int status; + + status = nfs_access_get_cached(inode, cred, &cache); + if (status == 0) + goto out; + + /* Be clever: ask server to check for all possible rights */ + cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; + cache.cred = cred; + cache.jiffies = jiffies; + status = NFS_PROTO(inode)->access(inode, &cache); + if (status != 0) + return status; + nfs_access_add_cache(inode, &cache); +out: + if ((cache.mask & mask) == mask) + return 0; + return -EACCES; +} + +int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + struct rpc_cred *cred; + int res = 0; + + if (mask == 0) + goto out; + /* Is this sys_access() ? */ + if (nd != NULL && (nd->flags & LOOKUP_ACCESS)) + goto force_lookup; + + switch (inode->i_mode & S_IFMT) { + case S_IFLNK: + goto out; + case S_IFREG: + /* NFSv4 has atomic_open... */ + if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN) + && nd != NULL + && (nd->flags & LOOKUP_OPEN)) + goto out; + break; + case S_IFDIR: + /* + * Optimize away all write operations, since the server + * will check permissions when we perform the op. + */ + if ((mask & MAY_WRITE) && !(mask & MAY_READ)) + goto out; + } + +force_lookup: + lock_kernel(); + + if (!NFS_PROTO(inode)->access) + goto out_notsup; + + cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); + if (!IS_ERR(cred)) { + res = nfs_do_access(inode, cred, mask); + put_rpccred(cred); + } else + res = PTR_ERR(cred); + unlock_kernel(); +out: + return res; +out_notsup: + res = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (res == 0) + res = generic_permission(inode, mask, NULL); + unlock_kernel(); + return res; +} + +/* + * Local variables: + * version-control: t + * kept-new-versions: 5 + * End: + */ diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c new file mode 100644 index 00000000000..68df803f27c --- /dev/null +++ b/fs/nfs/direct.c @@ -0,0 +1,808 @@ +/* + * linux/fs/nfs/direct.c + * + * Copyright (C) 2003 by Chuck Lever <cel@netapp.com> + * + * High-performance uncached I/O for the Linux NFS client + * + * There are important applications whose performance or correctness + * depends on uncached access to file data. Database clusters + * (multiple copies of the same instance running on separate hosts) + * implement their own cache coherency protocol that subsumes file + * system cache protocols. Applications that process datasets + * considerably larger than the client's memory do not always benefit + * from a local cache. A streaming video server, for instance, has no + * need to cache the contents of a file. + * + * When an application requests uncached I/O, all read and write requests + * are made directly to the server; data stored or fetched via these + * requests is not cached in the Linux page cache. The client does not + * correct unaligned requests from applications. All requested bytes are + * held on permanent storage before a direct write system call returns to + * an application. + * + * Solaris implements an uncached I/O facility called directio() that + * is used for backups and sequential I/O to very large files. Solaris + * also supports uncaching whole NFS partitions with "-o forcedirectio," + * an undocumented mount option. + * + * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with + * help from Andrew Morton. + * + * 18 Dec 2001 Initial implementation for 2.4 --cel + * 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy + * 08 Jun 2003 Port to 2.5 APIs --cel + * 31 Mar 2004 Handle direct I/O without VFS support --cel + * 15 Sep 2004 Parallel async reads --cel + * + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/smp_lock.h> +#include <linux/file.h> +#include <linux/pagemap.h> +#include <linux/kref.h> + +#include <linux/nfs_fs.h> +#include <linux/nfs_page.h> +#include <linux/sunrpc/clnt.h> + +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/atomic.h> + +#define NFSDBG_FACILITY NFSDBG_VFS +#define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT) + +static kmem_cache_t *nfs_direct_cachep; + +/* + * This represents a set of asynchronous requests that we're waiting on + */ +struct nfs_direct_req { + struct kref kref; /* release manager */ + struct list_head list; /* nfs_read_data structs */ + wait_queue_head_t wait; /* wait for i/o completion */ + struct page ** pages; /* pages in our buffer */ + unsigned int npages; /* count of pages */ + atomic_t complete, /* i/os we're waiting for */ + count, /* bytes actually processed */ + error; /* any reported error */ +}; + + +/** + * nfs_get_user_pages - find and set up pages underlying user's buffer + * rw: direction (read or write) + * user_addr: starting address of this segment of user's buffer + * count: size of this segment + * @pages: returned array of page struct pointers underlying user's buffer + */ +static inline int +nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, + struct page ***pages) +{ + int result = -ENOMEM; + unsigned long page_count; + size_t array_size; + + /* set an arbitrary limit to prevent type overflow */ + /* XXX: this can probably be as large as INT_MAX */ + if (size > MAX_DIRECTIO_SIZE) { + *pages = NULL; + return -EFBIG; + } + + page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; + page_count -= user_addr >> PAGE_SHIFT; + + array_size = (page_count * sizeof(struct page *)); + *pages = kmalloc(array_size, GFP_KERNEL); + if (*pages) { + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, + page_count, (rw == READ), 0, + *pages, NULL); + up_read(¤t->mm->mmap_sem); + } + return result; +} + +/** + * nfs_free_user_pages - tear down page struct array + * @pages: array of page struct pointers underlying target buffer + * @npages: number of pages in the array + * @do_dirty: dirty the pages as we release them + */ +static void +nfs_free_user_pages(struct page **pages, int npages, int do_dirty) +{ + int i; + for (i = 0; i < npages; i++) { + if (do_dirty) + set_page_dirty_lock(pages[i]); + page_cache_release(pages[i]); + } + kfree(pages); +} + +/** + * nfs_direct_req_release - release nfs_direct_req structure for direct read + * @kref: kref object embedded in an nfs_direct_req structure + * + */ +static void nfs_direct_req_release(struct kref *kref) +{ + struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); + kmem_cache_free(nfs_direct_cachep, dreq); +} + +/** + * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read + * @count: count of bytes for the read request + * @rsize: local rsize setting + * + * Note we also set the number of requests we have in the dreq when we are + * done. This prevents races with I/O completion so we will always wait + * until all requests have been dispatched and completed. + */ +static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize) +{ + struct list_head *list; + struct nfs_direct_req *dreq; + unsigned int reads = 0; + + dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); + if (!dreq) + return NULL; + + kref_init(&dreq->kref); + init_waitqueue_head(&dreq->wait); + INIT_LIST_HEAD(&dreq->list); + atomic_set(&dreq->count, 0); + atomic_set(&dreq->error, 0); + + list = &dreq->list; + for(;;) { + struct nfs_read_data *data = nfs_readdata_alloc(); + + if (unlikely(!data)) { + while (!list_empty(list)) { + data = list_entry(list->next, + struct nfs_read_data, pages); + list_del(&data->pages); + nfs_readdata_free(data); + } + kref_put(&dreq->kref, nfs_direct_req_release); + return NULL; + } + + INIT_LIST_HEAD(&data->pages); + list_add(&data->pages, list); + + data->req = (struct nfs_page *) dreq; + reads++; + if (nbytes <= rsize) + break; + nbytes -= rsize; + } + kref_get(&dreq->kref); + atomic_set(&dreq->complete, reads); + return dreq; +} + +/** + * nfs_direct_read_result - handle a read reply for a direct read request + * @data: address of NFS READ operation control block + * @status: status of this NFS READ operation + * + * We must hold a reference to all the pages in this direct read request + * until the RPCs complete. This could be long *after* we are woken up in + * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server). + */ +static void nfs_direct_read_result(struct nfs_read_data *data, int status) +{ + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + + if (likely(status >= 0)) + atomic_add(data->res.count, &dreq->count); + else + atomic_set(&dreq->error, status); + + if (unlikely(atomic_dec_and_test(&dreq->complete))) { + nfs_free_user_pages(dreq->pages, dreq->npages, 1); + wake_up(&dreq->wait); + kref_put(&dreq->kref, nfs_direct_req_release); + } +} + +/** + * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read + * @dreq: address of nfs_direct_req struct for this request + * @inode: target inode + * @ctx: target file open context + * @user_addr: starting address of this segment of user's buffer + * @count: size of this segment + * @file_offset: offset in file to begin the operation + * + * For each nfs_read_data struct that was allocated on the list, dispatch + * an NFS READ operation + */ +static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, + struct inode *inode, struct nfs_open_context *ctx, + unsigned long user_addr, size_t count, loff_t file_offset) +{ + struct list_head *list = &dreq->list; + struct page **pages = dreq->pages; + unsigned int curpage, pgbase; + unsigned int rsize = NFS_SERVER(inode)->rsize; + + curpage = 0; + pgbase = user_addr & ~PAGE_MASK; + do { + struct nfs_read_data *data; + unsigned int bytes; + + bytes = rsize; + if (count < rsize) + bytes = count; + + data = list_entry(list->next, struct nfs_read_data, pages); + list_del_init(&data->pages); + + data->inode = inode; + data->cred = ctx->cred; + data->args.fh = NFS_FH(inode); + data->args.context = ctx; + data->args.offset = file_offset; + data->args.pgbase = pgbase; + data->args.pages = &pages[curpage]; + data->args.count = bytes; + data->res.fattr = &data->fattr; + data->res.eof = 0; + data->res.count = bytes; + + NFS_PROTO(inode)->read_setup(data); + + data->task.tk_cookie = (unsigned long) inode; + data->task.tk_calldata = data; + data->task.tk_release = nfs_readdata_release; + data->complete = nfs_direct_read_result; + + lock_kernel(); + rpc_execute(&data->task); + unlock_kernel(); + + dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n", + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + bytes, + (unsigned long long)data->args.offset); + + file_offset += bytes; + pgbase += bytes; + curpage += pgbase >> PAGE_SHIFT; + pgbase &= ~PAGE_MASK; + + count -= bytes; + } while (count != 0); +} + +/** + * nfs_direct_read_wait - wait for I/O completion for direct reads + * @dreq: request on which we are to wait + * @intr: whether or not this wait can be interrupted + * + * Collects and returns the final error value/byte-count. + */ +static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr) +{ + int result = 0; + + if (intr) { + result = wait_event_interruptible(dreq->wait, + (atomic_read(&dreq->complete) == 0)); + } else { + wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0)); + } + + if (!result) + result = atomic_read(&dreq->error); + if (!result) + result = atomic_read(&dreq->count); + + kref_put(&dreq->kref, nfs_direct_req_release); + return (ssize_t) result; +} + +/** + * nfs_direct_read_seg - Read in one iov segment. Generate separate + * read RPCs for each "rsize" bytes. + * @inode: target inode + * @ctx: target file open context + * @user_addr: starting address of this segment of user's buffer + * @count: size of this segment + * @file_offset: offset in file to begin the operation + * @pages: array of addresses of page structs defining user's buffer + * @nr_pages: number of pages in the array + * + */ +static ssize_t nfs_direct_read_seg(struct inode *inode, + struct nfs_open_context *ctx, unsigned long user_addr, + size_t count, loff_t file_offset, struct page **pages, + unsigned int nr_pages) +{ + ssize_t result; + sigset_t oldset; + struct rpc_clnt *clnt = NFS_CLIENT(inode); + struct nfs_direct_req *dreq; + + dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize); + if (!dreq) + return -ENOMEM; + + dreq->pages = pages; + dreq->npages = nr_pages; + + rpc_clnt_sigmask(clnt, &oldset); + nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count, + file_offset); + result = nfs_direct_read_wait(dreq, clnt->cl_intr); + rpc_clnt_sigunmask(clnt, &oldset); + + return result; +} + +/** + * nfs_direct_read - For each iov segment, map the user's buffer + * then generate read RPCs. + * @inode: target inode + * @ctx: target file open context + * @iov: array of vectors that define I/O buffer + * file_offset: offset in file to begin the operation + * nr_segs: size of iovec array + * + * We've already pushed out any non-direct writes so that this read + * will see them when we read from the server. + */ +static ssize_t +nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, + const struct iovec *iov, loff_t file_offset, + unsigned long nr_segs) +{ + ssize_t tot_bytes = 0; + unsigned long seg = 0; + + while ((seg < nr_segs) && (tot_bytes >= 0)) { + ssize_t result; + int page_count; + struct page **pages; + const struct iovec *vec = &iov[seg++]; + unsigned long user_addr = (unsigned long) vec->iov_base; + size_t size = vec->iov_len; + + page_count = nfs_get_user_pages(READ, user_addr, size, &pages); + if (page_count < 0) { + nfs_free_user_pages(pages, 0, 0); + if (tot_bytes > 0) + break; + return page_count; + } + + result = nfs_direct_read_seg(inode, ctx, user_addr, size, + file_offset, pages, page_count); + + if (result <= 0) { + if (tot_bytes > 0) + break; + return result; + } + tot_bytes += result; + file_offset += result; + if (result < size) + break; + } + + return tot_bytes; +} + +/** + * nfs_direct_write_seg - Write out one iov segment. Generate separate + * write RPCs for each "wsize" bytes, then commit. + * @inode: target inode + * @ctx: target file open context + * user_addr: starting address of this segment of user's buffer + * count: size of this segment + * file_offset: offset in file to begin the operation + * @pages: array of addresses of page structs defining user's buffer + * nr_pages: size of pages array + */ +static ssize_t nfs_direct_write_seg(struct inode *inode, + struct nfs_open_context *ctx, unsigned long user_addr, + size_t count, loff_t file_offset, struct page **pages, + int nr_pages) +{ + const unsigned int wsize = NFS_SERVER(inode)->wsize; + size_t request; + int curpage, need_commit; + ssize_t result, tot_bytes; + struct nfs_writeverf first_verf; + struct nfs_write_data *wdata; + + wdata = nfs_writedata_alloc(); + if (!wdata) + return -ENOMEM; + + wdata->inode = inode; + wdata->cred = ctx->cred; + wdata->args.fh = NFS_FH(inode); + wdata->args.context = ctx; + wdata->args.stable = NFS_UNSTABLE; + if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize) + wdata->args.stable = NFS_FILE_SYNC; + wdata->res.fattr = &wdata->fattr; + wdata->res.verf = &wdata->verf; + + nfs_begin_data_update(inode); +retry: + need_commit = 0; + tot_bytes = 0; + curpage = 0; + request = count; + wdata->args.pgbase = user_addr & ~PAGE_MASK; + wdata->args.offset = file_offset; + do { + wdata->args.count = request; + if (wdata->args.count > wsize) + wdata->args.count = wsize; + wdata->args.pages = &pages[curpage]; + + dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n", + wdata->args.count, (long long) wdata->args.offset, + user_addr + tot_bytes, wdata->args.pgbase, curpage); + + lock_kernel(); + result = NFS_PROTO(inode)->write(wdata); + unlock_kernel(); + + if (result <= 0) { + if (tot_bytes > 0) + break; + goto out; + } + + if (tot_bytes == 0) + memcpy(&first_verf.verifier, &wdata->verf.verifier, + sizeof(first_verf.verifier)); + if (wdata->verf.committed != NFS_FILE_SYNC) { + need_commit = 1; + if (memcmp(&first_verf.verifier, &wdata->verf.verifier, + sizeof(first_verf.verifier))); + goto sync_retry; + } + + tot_bytes += result; + + /* in case of a short write: stop now, let the app recover */ + if (result < wdata->args.count) + break; + + wdata->args.offset += result; + wdata->args.pgbase += result; + curpage += wdata->args.pgbase >> PAGE_SHIFT; + wdata->args.pgbase &= ~PAGE_MASK; + request -= result; + } while (request != 0); + + /* + * Commit data written so far, even in the event of an error + */ + if (need_commit) { + wdata->args.count = tot_bytes; + wdata->args.offset = file_offset; + + lock_kernel(); + result = NFS_PROTO(inode)->commit(wdata); + unlock_kernel(); + + if (result < 0 || memcmp(&first_verf.verifier, + &wdata->verf.verifier, + sizeof(first_verf.verifier)) != 0) + goto sync_retry; + } + result = tot_bytes; + +out: + nfs_end_data_update_defer(inode); + nfs_writedata_free(wdata); + return result; + +sync_retry: + wdata->args.stable = NFS_FILE_SYNC; + goto retry; +} + +/** + * nfs_direct_write - For each iov segment, map the user's buffer + * then generate write and commit RPCs. + * @inode: target inode + * @ctx: target file open context + * @iov: array of vectors that define I/O buffer + * file_offset: offset in file to begin the operation + * nr_segs: size of iovec array + * + * Upon return, generic_file_direct_IO invalidates any cached pages + * that non-direct readers might access, so they will pick up these + * writes immediately. + */ +static ssize_t nfs_direct_write(struct inode *inode, + struct nfs_open_context *ctx, const struct iovec *iov, + loff_t file_offset, unsigned long nr_segs) +{ + ssize_t tot_bytes = 0; + unsigned long seg = 0; + + while ((seg < nr_segs) && (tot_bytes >= 0)) { + ssize_t result; + int page_count; + struct page **pages; + const struct iovec *vec = &iov[seg++]; + unsigned long user_addr = (unsigned long) vec->iov_base; + size_t size = vec->iov_len; + + page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages); + if (page_count < 0) { + nfs_free_user_pages(pages, 0, 0); + if (tot_bytes > 0) + break; + return page_count; + } + + result = nfs_direct_write_seg(inode, ctx, user_addr, size, + file_offset, pages, page_count); + nfs_free_user_pages(pages, page_count, 0); + + if (result <= 0) { + if (tot_bytes > 0) + break; + return result; + } + tot_bytes += result; + file_offset += result; + if (result < size) + break; + } + return tot_bytes; +} + +/** + * nfs_direct_IO - NFS address space operation for direct I/O + * rw: direction (read or write) + * @iocb: target I/O control block + * @iov: array of vectors that define I/O buffer + * file_offset: offset in file to begin the operation + * nr_segs: size of iovec array + * + */ +ssize_t +nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, + loff_t file_offset, unsigned long nr_segs) +{ + ssize_t result = -EINVAL; + struct file *file = iocb->ki_filp; + struct nfs_open_context *ctx; + struct dentry *dentry = file->f_dentry; + struct inode *inode = dentry->d_inode; + + /* + * No support for async yet + */ + if (!is_sync_kiocb(iocb)) + return result; + + ctx = (struct nfs_open_context *)file->private_data; + switch (rw) { + case READ: + dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n", + dentry->d_name.name, file_offset, nr_segs); + + result = nfs_direct_read(inode, ctx, iov, + file_offset, nr_segs); + break; + case WRITE: + dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n", + dentry->d_name.name, file_offset, nr_segs); + + result = nfs_direct_write(inode, ctx, iov, + file_offset, nr_segs); + break; + default: + break; + } + return result; +} + +/** + * nfs_file_direct_read - file direct read operation for NFS files + * @iocb: target I/O control block + * @buf: user's buffer into which to read data + * count: number of bytes to read + * pos: byte offset in file where reading starts + * + * We use this function for direct reads instead of calling + * generic_file_aio_read() in order to avoid gfar's check to see if + * the request starts before the end of the file. For that check + * to work, we must generate a GETATTR before each direct read, and + * even then there is a window between the GETATTR and the subsequent + * READ where the file size could change. So our preference is simply + * to do all reads the application wants, and the server will take + * care of managing the end of file boundary. + * + * This function also eliminates unnecessarily updating the file's + * atime locally, as the NFS server sets the file's atime, and this + * client must read the updated atime from the server back into its + * cache. + */ +ssize_t +nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) +{ + ssize_t retval = -EINVAL; + loff_t *ppos = &iocb->ki_pos; + struct file *file = iocb->ki_filp; + struct nfs_open_context *ctx = + (struct nfs_open_context *) file->private_data; + struct dentry *dentry = file->f_dentry; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + struct iovec iov = { + .iov_base = buf, + .iov_len = count, + }; + + dprintk("nfs: direct read(%s/%s, %lu@%lu)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (unsigned long) count, (unsigned long) pos); + + if (!is_sync_kiocb(iocb)) + goto out; + if (count < 0) + goto out; + retval = -EFAULT; + if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len)) + goto out; + retval = 0; + if (!count) + goto out; + + if (mapping->nrpages) { + retval = filemap_fdatawrite(mapping); + if (retval == 0) + retval = nfs_wb_all(inode); + if (retval == 0) + retval = filemap_fdatawait(mapping); + if (retval) + goto out; + } + + retval = nfs_direct_read(inode, ctx, &iov, pos, 1); + if (retval > 0) + *ppos = pos + retval; + +out: + return retval; +} + +/** + * nfs_file_direct_write - file direct write operation for NFS files + * @iocb: target I/O control block + * @buf: user's buffer from which to write data + * count: number of bytes to write + * pos: byte offset in file where writing starts + * + * We use this function for direct writes instead of calling + * generic_file_aio_write() in order to avoid taking the inode + * semaphore and updating the i_size. The NFS server will set + * the new i_size and this client must read the updated size + * back into its cache. We let the server do generic write + * parameter checking and report problems. + * + * We also avoid an unnecessary invocation of generic_osync_inode(), + * as it is fairly meaningless to sync the metadata of an NFS file. + * + * We eliminate local atime updates, see direct read above. + * + * We avoid unnecessary page cache invalidations for normal cached + * readers of this file. + * + * Note that O_APPEND is not supported for NFS direct writes, as there + * is no atomic O_APPEND write facility in the NFS protocol. + */ +ssize_t +nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +{ + ssize_t retval = -EINVAL; + loff_t *ppos = &iocb->ki_pos; + unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + struct file *file = iocb->ki_filp; + struct nfs_open_context *ctx = + (struct nfs_open_context *) file->private_data; + struct dentry *dentry = file->f_dentry; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + struct iovec iov = { + .iov_base = (char __user *)buf, + .iov_len = count, + }; + + dfprintk(VFS, "nfs: direct write(%s/%s(%ld), %lu@%lu)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + inode->i_ino, (unsigned long) count, (unsigned long) pos); + + if (!is_sync_kiocb(iocb)) + goto out; + if (count < 0) + goto out; + if (pos < 0) + goto out; + retval = -EFAULT; + if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len)) + goto out; + if (file->f_error) { + retval = file->f_error; + file->f_error = 0; + goto out; + } + retval = -EFBIG; + if (limit != RLIM_INFINITY) { + if (pos >= limit) { + send_sig(SIGXFSZ, current, 0); + goto out; + } + if (count > limit - (unsigned long) pos) + count = limit - (unsigned long) pos; + } + retval = 0; + if (!count) + goto out; + + if (mapping->nrpages) { + retval = filemap_fdatawrite(mapping); + if (retval == 0) + retval = nfs_wb_all(inode); + if (retval == 0) + retval = filemap_fdatawait(mapping); + if (retval) + goto out; + } + + retval = nfs_direct_write(inode, ctx, &iov, pos, 1); + if (mapping->nrpages) + invalidate_inode_pages2(mapping); + if (retval > 0) + *ppos = pos + retval; + +out: + return retval; +} + +int nfs_init_directcache(void) +{ + nfs_direct_cachep = kmem_cache_create("nfs_direct_cache", + sizeof(struct nfs_direct_req), + 0, SLAB_RECLAIM_ACCOUNT, + NULL, NULL); + if (nfs_direct_cachep == NULL) + return -ENOMEM; + + return 0; +} + +void nfs_destroy_directcache(void) +{ + if (kmem_cache_destroy(nfs_direct_cachep)) + printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n"); +} diff --git a/fs/nfs/file.c b/fs/nfs/file.c new file mode 100644 index 00000000000..f06eee6dcff --- /dev/null +++ b/fs/nfs/file.c @@ -0,0 +1,484 @@ +/* + * linux/fs/nfs/file.c + * + * Copyright (C) 1992 Rick Sladkey + * + * Changes Copyright (C) 1994 by Florian La Roche + * - Do not copy data too often around in the kernel. + * - In nfs_file_read the return value of kmalloc wasn't checked. + * - Put in a better version of read look-ahead buffering. Original idea + * and implementation by Wai S Kok elekokws@ee.nus.sg. + * + * Expire cache on write to a file by Wai S Kok (Oct 1994). + * + * Total rewrite of read side for new NFS buffer cache.. Linus. + * + * nfs regular file handling functions + */ + +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <linux/smp_lock.h> + +#include <asm/uaccess.h> +#include <asm/system.h> + +#include "delegation.h" + +#define NFSDBG_FACILITY NFSDBG_FILE + +static int nfs_file_open(struct inode *, struct file *); +static int nfs_file_release(struct inode *, struct file *); +static int nfs_file_mmap(struct file *, struct vm_area_struct *); +static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); +static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t); +static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t); +static int nfs_file_flush(struct file *); +static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); +static int nfs_check_flags(int flags); +static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); +static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); + +struct file_operations nfs_file_operations = { + .llseek = remote_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = nfs_file_read, + .aio_write = nfs_file_write, + .mmap = nfs_file_mmap, + .open = nfs_file_open, + .flush = nfs_file_flush, + .release = nfs_file_release, + .fsync = nfs_fsync, + .lock = nfs_lock, + .flock = nfs_flock, + .sendfile = nfs_file_sendfile, + .check_flags = nfs_check_flags, +}; + +struct inode_operations nfs_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; + +/* Hack for future NFS swap support */ +#ifndef IS_SWAPFILE +# define IS_SWAPFILE(inode) (0) +#endif + +static int nfs_check_flags(int flags) +{ + if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT)) + return -EINVAL; + + return 0; +} + +/* + * Open file + */ +static int +nfs_file_open(struct inode *inode, struct file *filp) +{ + struct nfs_server *server = NFS_SERVER(inode); + int (*open)(struct inode *, struct file *); + int res; + + res = nfs_check_flags(filp->f_flags); + if (res) + return res; + + lock_kernel(); + /* Do NFSv4 open() call */ + if ((open = server->rpc_ops->file_open) != NULL) + res = open(inode, filp); + unlock_kernel(); + return res; +} + +static int +nfs_file_release(struct inode *inode, struct file *filp) +{ + /* Ensure that dirty pages are flushed out with the right creds */ + if (filp->f_mode & FMODE_WRITE) + filemap_fdatawrite(filp->f_mapping); + return NFS_PROTO(inode)->file_release(inode, filp); +} + +/* + * Flush all dirty pages, and check for write errors. + * + */ +static int +nfs_file_flush(struct file *file) +{ + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct inode *inode = file->f_dentry->d_inode; + int status; + + dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + + if ((file->f_mode & FMODE_WRITE) == 0) + return 0; + lock_kernel(); + /* Ensure that data+attribute caches are up to date after close() */ + status = nfs_wb_all(inode); + if (!status) { + status = ctx->error; + ctx->error = 0; + if (!status && !nfs_have_delegation(inode, FMODE_READ)) + __nfs_revalidate_inode(NFS_SERVER(inode), inode); + } + unlock_kernel(); + return status; +} + +static ssize_t +nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) +{ + struct dentry * dentry = iocb->ki_filp->f_dentry; + struct inode * inode = dentry->d_inode; + ssize_t result; + +#ifdef CONFIG_NFS_DIRECTIO + if (iocb->ki_filp->f_flags & O_DIRECT) + return nfs_file_direct_read(iocb, buf, count, pos); +#endif + + dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (unsigned long) count, (unsigned long) pos); + + result = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (!result) + result = generic_file_aio_read(iocb, buf, count, pos); + return result; +} + +static ssize_t +nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count, + read_actor_t actor, void *target) +{ + struct dentry *dentry = filp->f_dentry; + struct inode *inode = dentry->d_inode; + ssize_t res; + + dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (unsigned long) count, (unsigned long long) *ppos); + + res = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (!res) + res = generic_file_sendfile(filp, ppos, count, actor, target); + return res; +} + +static int +nfs_file_mmap(struct file * file, struct vm_area_struct * vma) +{ + struct dentry *dentry = file->f_dentry; + struct inode *inode = dentry->d_inode; + int status; + + dfprintk(VFS, "nfs: mmap(%s/%s)\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + + status = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (!status) + status = generic_file_mmap(file, vma); + return status; +} + +/* + * Flush any dirty pages for this process, and check for write errors. + * The return status from this call provides a reliable indication of + * whether any write errors occurred for this process. + */ +static int +nfs_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct inode *inode = dentry->d_inode; + int status; + + dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + + lock_kernel(); + status = nfs_wb_all(inode); + if (!status) { + status = ctx->error; + ctx->error = 0; + } + unlock_kernel(); + return status; +} + +/* + * This does the "real" work of the write. The generic routine has + * allocated the page, locked it, done all the page alignment stuff + * calculations etc. Now we should just copy the data from user + * space and write it back to the real medium.. + * + * If the writer ends up delaying the write, the writer needs to + * increment the page use counts until he is done with the page. + */ +static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) +{ + return nfs_flush_incompatible(file, page); +} + +static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) +{ + long status; + + lock_kernel(); + status = nfs_updatepage(file, page, offset, to-offset); + unlock_kernel(); + return status; +} + +struct address_space_operations nfs_file_aops = { + .readpage = nfs_readpage, + .readpages = nfs_readpages, + .set_page_dirty = __set_page_dirty_nobuffers, + .writepage = nfs_writepage, + .writepages = nfs_writepages, + .prepare_write = nfs_prepare_write, + .commit_write = nfs_commit_write, +#ifdef CONFIG_NFS_DIRECTIO + .direct_IO = nfs_direct_IO, +#endif +}; + +/* + * Write to a file (through the page cache). + */ +static ssize_t +nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +{ + struct dentry * dentry = iocb->ki_filp->f_dentry; + struct inode * inode = dentry->d_inode; + ssize_t result; + +#ifdef CONFIG_NFS_DIRECTIO + if (iocb->ki_filp->f_flags & O_DIRECT) + return nfs_file_direct_write(iocb, buf, count, pos); +#endif + + dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + inode->i_ino, (unsigned long) count, (unsigned long) pos); + + result = -EBUSY; + if (IS_SWAPFILE(inode)) + goto out_swapfile; + result = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (result) + goto out; + + result = count; + if (!count) + goto out; + + result = generic_file_aio_write(iocb, buf, count, pos); +out: + return result; + +out_swapfile: + printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); + goto out; +} + +static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_mapping->host; + int status = 0; + + lock_kernel(); + /* Use local locking if mounted with "-onolock" */ + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + else { + struct file_lock *cfl = posix_test_lock(filp, fl); + + fl->fl_type = F_UNLCK; + if (cfl != NULL) + memcpy(fl, cfl, sizeof(*fl)); + } + unlock_kernel(); + return status; +} + +static int do_vfs_lock(struct file *file, struct file_lock *fl) +{ + int res = 0; + switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { + case FL_POSIX: + res = posix_lock_file_wait(file, fl); + break; + case FL_FLOCK: + res = flock_lock_file_wait(file, fl); + break; + default: + BUG(); + } + if (res < 0) + printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", + __FUNCTION__); + return res; +} + +static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_mapping->host; + sigset_t oldset; + int status; + + rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset); + /* + * Flush all pending writes before doing anything + * with locks.. + */ + filemap_fdatawrite(filp->f_mapping); + down(&inode->i_sem); + nfs_wb_all(inode); + up(&inode->i_sem); + filemap_fdatawait(filp->f_mapping); + + /* NOTE: special case + * If we're signalled while cleaning up locks on process exit, we + * still need to complete the unlock. + */ + lock_kernel(); + /* Use local locking if mounted with "-onolock" */ + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + else + status = do_vfs_lock(filp, fl); + unlock_kernel(); + rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset); + return status; +} + +static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_mapping->host; + sigset_t oldset; + int status; + + rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset); + /* + * Flush all pending writes before doing anything + * with locks.. + */ + status = filemap_fdatawrite(filp->f_mapping); + if (status == 0) { + down(&inode->i_sem); + status = nfs_wb_all(inode); + up(&inode->i_sem); + if (status == 0) + status = filemap_fdatawait(filp->f_mapping); + } + if (status < 0) + goto out; + + lock_kernel(); + /* Use local locking if mounted with "-onolock" */ + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) { + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + /* If we were signalled we still need to ensure that + * we clean up any state on the server. We therefore + * record the lock call as having succeeded in order to + * ensure that locks_remove_posix() cleans it out when + * the process exits. + */ + if (status == -EINTR || status == -ERESTARTSYS) + do_vfs_lock(filp, fl); + } else + status = do_vfs_lock(filp, fl); + unlock_kernel(); + if (status < 0) + goto out; + /* + * Make sure we clear the cache whenever we try to get the lock. + * This makes locking act as a cache coherency point. + */ + filemap_fdatawrite(filp->f_mapping); + down(&inode->i_sem); + nfs_wb_all(inode); /* we may have slept */ + up(&inode->i_sem); + filemap_fdatawait(filp->f_mapping); + nfs_zap_caches(inode); +out: + rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset); + return status; +} + +/* + * Lock a (portion of) a file + */ +static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode * inode = filp->f_mapping->host; + + dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n", + inode->i_sb->s_id, inode->i_ino, + fl->fl_type, fl->fl_flags, + (long long)fl->fl_start, (long long)fl->fl_end); + + if (!inode) + return -EINVAL; + + /* No mandatory locks over NFS */ + if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + return -ENOLCK; + + if (IS_GETLK(cmd)) + return do_getlk(filp, cmd, fl); + if (fl->fl_type == F_UNLCK) + return do_unlk(filp, cmd, fl); + return do_setlk(filp, cmd, fl); +} + +/* + * Lock a (portion of) a file + */ +static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode * inode = filp->f_mapping->host; + + dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n", + inode->i_sb->s_id, inode->i_ino, + fl->fl_type, fl->fl_flags); + + if (!inode) + return -EINVAL; + + /* + * No BSD flocks over NFS allowed. + * Note: we could try to fake a POSIX lock request here by + * using ((u32) filp | 0x80000000) or some such as the pid. + * Not sure whether that would be unique, though, or whether + * that would break in other places. + */ + if (!(fl->fl_flags & FL_FLOCK)) + return -ENOLCK; + + /* We're simulating flock() locks using posix locks on the server */ + fl->fl_owner = (fl_owner_t)filp; + fl->fl_start = 0; + fl->fl_end = OFFSET_MAX; + + if (fl->fl_type == F_UNLCK) + return do_unlk(filp, cmd, fl); + return do_setlk(filp, cmd, fl); +} diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c new file mode 100644 index 00000000000..b74c4e3a64e --- /dev/null +++ b/fs/nfs/idmap.c @@ -0,0 +1,498 @@ +/* + * fs/nfs/idmap.c + * + * UID and GID to name mapping for clients. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Marius Aamodt Eriksen <marius@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/sched.h> + +#include <linux/sunrpc/clnt.h> +#include <linux/workqueue.h> +#include <linux/sunrpc/rpc_pipe_fs.h> + +#include <linux/nfs_fs_sb.h> +#include <linux/nfs_fs.h> + +#include <linux/nfs_idmap.h> + +#define IDMAP_HASH_SZ 128 + +struct idmap_hashent { + __u32 ih_id; + int ih_namelen; + char ih_name[IDMAP_NAMESZ]; +}; + +struct idmap_hashtable { + __u8 h_type; + struct idmap_hashent h_entries[IDMAP_HASH_SZ]; +}; + +struct idmap { + char idmap_path[48]; + struct dentry *idmap_dentry; + wait_queue_head_t idmap_wq; + struct idmap_msg idmap_im; + struct semaphore idmap_lock; /* Serializes upcalls */ + struct semaphore idmap_im_lock; /* Protects the hashtable */ + struct idmap_hashtable idmap_user_hash; + struct idmap_hashtable idmap_group_hash; +}; + +static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *, + char __user *, size_t); +static ssize_t idmap_pipe_downcall(struct file *, const char __user *, + size_t); +void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); + +static unsigned int fnvhash32(const void *, size_t); + +static struct rpc_pipe_ops idmap_upcall_ops = { + .upcall = idmap_pipe_upcall, + .downcall = idmap_pipe_downcall, + .destroy_msg = idmap_pipe_destroy_msg, +}; + +void +nfs_idmap_new(struct nfs4_client *clp) +{ + struct idmap *idmap; + + if (clp->cl_idmap != NULL) + return; + if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) + return; + + memset(idmap, 0, sizeof(*idmap)); + + snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), + "%s/idmap", clp->cl_rpcclient->cl_pathname); + + idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path, + idmap, &idmap_upcall_ops, 0); + if (IS_ERR(idmap->idmap_dentry)) { + kfree(idmap); + return; + } + + init_MUTEX(&idmap->idmap_lock); + init_MUTEX(&idmap->idmap_im_lock); + init_waitqueue_head(&idmap->idmap_wq); + idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; + idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; + + clp->cl_idmap = idmap; +} + +void +nfs_idmap_delete(struct nfs4_client *clp) +{ + struct idmap *idmap = clp->cl_idmap; + + if (!idmap) + return; + rpc_unlink(idmap->idmap_path); + clp->cl_idmap = NULL; + kfree(idmap); +} + +/* + * Helper routines for manipulating the hashtable + */ +static inline struct idmap_hashent * +idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len) +{ + return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ]; +} + +static struct idmap_hashent * +idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len) +{ + struct idmap_hashent *he = idmap_name_hash(h, name, len); + + if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0) + return NULL; + return he; +} + +static inline struct idmap_hashent * +idmap_id_hash(struct idmap_hashtable* h, __u32 id) +{ + return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ]; +} + +static struct idmap_hashent * +idmap_lookup_id(struct idmap_hashtable *h, __u32 id) +{ + struct idmap_hashent *he = idmap_id_hash(h, id); + if (he->ih_id != id || he->ih_namelen == 0) + return NULL; + return he; +} + +/* + * Routines for allocating new entries in the hashtable. + * For now, we just have 1 entry per bucket, so it's all + * pretty trivial. + */ +static inline struct idmap_hashent * +idmap_alloc_name(struct idmap_hashtable *h, char *name, unsigned len) +{ + return idmap_name_hash(h, name, len); +} + +static inline struct idmap_hashent * +idmap_alloc_id(struct idmap_hashtable *h, __u32 id) +{ + return idmap_id_hash(h, id); +} + +static void +idmap_update_entry(struct idmap_hashent *he, const char *name, + size_t namelen, __u32 id) +{ + he->ih_id = id; + memcpy(he->ih_name, name, namelen); + he->ih_name[namelen] = '\0'; + he->ih_namelen = namelen; +} + +/* + * Name -> ID + */ +static int +nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, + const char *name, size_t namelen, __u32 *id) +{ + struct rpc_pipe_msg msg; + struct idmap_msg *im; + struct idmap_hashent *he; + DECLARE_WAITQUEUE(wq, current); + int ret = -EIO; + + im = &idmap->idmap_im; + + /* + * String sanity checks + * Note that the userland daemon expects NUL terminated strings + */ + for (;;) { + if (namelen == 0) + return -EINVAL; + if (name[namelen-1] != '\0') + break; + namelen--; + } + if (namelen >= IDMAP_NAMESZ) + return -EINVAL; + + down(&idmap->idmap_lock); + down(&idmap->idmap_im_lock); + + he = idmap_lookup_name(h, name, namelen); + if (he != NULL) { + *id = he->ih_id; + ret = 0; + goto out; + } + + memset(im, 0, sizeof(*im)); + memcpy(im->im_name, name, namelen); + + im->im_type = h->h_type; + im->im_conv = IDMAP_CONV_NAMETOID; + + memset(&msg, 0, sizeof(msg)); + msg.data = im; + msg.len = sizeof(*im); + + add_wait_queue(&idmap->idmap_wq, &wq); + if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) { + remove_wait_queue(&idmap->idmap_wq, &wq); + goto out; + } + + set_current_state(TASK_UNINTERRUPTIBLE); + up(&idmap->idmap_im_lock); + schedule(); + current->state = TASK_RUNNING; + remove_wait_queue(&idmap->idmap_wq, &wq); + down(&idmap->idmap_im_lock); + + if (im->im_status & IDMAP_STATUS_SUCCESS) { + *id = im->im_id; + ret = 0; + } + + out: + memset(im, 0, sizeof(*im)); + up(&idmap->idmap_im_lock); + up(&idmap->idmap_lock); + return (ret); +} + +/* + * ID -> Name + */ +static int +nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, + __u32 id, char *name) +{ + struct rpc_pipe_msg msg; + struct idmap_msg *im; + struct idmap_hashent *he; + DECLARE_WAITQUEUE(wq, current); + int ret = -EIO; + unsigned int len; + + im = &idmap->idmap_im; + + down(&idmap->idmap_lock); + down(&idmap->idmap_im_lock); + + he = idmap_lookup_id(h, id); + if (he != 0) { + memcpy(name, he->ih_name, he->ih_namelen); + ret = he->ih_namelen; + goto out; + } + + memset(im, 0, sizeof(*im)); + im->im_type = h->h_type; + im->im_conv = IDMAP_CONV_IDTONAME; + im->im_id = id; + + memset(&msg, 0, sizeof(msg)); + msg.data = im; + msg.len = sizeof(*im); + + add_wait_queue(&idmap->idmap_wq, &wq); + + if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) { + remove_wait_queue(&idmap->idmap_wq, &wq); + goto out; + } + + set_current_state(TASK_UNINTERRUPTIBLE); + up(&idmap->idmap_im_lock); + schedule(); + current->state = TASK_RUNNING; + remove_wait_queue(&idmap->idmap_wq, &wq); + down(&idmap->idmap_im_lock); + + if (im->im_status & IDMAP_STATUS_SUCCESS) { + if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0) + goto out; + memcpy(name, im->im_name, len); + ret = len; + } + + out: + memset(im, 0, sizeof(*im)); + up(&idmap->idmap_im_lock); + up(&idmap->idmap_lock); + return ret; +} + +/* RPC pipefs upcall/downcall routines */ +static ssize_t +idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, + char __user *dst, size_t buflen) +{ + char *data = (char *)msg->data + msg->copied; + ssize_t mlen = msg->len - msg->copied; + ssize_t left; + + if (mlen > buflen) + mlen = buflen; + + left = copy_to_user(dst, data, mlen); + if (left < 0) { + msg->errno = left; + return left; + } + mlen -= left; + msg->copied += mlen; + msg->errno = 0; + return mlen; +} + +static ssize_t +idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) +{ + struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode); + struct idmap *idmap = (struct idmap *)rpci->private; + struct idmap_msg im_in, *im = &idmap->idmap_im; + struct idmap_hashtable *h; + struct idmap_hashent *he = NULL; + int namelen_in; + int ret; + + if (mlen != sizeof(im_in)) + return (-ENOSPC); + + if (copy_from_user(&im_in, src, mlen) != 0) + return (-EFAULT); + + down(&idmap->idmap_im_lock); + + ret = mlen; + im->im_status = im_in.im_status; + /* If we got an error, terminate now, and wake up pending upcalls */ + if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) { + wake_up(&idmap->idmap_wq); + goto out; + } + + /* Sanity checking of strings */ + ret = -EINVAL; + namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ); + if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) + goto out; + + switch (im_in.im_type) { + case IDMAP_TYPE_USER: + h = &idmap->idmap_user_hash; + break; + case IDMAP_TYPE_GROUP: + h = &idmap->idmap_group_hash; + break; + default: + goto out; + } + + switch (im_in.im_conv) { + case IDMAP_CONV_IDTONAME: + /* Did we match the current upcall? */ + if (im->im_conv == IDMAP_CONV_IDTONAME + && im->im_type == im_in.im_type + && im->im_id == im_in.im_id) { + /* Yes: copy string, including the terminating '\0' */ + memcpy(im->im_name, im_in.im_name, namelen_in); + im->im_name[namelen_in] = '\0'; + wake_up(&idmap->idmap_wq); + } + he = idmap_alloc_id(h, im_in.im_id); + break; + case IDMAP_CONV_NAMETOID: + /* Did we match the current upcall? */ + if (im->im_conv == IDMAP_CONV_NAMETOID + && im->im_type == im_in.im_type + && strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in + && memcmp(im->im_name, im_in.im_name, namelen_in) == 0) { + im->im_id = im_in.im_id; + wake_up(&idmap->idmap_wq); + } + he = idmap_alloc_name(h, im_in.im_name, namelen_in); + break; + default: + goto out; + } + + /* If the entry is valid, also copy it to the cache */ + if (he != NULL) + idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id); + ret = mlen; +out: + up(&idmap->idmap_im_lock); + return ret; +} + +void +idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) +{ + struct idmap_msg *im = msg->data; + struct idmap *idmap = container_of(im, struct idmap, idmap_im); + + if (msg->errno >= 0) + return; + down(&idmap->idmap_im_lock); + im->im_status = IDMAP_STATUS_LOOKUPFAIL; + wake_up(&idmap->idmap_wq); + up(&idmap->idmap_im_lock); +} + +/* + * Fowler/Noll/Vo hash + * http://www.isthe.com/chongo/tech/comp/fnv/ + */ + +#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */ +#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */ + +static unsigned int fnvhash32(const void *buf, size_t buflen) +{ + const unsigned char *p, *end = (const unsigned char *)buf + buflen; + unsigned int hash = FNV_1_32; + + for (p = buf; p < end; p++) { + hash *= FNV_P_32; + hash ^= (unsigned int)*p; + } + + return (hash); +} + +int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) +{ + struct idmap *idmap = clp->cl_idmap; + + return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); +} + +int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) +{ + struct idmap *idmap = clp->cl_idmap; + + return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); +} + +int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf) +{ + struct idmap *idmap = clp->cl_idmap; + + return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); +} +int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf) +{ + struct idmap *idmap = clp->cl_idmap; + + return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); +} + diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c new file mode 100644 index 00000000000..6345f26e87e --- /dev/null +++ b/fs/nfs/inode.c @@ -0,0 +1,2003 @@ +/* + * linux/fs/nfs/inode.c + * + * Copyright (C) 1992 Rick Sladkey + * + * nfs inode and superblock handling functions + * + * Modularised by Alan Cox <Alan.Cox@linux.org>, while hacking some + * experimental NFS changes. Modularisation taken straight from SYS5 fs. + * + * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts. + * J.S.Peatfield@damtp.cam.ac.uk + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/init.h> + +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/unistd.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/stats.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/nfs4_mount.h> +#include <linux/lockd/bind.h> +#include <linux/smp_lock.h> +#include <linux/seq_file.h> +#include <linux/mount.h> +#include <linux/nfs_idmap.h> +#include <linux/vfs.h> + +#include <asm/system.h> +#include <asm/uaccess.h> + +#include "delegation.h" + +#define NFSDBG_FACILITY NFSDBG_VFS +#define NFS_PARANOIA 1 + +/* Maximum number of readahead requests + * FIXME: this should really be a sysctl so that users may tune it to suit + * their needs. People that do NFS over a slow network, might for + * instance want to reduce it to something closer to 1 for improved + * interactive response. + */ +#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) + +static void nfs_invalidate_inode(struct inode *); +static int nfs_update_inode(struct inode *, struct nfs_fattr *, unsigned long); + +static struct inode *nfs_alloc_inode(struct super_block *sb); +static void nfs_destroy_inode(struct inode *); +static int nfs_write_inode(struct inode *,int); +static void nfs_delete_inode(struct inode *); +static void nfs_clear_inode(struct inode *); +static void nfs_umount_begin(struct super_block *); +static int nfs_statfs(struct super_block *, struct kstatfs *); +static int nfs_show_options(struct seq_file *, struct vfsmount *); + +static struct rpc_program nfs_program; + +static struct super_operations nfs_sops = { + .alloc_inode = nfs_alloc_inode, + .destroy_inode = nfs_destroy_inode, + .write_inode = nfs_write_inode, + .delete_inode = nfs_delete_inode, + .statfs = nfs_statfs, + .clear_inode = nfs_clear_inode, + .umount_begin = nfs_umount_begin, + .show_options = nfs_show_options, +}; + +/* + * RPC cruft for NFS + */ +static struct rpc_stat nfs_rpcstat = { + .program = &nfs_program +}; +static struct rpc_version * nfs_version[] = { + NULL, + NULL, + &nfs_version2, +#if defined(CONFIG_NFS_V3) + &nfs_version3, +#elif defined(CONFIG_NFS_V4) + NULL, +#endif +#if defined(CONFIG_NFS_V4) + &nfs_version4, +#endif +}; + +static struct rpc_program nfs_program = { + .name = "nfs", + .number = NFS_PROGRAM, + .nrvers = sizeof(nfs_version) / sizeof(nfs_version[0]), + .version = nfs_version, + .stats = &nfs_rpcstat, + .pipe_dir_name = "/nfs", +}; + +static inline unsigned long +nfs_fattr_to_ino_t(struct nfs_fattr *fattr) +{ + return nfs_fileid_to_ino_t(fattr->fileid); +} + +static int +nfs_write_inode(struct inode *inode, int sync) +{ + int flags = sync ? FLUSH_WAIT : 0; + int ret; + + ret = nfs_commit_inode(inode, 0, 0, flags); + if (ret < 0) + return ret; + return 0; +} + +static void +nfs_delete_inode(struct inode * inode) +{ + dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + + nfs_wb_all(inode); + /* + * The following should never happen... + */ + if (nfs_have_writebacks(inode)) { + printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino); + } + + clear_inode(inode); +} + +/* + * For the moment, the only task for the NFS clear_inode method is to + * release the mmap credential + */ +static void +nfs_clear_inode(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct rpc_cred *cred; + + nfs_wb_all(inode); + BUG_ON (!list_empty(&nfsi->open_files)); + cred = nfsi->cache_access.cred; + if (cred) + put_rpccred(cred); + BUG_ON(atomic_read(&nfsi->data_updates) != 0); +} + +void +nfs_umount_begin(struct super_block *sb) +{ + struct nfs_server *server = NFS_SB(sb); + struct rpc_clnt *rpc; + + /* -EIO all pending I/O */ + if ((rpc = server->client) != NULL) + rpc_killall_tasks(rpc); +} + + +static inline unsigned long +nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) +{ + /* make sure blocksize is a power of two */ + if ((bsize & (bsize - 1)) || nrbitsp) { + unsigned char nrbits; + + for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--) + ; + bsize = 1 << nrbits; + if (nrbitsp) + *nrbitsp = nrbits; + } + + return bsize; +} + +/* + * Calculate the number of 512byte blocks used. + */ +static inline unsigned long +nfs_calc_block_size(u64 tsize) +{ + loff_t used = (tsize + 511) >> 9; + return (used > ULONG_MAX) ? ULONG_MAX : used; +} + +/* + * Compute and set NFS server blocksize + */ +static inline unsigned long +nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) +{ + if (bsize < 1024) + bsize = NFS_DEF_FILE_IO_BUFFER_SIZE; + else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE) + bsize = NFS_MAX_FILE_IO_BUFFER_SIZE; + + return nfs_block_bits(bsize, nrbitsp); +} + +/* + * Obtain the root inode of the file system. + */ +static struct inode * +nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) +{ + struct nfs_server *server = NFS_SB(sb); + struct inode *rooti; + int error; + + error = server->rpc_ops->getroot(server, rootfh, fsinfo); + if (error < 0) { + dprintk("nfs_get_root: getattr error = %d\n", -error); + return ERR_PTR(error); + } + + rooti = nfs_fhget(sb, rootfh, fsinfo->fattr); + if (!rooti) + return ERR_PTR(-ENOMEM); + return rooti; +} + +/* + * Do NFS version-independent mount processing, and sanity checking + */ +static int +nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) +{ + struct nfs_server *server; + struct inode *root_inode; + struct nfs_fattr fattr; + struct nfs_fsinfo fsinfo = { + .fattr = &fattr, + }; + struct nfs_pathconf pathinfo = { + .fattr = &fattr, + }; + int no_root_error = 0; + unsigned long max_rpc_payload; + + /* We probably want something more informative here */ + snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); + + server = NFS_SB(sb); + + sb->s_magic = NFS_SUPER_MAGIC; + + root_inode = nfs_get_root(sb, &server->fh, &fsinfo); + /* Did getting the root inode fail? */ + if (IS_ERR(root_inode)) { + no_root_error = PTR_ERR(root_inode); + goto out_no_root; + } + sb->s_root = d_alloc_root(root_inode); + if (!sb->s_root) { + no_root_error = -ENOMEM; + goto out_no_root; + } + sb->s_root->d_op = server->rpc_ops->dentry_ops; + + /* Get some general file system info */ + if (server->namelen == 0 && + server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) + server->namelen = pathinfo.max_namelen; + /* Work out a lot of parameters */ + if (server->rsize == 0) + server->rsize = nfs_block_size(fsinfo.rtpref, NULL); + if (server->wsize == 0) + server->wsize = nfs_block_size(fsinfo.wtpref, NULL); + + if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax) + server->rsize = nfs_block_size(fsinfo.rtmax, NULL); + if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax) + server->wsize = nfs_block_size(fsinfo.wtmax, NULL); + + max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); + if (server->rsize > max_rpc_payload) + server->rsize = max_rpc_payload; + if (server->wsize > max_rpc_payload) + server->wsize = max_rpc_payload; + + server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + if (server->rpages > NFS_READ_MAXIOV) { + server->rpages = NFS_READ_MAXIOV; + server->rsize = server->rpages << PAGE_CACHE_SHIFT; + } + + server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + if (server->wpages > NFS_WRITE_MAXIOV) { + server->wpages = NFS_WRITE_MAXIOV; + server->wsize = server->wpages << PAGE_CACHE_SHIFT; + } + + if (sb->s_blocksize == 0) + sb->s_blocksize = nfs_block_bits(server->wsize, + &sb->s_blocksize_bits); + server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL); + + server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); + if (server->dtsize > PAGE_CACHE_SIZE) + server->dtsize = PAGE_CACHE_SIZE; + if (server->dtsize > server->rsize) + server->dtsize = server->rsize; + + if (server->flags & NFS_MOUNT_NOAC) { + server->acregmin = server->acregmax = 0; + server->acdirmin = server->acdirmax = 0; + sb->s_flags |= MS_SYNCHRONOUS; + } + server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; + + sb->s_maxbytes = fsinfo.maxfilesize; + if (sb->s_maxbytes > MAX_LFS_FILESIZE) + sb->s_maxbytes = MAX_LFS_FILESIZE; + + server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; + server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; + + /* We're airborne Set socket buffersize */ + rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); + return 0; + /* Yargs. It didn't work out. */ +out_no_root: + dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error); + if (!IS_ERR(root_inode)) + iput(root_inode); + return no_root_error; +} + +/* + * Create an RPC client handle. + */ +static struct rpc_clnt * +nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) +{ + struct rpc_timeout timeparms; + struct rpc_xprt *xprt = NULL; + struct rpc_clnt *clnt = NULL; + int tcp = (data->flags & NFS_MOUNT_TCP); + + /* Initialize timeout values */ + timeparms.to_initval = data->timeo * HZ / 10; + timeparms.to_retries = data->retrans; + timeparms.to_maxval = tcp ? RPC_MAX_TCP_TIMEOUT : RPC_MAX_UDP_TIMEOUT; + timeparms.to_exponential = 1; + + if (!timeparms.to_initval) + timeparms.to_initval = (tcp ? 600 : 11) * HZ / 10; + if (!timeparms.to_retries) + timeparms.to_retries = 5; + + /* create transport and client */ + xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP, + &server->addr, &timeparms); + if (IS_ERR(xprt)) { + printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + return (struct rpc_clnt *)xprt; + } + clnt = rpc_create_client(xprt, server->hostname, &nfs_program, + server->rpc_ops->version, data->pseudoflavor); + if (IS_ERR(clnt)) { + printk(KERN_WARNING "NFS: cannot create RPC client.\n"); + goto out_fail; + } + + clnt->cl_intr = 1; + clnt->cl_softrtry = 1; + clnt->cl_chatty = 1; + + return clnt; + +out_fail: + xprt_destroy(xprt); + return clnt; +} + +/* + * The way this works is that the mount process passes a structure + * in the data argument which contains the server's IP address + * and the root file handle obtained from the server's mount + * daemon. We stash these away in the private superblock fields. + */ +static int +nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) +{ + struct nfs_server *server; + rpc_authflavor_t authflavor; + + server = NFS_SB(sb); + sb->s_blocksize_bits = 0; + sb->s_blocksize = 0; + if (data->bsize) + sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); + server->flags = data->flags & NFS_MOUNT_FLAGMASK; + + server->acregmin = data->acregmin*HZ; + server->acregmax = data->acregmax*HZ; + server->acdirmin = data->acdirmin*HZ; + server->acdirmax = data->acdirmax*HZ; + + /* Start lockd here, before we might error out */ + if (!(server->flags & NFS_MOUNT_NONLM)) + lockd_up(); + + server->namelen = data->namlen; + server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL); + if (!server->hostname) + return -ENOMEM; + strcpy(server->hostname, data->hostname); + + /* Check NFS protocol revision and initialize RPC op vector + * and file handle pool. */ + if (server->flags & NFS_MOUNT_VER3) { +#ifdef CONFIG_NFS_V3 + server->rpc_ops = &nfs_v3_clientops; + server->caps |= NFS_CAP_READDIRPLUS; + if (data->version < 4) { + printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n"); + return -EIO; + } +#else + printk(KERN_NOTICE "NFS: NFSv3 not supported.\n"); + return -EIO; +#endif + } else { + server->rpc_ops = &nfs_v2_clientops; + } + + /* Fill in pseudoflavor for mount version < 5 */ + if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) + data->pseudoflavor = RPC_AUTH_UNIX; + authflavor = data->pseudoflavor; /* save for sb_init() */ + /* XXX maybe we want to add a server->pseudoflavor field */ + + /* Create RPC client handles */ + server->client = nfs_create_client(server, data); + if (IS_ERR(server->client)) + return PTR_ERR(server->client); + /* RFC 2623, sec 2.3.2 */ + if (authflavor != RPC_AUTH_UNIX) { + server->client_sys = rpc_clone_client(server->client); + if (IS_ERR(server->client_sys)) + return PTR_ERR(server->client_sys); + if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys)) + return -ENOMEM; + } else { + atomic_inc(&server->client->cl_count); + server->client_sys = server->client; + } + + if (server->flags & NFS_MOUNT_VER3) { + if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) + server->namelen = NFS3_MAXNAMLEN; + sb->s_time_gran = 1; + } else { + if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) + server->namelen = NFS2_MAXNAMLEN; + } + + sb->s_op = &nfs_sops; + return nfs_sb_init(sb, authflavor); +} + +static int +nfs_statfs(struct super_block *sb, struct kstatfs *buf) +{ + struct nfs_server *server = NFS_SB(sb); + unsigned char blockbits; + unsigned long blockres; + struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode); + struct nfs_fattr fattr; + struct nfs_fsstat res = { + .fattr = &fattr, + }; + int error; + + lock_kernel(); + + error = server->rpc_ops->statfs(server, rootfh, &res); + buf->f_type = NFS_SUPER_MAGIC; + if (error < 0) + goto out_err; + + /* + * Current versions of glibc do not correctly handle the + * case where f_frsize != f_bsize. Eventually we want to + * report the value of wtmult in this field. + */ + buf->f_frsize = sb->s_blocksize; + + /* + * On most *nix systems, f_blocks, f_bfree, and f_bavail + * are reported in units of f_frsize. Linux hasn't had + * an f_frsize field in its statfs struct until recently, + * thus historically Linux's sys_statfs reports these + * fields in units of f_bsize. + */ + buf->f_bsize = sb->s_blocksize; + blockbits = sb->s_blocksize_bits; + blockres = (1 << blockbits) - 1; + buf->f_blocks = (res.tbytes + blockres) >> blockbits; + buf->f_bfree = (res.fbytes + blockres) >> blockbits; + buf->f_bavail = (res.abytes + blockres) >> blockbits; + + buf->f_files = res.tfiles; + buf->f_ffree = res.afiles; + + buf->f_namelen = server->namelen; + out: + unlock_kernel(); + + return 0; + + out_err: + printk(KERN_WARNING "nfs_statfs: statfs error = %d\n", -error); + buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; + goto out; + +} + +static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + static struct proc_nfs_info { + int flag; + char *str; + char *nostr; + } nfs_info[] = { + { NFS_MOUNT_SOFT, ",soft", ",hard" }, + { NFS_MOUNT_INTR, ",intr", "" }, + { NFS_MOUNT_POSIX, ",posix", "" }, + { NFS_MOUNT_TCP, ",tcp", ",udp" }, + { NFS_MOUNT_NOCTO, ",nocto", "" }, + { NFS_MOUNT_NOAC, ",noac", "" }, + { NFS_MOUNT_NONLM, ",nolock", ",lock" }, + { 0, NULL, NULL } + }; + struct proc_nfs_info *nfs_infop; + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + + seq_printf(m, ",v%d", nfss->rpc_ops->version); + seq_printf(m, ",rsize=%d", nfss->rsize); + seq_printf(m, ",wsize=%d", nfss->wsize); + if (nfss->acregmin != 3*HZ) + seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); + if (nfss->acregmax != 60*HZ) + seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); + if (nfss->acdirmin != 30*HZ) + seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); + if (nfss->acdirmax != 60*HZ) + seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); + for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { + if (nfss->flags & nfs_infop->flag) + seq_puts(m, nfs_infop->str); + else + seq_puts(m, nfs_infop->nostr); + } + seq_puts(m, ",addr="); + seq_escape(m, nfss->hostname, " \t\n\\"); + return 0; +} + +/* + * Invalidate the local caches + */ +void +nfs_zap_caches(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int mode = inode->i_mode; + + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); + NFS_ATTRTIMEO_UPDATE(inode) = jiffies; + + memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS; + else + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; +} + +/* + * Invalidate, but do not unhash, the inode + */ +static void +nfs_invalidate_inode(struct inode *inode) +{ + umode_t save_mode = inode->i_mode; + + make_bad_inode(inode); + inode->i_mode = save_mode; + nfs_zap_caches(inode); +} + +struct nfs_find_desc { + struct nfs_fh *fh; + struct nfs_fattr *fattr; +}; + +/* + * In NFSv3 we can have 64bit inode numbers. In order to support + * this, and re-exported directories (also seen in NFSv2) + * we are forced to allow 2 different inodes to have the same + * i_ino. + */ +static int +nfs_find_actor(struct inode *inode, void *opaque) +{ + struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; + struct nfs_fh *fh = desc->fh; + struct nfs_fattr *fattr = desc->fattr; + + if (NFS_FILEID(inode) != fattr->fileid) + return 0; + if (nfs_compare_fh(NFS_FH(inode), fh)) + return 0; + if (is_bad_inode(inode) || NFS_STALE(inode)) + return 0; + return 1; +} + +static int +nfs_init_locked(struct inode *inode, void *opaque) +{ + struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; + struct nfs_fattr *fattr = desc->fattr; + + NFS_FILEID(inode) = fattr->fileid; + nfs_copy_fh(NFS_FH(inode), desc->fh); + return 0; +} + +/* Don't use READDIRPLUS on directories that we believe are too large */ +#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE) + +/* + * This is our front-end to iget that looks up inodes by file handle + * instead of inode number. + */ +struct inode * +nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + struct nfs_find_desc desc = { + .fh = fh, + .fattr = fattr + }; + struct inode *inode = NULL; + unsigned long hash; + + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + goto out_no_inode; + + if (!fattr->nlink) { + printk("NFS: Buggy server - nlink == 0!\n"); + goto out_no_inode; + } + + hash = nfs_fattr_to_ino_t(fattr); + + if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc))) + goto out_no_inode; + + if (inode->i_state & I_NEW) { + struct nfs_inode *nfsi = NFS_I(inode); + + /* We set i_ino for the few things that still rely on it, + * such as stat(2) */ + inode->i_ino = hash; + + /* We can't support update_atime(), since the server will reset it */ + inode->i_flags |= S_NOATIME|S_NOCMTIME; + inode->i_mode = fattr->mode; + /* Why so? Because we want revalidate for devices/FIFOs, and + * that's precisely what we have in nfs_file_inode_operations. + */ + inode->i_op = &nfs_file_inode_operations; + if (S_ISREG(inode->i_mode)) { + inode->i_fop = &nfs_file_operations; + inode->i_data.a_ops = &nfs_file_aops; + inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops; + inode->i_fop = &nfs_dir_operations; + if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) + && fattr->size <= NFS_LIMIT_READDIRPLUS) + NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS; + } else if (S_ISLNK(inode->i_mode)) + inode->i_op = &nfs_symlink_inode_operations; + else + init_special_inode(inode, inode->i_mode, fattr->rdev); + + nfsi->read_cache_jiffies = fattr->timestamp; + inode->i_atime = fattr->atime; + inode->i_mtime = fattr->mtime; + inode->i_ctime = fattr->ctime; + if (fattr->valid & NFS_ATTR_FATTR_V4) + nfsi->change_attr = fattr->change_attr; + inode->i_size = nfs_size_to_loff_t(fattr->size); + inode->i_nlink = fattr->nlink; + inode->i_uid = fattr->uid; + inode->i_gid = fattr->gid; + if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { + /* + * report the blocks in 512byte units + */ + inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); + inode->i_blksize = inode->i_sb->s_blocksize; + } else { + inode->i_blocks = fattr->du.nfs2.blocks; + inode->i_blksize = fattr->du.nfs2.blocksize; + } + nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); + nfsi->attrtimeo_timestamp = jiffies; + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + nfsi->cache_access.cred = NULL; + + unlock_new_inode(inode); + } else + nfs_refresh_inode(inode, fattr); + dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + atomic_read(&inode->i_count)); + +out: + return inode; + +out_no_inode: + printk("nfs_fhget: iget failed\n"); + goto out; +} + +#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET) + +int +nfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + struct nfs_fattr fattr; + int error; + + if (attr->ia_valid & ATTR_SIZE) { + if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) + attr->ia_valid &= ~ATTR_SIZE; + } + + /* Optimization: if the end result is no change, don't RPC */ + attr->ia_valid &= NFS_VALID_ATTRS; + if (attr->ia_valid == 0) + return 0; + + lock_kernel(); + nfs_begin_data_update(inode); + /* Write all dirty data if we're changing file permissions or size */ + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) { + if (filemap_fdatawrite(inode->i_mapping) == 0) + filemap_fdatawait(inode->i_mapping); + nfs_wb_all(inode); + } + error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); + if (error == 0) { + nfs_refresh_inode(inode, &fattr); + if ((attr->ia_valid & ATTR_MODE) != 0) { + int mode; + mode = inode->i_mode & ~S_IALLUGO; + mode |= attr->ia_mode & S_IALLUGO; + inode->i_mode = mode; + } + if ((attr->ia_valid & ATTR_UID) != 0) + inode->i_uid = attr->ia_uid; + if ((attr->ia_valid & ATTR_GID) != 0) + inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_SIZE) != 0) { + inode->i_size = attr->ia_size; + vmtruncate(inode, attr->ia_size); + } + } + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) + NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; + nfs_end_data_update(inode); + unlock_kernel(); + return error; +} + +/* + * Wait for the inode to get unlocked. + * (Used for NFS_INO_LOCKED and NFS_INO_REVALIDATING). + */ +static int +nfs_wait_on_inode(struct inode *inode, int flag) +{ + struct rpc_clnt *clnt = NFS_CLIENT(inode); + struct nfs_inode *nfsi = NFS_I(inode); + + int error; + if (!(NFS_FLAGS(inode) & flag)) + return 0; + atomic_inc(&inode->i_count); + error = nfs_wait_event(clnt, nfsi->nfs_i_wait, + !(NFS_FLAGS(inode) & flag)); + iput(inode); + return error; +} + +int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); + int need_atime = nfsi->flags & NFS_INO_INVALID_ATIME; + int err; + + if (__IS_FLG(inode, MS_NOATIME)) + need_atime = 0; + else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode)) + need_atime = 0; + /* We may force a getattr if the user cares about atime */ + if (need_atime) + err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + else + err = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (!err) + generic_fillattr(inode, stat); + return err; +} + +struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred) +{ + struct nfs_open_context *ctx; + + ctx = (struct nfs_open_context *)kmalloc(sizeof(*ctx), GFP_KERNEL); + if (ctx != NULL) { + atomic_set(&ctx->count, 1); + ctx->dentry = dget(dentry); + ctx->cred = get_rpccred(cred); + ctx->state = NULL; + ctx->lockowner = current->files; + ctx->error = 0; + init_waitqueue_head(&ctx->waitq); + } + return ctx; +} + +struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) +{ + if (ctx != NULL) + atomic_inc(&ctx->count); + return ctx; +} + +void put_nfs_open_context(struct nfs_open_context *ctx) +{ + if (atomic_dec_and_test(&ctx->count)) { + if (!list_empty(&ctx->list)) { + struct inode *inode = ctx->dentry->d_inode; + spin_lock(&inode->i_lock); + list_del(&ctx->list); + spin_unlock(&inode->i_lock); + } + if (ctx->state != NULL) + nfs4_close_state(ctx->state, ctx->mode); + if (ctx->cred != NULL) + put_rpccred(ctx->cred); + dput(ctx->dentry); + kfree(ctx); + } +} + +/* + * Ensure that mmap has a recent RPC credential for use when writing out + * shared pages + */ +void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); + + filp->private_data = get_nfs_open_context(ctx); + spin_lock(&inode->i_lock); + list_add(&ctx->list, &nfsi->open_files); + spin_unlock(&inode->i_lock); +} + +struct nfs_open_context *nfs_find_open_context(struct inode *inode, int mode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *pos, *ctx = NULL; + + spin_lock(&inode->i_lock); + list_for_each_entry(pos, &nfsi->open_files, list) { + if ((pos->mode & mode) == mode) { + ctx = get_nfs_open_context(pos); + break; + } + } + spin_unlock(&inode->i_lock); + return ctx; +} + +void nfs_file_clear_open_context(struct file *filp) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data; + + if (ctx) { + filp->private_data = NULL; + spin_lock(&inode->i_lock); + list_move_tail(&ctx->list, &NFS_I(inode)->open_files); + spin_unlock(&inode->i_lock); + put_nfs_open_context(ctx); + } +} + +/* + * These allocate and release file read/write context information. + */ +int nfs_open(struct inode *inode, struct file *filp) +{ + struct nfs_open_context *ctx; + struct rpc_cred *cred; + + cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); + if (IS_ERR(cred)) + return PTR_ERR(cred); + ctx = alloc_nfs_open_context(filp->f_dentry, cred); + put_rpccred(cred); + if (ctx == NULL) + return -ENOMEM; + ctx->mode = filp->f_mode; + nfs_file_set_open_context(filp, ctx); + put_nfs_open_context(ctx); + if ((filp->f_mode & FMODE_WRITE) != 0) + nfs_begin_data_update(inode); + return 0; +} + +int nfs_release(struct inode *inode, struct file *filp) +{ + if ((filp->f_mode & FMODE_WRITE) != 0) + nfs_end_data_update(inode); + nfs_file_clear_open_context(filp); + return 0; +} + +/* + * This function is called whenever some part of NFS notices that + * the cached attributes have to be refreshed. + */ +int +__nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) +{ + int status = -ESTALE; + struct nfs_fattr fattr; + struct nfs_inode *nfsi = NFS_I(inode); + unsigned long verifier; + unsigned int flags; + + dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", + inode->i_sb->s_id, (long long)NFS_FILEID(inode)); + + lock_kernel(); + if (!inode || is_bad_inode(inode)) + goto out_nowait; + if (NFS_STALE(inode)) + goto out_nowait; + + while (NFS_REVALIDATING(inode)) { + status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING); + if (status < 0) + goto out_nowait; + if (NFS_ATTRTIMEO(inode) == 0) + continue; + if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) + continue; + status = NFS_STALE(inode) ? -ESTALE : 0; + goto out_nowait; + } + NFS_FLAGS(inode) |= NFS_INO_REVALIDATING; + + /* Protect against RPC races by saving the change attribute */ + verifier = nfs_save_change_attribute(inode); + status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); + if (status != 0) { + dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), status); + if (status == -ESTALE) { + nfs_zap_caches(inode); + if (!S_ISDIR(inode->i_mode)) + NFS_FLAGS(inode) |= NFS_INO_STALE; + } + goto out; + } + + status = nfs_update_inode(inode, &fattr, verifier); + if (status) { + dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), status); + goto out; + } + flags = nfsi->flags; + /* + * We may need to keep the attributes marked as invalid if + * we raced with nfs_end_attr_update(). + */ + if (verifier == nfsi->cache_change_attribute) + nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + /* Do the page cache invalidation */ + if (flags & NFS_INO_INVALID_DATA) { + if (S_ISREG(inode->i_mode)) { + if (filemap_fdatawrite(inode->i_mapping) == 0) + filemap_fdatawait(inode->i_mapping); + nfs_wb_all(inode); + } + nfsi->flags &= ~NFS_INO_INVALID_DATA; + invalidate_inode_pages2(inode->i_mapping); + memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode)); + /* This ensures we revalidate dentries */ + nfsi->cache_change_attribute++; + } + dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode)); + +out: + NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; + wake_up(&nfsi->nfs_i_wait); + out_nowait: + unlock_kernel(); + return status; +} + +int nfs_attribute_timeout(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (nfs_have_delegation(inode, FMODE_READ)) + return 0; + return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); +} + +/** + * nfs_revalidate_inode - Revalidate the inode attributes + * @server - pointer to nfs_server struct + * @inode - pointer to inode struct + * + * Updates inode attribute information by retrieving the data from the server. + */ +int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) +{ + if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) + && !nfs_attribute_timeout(inode)) + return NFS_STALE(inode) ? -ESTALE : 0; + return __nfs_revalidate_inode(server, inode); +} + +/** + * nfs_begin_data_update + * @inode - pointer to inode + * Declare that a set of operations will update file data on the server + */ +void nfs_begin_data_update(struct inode *inode) +{ + atomic_inc(&NFS_I(inode)->data_updates); +} + +/** + * nfs_end_data_update + * @inode - pointer to inode + * Declare end of the operations that will update file data + * This will mark the inode as immediately needing revalidation + * of its attribute cache. + */ +void nfs_end_data_update(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (!nfs_have_delegation(inode, FMODE_READ)) { + /* Mark the attribute cache for revalidation */ + nfsi->flags |= NFS_INO_INVALID_ATTR; + /* Directories and symlinks: invalidate page cache too */ + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + nfsi->flags |= NFS_INO_INVALID_DATA; + } + nfsi->cache_change_attribute ++; + atomic_dec(&nfsi->data_updates); +} + +/** + * nfs_end_data_update_defer + * @inode - pointer to inode + * Declare end of the operations that will update file data + * This will defer marking the inode as needing revalidation + * unless there are no other pending updates. + */ +void nfs_end_data_update_defer(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (atomic_dec_and_test(&nfsi->data_updates)) { + /* Mark the attribute cache for revalidation */ + nfsi->flags |= NFS_INO_INVALID_ATTR; + /* Directories and symlinks: invalidate page cache too */ + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + nfsi->flags |= NFS_INO_INVALID_DATA; + nfsi->cache_change_attribute ++; + } +} + +/** + * nfs_refresh_inode - verify consistency of the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes + * + * Verifies the attribute cache. If we have just changed the attributes, + * so that fattr carries weak cache consistency data, then it may + * also update the ctime/mtime/change_attribute. + */ +int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +{ + struct nfs_inode *nfsi = NFS_I(inode); + loff_t cur_size, new_isize; + int data_unstable; + + /* Do we hold a delegation? */ + if (nfs_have_delegation(inode, FMODE_READ)) + return 0; + + /* Are we in the process of updating data on the server? */ + data_unstable = nfs_caches_unstable(inode); + + if (fattr->valid & NFS_ATTR_FATTR_V4) { + if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 + && nfsi->change_attr == fattr->pre_change_attr) + nfsi->change_attr = fattr->change_attr; + if (!data_unstable && nfsi->change_attr != fattr->change_attr) + nfsi->flags |= NFS_INO_INVALID_ATTR; + } + + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + + /* Has the inode gone and changed behind our back? */ + if (nfsi->fileid != fattr->fileid + || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + return -EIO; + + cur_size = i_size_read(inode); + new_isize = nfs_size_to_loff_t(fattr->size); + + /* If we have atomic WCC data, we may update some attributes */ + if ((fattr->valid & NFS_ATTR_WCC) != 0) { + if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + } + + /* Verify a few of the more important attributes */ + if (!data_unstable) { + if (!timespec_equal(&inode->i_mtime, &fattr->mtime) + || cur_size != new_isize) + nfsi->flags |= NFS_INO_INVALID_ATTR; + } else if (S_ISREG(inode->i_mode) && new_isize > cur_size) + nfsi->flags |= NFS_INO_INVALID_ATTR; + + /* Have any file permissions changed? */ + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) + || inode->i_uid != fattr->uid + || inode->i_gid != fattr->gid) + nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; + + /* Has the link count changed? */ + if (inode->i_nlink != fattr->nlink) + nfsi->flags |= NFS_INO_INVALID_ATTR; + + if (!timespec_equal(&inode->i_atime, &fattr->atime)) + nfsi->flags |= NFS_INO_INVALID_ATIME; + + nfsi->read_cache_jiffies = fattr->timestamp; + return 0; +} + +/* + * Many nfs protocol calls return the new file attributes after + * an operation. Here we update the inode to reflect the state + * of the server's inode. + * + * This is a bit tricky because we have to make sure all dirty pages + * have been sent off to the server before calling invalidate_inode_pages. + * To make sure no other process adds more write requests while we try + * our best to flush them, we make them sleep during the attribute refresh. + * + * A very similar scenario holds for the dir cache. + */ +static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier) +{ + struct nfs_inode *nfsi = NFS_I(inode); + __u64 new_size; + loff_t new_isize; + unsigned int invalid = 0; + loff_t cur_isize; + int data_unstable; + + dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", + __FUNCTION__, inode->i_sb->s_id, inode->i_ino, + atomic_read(&inode->i_count), fattr->valid); + + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + + if (nfsi->fileid != fattr->fileid) { + printk(KERN_ERR "%s: inode number mismatch\n" + "expected (%s/0x%Lx), got (%s/0x%Lx)\n", + __FUNCTION__, + inode->i_sb->s_id, (long long)nfsi->fileid, + inode->i_sb->s_id, (long long)fattr->fileid); + goto out_err; + } + + /* + * Make sure the inode's type hasn't changed. + */ + if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + goto out_changed; + + /* + * Update the read time so we don't revalidate too often. + */ + nfsi->read_cache_jiffies = fattr->timestamp; + + /* Are we racing with known updates of the metadata on the server? */ + data_unstable = ! nfs_verify_change_attribute(inode, verifier); + + /* Check if the file size agrees */ + new_size = fattr->size; + new_isize = nfs_size_to_loff_t(fattr->size); + cur_isize = i_size_read(inode); + if (cur_isize != new_size) { +#ifdef NFS_DEBUG_VERBOSE + printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); +#endif + /* + * If we have pending writebacks, things can get + * messy. + */ + if (S_ISREG(inode->i_mode) && data_unstable) { + if (new_isize > cur_isize) { + inode->i_size = new_isize; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + } + } else { + inode->i_size = new_isize; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + } + } + + /* + * Note: we don't check inode->i_mtime since pipes etc. + * can change this value in VFS without requiring a + * cache revalidation. + */ + if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); +#ifdef NFS_DEBUG_VERBOSE + printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); +#endif + if (!data_unstable) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + } + + if ((fattr->valid & NFS_ATTR_FATTR_V4) + && nfsi->change_attr != fattr->change_attr) { +#ifdef NFS_DEBUG_VERBOSE + printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); +#endif + nfsi->change_attr = fattr->change_attr; + if (!data_unstable) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS; + } + + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); + + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || + inode->i_uid != fattr->uid || + inode->i_gid != fattr->gid) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; + + inode->i_mode = fattr->mode; + inode->i_nlink = fattr->nlink; + inode->i_uid = fattr->uid; + inode->i_gid = fattr->gid; + + if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { + /* + * report the blocks in 512byte units + */ + inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); + inode->i_blksize = inode->i_sb->s_blocksize; + } else { + inode->i_blocks = fattr->du.nfs2.blocks; + inode->i_blksize = fattr->du.nfs2.blocksize; + } + + /* Update attrtimeo value if we're out of the unstable period */ + if (invalid & NFS_INO_INVALID_ATTR) { + nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); + nfsi->attrtimeo_timestamp = jiffies; + } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { + if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) + nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); + nfsi->attrtimeo_timestamp = jiffies; + } + /* Don't invalidate the data if we were to blame */ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) + || S_ISLNK(inode->i_mode))) + invalid &= ~NFS_INO_INVALID_DATA; + if (!nfs_have_delegation(inode, FMODE_READ)) + nfsi->flags |= invalid; + + return 0; + out_changed: + /* + * Big trouble! The inode has become a different object. + */ +#ifdef NFS_PARANOIA + printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", + __FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode); +#endif + /* + * No need to worry about unhashing the dentry, as the + * lookup validation will know that the inode is bad. + * (But we fall through to invalidate the caches.) + */ + nfs_invalidate_inode(inode); + out_err: + NFS_FLAGS(inode) |= NFS_INO_STALE; + return -ESTALE; +} + +/* + * File system information + */ + +static int nfs_set_super(struct super_block *s, void *data) +{ + s->s_fs_info = data; + return set_anon_super(s, data); +} + +static int nfs_compare_super(struct super_block *sb, void *data) +{ + struct nfs_server *server = data; + struct nfs_server *old = NFS_SB(sb); + + if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr) + return 0; + if (old->addr.sin_port != server->addr.sin_port) + return 0; + return !nfs_compare_fh(&old->fh, &server->fh); +} + +static struct super_block *nfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + int error; + struct nfs_server *server; + struct super_block *s; + struct nfs_fh *root; + struct nfs_mount_data *data = raw_data; + + if (!data) { + printk("nfs_read_super: missing data argument\n"); + return ERR_PTR(-EINVAL); + } + + server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); + if (!server) + return ERR_PTR(-ENOMEM); + memset(server, 0, sizeof(struct nfs_server)); + /* Zero out the NFS state stuff */ + init_nfsv4_state(server); + + if (data->version != NFS_MOUNT_VERSION) { + printk("nfs warning: mount version %s than kernel\n", + data->version < NFS_MOUNT_VERSION ? "older" : "newer"); + if (data->version < 2) + data->namlen = 0; + if (data->version < 3) + data->bsize = 0; + if (data->version < 4) { + data->flags &= ~NFS_MOUNT_VER3; + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + } + if (data->version < 5) + data->flags &= ~NFS_MOUNT_SECFLAVOUR; + } + + root = &server->fh; + if (data->flags & NFS_MOUNT_VER3) + root->size = data->root.size; + else + root->size = NFS2_FHSIZE; + if (root->size > sizeof(root->data)) { + printk("nfs_get_sb: invalid root filehandle\n"); + kfree(server); + return ERR_PTR(-EINVAL); + } + memcpy(root->data, data->root.data, root->size); + + /* We now require that the mount process passes the remote address */ + memcpy(&server->addr, &data->addr, sizeof(server->addr)); + if (server->addr.sin_addr.s_addr == INADDR_ANY) { + printk("NFS: mount program didn't pass remote address!\n"); + kfree(server); + return ERR_PTR(-EINVAL); + } + + s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + + if (IS_ERR(s) || s->s_root) { + kfree(server); + return s; + } + + s->s_flags = flags; + + /* Fire up rpciod if not yet running */ + if (rpciod_up() != 0) { + printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); + kfree(server); + return ERR_PTR(-EIO); + } + + error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); + if (error) { + up_write(&s->s_umount); + deactivate_super(s); + return ERR_PTR(error); + } + s->s_flags |= MS_ACTIVE; + return s; +} + +static void nfs_kill_super(struct super_block *s) +{ + struct nfs_server *server = NFS_SB(s); + + kill_anon_super(s); + + if (server->client != NULL && !IS_ERR(server->client)) + rpc_shutdown_client(server->client); + if (server->client_sys != NULL && !IS_ERR(server->client_sys)) + rpc_shutdown_client(server->client_sys); + + if (!(server->flags & NFS_MOUNT_NONLM)) + lockd_down(); /* release rpc.lockd */ + + rpciod_down(); /* release rpciod */ + + if (server->hostname != NULL) + kfree(server->hostname); + kfree(server); +} + +static struct file_system_type nfs_fs_type = { + .owner = THIS_MODULE, + .name = "nfs", + .get_sb = nfs_get_sb, + .kill_sb = nfs_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +#ifdef CONFIG_NFS_V4 + +static void nfs4_clear_inode(struct inode *); + + +static struct super_operations nfs4_sops = { + .alloc_inode = nfs_alloc_inode, + .destroy_inode = nfs_destroy_inode, + .write_inode = nfs_write_inode, + .delete_inode = nfs_delete_inode, + .statfs = nfs_statfs, + .clear_inode = nfs4_clear_inode, + .umount_begin = nfs_umount_begin, + .show_options = nfs_show_options, +}; + +/* + * Clean out any remaining NFSv4 state that might be left over due + * to open() calls that passed nfs_atomic_lookup, but failed to call + * nfs_open(). + */ +static void nfs4_clear_inode(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + /* If we are holding a delegation, return it! */ + if (nfsi->delegation != NULL) + nfs_inode_return_delegation(inode); + /* First call standard NFS clear_inode() code */ + nfs_clear_inode(inode); + /* Now clear out any remaining state */ + while (!list_empty(&nfsi->open_states)) { + struct nfs4_state *state; + + state = list_entry(nfsi->open_states.next, + struct nfs4_state, + inode_states); + dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n", + __FUNCTION__, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + state); + BUG_ON(atomic_read(&state->count) != 1); + nfs4_close_state(state, state->state); + } +} + + +static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) +{ + struct nfs_server *server; + struct nfs4_client *clp = NULL; + struct rpc_xprt *xprt = NULL; + struct rpc_clnt *clnt = NULL; + struct rpc_timeout timeparms; + rpc_authflavor_t authflavour; + int proto, err = -EIO; + + sb->s_blocksize_bits = 0; + sb->s_blocksize = 0; + server = NFS_SB(sb); + if (data->rsize != 0) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize != 0) + server->wsize = nfs_block_size(data->wsize, NULL); + server->flags = data->flags & NFS_MOUNT_FLAGMASK; + server->caps = NFS_CAP_ATOMIC_OPEN; + + server->acregmin = data->acregmin*HZ; + server->acregmax = data->acregmax*HZ; + server->acdirmin = data->acdirmin*HZ; + server->acdirmax = data->acdirmax*HZ; + + server->rpc_ops = &nfs_v4_clientops; + /* Initialize timeout values */ + + timeparms.to_initval = data->timeo * HZ / 10; + timeparms.to_retries = data->retrans; + timeparms.to_exponential = 1; + if (!timeparms.to_retries) + timeparms.to_retries = 5; + + proto = data->proto; + /* Which IP protocol do we use? */ + switch (proto) { + case IPPROTO_TCP: + timeparms.to_maxval = RPC_MAX_TCP_TIMEOUT; + if (!timeparms.to_initval) + timeparms.to_initval = 600 * HZ / 10; + break; + case IPPROTO_UDP: + timeparms.to_maxval = RPC_MAX_UDP_TIMEOUT; + if (!timeparms.to_initval) + timeparms.to_initval = 11 * HZ / 10; + break; + default: + return -EINVAL; + } + + clp = nfs4_get_client(&server->addr.sin_addr); + if (!clp) { + printk(KERN_WARNING "NFS: failed to create NFS4 client.\n"); + return -EIO; + } + + /* Now create transport and client */ + authflavour = RPC_AUTH_UNIX; + if (data->auth_flavourlen != 0) { + if (data->auth_flavourlen > 1) + printk(KERN_INFO "NFS: cannot yet deal with multiple auth flavours.\n"); + if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { + err = -EFAULT; + goto out_fail; + } + } + + down_write(&clp->cl_sem); + if (clp->cl_rpcclient == NULL) { + xprt = xprt_create_proto(proto, &server->addr, &timeparms); + if (IS_ERR(xprt)) { + up_write(&clp->cl_sem); + printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + err = PTR_ERR(xprt); + goto out_fail; + } + clnt = rpc_create_client(xprt, server->hostname, &nfs_program, + server->rpc_ops->version, authflavour); + if (IS_ERR(clnt)) { + up_write(&clp->cl_sem); + printk(KERN_WARNING "NFS: cannot create RPC client.\n"); + xprt_destroy(xprt); + err = PTR_ERR(clnt); + goto out_fail; + } + clnt->cl_intr = 1; + clnt->cl_softrtry = 1; + clnt->cl_chatty = 1; + clp->cl_rpcclient = clnt; + clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0); + if (IS_ERR(clp->cl_cred)) { + up_write(&clp->cl_sem); + err = PTR_ERR(clp->cl_cred); + clp->cl_cred = NULL; + goto out_fail; + } + memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); + nfs_idmap_new(clp); + } + if (list_empty(&clp->cl_superblocks)) { + err = nfs4_init_client(clp); + if (err != 0) { + up_write(&clp->cl_sem); + goto out_fail; + } + } + list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); + clnt = rpc_clone_client(clp->cl_rpcclient); + if (!IS_ERR(clnt)) + server->nfs4_state = clp; + up_write(&clp->cl_sem); + clp = NULL; + + if (IS_ERR(clnt)) { + printk(KERN_WARNING "NFS: cannot create RPC client.\n"); + return PTR_ERR(clnt); + } + + server->client = clnt; + + if (server->nfs4_state->cl_idmap == NULL) { + printk(KERN_WARNING "NFS: failed to create idmapper.\n"); + return -ENOMEM; + } + + if (clnt->cl_auth->au_flavor != authflavour) { + if (rpcauth_create(authflavour, clnt) == NULL) { + printk(KERN_WARNING "NFS: couldn't create credcache!\n"); + return -ENOMEM; + } + } + + sb->s_time_gran = 1; + + sb->s_op = &nfs4_sops; + err = nfs_sb_init(sb, authflavour); + if (err == 0) + return 0; +out_fail: + if (clp) + nfs4_put_client(clp); + return err; +} + +static int nfs4_compare_super(struct super_block *sb, void *data) +{ + struct nfs_server *server = data; + struct nfs_server *old = NFS_SB(sb); + + if (strcmp(server->hostname, old->hostname) != 0) + return 0; + if (strcmp(server->mnt_path, old->mnt_path) != 0) + return 0; + return 1; +} + +static void * +nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) +{ + void *p = NULL; + + if (!src->len) + return ERR_PTR(-EINVAL); + if (src->len < maxlen) + maxlen = src->len; + if (dst == NULL) { + p = dst = kmalloc(maxlen + 1, GFP_KERNEL); + if (p == NULL) + return ERR_PTR(-ENOMEM); + } + if (copy_from_user(dst, src->data, maxlen)) { + if (p != NULL) + kfree(p); + return ERR_PTR(-EFAULT); + } + dst[maxlen] = '\0'; + return dst; +} + +static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + int error; + struct nfs_server *server; + struct super_block *s; + struct nfs4_mount_data *data = raw_data; + void *p; + + if (!data) { + printk("nfs_read_super: missing data argument\n"); + return ERR_PTR(-EINVAL); + } + + server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); + if (!server) + return ERR_PTR(-ENOMEM); + memset(server, 0, sizeof(struct nfs_server)); + /* Zero out the NFS state stuff */ + init_nfsv4_state(server); + + if (data->version != NFS4_MOUNT_VERSION) { + printk("nfs warning: mount version %s than kernel\n", + data->version < NFS4_MOUNT_VERSION ? "older" : "newer"); + } + + p = nfs_copy_user_string(NULL, &data->hostname, 256); + if (IS_ERR(p)) + goto out_err; + server->hostname = p; + + p = nfs_copy_user_string(NULL, &data->mnt_path, 1024); + if (IS_ERR(p)) + goto out_err; + server->mnt_path = p; + + p = nfs_copy_user_string(server->ip_addr, &data->client_addr, + sizeof(server->ip_addr) - 1); + if (IS_ERR(p)) + goto out_err; + + /* We now require that the mount process passes the remote address */ + if (data->host_addrlen != sizeof(server->addr)) { + s = ERR_PTR(-EINVAL); + goto out_free; + } + if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) { + s = ERR_PTR(-EFAULT); + goto out_free; + } + if (server->addr.sin_family != AF_INET || + server->addr.sin_addr.s_addr == INADDR_ANY) { + printk("NFS: mount program didn't pass remote IP address!\n"); + s = ERR_PTR(-EINVAL); + goto out_free; + } + + s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); + + if (IS_ERR(s) || s->s_root) + goto out_free; + + s->s_flags = flags; + + /* Fire up rpciod if not yet running */ + if (rpciod_up() != 0) { + printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); + s = ERR_PTR(-EIO); + goto out_free; + } + + error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); + if (error) { + up_write(&s->s_umount); + deactivate_super(s); + return ERR_PTR(error); + } + s->s_flags |= MS_ACTIVE; + return s; +out_err: + s = (struct super_block *)p; +out_free: + if (server->mnt_path) + kfree(server->mnt_path); + if (server->hostname) + kfree(server->hostname); + kfree(server); + return s; +} + +static void nfs4_kill_super(struct super_block *sb) +{ + struct nfs_server *server = NFS_SB(sb); + + nfs_return_all_delegations(sb); + kill_anon_super(sb); + + nfs4_renewd_prepare_shutdown(server); + + if (server->client != NULL && !IS_ERR(server->client)) + rpc_shutdown_client(server->client); + rpciod_down(); /* release rpciod */ + + destroy_nfsv4_state(server); + + if (server->hostname != NULL) + kfree(server->hostname); + kfree(server); +} + +static struct file_system_type nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs4_get_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +#define nfs4_init_once(nfsi) \ + do { \ + INIT_LIST_HEAD(&(nfsi)->open_states); \ + nfsi->delegation = NULL; \ + nfsi->delegation_state = 0; \ + init_rwsem(&nfsi->rwsem); \ + } while(0) +#define register_nfs4fs() register_filesystem(&nfs4_fs_type) +#define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type) +#else +#define nfs4_init_once(nfsi) \ + do { } while (0) +#define register_nfs4fs() (0) +#define unregister_nfs4fs() +#endif + +extern int nfs_init_nfspagecache(void); +extern void nfs_destroy_nfspagecache(void); +extern int nfs_init_readpagecache(void); +extern void nfs_destroy_readpagecache(void); +extern int nfs_init_writepagecache(void); +extern void nfs_destroy_writepagecache(void); +#ifdef CONFIG_NFS_DIRECTIO +extern int nfs_init_directcache(void); +extern void nfs_destroy_directcache(void); +#endif + +static kmem_cache_t * nfs_inode_cachep; + +static struct inode *nfs_alloc_inode(struct super_block *sb) +{ + struct nfs_inode *nfsi; + nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL); + if (!nfsi) + return NULL; + nfsi->flags = 0; + return &nfsi->vfs_inode; +} + +static void nfs_destroy_inode(struct inode *inode) +{ + kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); +} + +static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +{ + struct nfs_inode *nfsi = (struct nfs_inode *) foo; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) { + inode_init_once(&nfsi->vfs_inode); + spin_lock_init(&nfsi->req_lock); + INIT_LIST_HEAD(&nfsi->dirty); + INIT_LIST_HEAD(&nfsi->commit); + INIT_LIST_HEAD(&nfsi->open_files); + INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); + atomic_set(&nfsi->data_updates, 0); + nfsi->ndirty = 0; + nfsi->ncommit = 0; + nfsi->npages = 0; + init_waitqueue_head(&nfsi->nfs_i_wait); + nfs4_init_once(nfsi); + } +} + +int nfs_init_inodecache(void) +{ + nfs_inode_cachep = kmem_cache_create("nfs_inode_cache", + sizeof(struct nfs_inode), + 0, SLAB_RECLAIM_ACCOUNT, + init_once, NULL); + if (nfs_inode_cachep == NULL) + return -ENOMEM; + + return 0; +} + +void nfs_destroy_inodecache(void) +{ + if (kmem_cache_destroy(nfs_inode_cachep)) + printk(KERN_INFO "nfs_inode_cache: not all structures were freed\n"); +} + +/* + * Initialize NFS + */ +static int __init init_nfs_fs(void) +{ + int err; + + err = nfs_init_nfspagecache(); + if (err) + goto out4; + + err = nfs_init_inodecache(); + if (err) + goto out3; + + err = nfs_init_readpagecache(); + if (err) + goto out2; + + err = nfs_init_writepagecache(); + if (err) + goto out1; + +#ifdef CONFIG_NFS_DIRECTIO + err = nfs_init_directcache(); + if (err) + goto out0; +#endif + +#ifdef CONFIG_PROC_FS + rpc_proc_register(&nfs_rpcstat); +#endif + err = register_filesystem(&nfs_fs_type); + if (err) + goto out; + if ((err = register_nfs4fs()) != 0) + goto out; + return 0; +out: +#ifdef CONFIG_PROC_FS + rpc_proc_unregister("nfs"); +#endif + nfs_destroy_writepagecache(); +#ifdef CONFIG_NFS_DIRECTIO +out0: + nfs_destroy_directcache(); +#endif +out1: + nfs_destroy_readpagecache(); +out2: + nfs_destroy_inodecache(); +out3: + nfs_destroy_nfspagecache(); +out4: + return err; +} + +static void __exit exit_nfs_fs(void) +{ +#ifdef CONFIG_NFS_DIRECTIO + nfs_destroy_directcache(); +#endif + nfs_destroy_writepagecache(); + nfs_destroy_readpagecache(); + nfs_destroy_inodecache(); + nfs_destroy_nfspagecache(); +#ifdef CONFIG_PROC_FS + rpc_proc_unregister("nfs"); +#endif + unregister_filesystem(&nfs_fs_type); + unregister_nfs4fs(); +} + +/* Not quite true; I just maintain it */ +MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); +MODULE_LICENSE("GPL"); + +module_init(init_nfs_fs) +module_exit(exit_nfs_fs) diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c new file mode 100644 index 00000000000..9d3ddad96d9 --- /dev/null +++ b/fs/nfs/mount_clnt.c @@ -0,0 +1,183 @@ +/* + * linux/fs/nfs/mount_clnt.c + * + * MOUNT client to support NFSroot. + * + * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/uio.h> +#include <linux/net.h> +#include <linux/in.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/xprt.h> +#include <linux/sunrpc/sched.h> +#include <linux/nfs_fs.h> + +#ifdef RPC_DEBUG +# define NFSDBG_FACILITY NFSDBG_ROOT +#endif + +/* +#define MOUNT_PROGRAM 100005 +#define MOUNT_VERSION 1 +#define MOUNT_MNT 1 +#define MOUNT_UMNT 3 + */ + +static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *, + int, int); +static struct rpc_program mnt_program; + +struct mnt_fhstatus { + unsigned int status; + struct nfs_fh * fh; +}; + +/* + * Obtain an NFS file handle for the given host and path + */ +int +nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, + int version, int protocol) +{ + struct rpc_clnt *mnt_clnt; + struct mnt_fhstatus result = { + .fh = fh + }; + char hostname[32]; + int status; + int call; + + dprintk("NFS: nfs_mount(%08x:%s)\n", + (unsigned)ntohl(addr->sin_addr.s_addr), path); + + sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr)); + mnt_clnt = mnt_create(hostname, addr, version, protocol); + if (IS_ERR(mnt_clnt)) + return PTR_ERR(mnt_clnt); + + call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT; + status = rpc_call(mnt_clnt, call, path, &result, 0); + return status < 0? status : (result.status? -EACCES : 0); +} + +static struct rpc_clnt * +mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, + int protocol) +{ + struct rpc_xprt *xprt; + struct rpc_clnt *clnt; + + xprt = xprt_create_proto(protocol, srvaddr, NULL); + if (IS_ERR(xprt)) + return (struct rpc_clnt *)xprt; + + clnt = rpc_create_client(xprt, hostname, + &mnt_program, version, + RPC_AUTH_UNIX); + if (IS_ERR(clnt)) { + xprt_destroy(xprt); + } else { + clnt->cl_softrtry = 1; + clnt->cl_chatty = 1; + clnt->cl_oneshot = 1; + clnt->cl_intr = 1; + } + return clnt; +} + +/* + * XDR encode/decode functions for MOUNT + */ +static int +xdr_encode_dirpath(struct rpc_rqst *req, u32 *p, const char *path) +{ + p = xdr_encode_string(p, path); + + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +static int +xdr_decode_fhstatus(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res) +{ + struct nfs_fh *fh = res->fh; + + if ((res->status = ntohl(*p++)) == 0) { + fh->size = NFS2_FHSIZE; + memcpy(fh->data, p, NFS2_FHSIZE); + } + return 0; +} + +static int +xdr_decode_fhstatus3(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res) +{ + struct nfs_fh *fh = res->fh; + + if ((res->status = ntohl(*p++)) == 0) { + int size = ntohl(*p++); + if (size <= NFS3_FHSIZE) { + fh->size = size; + memcpy(fh->data, p, size); + } else + res->status = -EBADHANDLE; + } + return 0; +} + +#define MNT_dirpath_sz (1 + 256) +#define MNT_fhstatus_sz (1 + 8) + +static struct rpc_procinfo mnt_procedures[] = { +[MNTPROC_MNT] = { + .p_proc = MNTPROC_MNT, + .p_encode = (kxdrproc_t) xdr_encode_dirpath, + .p_decode = (kxdrproc_t) xdr_decode_fhstatus, + .p_bufsiz = MNT_dirpath_sz << 2, + }, +}; + +static struct rpc_procinfo mnt3_procedures[] = { +[MOUNTPROC3_MNT] = { + .p_proc = MOUNTPROC3_MNT, + .p_encode = (kxdrproc_t) xdr_encode_dirpath, + .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, + .p_bufsiz = MNT_dirpath_sz << 2, + }, +}; + + +static struct rpc_version mnt_version1 = { + .number = 1, + .nrprocs = 2, + .procs = mnt_procedures +}; + +static struct rpc_version mnt_version3 = { + .number = 3, + .nrprocs = 2, + .procs = mnt3_procedures +}; + +static struct rpc_version * mnt_version[] = { + NULL, + &mnt_version1, + NULL, + &mnt_version3, +}; + +static struct rpc_stat mnt_stats; + +static struct rpc_program mnt_program = { + .name = "mount", + .number = NFS_MNT_PROGRAM, + .nrvers = sizeof(mnt_version)/sizeof(mnt_version[0]), + .version = mnt_version, + .stats = &mnt_stats, +}; diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c new file mode 100644 index 00000000000..d91b69044a4 --- /dev/null +++ b/fs/nfs/nfs2xdr.c @@ -0,0 +1,711 @@ +/* + * linux/fs/nfs/nfs2xdr.c + * + * XDR functions to encode/decode NFS RPC arguments and results. + * + * Copyright (C) 1992, 1993, 1994 Rick Sladkey + * Copyright (C) 1996 Olaf Kirch + * 04 Aug 1998 Ion Badulescu <ionut@cs.columbia.edu> + * FIFO's need special handling in NFSv2 + */ + +#include <linux/param.h> +#include <linux/time.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/utsname.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/in.h> +#include <linux/pagemap.h> +#include <linux/proc_fs.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs2.h> +#include <linux/nfs_fs.h> + +#define NFSDBG_FACILITY NFSDBG_XDR +/* #define NFS_PARANOIA 1 */ + +extern int nfs_stat_to_errno(int stat); + +/* Mapping from NFS error code to "errno" error code. */ +#define errno_NFSERR_IO EIO + +/* + * Declare the space requirements for NFS arguments and replies as + * number of 32bit-words + */ +#define NFS_fhandle_sz (8) +#define NFS_sattr_sz (8) +#define NFS_filename_sz (1+(NFS2_MAXNAMLEN>>2)) +#define NFS_path_sz (1+(NFS2_MAXPATHLEN>>2)) +#define NFS_fattr_sz (17) +#define NFS_info_sz (5) +#define NFS_entry_sz (NFS_filename_sz+3) + +#define NFS_diropargs_sz (NFS_fhandle_sz+NFS_filename_sz) +#define NFS_sattrargs_sz (NFS_fhandle_sz+NFS_sattr_sz) +#define NFS_readlinkargs_sz (NFS_fhandle_sz) +#define NFS_readargs_sz (NFS_fhandle_sz+3) +#define NFS_writeargs_sz (NFS_fhandle_sz+4) +#define NFS_createargs_sz (NFS_diropargs_sz+NFS_sattr_sz) +#define NFS_renameargs_sz (NFS_diropargs_sz+NFS_diropargs_sz) +#define NFS_linkargs_sz (NFS_fhandle_sz+NFS_diropargs_sz) +#define NFS_symlinkargs_sz (NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz) +#define NFS_readdirargs_sz (NFS_fhandle_sz+2) + +#define NFS_attrstat_sz (1+NFS_fattr_sz) +#define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz) +#define NFS_readlinkres_sz (2) +#define NFS_readres_sz (1+NFS_fattr_sz+1) +#define NFS_writeres_sz (NFS_attrstat_sz) +#define NFS_stat_sz (1) +#define NFS_readdirres_sz (1) +#define NFS_statfsres_sz (1+NFS_info_sz) + +/* + * Common NFS XDR functions as inlines + */ +static inline u32 * +xdr_encode_fhandle(u32 *p, struct nfs_fh *fhandle) +{ + memcpy(p, fhandle->data, NFS2_FHSIZE); + return p + XDR_QUADLEN(NFS2_FHSIZE); +} + +static inline u32 * +xdr_decode_fhandle(u32 *p, struct nfs_fh *fhandle) +{ + /* NFSv2 handles have a fixed length */ + fhandle->size = NFS2_FHSIZE; + memcpy(fhandle->data, p, NFS2_FHSIZE); + return p + XDR_QUADLEN(NFS2_FHSIZE); +} + +static inline u32* +xdr_encode_time(u32 *p, struct timespec *timep) +{ + *p++ = htonl(timep->tv_sec); + /* Convert nanoseconds into microseconds */ + *p++ = htonl(timep->tv_nsec ? timep->tv_nsec / 1000 : 0); + return p; +} + +static inline u32* +xdr_encode_current_server_time(u32 *p, struct timespec *timep) +{ + /* + * Passing the invalid value useconds=1000000 is a + * Sun convention for "set to current server time". + * It's needed to make permissions checks for the + * "touch" program across v2 mounts to Solaris and + * Irix boxes work correctly. See description of + * sattr in section 6.1 of "NFS Illustrated" by + * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5 + */ + *p++ = htonl(timep->tv_sec); + *p++ = htonl(1000000); + return p; +} + +static inline u32* +xdr_decode_time(u32 *p, struct timespec *timep) +{ + timep->tv_sec = ntohl(*p++); + /* Convert microseconds into nanoseconds */ + timep->tv_nsec = ntohl(*p++) * 1000; + return p; +} + +static u32 * +xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) +{ + u32 rdev; + fattr->type = (enum nfs_ftype) ntohl(*p++); + fattr->mode = ntohl(*p++); + fattr->nlink = ntohl(*p++); + fattr->uid = ntohl(*p++); + fattr->gid = ntohl(*p++); + fattr->size = ntohl(*p++); + fattr->du.nfs2.blocksize = ntohl(*p++); + rdev = ntohl(*p++); + fattr->du.nfs2.blocks = ntohl(*p++); + fattr->fsid_u.nfs3 = ntohl(*p++); + fattr->fileid = ntohl(*p++); + p = xdr_decode_time(p, &fattr->atime); + p = xdr_decode_time(p, &fattr->mtime); + p = xdr_decode_time(p, &fattr->ctime); + fattr->valid |= NFS_ATTR_FATTR; + fattr->rdev = new_decode_dev(rdev); + if (fattr->type == NFCHR && rdev == NFS2_FIFO_DEV) { + fattr->type = NFFIFO; + fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO; + fattr->rdev = 0; + } + fattr->timestamp = jiffies; + return p; +} + +#define SATTR(p, attr, flag, field) \ + *p++ = (attr->ia_valid & flag) ? htonl(attr->field) : ~(u32) 0 +static inline u32 * +xdr_encode_sattr(u32 *p, struct iattr *attr) +{ + SATTR(p, attr, ATTR_MODE, ia_mode); + SATTR(p, attr, ATTR_UID, ia_uid); + SATTR(p, attr, ATTR_GID, ia_gid); + SATTR(p, attr, ATTR_SIZE, ia_size); + + if (attr->ia_valid & ATTR_ATIME_SET) { + p = xdr_encode_time(p, &attr->ia_atime); + } else if (attr->ia_valid & ATTR_ATIME) { + p = xdr_encode_current_server_time(p, &attr->ia_atime); + } else { + *p++ = ~(u32) 0; + *p++ = ~(u32) 0; + } + + if (attr->ia_valid & ATTR_MTIME_SET) { + p = xdr_encode_time(p, &attr->ia_mtime); + } else if (attr->ia_valid & ATTR_MTIME) { + p = xdr_encode_current_server_time(p, &attr->ia_mtime); + } else { + *p++ = ~(u32) 0; + *p++ = ~(u32) 0; + } + return p; +} +#undef SATTR + +/* + * NFS encode functions + */ +/* + * Encode file handle argument + * GETATTR, READLINK, STATFS + */ +static int +nfs_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh) +{ + p = xdr_encode_fhandle(p, fh); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode SETATTR arguments + */ +static int +nfs_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs_sattrargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_sattr(p, args->sattr); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode directory ops argument + * LOOKUP, REMOVE, RMDIR + */ +static int +nfs_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs_diropargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_array(p, args->name, args->len); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Arguments to a READ call. Since we read data directly into the page + * cache, we also set up the reply iovec here so that iov[1] points + * exactly to the page we want to fetch. + */ +static int +nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + unsigned int replen; + u32 offset = (u32)args->offset; + u32 count = args->count; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(offset); + *p++ = htonl(count); + *p++ = htonl(count); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, + args->pages, args->pgbase, count); + return 0; +} + +/* + * Decode READ reply + */ +static int +nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res) +{ + struct kvec *iov = req->rq_rcv_buf.head; + int status, count, recvd, hdrlen; + + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + p = xdr_decode_fattr(p, res->fattr); + + count = ntohl(*p++); + res->eof = 0; + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + if (iov->iov_len < hdrlen) { + printk(KERN_WARNING "NFS: READ reply header overflowed:" + "length %d > %Zu\n", hdrlen, iov->iov_len); + return -errno_NFSERR_IO; + } else if (iov->iov_len != hdrlen) { + dprintk("NFS: READ header is short. iovec will be shifted.\n"); + xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen); + } + + recvd = req->rq_rcv_buf.len - hdrlen; + if (count > recvd) { + printk(KERN_WARNING "NFS: server cheating in read reply: " + "count %d > recvd %d\n", count, recvd); + count = recvd; + } + + dprintk("RPC: readres OK count %d\n", count); + if (count < res->count) + res->count = count; + + return count; +} + + +/* + * Write arguments. Splice the buffer to be written into the iovec. + */ +static int +nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) +{ + struct xdr_buf *sndbuf = &req->rq_snd_buf; + u32 offset = (u32)args->offset; + u32 count = args->count; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(offset); + *p++ = htonl(offset); + *p++ = htonl(count); + *p++ = htonl(count); + sndbuf->len = xdr_adjust_iovec(sndbuf->head, p); + + /* Copy the page array */ + xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); + return 0; +} + +/* + * Encode create arguments + * CREATE, MKDIR + */ +static int +nfs_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs_createargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_array(p, args->name, args->len); + p = xdr_encode_sattr(p, args->sattr); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode RENAME arguments + */ +static int +nfs_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs_renameargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_array(p, args->fromname, args->fromlen); + p = xdr_encode_fhandle(p, args->tofh); + p = xdr_encode_array(p, args->toname, args->tolen); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode LINK arguments + */ +static int +nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_fhandle(p, args->tofh); + p = xdr_encode_array(p, args->toname, args->tolen); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode SYMLINK arguments + */ +static int +nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_array(p, args->fromname, args->fromlen); + p = xdr_encode_array(p, args->topath, args->tolen); + p = xdr_encode_sattr(p, args->sattr); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode arguments to readdir call + */ +static int +nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args) +{ + struct rpc_task *task = req->rq_task; + struct rpc_auth *auth = task->tk_auth; + unsigned int replen; + u32 count = args->count; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->cookie); + *p++ = htonl(count); /* see above */ + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count); + return 0; +} + +/* + * Decode the result of a readdir call. + * We're not really decoding anymore, we just leave the buffer untouched + * and only check that it is syntactically correct. + * The real decoding happens in nfs_decode_entry below, called directly + * from nfs_readdir for each entry. + */ +static int +nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, void *dummy) +{ + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct kvec *iov = rcvbuf->head; + struct page **page; + int hdrlen, recvd; + int status, nr; + unsigned int len, pglen; + u32 *end, *entry, *kaddr; + + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + if (iov->iov_len < hdrlen) { + printk(KERN_WARNING "NFS: READDIR reply header overflowed:" + "length %d > %Zu\n", hdrlen, iov->iov_len); + return -errno_NFSERR_IO; + } else if (iov->iov_len != hdrlen) { + dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); + xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); + } + + pglen = rcvbuf->page_len; + recvd = rcvbuf->len - hdrlen; + if (pglen > recvd) + pglen = recvd; + page = rcvbuf->pages; + kaddr = p = (u32 *)kmap_atomic(*page, KM_USER0); + end = (u32 *)((char *)p + pglen); + entry = p; + for (nr = 0; *p++; nr++) { + if (p + 2 > end) + goto short_pkt; + p++; /* fileid */ + len = ntohl(*p++); + p += XDR_QUADLEN(len) + 1; /* name plus cookie */ + if (len > NFS2_MAXNAMLEN) { + printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n", + len); + goto err_unmap; + } + if (p + 2 > end) + goto short_pkt; + entry = p; + } + if (!nr && (entry[0] != 0 || entry[1] == 0)) + goto short_pkt; + out: + kunmap_atomic(kaddr, KM_USER0); + return nr; + short_pkt: + entry[0] = entry[1] = 0; + /* truncate listing ? */ + if (!nr) { + printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); + entry[1] = 1; + } + goto out; +err_unmap: + nr = -errno_NFSERR_IO; + goto out; +} + +u32 * +nfs_decode_dirent(u32 *p, struct nfs_entry *entry, int plus) +{ + if (!*p++) { + if (!*p) + return ERR_PTR(-EAGAIN); + entry->eof = 1; + return ERR_PTR(-EBADCOOKIE); + } + + entry->ino = ntohl(*p++); + entry->len = ntohl(*p++); + entry->name = (const char *) p; + p += XDR_QUADLEN(entry->len); + entry->prev_cookie = entry->cookie; + entry->cookie = ntohl(*p++); + entry->eof = !p[0] && p[1]; + + return p; +} + +/* + * NFS XDR decode functions + */ +/* + * Decode simple status reply + */ +static int +nfs_xdr_stat(struct rpc_rqst *req, u32 *p, void *dummy) +{ + int status; + + if ((status = ntohl(*p++)) != 0) + status = -nfs_stat_to_errno(status); + return status; +} + +/* + * Decode attrstat reply + * GETATTR, SETATTR, WRITE + */ +static int +nfs_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) +{ + int status; + + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + xdr_decode_fattr(p, fattr); + return 0; +} + +/* + * Decode diropres reply + * LOOKUP, CREATE, MKDIR + */ +static int +nfs_xdr_diropres(struct rpc_rqst *req, u32 *p, struct nfs_diropok *res) +{ + int status; + + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + p = xdr_decode_fhandle(p, res->fh); + xdr_decode_fattr(p, res->fattr); + return 0; +} + +/* + * Encode READLINK args + */ +static int +nfs_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_readlinkargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + unsigned int replen; + + p = xdr_encode_fhandle(p, args->fh); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen); + return 0; +} + +/* + * Decode READLINK reply + */ +static int +nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, void *dummy) +{ + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct kvec *iov = rcvbuf->head; + int hdrlen, len, recvd; + char *kaddr; + int status; + + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + /* Convert length of symlink */ + len = ntohl(*p++); + if (len >= rcvbuf->page_len || len <= 0) { + dprintk(KERN_WARNING "nfs: server returned giant symlink!\n"); + return -ENAMETOOLONG; + } + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + if (iov->iov_len < hdrlen) { + printk(KERN_WARNING "NFS: READLINK reply header overflowed:" + "length %d > %Zu\n", hdrlen, iov->iov_len); + return -errno_NFSERR_IO; + } else if (iov->iov_len != hdrlen) { + dprintk("NFS: READLINK header is short. iovec will be shifted.\n"); + xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); + } + recvd = req->rq_rcv_buf.len - hdrlen; + if (recvd < len) { + printk(KERN_WARNING "NFS: server cheating in readlink reply: " + "count %u > recvd %u\n", len, recvd); + return -EIO; + } + + /* NULL terminate the string we got */ + kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0); + kaddr[len+rcvbuf->page_base] = '\0'; + kunmap_atomic(kaddr, KM_USER0); + return 0; +} + +/* + * Decode WRITE reply + */ +static int +nfs_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res) +{ + res->verf->committed = NFS_FILE_SYNC; + return nfs_xdr_attrstat(req, p, res->fattr); +} + +/* + * Decode STATFS reply + */ +static int +nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs2_fsstat *res) +{ + int status; + + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + + res->tsize = ntohl(*p++); + res->bsize = ntohl(*p++); + res->blocks = ntohl(*p++); + res->bfree = ntohl(*p++); + res->bavail = ntohl(*p++); + return 0; +} + +/* + * We need to translate between nfs status return values and + * the local errno values which may not be the same. + */ +static struct { + int stat; + int errno; +} nfs_errtbl[] = { + { NFS_OK, 0 }, + { NFSERR_PERM, EPERM }, + { NFSERR_NOENT, ENOENT }, + { NFSERR_IO, errno_NFSERR_IO }, + { NFSERR_NXIO, ENXIO }, +/* { NFSERR_EAGAIN, EAGAIN }, */ + { NFSERR_ACCES, EACCES }, + { NFSERR_EXIST, EEXIST }, + { NFSERR_XDEV, EXDEV }, + { NFSERR_NODEV, ENODEV }, + { NFSERR_NOTDIR, ENOTDIR }, + { NFSERR_ISDIR, EISDIR }, + { NFSERR_INVAL, EINVAL }, + { NFSERR_FBIG, EFBIG }, + { NFSERR_NOSPC, ENOSPC }, + { NFSERR_ROFS, EROFS }, + { NFSERR_MLINK, EMLINK }, + { NFSERR_NAMETOOLONG, ENAMETOOLONG }, + { NFSERR_NOTEMPTY, ENOTEMPTY }, + { NFSERR_DQUOT, EDQUOT }, + { NFSERR_STALE, ESTALE }, + { NFSERR_REMOTE, EREMOTE }, +#ifdef EWFLUSH + { NFSERR_WFLUSH, EWFLUSH }, +#endif + { NFSERR_BADHANDLE, EBADHANDLE }, + { NFSERR_NOT_SYNC, ENOTSYNC }, + { NFSERR_BAD_COOKIE, EBADCOOKIE }, + { NFSERR_NOTSUPP, ENOTSUPP }, + { NFSERR_TOOSMALL, ETOOSMALL }, + { NFSERR_SERVERFAULT, ESERVERFAULT }, + { NFSERR_BADTYPE, EBADTYPE }, + { NFSERR_JUKEBOX, EJUKEBOX }, + { -1, EIO } +}; + +/* + * Convert an NFS error code to a local one. + * This one is used jointly by NFSv2 and NFSv3. + */ +int +nfs_stat_to_errno(int stat) +{ + int i; + + for (i = 0; nfs_errtbl[i].stat != -1; i++) { + if (nfs_errtbl[i].stat == stat) + return nfs_errtbl[i].errno; + } + printk(KERN_ERR "nfs_stat_to_errno: bad nfs status return value: %d\n", stat); + return nfs_errtbl[i].errno; +} + +#ifndef MAX +# define MAX(a, b) (((a) > (b))? (a) : (b)) +#endif + +#define PROC(proc, argtype, restype, timer) \ +[NFSPROC_##proc] = { \ + .p_proc = NFSPROC_##proc, \ + .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \ + .p_decode = (kxdrproc_t) nfs_xdr_##restype, \ + .p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \ + .p_timer = timer \ + } +struct rpc_procinfo nfs_procedures[] = { + PROC(GETATTR, fhandle, attrstat, 1), + PROC(SETATTR, sattrargs, attrstat, 0), + PROC(LOOKUP, diropargs, diropres, 2), + PROC(READLINK, readlinkargs, readlinkres, 3), + PROC(READ, readargs, readres, 3), + PROC(WRITE, writeargs, writeres, 4), + PROC(CREATE, createargs, diropres, 0), + PROC(REMOVE, diropargs, stat, 0), + PROC(RENAME, renameargs, stat, 0), + PROC(LINK, linkargs, stat, 0), + PROC(SYMLINK, symlinkargs, stat, 0), + PROC(MKDIR, createargs, diropres, 0), + PROC(RMDIR, diropargs, stat, 0), + PROC(READDIR, readdirargs, readdirres, 3), + PROC(STATFS, fhandle, statfsres, 0), +}; + +struct rpc_version nfs_version2 = { + .number = 2, + .nrprocs = sizeof(nfs_procedures)/sizeof(nfs_procedures[0]), + .procs = nfs_procedures +}; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c new file mode 100644 index 00000000000..3878494dfc2 --- /dev/null +++ b/fs/nfs/nfs3proc.c @@ -0,0 +1,859 @@ +/* + * linux/fs/nfs/nfs3proc.c + * + * Client-side NFSv3 procedures stubs. + * + * Copyright (C) 1997, Olaf Kirch + */ + +#include <linux/mm.h> +#include <linux/utsname.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs3.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_page.h> +#include <linux/lockd/bind.h> +#include <linux/smp_lock.h> + +#define NFSDBG_FACILITY NFSDBG_PROC + +extern struct rpc_procinfo nfs3_procedures[]; + +/* A wrapper to handle the EJUKEBOX error message */ +static int +nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) +{ + sigset_t oldset; + int res; + rpc_clnt_sigmask(clnt, &oldset); + do { + res = rpc_call_sync(clnt, msg, flags); + if (res != -EJUKEBOX) + break; + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(NFS_JUKEBOX_RETRY_TIME); + res = -ERESTARTSYS; + } while (!signalled()); + rpc_clnt_sigunmask(clnt, &oldset); + return res; +} + +static inline int +nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) +{ + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[proc], + .rpc_argp = argp, + .rpc_resp = resp, + }; + return nfs3_rpc_wrapper(clnt, &msg, flags); +} + +#define rpc_call(clnt, proc, argp, resp, flags) \ + nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags) +#define rpc_call_sync(clnt, msg, flags) \ + nfs3_rpc_wrapper(clnt, msg, flags) + +static int +nfs3_async_handle_jukebox(struct rpc_task *task) +{ + if (task->tk_status != -EJUKEBOX) + return 0; + task->tk_status = 0; + rpc_restart_call(task); + rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); + return 1; +} + +/* + * Bare-bones access to getattr: this is for nfs_read_super. + */ +static int +nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int status; + + dprintk("%s: call fsinfo\n", __FUNCTION__); + info->fattr->valid = 0; + status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); + dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status); + if (!(info->fattr->valid & NFS_ATTR_FATTR)) { + status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0); + dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); + } + return status; +} + +/* + * One function for each procedure in the NFS protocol. + */ +static int +nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + int status; + + dprintk("NFS call getattr\n"); + fattr->valid = 0; + status = rpc_call(server->client, NFS3PROC_GETATTR, + fhandle, fattr, 0); + dprintk("NFS reply getattr: %d\n", status); + return status; +} + +static int +nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, + struct iattr *sattr) +{ + struct inode *inode = dentry->d_inode; + struct nfs3_sattrargs arg = { + .fh = NFS_FH(inode), + .sattr = sattr, + }; + int status; + + dprintk("NFS call setattr\n"); + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0); + dprintk("NFS reply setattr: %d\n", status); + return status; +} + +static int +nfs3_proc_lookup(struct inode *dir, struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct nfs_fattr dir_attr; + struct nfs3_diropargs arg = { + .fh = NFS_FH(dir), + .name = name->name, + .len = name->len + }; + struct nfs3_diropres res = { + .dir_attr = &dir_attr, + .fh = fhandle, + .fattr = fattr + }; + int status; + + dprintk("NFS call lookup %s\n", name->name); + dir_attr.valid = 0; + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0); + if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) + status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR, + fhandle, fattr, 0); + dprintk("NFS reply lookup: %d\n", status); + if (status >= 0) + status = nfs_refresh_inode(dir, &dir_attr); + return status; +} + +static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) +{ + struct nfs_fattr fattr; + struct nfs3_accessargs arg = { + .fh = NFS_FH(inode), + }; + struct nfs3_accessres res = { + .fattr = &fattr, + }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = entry->cred + }; + int mode = entry->mask; + int status; + + dprintk("NFS call access\n"); + fattr.valid = 0; + + if (mode & MAY_READ) + arg.access |= NFS3_ACCESS_READ; + if (S_ISDIR(inode->i_mode)) { + if (mode & MAY_WRITE) + arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE; + if (mode & MAY_EXEC) + arg.access |= NFS3_ACCESS_LOOKUP; + } else { + if (mode & MAY_WRITE) + arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND; + if (mode & MAY_EXEC) + arg.access |= NFS3_ACCESS_EXECUTE; + } + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + nfs_refresh_inode(inode, &fattr); + if (status == 0) { + entry->mask = 0; + if (res.access & NFS3_ACCESS_READ) + entry->mask |= MAY_READ; + if (res.access & (NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE)) + entry->mask |= MAY_WRITE; + if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE)) + entry->mask |= MAY_EXEC; + } + dprintk("NFS reply access: %d\n", status); + return status; +} + +static int nfs3_proc_readlink(struct inode *inode, struct page *page, + unsigned int pgbase, unsigned int pglen) +{ + struct nfs_fattr fattr; + struct nfs3_readlinkargs args = { + .fh = NFS_FH(inode), + .pgbase = pgbase, + .pglen = pglen, + .pages = &page + }; + int status; + + dprintk("NFS call readlink\n"); + fattr.valid = 0; + status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK, + &args, &fattr, 0); + nfs_refresh_inode(inode, &fattr); + dprintk("NFS reply readlink: %d\n", status); + return status; +} + +static int nfs3_proc_read(struct nfs_read_data *rdata) +{ + int flags = rdata->flags; + struct inode * inode = rdata->inode; + struct nfs_fattr * fattr = rdata->res.fattr; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_READ], + .rpc_argp = &rdata->args, + .rpc_resp = &rdata->res, + .rpc_cred = rdata->cred, + }; + int status; + + dprintk("NFS call read %d @ %Ld\n", rdata->args.count, + (long long) rdata->args.offset); + fattr->valid = 0; + status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); + if (status >= 0) + nfs_refresh_inode(inode, fattr); + dprintk("NFS reply read: %d\n", status); + return status; +} + +static int nfs3_proc_write(struct nfs_write_data *wdata) +{ + int rpcflags = wdata->flags; + struct inode * inode = wdata->inode; + struct nfs_fattr * fattr = wdata->res.fattr; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE], + .rpc_argp = &wdata->args, + .rpc_resp = &wdata->res, + .rpc_cred = wdata->cred, + }; + int status; + + dprintk("NFS call write %d @ %Ld\n", wdata->args.count, + (long long) wdata->args.offset); + fattr->valid = 0; + status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags); + if (status >= 0) + nfs_refresh_inode(inode, fattr); + dprintk("NFS reply write: %d\n", status); + return status < 0? status : wdata->res.count; +} + +static int nfs3_proc_commit(struct nfs_write_data *cdata) +{ + struct inode * inode = cdata->inode; + struct nfs_fattr * fattr = cdata->res.fattr; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT], + .rpc_argp = &cdata->args, + .rpc_resp = &cdata->res, + .rpc_cred = cdata->cred, + }; + int status; + + dprintk("NFS call commit %d @ %Ld\n", cdata->args.count, + (long long) cdata->args.offset); + fattr->valid = 0; + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + if (status >= 0) + nfs_refresh_inode(inode, fattr); + dprintk("NFS reply commit: %d\n", status); + return status; +} + +/* + * Create a regular file. + * For now, we don't implement O_EXCL. + */ +static int +nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, + int flags) +{ + struct nfs_fh fhandle; + struct nfs_fattr fattr; + struct nfs_fattr dir_attr; + struct nfs3_createargs arg = { + .fh = NFS_FH(dir), + .name = dentry->d_name.name, + .len = dentry->d_name.len, + .sattr = sattr, + }; + struct nfs3_diropres res = { + .dir_attr = &dir_attr, + .fh = &fhandle, + .fattr = &fattr + }; + int status; + + dprintk("NFS call create %s\n", dentry->d_name.name); + arg.createmode = NFS3_CREATE_UNCHECKED; + if (flags & O_EXCL) { + arg.createmode = NFS3_CREATE_EXCLUSIVE; + arg.verifier[0] = jiffies; + arg.verifier[1] = current->pid; + } + +again: + dir_attr.valid = 0; + fattr.valid = 0; + status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0); + nfs_refresh_inode(dir, &dir_attr); + + /* If the server doesn't support the exclusive creation semantics, + * try again with simple 'guarded' mode. */ + if (status == NFSERR_NOTSUPP) { + switch (arg.createmode) { + case NFS3_CREATE_EXCLUSIVE: + arg.createmode = NFS3_CREATE_GUARDED; + break; + + case NFS3_CREATE_GUARDED: + arg.createmode = NFS3_CREATE_UNCHECKED; + break; + + case NFS3_CREATE_UNCHECKED: + goto out; + } + goto again; + } + + if (status == 0) + status = nfs_instantiate(dentry, &fhandle, &fattr); + if (status != 0) + goto out; + + /* When we created the file with exclusive semantics, make + * sure we set the attributes afterwards. */ + if (arg.createmode == NFS3_CREATE_EXCLUSIVE) { + dprintk("NFS call setattr (post-create)\n"); + + if (!(sattr->ia_valid & ATTR_ATIME_SET)) + sattr->ia_valid |= ATTR_ATIME; + if (!(sattr->ia_valid & ATTR_MTIME_SET)) + sattr->ia_valid |= ATTR_MTIME; + + /* Note: we could use a guarded setattr here, but I'm + * not sure this buys us anything (and I'd have + * to revamp the NFSv3 XDR code) */ + status = nfs3_proc_setattr(dentry, &fattr, sattr); + nfs_refresh_inode(dentry->d_inode, &fattr); + dprintk("NFS reply setattr (post-create): %d\n", status); + } +out: + dprintk("NFS reply create: %d\n", status); + return status; +} + +static int +nfs3_proc_remove(struct inode *dir, struct qstr *name) +{ + struct nfs_fattr dir_attr; + struct nfs3_diropargs arg = { + .fh = NFS_FH(dir), + .name = name->name, + .len = name->len + }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE], + .rpc_argp = &arg, + .rpc_resp = &dir_attr, + }; + int status; + + dprintk("NFS call remove %s\n", name->name); + dir_attr.valid = 0; + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_refresh_inode(dir, &dir_attr); + dprintk("NFS reply remove: %d\n", status); + return status; +} + +static int +nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name) +{ + struct unlinkxdr { + struct nfs3_diropargs arg; + struct nfs_fattr res; + } *ptr; + + ptr = (struct unlinkxdr *)kmalloc(sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return -ENOMEM; + ptr->arg.fh = NFS_FH(dir->d_inode); + ptr->arg.name = name->name; + ptr->arg.len = name->len; + ptr->res.valid = 0; + msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; + msg->rpc_argp = &ptr->arg; + msg->rpc_resp = &ptr->res; + return 0; +} + +static int +nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task) +{ + struct rpc_message *msg = &task->tk_msg; + struct nfs_fattr *dir_attr; + + if (nfs3_async_handle_jukebox(task)) + return 1; + if (msg->rpc_argp) { + dir_attr = (struct nfs_fattr*)msg->rpc_resp; + nfs_refresh_inode(dir->d_inode, dir_attr); + kfree(msg->rpc_argp); + } + return 0; +} + +static int +nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, + struct inode *new_dir, struct qstr *new_name) +{ + struct nfs_fattr old_dir_attr, new_dir_attr; + struct nfs3_renameargs arg = { + .fromfh = NFS_FH(old_dir), + .fromname = old_name->name, + .fromlen = old_name->len, + .tofh = NFS_FH(new_dir), + .toname = new_name->name, + .tolen = new_name->len + }; + struct nfs3_renameres res = { + .fromattr = &old_dir_attr, + .toattr = &new_dir_attr + }; + int status; + + dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); + old_dir_attr.valid = 0; + new_dir_attr.valid = 0; + status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0); + nfs_refresh_inode(old_dir, &old_dir_attr); + nfs_refresh_inode(new_dir, &new_dir_attr); + dprintk("NFS reply rename: %d\n", status); + return status; +} + +static int +nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) +{ + struct nfs_fattr dir_attr, fattr; + struct nfs3_linkargs arg = { + .fromfh = NFS_FH(inode), + .tofh = NFS_FH(dir), + .toname = name->name, + .tolen = name->len + }; + struct nfs3_linkres res = { + .dir_attr = &dir_attr, + .fattr = &fattr + }; + int status; + + dprintk("NFS call link %s\n", name->name); + dir_attr.valid = 0; + fattr.valid = 0; + status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0); + nfs_refresh_inode(dir, &dir_attr); + nfs_refresh_inode(inode, &fattr); + dprintk("NFS reply link: %d\n", status); + return status; +} + +static int +nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, + struct iattr *sattr, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct nfs_fattr dir_attr; + struct nfs3_symlinkargs arg = { + .fromfh = NFS_FH(dir), + .fromname = name->name, + .fromlen = name->len, + .topath = path->name, + .tolen = path->len, + .sattr = sattr + }; + struct nfs3_diropres res = { + .dir_attr = &dir_attr, + .fh = fhandle, + .fattr = fattr + }; + int status; + + if (path->len > NFS3_MAXPATHLEN) + return -ENAMETOOLONG; + dprintk("NFS call symlink %s -> %s\n", name->name, path->name); + dir_attr.valid = 0; + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0); + nfs_refresh_inode(dir, &dir_attr); + dprintk("NFS reply symlink: %d\n", status); + return status; +} + +static int +nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) +{ + struct nfs_fh fhandle; + struct nfs_fattr fattr, dir_attr; + struct nfs3_mkdirargs arg = { + .fh = NFS_FH(dir), + .name = dentry->d_name.name, + .len = dentry->d_name.len, + .sattr = sattr + }; + struct nfs3_diropres res = { + .dir_attr = &dir_attr, + .fh = &fhandle, + .fattr = &fattr + }; + int status; + + dprintk("NFS call mkdir %s\n", dentry->d_name.name); + dir_attr.valid = 0; + fattr.valid = 0; + status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0); + nfs_refresh_inode(dir, &dir_attr); + if (status == 0) + status = nfs_instantiate(dentry, &fhandle, &fattr); + dprintk("NFS reply mkdir: %d\n", status); + return status; +} + +static int +nfs3_proc_rmdir(struct inode *dir, struct qstr *name) +{ + struct nfs_fattr dir_attr; + struct nfs3_diropargs arg = { + .fh = NFS_FH(dir), + .name = name->name, + .len = name->len + }; + int status; + + dprintk("NFS call rmdir %s\n", name->name); + dir_attr.valid = 0; + status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0); + nfs_refresh_inode(dir, &dir_attr); + dprintk("NFS reply rmdir: %d\n", status); + return status; +} + +/* + * The READDIR implementation is somewhat hackish - we pass the user buffer + * to the encode function, which installs it in the receive iovec. + * The decode function itself doesn't perform any decoding, it just makes + * sure the reply is syntactically correct. + * + * Also note that this implementation handles both plain readdir and + * readdirplus. + */ +static int +nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, + u64 cookie, struct page *page, unsigned int count, int plus) +{ + struct inode *dir = dentry->d_inode; + struct nfs_fattr dir_attr; + u32 *verf = NFS_COOKIEVERF(dir); + struct nfs3_readdirargs arg = { + .fh = NFS_FH(dir), + .cookie = cookie, + .verf = {verf[0], verf[1]}, + .plus = plus, + .count = count, + .pages = &page + }; + struct nfs3_readdirres res = { + .dir_attr = &dir_attr, + .verf = verf, + .plus = plus + }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_READDIR], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = cred + }; + int status; + + lock_kernel(); + + if (plus) + msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS]; + + dprintk("NFS call readdir%s %d\n", + plus? "plus" : "", (unsigned int) cookie); + + dir_attr.valid = 0; + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_refresh_inode(dir, &dir_attr); + dprintk("NFS reply readdir: %d\n", status); + unlock_kernel(); + return status; +} + +static int +nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, + dev_t rdev) +{ + struct nfs_fh fh; + struct nfs_fattr fattr, dir_attr; + struct nfs3_mknodargs arg = { + .fh = NFS_FH(dir), + .name = dentry->d_name.name, + .len = dentry->d_name.len, + .sattr = sattr, + .rdev = rdev + }; + struct nfs3_diropres res = { + .dir_attr = &dir_attr, + .fh = &fh, + .fattr = &fattr + }; + int status; + + switch (sattr->ia_mode & S_IFMT) { + case S_IFBLK: arg.type = NF3BLK; break; + case S_IFCHR: arg.type = NF3CHR; break; + case S_IFIFO: arg.type = NF3FIFO; break; + case S_IFSOCK: arg.type = NF3SOCK; break; + default: return -EINVAL; + } + + dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, + MAJOR(rdev), MINOR(rdev)); + dir_attr.valid = 0; + fattr.valid = 0; + status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0); + nfs_refresh_inode(dir, &dir_attr); + if (status == 0) + status = nfs_instantiate(dentry, &fh, &fattr); + dprintk("NFS reply mknod: %d\n", status); + return status; +} + +static int +nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsstat *stat) +{ + int status; + + dprintk("NFS call fsstat\n"); + stat->fattr->valid = 0; + status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0); + dprintk("NFS reply statfs: %d\n", status); + return status; +} + +static int +nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int status; + + dprintk("NFS call fsinfo\n"); + info->fattr->valid = 0; + status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); + dprintk("NFS reply fsinfo: %d\n", status); + return status; +} + +static int +nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *info) +{ + int status; + + dprintk("NFS call pathconf\n"); + info->fattr->valid = 0; + status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0); + dprintk("NFS reply pathconf: %d\n", status); + return status; +} + +extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); + +static void +nfs3_read_done(struct rpc_task *task) +{ + struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + + if (nfs3_async_handle_jukebox(task)) + return; + /* Call back common NFS readpage processing */ + if (task->tk_status >= 0) + nfs_refresh_inode(data->inode, &data->fattr); + nfs_readpage_result(task); +} + +static void +nfs3_proc_read_setup(struct nfs_read_data *data) +{ + struct rpc_task *task = &data->task; + struct inode *inode = data->inode; + int flags; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_READ], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = data->cred, + }; + + /* N.B. Do we need to test? Never called for swapfile inode */ + flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); + + /* Finalize the task. */ + rpc_init_task(task, NFS_CLIENT(inode), nfs3_read_done, flags); + rpc_call_setup(task, &msg, 0); +} + +static void +nfs3_write_done(struct rpc_task *task) +{ + struct nfs_write_data *data; + + if (nfs3_async_handle_jukebox(task)) + return; + data = (struct nfs_write_data *)task->tk_calldata; + if (task->tk_status >= 0) + nfs_refresh_inode(data->inode, data->res.fattr); + nfs_writeback_done(task); +} + +static void +nfs3_proc_write_setup(struct nfs_write_data *data, int how) +{ + struct rpc_task *task = &data->task; + struct inode *inode = data->inode; + int stable; + int flags; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = data->cred, + }; + + if (how & FLUSH_STABLE) { + if (!NFS_I(inode)->ncommit) + stable = NFS_FILE_SYNC; + else + stable = NFS_DATA_SYNC; + } else + stable = NFS_UNSTABLE; + data->args.stable = stable; + + /* Set the initial flags for the task. */ + flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + + /* Finalize the task. */ + rpc_init_task(task, NFS_CLIENT(inode), nfs3_write_done, flags); + rpc_call_setup(task, &msg, 0); +} + +static void +nfs3_commit_done(struct rpc_task *task) +{ + struct nfs_write_data *data; + + if (nfs3_async_handle_jukebox(task)) + return; + data = (struct nfs_write_data *)task->tk_calldata; + if (task->tk_status >= 0) + nfs_refresh_inode(data->inode, data->res.fattr); + nfs_commit_done(task); +} + +static void +nfs3_proc_commit_setup(struct nfs_write_data *data, int how) +{ + struct rpc_task *task = &data->task; + struct inode *inode = data->inode; + int flags; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = data->cred, + }; + + /* Set the initial flags for the task. */ + flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + + /* Finalize the task. */ + rpc_init_task(task, NFS_CLIENT(inode), nfs3_commit_done, flags); + rpc_call_setup(task, &msg, 0); +} + +static int +nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl); +} + +struct nfs_rpc_ops nfs_v3_clientops = { + .version = 3, /* protocol version */ + .dentry_ops = &nfs_dentry_operations, + .dir_inode_ops = &nfs_dir_inode_operations, + .getroot = nfs3_proc_get_root, + .getattr = nfs3_proc_getattr, + .setattr = nfs3_proc_setattr, + .lookup = nfs3_proc_lookup, + .access = nfs3_proc_access, + .readlink = nfs3_proc_readlink, + .read = nfs3_proc_read, + .write = nfs3_proc_write, + .commit = nfs3_proc_commit, + .create = nfs3_proc_create, + .remove = nfs3_proc_remove, + .unlink_setup = nfs3_proc_unlink_setup, + .unlink_done = nfs3_proc_unlink_done, + .rename = nfs3_proc_rename, + .link = nfs3_proc_link, + .symlink = nfs3_proc_symlink, + .mkdir = nfs3_proc_mkdir, + .rmdir = nfs3_proc_rmdir, + .readdir = nfs3_proc_readdir, + .mknod = nfs3_proc_mknod, + .statfs = nfs3_proc_statfs, + .fsinfo = nfs3_proc_fsinfo, + .pathconf = nfs3_proc_pathconf, + .decode_dirent = nfs3_decode_dirent, + .read_setup = nfs3_proc_read_setup, + .write_setup = nfs3_proc_write_setup, + .commit_setup = nfs3_proc_commit_setup, + .file_open = nfs_open, + .file_release = nfs_release, + .lock = nfs3_proc_lock, +}; diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c new file mode 100644 index 00000000000..a3593d47e5a --- /dev/null +++ b/fs/nfs/nfs3xdr.c @@ -0,0 +1,1023 @@ +/* + * linux/fs/nfs/nfs3xdr.c + * + * XDR functions to encode/decode NFSv3 RPC arguments and results. + * + * Copyright (C) 1996, 1997 Olaf Kirch + */ + +#include <linux/param.h> +#include <linux/time.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/utsname.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/in.h> +#include <linux/pagemap.h> +#include <linux/proc_fs.h> +#include <linux/kdev_t.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs3.h> +#include <linux/nfs_fs.h> + +#define NFSDBG_FACILITY NFSDBG_XDR + +/* Mapping from NFS error code to "errno" error code. */ +#define errno_NFSERR_IO EIO + +extern int nfs_stat_to_errno(int); + +/* + * Declare the space requirements for NFS arguments and replies as + * number of 32bit-words + */ +#define NFS3_fhandle_sz (1+16) +#define NFS3_fh_sz (NFS3_fhandle_sz) /* shorthand */ +#define NFS3_sattr_sz (15) +#define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2)) +#define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2)) +#define NFS3_fattr_sz (21) +#define NFS3_wcc_attr_sz (6) +#define NFS3_pre_op_attr_sz (1+NFS3_wcc_attr_sz) +#define NFS3_post_op_attr_sz (1+NFS3_fattr_sz) +#define NFS3_wcc_data_sz (NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz) +#define NFS3_fsstat_sz +#define NFS3_fsinfo_sz +#define NFS3_pathconf_sz +#define NFS3_entry_sz (NFS3_filename_sz+3) + +#define NFS3_sattrargs_sz (NFS3_fh_sz+NFS3_sattr_sz+3) +#define NFS3_diropargs_sz (NFS3_fh_sz+NFS3_filename_sz) +#define NFS3_accessargs_sz (NFS3_fh_sz+1) +#define NFS3_readlinkargs_sz (NFS3_fh_sz) +#define NFS3_readargs_sz (NFS3_fh_sz+3) +#define NFS3_writeargs_sz (NFS3_fh_sz+5) +#define NFS3_createargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) +#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) +#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+NFS3_path_sz+NFS3_sattr_sz) +#define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz) +#define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz) +#define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz) +#define NFS3_readdirargs_sz (NFS3_fh_sz+2) +#define NFS3_commitargs_sz (NFS3_fh_sz+3) + +#define NFS3_attrstat_sz (1+NFS3_fattr_sz) +#define NFS3_wccstat_sz (1+NFS3_wcc_data_sz) +#define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz)) +#define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1) +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1) +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3) +#define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4) +#define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) +#define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz)) +#define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2) +#define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13) +#define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12) +#define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6) +#define NFS3_commitres_sz (1+NFS3_wcc_data_sz+2) + +/* + * Map file type to S_IFMT bits + */ +static struct { + unsigned int mode; + unsigned int nfs2type; +} nfs_type2fmt[] = { + { 0, NFNON }, + { S_IFREG, NFREG }, + { S_IFDIR, NFDIR }, + { S_IFBLK, NFBLK }, + { S_IFCHR, NFCHR }, + { S_IFLNK, NFLNK }, + { S_IFSOCK, NFSOCK }, + { S_IFIFO, NFFIFO }, + { 0, NFBAD } +}; + +/* + * Common NFS XDR functions as inlines + */ +static inline u32 * +xdr_encode_fhandle(u32 *p, struct nfs_fh *fh) +{ + return xdr_encode_array(p, fh->data, fh->size); +} + +static inline u32 * +xdr_decode_fhandle(u32 *p, struct nfs_fh *fh) +{ + if ((fh->size = ntohl(*p++)) <= NFS3_FHSIZE) { + memcpy(fh->data, p, fh->size); + return p + XDR_QUADLEN(fh->size); + } + return NULL; +} + +/* + * Encode/decode time. + */ +static inline u32 * +xdr_encode_time3(u32 *p, struct timespec *timep) +{ + *p++ = htonl(timep->tv_sec); + *p++ = htonl(timep->tv_nsec); + return p; +} + +static inline u32 * +xdr_decode_time3(u32 *p, struct timespec *timep) +{ + timep->tv_sec = ntohl(*p++); + timep->tv_nsec = ntohl(*p++); + return p; +} + +static u32 * +xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) +{ + unsigned int type, major, minor; + int fmode; + + type = ntohl(*p++); + if (type >= NF3BAD) + type = NF3BAD; + fmode = nfs_type2fmt[type].mode; + fattr->type = nfs_type2fmt[type].nfs2type; + fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode; + fattr->nlink = ntohl(*p++); + fattr->uid = ntohl(*p++); + fattr->gid = ntohl(*p++); + p = xdr_decode_hyper(p, &fattr->size); + p = xdr_decode_hyper(p, &fattr->du.nfs3.used); + + /* Turn remote device info into Linux-specific dev_t */ + major = ntohl(*p++); + minor = ntohl(*p++); + fattr->rdev = MKDEV(major, minor); + if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor) + fattr->rdev = 0; + + p = xdr_decode_hyper(p, &fattr->fsid_u.nfs3); + p = xdr_decode_hyper(p, &fattr->fileid); + p = xdr_decode_time3(p, &fattr->atime); + p = xdr_decode_time3(p, &fattr->mtime); + p = xdr_decode_time3(p, &fattr->ctime); + + /* Update the mode bits */ + fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3); + fattr->timestamp = jiffies; + return p; +} + +static inline u32 * +xdr_encode_sattr(u32 *p, struct iattr *attr) +{ + if (attr->ia_valid & ATTR_MODE) { + *p++ = xdr_one; + *p++ = htonl(attr->ia_mode); + } else { + *p++ = xdr_zero; + } + if (attr->ia_valid & ATTR_UID) { + *p++ = xdr_one; + *p++ = htonl(attr->ia_uid); + } else { + *p++ = xdr_zero; + } + if (attr->ia_valid & ATTR_GID) { + *p++ = xdr_one; + *p++ = htonl(attr->ia_gid); + } else { + *p++ = xdr_zero; + } + if (attr->ia_valid & ATTR_SIZE) { + *p++ = xdr_one; + p = xdr_encode_hyper(p, (__u64) attr->ia_size); + } else { + *p++ = xdr_zero; + } + if (attr->ia_valid & ATTR_ATIME_SET) { + *p++ = xdr_two; + p = xdr_encode_time3(p, &attr->ia_atime); + } else if (attr->ia_valid & ATTR_ATIME) { + *p++ = xdr_one; + } else { + *p++ = xdr_zero; + } + if (attr->ia_valid & ATTR_MTIME_SET) { + *p++ = xdr_two; + p = xdr_encode_time3(p, &attr->ia_mtime); + } else if (attr->ia_valid & ATTR_MTIME) { + *p++ = xdr_one; + } else { + *p++ = xdr_zero; + } + return p; +} + +static inline u32 * +xdr_decode_wcc_attr(u32 *p, struct nfs_fattr *fattr) +{ + p = xdr_decode_hyper(p, &fattr->pre_size); + p = xdr_decode_time3(p, &fattr->pre_mtime); + p = xdr_decode_time3(p, &fattr->pre_ctime); + fattr->valid |= NFS_ATTR_WCC; + return p; +} + +static inline u32 * +xdr_decode_post_op_attr(u32 *p, struct nfs_fattr *fattr) +{ + if (*p++) + p = xdr_decode_fattr(p, fattr); + return p; +} + +static inline u32 * +xdr_decode_pre_op_attr(u32 *p, struct nfs_fattr *fattr) +{ + if (*p++) + return xdr_decode_wcc_attr(p, fattr); + return p; +} + + +static inline u32 * +xdr_decode_wcc_data(u32 *p, struct nfs_fattr *fattr) +{ + p = xdr_decode_pre_op_attr(p, fattr); + return xdr_decode_post_op_attr(p, fattr); +} + +/* + * NFS encode functions + */ + +/* + * Encode file handle argument + */ +static int +nfs3_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh) +{ + p = xdr_encode_fhandle(p, fh); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode SETATTR arguments + */ +static int +nfs3_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs3_sattrargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_sattr(p, args->sattr); + *p++ = htonl(args->guard); + if (args->guard) + p = xdr_encode_time3(p, &args->guardtime); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode directory ops argument + */ +static int +nfs3_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs3_diropargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_array(p, args->name, args->len); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode access() argument + */ +static int +nfs3_xdr_accessargs(struct rpc_rqst *req, u32 *p, struct nfs3_accessargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->access); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Arguments to a READ call. Since we read data directly into the page + * cache, we also set up the reply iovec here so that iov[1] points + * exactly to the page we want to fetch. + */ +static int +nfs3_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + unsigned int replen; + u32 count = args->count; + + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_hyper(p, args->offset); + *p++ = htonl(count); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, + args->pages, args->pgbase, count); + return 0; +} + +/* + * Write arguments. Splice the buffer to be written into the iovec. + */ +static int +nfs3_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) +{ + struct xdr_buf *sndbuf = &req->rq_snd_buf; + u32 count = args->count; + + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_hyper(p, args->offset); + *p++ = htonl(count); + *p++ = htonl(args->stable); + *p++ = htonl(count); + sndbuf->len = xdr_adjust_iovec(sndbuf->head, p); + + /* Copy the page array */ + xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); + return 0; +} + +/* + * Encode CREATE arguments + */ +static int +nfs3_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs3_createargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_array(p, args->name, args->len); + + *p++ = htonl(args->createmode); + if (args->createmode == NFS3_CREATE_EXCLUSIVE) { + *p++ = args->verifier[0]; + *p++ = args->verifier[1]; + } else + p = xdr_encode_sattr(p, args->sattr); + + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode MKDIR arguments + */ +static int +nfs3_xdr_mkdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_mkdirargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_array(p, args->name, args->len); + p = xdr_encode_sattr(p, args->sattr); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode SYMLINK arguments + */ +static int +nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_array(p, args->fromname, args->fromlen); + p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_array(p, args->topath, args->tolen); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode MKNOD arguments + */ +static int +nfs3_xdr_mknodargs(struct rpc_rqst *req, u32 *p, struct nfs3_mknodargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_array(p, args->name, args->len); + *p++ = htonl(args->type); + p = xdr_encode_sattr(p, args->sattr); + if (args->type == NF3CHR || args->type == NF3BLK) { + *p++ = htonl(MAJOR(args->rdev)); + *p++ = htonl(MINOR(args->rdev)); + } + + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode RENAME arguments + */ +static int +nfs3_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs3_renameargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_array(p, args->fromname, args->fromlen); + p = xdr_encode_fhandle(p, args->tofh); + p = xdr_encode_array(p, args->toname, args->tolen); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode LINK arguments + */ +static int +nfs3_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs3_linkargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_fhandle(p, args->tofh); + p = xdr_encode_array(p, args->toname, args->tolen); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode arguments to readdir call + */ +static int +nfs3_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_readdirargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + unsigned int replen; + u32 count = args->count; + + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_hyper(p, args->cookie); + *p++ = args->verf[0]; + *p++ = args->verf[1]; + if (args->plus) { + /* readdirplus: need dircount + buffer size. + * We just make sure we make dircount big enough */ + *p++ = htonl(count >> 3); + } + *p++ = htonl(count); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readdirres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count); + return 0; +} + +/* + * Decode the result of a readdir call. + * We just check for syntactical correctness. + */ +static int +nfs3_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs3_readdirres *res) +{ + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct kvec *iov = rcvbuf->head; + struct page **page; + int hdrlen, recvd; + int status, nr; + unsigned int len, pglen; + u32 *entry, *end, *kaddr; + + status = ntohl(*p++); + /* Decode post_op_attrs */ + p = xdr_decode_post_op_attr(p, res->dir_attr); + if (status) + return -nfs_stat_to_errno(status); + /* Decode verifier cookie */ + if (res->verf) { + res->verf[0] = *p++; + res->verf[1] = *p++; + } else { + p += 2; + } + + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + if (iov->iov_len < hdrlen) { + printk(KERN_WARNING "NFS: READDIR reply header overflowed:" + "length %d > %Zu\n", hdrlen, iov->iov_len); + return -errno_NFSERR_IO; + } else if (iov->iov_len != hdrlen) { + dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); + xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); + } + + pglen = rcvbuf->page_len; + recvd = rcvbuf->len - hdrlen; + if (pglen > recvd) + pglen = recvd; + page = rcvbuf->pages; + kaddr = p = (u32 *)kmap_atomic(*page, KM_USER0); + end = (u32 *)((char *)p + pglen); + entry = p; + for (nr = 0; *p++; nr++) { + if (p + 3 > end) + goto short_pkt; + p += 2; /* inode # */ + len = ntohl(*p++); /* string length */ + p += XDR_QUADLEN(len) + 2; /* name + cookie */ + if (len > NFS3_MAXNAMLEN) { + printk(KERN_WARNING "NFS: giant filename in readdir (len %x)!\n", + len); + goto err_unmap; + } + + if (res->plus) { + /* post_op_attr */ + if (p + 2 > end) + goto short_pkt; + if (*p++) { + p += 21; + if (p + 1 > end) + goto short_pkt; + } + /* post_op_fh3 */ + if (*p++) { + if (p + 1 > end) + goto short_pkt; + len = ntohl(*p++); + if (len > NFS3_FHSIZE) { + printk(KERN_WARNING "NFS: giant filehandle in " + "readdir (len %x)!\n", len); + goto err_unmap; + } + p += XDR_QUADLEN(len); + } + } + + if (p + 2 > end) + goto short_pkt; + entry = p; + } + if (!nr && (entry[0] != 0 || entry[1] == 0)) + goto short_pkt; + out: + kunmap_atomic(kaddr, KM_USER0); + return nr; + short_pkt: + entry[0] = entry[1] = 0; + /* truncate listing ? */ + if (!nr) { + printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); + entry[1] = 1; + } + goto out; +err_unmap: + nr = -errno_NFSERR_IO; + goto out; +} + +u32 * +nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus) +{ + struct nfs_entry old = *entry; + + if (!*p++) { + if (!*p) + return ERR_PTR(-EAGAIN); + entry->eof = 1; + return ERR_PTR(-EBADCOOKIE); + } + + p = xdr_decode_hyper(p, &entry->ino); + entry->len = ntohl(*p++); + entry->name = (const char *) p; + p += XDR_QUADLEN(entry->len); + entry->prev_cookie = entry->cookie; + p = xdr_decode_hyper(p, &entry->cookie); + + if (plus) { + entry->fattr->valid = 0; + p = xdr_decode_post_op_attr(p, entry->fattr); + /* In fact, a post_op_fh3: */ + if (*p++) { + p = xdr_decode_fhandle(p, entry->fh); + /* Ugh -- server reply was truncated */ + if (p == NULL) { + dprintk("NFS: FH truncated\n"); + *entry = old; + return ERR_PTR(-EAGAIN); + } + } else + memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); + } + + entry->eof = !p[0] && p[1]; + return p; +} + +/* + * Encode COMMIT arguments + */ +static int +nfs3_xdr_commitargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_hyper(p, args->offset); + *p++ = htonl(args->count); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * NFS XDR decode functions + */ + +/* + * Decode attrstat reply. + */ +static int +nfs3_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) +{ + int status; + + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + xdr_decode_fattr(p, fattr); + return 0; +} + +/* + * Decode status+wcc_data reply + * SATTR, REMOVE, RMDIR + */ +static int +nfs3_xdr_wccstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) +{ + int status; + + if ((status = ntohl(*p++))) + status = -nfs_stat_to_errno(status); + xdr_decode_wcc_data(p, fattr); + return status; +} + +/* + * Decode LOOKUP reply + */ +static int +nfs3_xdr_lookupres(struct rpc_rqst *req, u32 *p, struct nfs3_diropres *res) +{ + int status; + + if ((status = ntohl(*p++))) { + status = -nfs_stat_to_errno(status); + } else { + if (!(p = xdr_decode_fhandle(p, res->fh))) + return -errno_NFSERR_IO; + p = xdr_decode_post_op_attr(p, res->fattr); + } + xdr_decode_post_op_attr(p, res->dir_attr); + return status; +} + +/* + * Decode ACCESS reply + */ +static int +nfs3_xdr_accessres(struct rpc_rqst *req, u32 *p, struct nfs3_accessres *res) +{ + int status = ntohl(*p++); + + p = xdr_decode_post_op_attr(p, res->fattr); + if (status) + return -nfs_stat_to_errno(status); + res->access = ntohl(*p++); + return 0; +} + +static int +nfs3_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_readlinkargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + unsigned int replen; + + p = xdr_encode_fhandle(p, args->fh); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readlinkres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen); + return 0; +} + +/* + * Decode READLINK reply + */ +static int +nfs3_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) +{ + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct kvec *iov = rcvbuf->head; + int hdrlen, len, recvd; + char *kaddr; + int status; + + status = ntohl(*p++); + p = xdr_decode_post_op_attr(p, fattr); + + if (status != 0) + return -nfs_stat_to_errno(status); + + /* Convert length of symlink */ + len = ntohl(*p++); + if (len >= rcvbuf->page_len || len <= 0) { + dprintk(KERN_WARNING "nfs: server returned giant symlink!\n"); + return -ENAMETOOLONG; + } + + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + if (iov->iov_len < hdrlen) { + printk(KERN_WARNING "NFS: READLINK reply header overflowed:" + "length %d > %Zu\n", hdrlen, iov->iov_len); + return -errno_NFSERR_IO; + } else if (iov->iov_len != hdrlen) { + dprintk("NFS: READLINK header is short. iovec will be shifted.\n"); + xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); + } + recvd = req->rq_rcv_buf.len - hdrlen; + if (recvd < len) { + printk(KERN_WARNING "NFS: server cheating in readlink reply: " + "count %u > recvd %u\n", len, recvd); + return -EIO; + } + + /* NULL terminate the string we got */ + kaddr = (char*)kmap_atomic(rcvbuf->pages[0], KM_USER0); + kaddr[len+rcvbuf->page_base] = '\0'; + kunmap_atomic(kaddr, KM_USER0); + return 0; +} + +/* + * Decode READ reply + */ +static int +nfs3_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res) +{ + struct kvec *iov = req->rq_rcv_buf.head; + int status, count, ocount, recvd, hdrlen; + + status = ntohl(*p++); + p = xdr_decode_post_op_attr(p, res->fattr); + + if (status != 0) + return -nfs_stat_to_errno(status); + + /* Decode reply could and EOF flag. NFSv3 is somewhat redundant + * in that it puts the count both in the res struct and in the + * opaque data count. */ + count = ntohl(*p++); + res->eof = ntohl(*p++); + ocount = ntohl(*p++); + + if (ocount != count) { + printk(KERN_WARNING "NFS: READ count doesn't match RPC opaque count.\n"); + return -errno_NFSERR_IO; + } + + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + if (iov->iov_len < hdrlen) { + printk(KERN_WARNING "NFS: READ reply header overflowed:" + "length %d > %Zu\n", hdrlen, iov->iov_len); + return -errno_NFSERR_IO; + } else if (iov->iov_len != hdrlen) { + dprintk("NFS: READ header is short. iovec will be shifted.\n"); + xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen); + } + + recvd = req->rq_rcv_buf.len - hdrlen; + if (count > recvd) { + printk(KERN_WARNING "NFS: server cheating in read reply: " + "count %d > recvd %d\n", count, recvd); + count = recvd; + res->eof = 0; + } + + if (count < res->count) + res->count = count; + + return count; +} + +/* + * Decode WRITE response + */ +static int +nfs3_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res) +{ + int status; + + status = ntohl(*p++); + p = xdr_decode_wcc_data(p, res->fattr); + + if (status != 0) + return -nfs_stat_to_errno(status); + + res->count = ntohl(*p++); + res->verf->committed = (enum nfs3_stable_how)ntohl(*p++); + res->verf->verifier[0] = *p++; + res->verf->verifier[1] = *p++; + + return res->count; +} + +/* + * Decode a CREATE response + */ +static int +nfs3_xdr_createres(struct rpc_rqst *req, u32 *p, struct nfs3_diropres *res) +{ + int status; + + status = ntohl(*p++); + if (status == 0) { + if (*p++) { + if (!(p = xdr_decode_fhandle(p, res->fh))) + return -errno_NFSERR_IO; + p = xdr_decode_post_op_attr(p, res->fattr); + } else { + memset(res->fh, 0, sizeof(*res->fh)); + /* Do decode post_op_attr but set it to NULL */ + p = xdr_decode_post_op_attr(p, res->fattr); + res->fattr->valid = 0; + } + } else { + status = -nfs_stat_to_errno(status); + } + p = xdr_decode_wcc_data(p, res->dir_attr); + return status; +} + +/* + * Decode RENAME reply + */ +static int +nfs3_xdr_renameres(struct rpc_rqst *req, u32 *p, struct nfs3_renameres *res) +{ + int status; + + if ((status = ntohl(*p++)) != 0) + status = -nfs_stat_to_errno(status); + p = xdr_decode_wcc_data(p, res->fromattr); + p = xdr_decode_wcc_data(p, res->toattr); + return status; +} + +/* + * Decode LINK reply + */ +static int +nfs3_xdr_linkres(struct rpc_rqst *req, u32 *p, struct nfs3_linkres *res) +{ + int status; + + if ((status = ntohl(*p++)) != 0) + status = -nfs_stat_to_errno(status); + p = xdr_decode_post_op_attr(p, res->fattr); + p = xdr_decode_wcc_data(p, res->dir_attr); + return status; +} + +/* + * Decode FSSTAT reply + */ +static int +nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsstat *res) +{ + int status; + + status = ntohl(*p++); + + p = xdr_decode_post_op_attr(p, res->fattr); + if (status != 0) + return -nfs_stat_to_errno(status); + + p = xdr_decode_hyper(p, &res->tbytes); + p = xdr_decode_hyper(p, &res->fbytes); + p = xdr_decode_hyper(p, &res->abytes); + p = xdr_decode_hyper(p, &res->tfiles); + p = xdr_decode_hyper(p, &res->ffiles); + p = xdr_decode_hyper(p, &res->afiles); + + /* ignore invarsec */ + return 0; +} + +/* + * Decode FSINFO reply + */ +static int +nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) +{ + int status; + + status = ntohl(*p++); + + p = xdr_decode_post_op_attr(p, res->fattr); + if (status != 0) + return -nfs_stat_to_errno(status); + + res->rtmax = ntohl(*p++); + res->rtpref = ntohl(*p++); + res->rtmult = ntohl(*p++); + res->wtmax = ntohl(*p++); + res->wtpref = ntohl(*p++); + res->wtmult = ntohl(*p++); + res->dtpref = ntohl(*p++); + p = xdr_decode_hyper(p, &res->maxfilesize); + + /* ignore time_delta and properties */ + res->lease_time = 0; + return 0; +} + +/* + * Decode PATHCONF reply + */ +static int +nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_pathconf *res) +{ + int status; + + status = ntohl(*p++); + + p = xdr_decode_post_op_attr(p, res->fattr); + if (status != 0) + return -nfs_stat_to_errno(status); + res->max_link = ntohl(*p++); + res->max_namelen = ntohl(*p++); + + /* ignore remaining fields */ + return 0; +} + +/* + * Decode COMMIT reply + */ +static int +nfs3_xdr_commitres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res) +{ + int status; + + status = ntohl(*p++); + p = xdr_decode_wcc_data(p, res->fattr); + if (status != 0) + return -nfs_stat_to_errno(status); + + res->verf->verifier[0] = *p++; + res->verf->verifier[1] = *p++; + return 0; +} + +#ifndef MAX +# define MAX(a, b) (((a) > (b))? (a) : (b)) +#endif + +#define PROC(proc, argtype, restype, timer) \ +[NFS3PROC_##proc] = { \ + .p_proc = NFS3PROC_##proc, \ + .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \ + .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \ + .p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \ + .p_timer = timer \ + } + +struct rpc_procinfo nfs3_procedures[] = { + PROC(GETATTR, fhandle, attrstat, 1), + PROC(SETATTR, sattrargs, wccstat, 0), + PROC(LOOKUP, diropargs, lookupres, 2), + PROC(ACCESS, accessargs, accessres, 1), + PROC(READLINK, readlinkargs, readlinkres, 3), + PROC(READ, readargs, readres, 3), + PROC(WRITE, writeargs, writeres, 4), + PROC(CREATE, createargs, createres, 0), + PROC(MKDIR, mkdirargs, createres, 0), + PROC(SYMLINK, symlinkargs, createres, 0), + PROC(MKNOD, mknodargs, createres, 0), + PROC(REMOVE, diropargs, wccstat, 0), + PROC(RMDIR, diropargs, wccstat, 0), + PROC(RENAME, renameargs, renameres, 0), + PROC(LINK, linkargs, linkres, 0), + PROC(READDIR, readdirargs, readdirres, 3), + PROC(READDIRPLUS, readdirargs, readdirres, 3), + PROC(FSSTAT, fhandle, fsstatres, 0), + PROC(FSINFO, fhandle, fsinfores, 0), + PROC(PATHCONF, fhandle, pathconfres, 0), + PROC(COMMIT, commitargs, commitres, 5), +}; + +struct rpc_version nfs_version3 = { + .number = 3, + .nrprocs = sizeof(nfs3_procedures)/sizeof(nfs3_procedures[0]), + .procs = nfs3_procedures +}; + diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c new file mode 100644 index 00000000000..1d5cb3e80c3 --- /dev/null +++ b/fs/nfs/nfs4proc.c @@ -0,0 +1,2786 @@ +/* + * fs/nfs/nfs4proc.c + * + * Client-side procedure declarations for NFSv4. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * Andy Adamson <andros@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/mm.h> +#include <linux/utsname.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_page.h> +#include <linux/smp_lock.h> +#include <linux/namei.h> + +#include "delegation.h" + +#define NFSDBG_FACILITY NFSDBG_PROC + +#define NFS4_POLL_RETRY_MIN (1*HZ) +#define NFS4_POLL_RETRY_MAX (15*HZ) + +static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); +static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); +static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); +static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception); +extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); +extern struct rpc_procinfo nfs4_procedures[]; + +extern nfs4_stateid zero_stateid; + +/* Prevent leaks of NFSv4 errors into userland */ +int nfs4_map_errors(int err) +{ + if (err < -1000) { + dprintk("%s could not handle NFSv4 error %d\n", + __FUNCTION__, -err); + return -EIO; + } + return err; +} + +/* + * This is our standard bitmap for GETATTR requests. + */ +const u32 nfs4_fattr_bitmap[2] = { + FATTR4_WORD0_TYPE + | FATTR4_WORD0_CHANGE + | FATTR4_WORD0_SIZE + | FATTR4_WORD0_FSID + | FATTR4_WORD0_FILEID, + FATTR4_WORD1_MODE + | FATTR4_WORD1_NUMLINKS + | FATTR4_WORD1_OWNER + | FATTR4_WORD1_OWNER_GROUP + | FATTR4_WORD1_RAWDEV + | FATTR4_WORD1_SPACE_USED + | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_METADATA + | FATTR4_WORD1_TIME_MODIFY +}; + +const u32 nfs4_statfs_bitmap[2] = { + FATTR4_WORD0_FILES_AVAIL + | FATTR4_WORD0_FILES_FREE + | FATTR4_WORD0_FILES_TOTAL, + FATTR4_WORD1_SPACE_AVAIL + | FATTR4_WORD1_SPACE_FREE + | FATTR4_WORD1_SPACE_TOTAL +}; + +u32 nfs4_pathconf_bitmap[2] = { + FATTR4_WORD0_MAXLINK + | FATTR4_WORD0_MAXNAME, + 0 +}; + +const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE + | FATTR4_WORD0_MAXREAD + | FATTR4_WORD0_MAXWRITE + | FATTR4_WORD0_LEASE_TIME, + 0 +}; + +static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, + struct nfs4_readdir_arg *readdir) +{ + u32 *start, *p; + + BUG_ON(readdir->count < 80); + if (cookie > 2) { + readdir->cookie = (cookie > 2) ? cookie : 0; + memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier)); + return; + } + + readdir->cookie = 0; + memset(&readdir->verifier, 0, sizeof(readdir->verifier)); + if (cookie == 2) + return; + + /* + * NFSv4 servers do not return entries for '.' and '..' + * Therefore, we fake these entries here. We let '.' + * have cookie 0 and '..' have cookie 1. Note that + * when talking to the server, we always send cookie 0 + * instead of 1 or 2. + */ + start = p = (u32 *)kmap_atomic(*readdir->pages, KM_USER0); + + if (cookie == 0) { + *p++ = xdr_one; /* next */ + *p++ = xdr_zero; /* cookie, first word */ + *p++ = xdr_one; /* cookie, second word */ + *p++ = xdr_one; /* entry len */ + memcpy(p, ".\0\0\0", 4); /* entry */ + p++; + *p++ = xdr_one; /* bitmap length */ + *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ + *p++ = htonl(8); /* attribute buffer length */ + p = xdr_encode_hyper(p, dentry->d_inode->i_ino); + } + + *p++ = xdr_one; /* next */ + *p++ = xdr_zero; /* cookie, first word */ + *p++ = xdr_two; /* cookie, second word */ + *p++ = xdr_two; /* entry len */ + memcpy(p, "..\0\0", 4); /* entry */ + p++; + *p++ = xdr_one; /* bitmap length */ + *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ + *p++ = htonl(8); /* attribute buffer length */ + p = xdr_encode_hyper(p, dentry->d_parent->d_inode->i_ino); + + readdir->pgbase = (char *)p - (char *)start; + readdir->count -= readdir->pgbase; + kunmap_atomic(start, KM_USER0); +} + +static void +renew_lease(struct nfs_server *server, unsigned long timestamp) +{ + struct nfs4_client *clp = server->nfs4_state; + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,timestamp)) + clp->cl_last_renewal = timestamp; + spin_unlock(&clp->cl_lock); +} + +static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinfo) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (cinfo->before == nfsi->change_attr && cinfo->atomic) + nfsi->change_attr = cinfo->after; +} + +static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) +{ + struct inode *inode = state->inode; + + open_flags &= (FMODE_READ|FMODE_WRITE); + /* Protect against nfs4_find_state() */ + spin_lock(&inode->i_lock); + state->state |= open_flags; + /* NB! List reordering - see the reclaim code for why. */ + if ((open_flags & FMODE_WRITE) && 0 == state->nwriters++) + list_move(&state->open_states, &state->owner->so_states); + if (open_flags & FMODE_READ) + state->nreaders++; + memcpy(&state->stateid, stateid, sizeof(state->stateid)); + spin_unlock(&inode->i_lock); +} + +/* + * OPEN_RECLAIM: + * reclaim state on the server after a reboot. + * Assumes caller is holding the sp->so_sem + */ +static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_delegation *delegation = NFS_I(inode)->delegation; + struct nfs_openargs o_arg = { + .fh = NFS_FH(inode), + .seqid = sp->so_seqid, + .id = sp->so_id, + .open_flags = state->state, + .clientid = server->nfs4_state->cl_clientid, + .claim = NFS4_OPEN_CLAIM_PREVIOUS, + .bitmask = server->attr_bitmask, + }; + struct nfs_openres o_res = { + .server = server, /* Grrr */ + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR], + .rpc_argp = &o_arg, + .rpc_resp = &o_res, + .rpc_cred = sp->so_cred, + }; + int status; + + if (delegation != NULL) { + if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { + memcpy(&state->stateid, &delegation->stateid, + sizeof(state->stateid)); + set_bit(NFS_DELEGATED_STATE, &state->flags); + return 0; + } + o_arg.u.delegation_type = delegation->type; + } + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + nfs4_increment_seqid(status, sp); + if (status == 0) { + memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); + if (o_res.delegation_type != 0) { + nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); + /* Did the server issue an immediate delegation recall? */ + if (o_res.do_recall) + nfs_async_inode_return_delegation(inode, &o_res.stateid); + } + } + clear_bit(NFS_DELEGATED_STATE, &state->flags); + /* Ensure we update the inode attributes */ + NFS_CACHEINV(inode); + return status; +} + +static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs4_open_reclaim(sp, state); + switch (err) { + case 0: + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + return err; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +{ + struct nfs4_state_owner *sp = state->owner; + struct inode *inode = dentry->d_inode; + struct nfs_server *server = NFS_SERVER(inode); + struct dentry *parent = dget_parent(dentry); + struct nfs_openargs arg = { + .fh = NFS_FH(parent->d_inode), + .clientid = server->nfs4_state->cl_clientid, + .name = &dentry->d_name, + .id = sp->so_id, + .server = server, + .bitmask = server->attr_bitmask, + .claim = NFS4_OPEN_CLAIM_DELEGATE_CUR, + }; + struct nfs_openres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = sp->so_cred, + }; + int status = 0; + + down(&sp->so_sema); + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) + goto out; + if (state->state == 0) + goto out; + arg.seqid = sp->so_seqid; + arg.open_flags = state->state; + memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data)); + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + nfs4_increment_seqid(status, sp); + if (status >= 0) { + memcpy(state->stateid.data, res.stateid.data, + sizeof(state->stateid.data)); + clear_bit(NFS_DELEGATED_STATE, &state->flags); + } +out: + up(&sp->so_sema); + dput(parent); + return status; +} + +int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +{ + struct nfs4_exception exception = { }; + struct nfs_server *server = NFS_SERVER(dentry->d_inode); + int err; + do { + err = _nfs4_open_delegation_recall(dentry, state); + switch (err) { + case 0: + return err; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + /* Don't recall a delegation if it was lost */ + nfs4_schedule_state_recovery(server->nfs4_state); + return err; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + +static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid) +{ + struct nfs_open_confirmargs arg = { + .fh = fh, + .seqid = sp->so_seqid, + .stateid = *stateid, + }; + struct nfs_open_confirmres res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = sp->so_cred, + }; + int status; + + status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR); + nfs4_increment_seqid(status, sp); + if (status >= 0) + memcpy(stateid, &res.stateid, sizeof(*stateid)); + return status; +} + +static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner *sp, struct nfs_openargs *o_arg, struct nfs_openres *o_res) +{ + struct nfs_server *server = NFS_SERVER(dir); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN], + .rpc_argp = o_arg, + .rpc_resp = o_res, + .rpc_cred = sp->so_cred, + }; + int status; + + /* Update sequence id. The caller must serialize! */ + o_arg->seqid = sp->so_seqid; + o_arg->id = sp->so_id; + o_arg->clientid = sp->so_client->cl_clientid; + + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + nfs4_increment_seqid(status, sp); + if (status != 0) + goto out; + update_changeattr(dir, &o_res->cinfo); + if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { + status = _nfs4_proc_open_confirm(server->client, &o_res->fh, + sp, &o_res->stateid); + if (status != 0) + goto out; + } + if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) + status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); +out: + return status; +} + +static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags) +{ + struct nfs_access_entry cache; + int mask = 0; + int status; + + if (openflags & FMODE_READ) + mask |= MAY_READ; + if (openflags & FMODE_WRITE) + mask |= MAY_WRITE; + status = nfs_access_get_cached(inode, cred, &cache); + if (status == 0) + goto out; + + /* Be clever: ask server to check for all possible rights */ + cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; + cache.cred = cred; + cache.jiffies = jiffies; + status = _nfs4_proc_access(inode, &cache); + if (status != 0) + return status; + nfs_access_add_cache(inode, &cache); +out: + if ((cache.mask & mask) == mask) + return 0; + return -EACCES; +} + +/* + * OPEN_EXPIRED: + * reclaim state on the server after a network partition. + * Assumes caller holds the appropriate lock + */ +static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +{ + struct dentry *parent = dget_parent(dentry); + struct inode *dir = parent->d_inode; + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs_delegation *delegation = NFS_I(inode)->delegation; + struct nfs_fattr f_attr = { + .valid = 0, + }; + struct nfs_openargs o_arg = { + .fh = NFS_FH(dir), + .open_flags = state->state, + .name = &dentry->d_name, + .bitmask = server->attr_bitmask, + .claim = NFS4_OPEN_CLAIM_NULL, + }; + struct nfs_openres o_res = { + .f_attr = &f_attr, + .server = server, + }; + int status = 0; + + if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { + status = _nfs4_do_access(inode, sp->so_cred, state->state); + if (status < 0) + goto out; + memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid)); + set_bit(NFS_DELEGATED_STATE, &state->flags); + goto out; + } + status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); + if (status != 0) + goto out_nodeleg; + /* Check if files differ */ + if ((f_attr.mode & S_IFMT) != (inode->i_mode & S_IFMT)) + goto out_stale; + /* Has the file handle changed? */ + if (nfs_compare_fh(&o_res.fh, NFS_FH(inode)) != 0) { + /* Verify if the change attributes are the same */ + if (f_attr.change_attr != NFS_I(inode)->change_attr) + goto out_stale; + if (nfs_size_to_loff_t(f_attr.size) != inode->i_size) + goto out_stale; + /* Lets just pretend that this is the same file */ + nfs_copy_fh(NFS_FH(inode), &o_res.fh); + NFS_I(inode)->fileid = f_attr.fileid; + } + memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); + if (o_res.delegation_type != 0) { + if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) + nfs_inode_set_delegation(inode, sp->so_cred, &o_res); + else + nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); + } +out_nodeleg: + clear_bit(NFS_DELEGATED_STATE, &state->flags); +out: + dput(parent); + return status; +out_stale: + status = -ESTALE; + /* Invalidate the state owner so we don't ever use it again */ + nfs4_drop_state_owner(sp); + d_drop(dentry); + /* Should we be trying to close that stateid? */ + goto out_nodeleg; +} + +static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct nfs_inode *nfsi = NFS_I(state->inode); + struct nfs_open_context *ctx; + int status; + + spin_lock(&state->inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + if (ctx->state != state) + continue; + get_nfs_open_context(ctx); + spin_unlock(&state->inode->i_lock); + status = _nfs4_open_expired(sp, state, ctx->dentry); + put_nfs_open_context(ctx); + return status; + } + spin_unlock(&state->inode->i_lock); + return -ENOENT; +} + +/* + * Returns an nfs4_state + an extra reference to the inode + */ +static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res) +{ + struct nfs_delegation *delegation; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_client *clp = server->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs4_state_owner *sp = NULL; + struct nfs4_state *state = NULL; + int open_flags = flags & (FMODE_READ|FMODE_WRITE); + int err; + + /* Protect against reboot recovery - NOTE ORDER! */ + down_read(&clp->cl_sem); + /* Protect against delegation recall */ + down_read(&nfsi->rwsem); + delegation = NFS_I(inode)->delegation; + err = -ENOENT; + if (delegation == NULL || (delegation->type & open_flags) != open_flags) + goto out_err; + err = -ENOMEM; + if (!(sp = nfs4_get_state_owner(server, cred))) { + dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); + goto out_err; + } + down(&sp->so_sema); + state = nfs4_get_open_state(inode, sp); + if (state == NULL) + goto out_err; + + err = -ENOENT; + if ((state->state & open_flags) == open_flags) { + spin_lock(&inode->i_lock); + if (open_flags & FMODE_READ) + state->nreaders++; + if (open_flags & FMODE_WRITE) + state->nwriters++; + spin_unlock(&inode->i_lock); + goto out_ok; + } else if (state->state != 0) + goto out_err; + + lock_kernel(); + err = _nfs4_do_access(inode, cred, open_flags); + unlock_kernel(); + if (err != 0) + goto out_err; + set_bit(NFS_DELEGATED_STATE, &state->flags); + update_open_stateid(state, &delegation->stateid, open_flags); +out_ok: + up(&sp->so_sema); + nfs4_put_state_owner(sp); + up_read(&nfsi->rwsem); + up_read(&clp->cl_sem); + igrab(inode); + *res = state; + return 0; +out_err: + if (sp != NULL) { + if (state != NULL) + nfs4_put_open_state(state); + up(&sp->so_sema); + nfs4_put_state_owner(sp); + } + up_read(&nfsi->rwsem); + up_read(&clp->cl_sem); + return err; +} + +static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred) +{ + struct nfs4_exception exception = { }; + struct nfs4_state *res; + int err; + + do { + err = _nfs4_open_delegated(inode, flags, cred, &res); + if (err == 0) + break; + res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode), + err, &exception)); + } while (exception.retry); + return res; +} + +/* + * Returns an nfs4_state + an referenced inode + */ +static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) +{ + struct nfs4_state_owner *sp; + struct nfs4_state *state = NULL; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs4_client *clp = server->nfs4_state; + struct inode *inode = NULL; + int status; + struct nfs_fattr f_attr = { + .valid = 0, + }; + struct nfs_openargs o_arg = { + .fh = NFS_FH(dir), + .open_flags = flags, + .name = &dentry->d_name, + .server = server, + .bitmask = server->attr_bitmask, + .claim = NFS4_OPEN_CLAIM_NULL, + }; + struct nfs_openres o_res = { + .f_attr = &f_attr, + .server = server, + }; + + /* Protect against reboot recovery conflicts */ + down_read(&clp->cl_sem); + status = -ENOMEM; + if (!(sp = nfs4_get_state_owner(server, cred))) { + dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); + goto out_err; + } + if (flags & O_EXCL) { + u32 *p = (u32 *) o_arg.u.verifier.data; + p[0] = jiffies; + p[1] = current->pid; + } else + o_arg.u.attrs = sattr; + /* Serialization for the sequence id */ + down(&sp->so_sema); + + status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); + if (status != 0) + goto out_err; + + status = -ENOMEM; + inode = nfs_fhget(dir->i_sb, &o_res.fh, &f_attr); + if (!inode) + goto out_err; + state = nfs4_get_open_state(inode, sp); + if (!state) + goto out_err; + update_open_stateid(state, &o_res.stateid, flags); + if (o_res.delegation_type != 0) + nfs_inode_set_delegation(inode, cred, &o_res); + up(&sp->so_sema); + nfs4_put_state_owner(sp); + up_read(&clp->cl_sem); + *res = state; + return 0; +out_err: + if (sp != NULL) { + if (state != NULL) + nfs4_put_open_state(state); + up(&sp->so_sema); + nfs4_put_state_owner(sp); + } + /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */ + up_read(&clp->cl_sem); + if (inode != NULL) + iput(inode); + *res = NULL; + return status; +} + + +static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred) +{ + struct nfs4_exception exception = { }; + struct nfs4_state *res; + int status; + + do { + status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res); + if (status == 0) + break; + /* NOTE: BAD_SEQID means the server and client disagree about the + * book-keeping w.r.t. state-changing operations + * (OPEN/CLOSE/LOCK/LOCKU...) + * It is actually a sign of a bug on the client or on the server. + * + * If we receive a BAD_SEQID error in the particular case of + * doing an OPEN, we assume that nfs4_increment_seqid() will + * have unhashed the old state_owner for us, and that we can + * therefore safely retry using a new one. We should still warn + * the user though... + */ + if (status == -NFS4ERR_BAD_SEQID) { + printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n"); + exception.retry = 1; + continue; + } + res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), + status, &exception)); + } while (exception.retry); + return res; +} + +static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, + struct nfs_fh *fhandle, struct iattr *sattr, + struct nfs4_state *state) +{ + struct nfs_setattrargs arg = { + .fh = fhandle, + .iap = sattr, + .server = server, + .bitmask = server->attr_bitmask, + }; + struct nfs_setattrres res = { + .fattr = fattr, + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + + fattr->valid = 0; + + if (state != NULL) + msg.rpc_cred = state->owner->so_cred; + if (sattr->ia_valid & ATTR_SIZE) + nfs4_copy_stateid(&arg.stateid, state, NULL); + else + memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); + + return rpc_call_sync(server->client, &msg, 0); +} + +static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, + struct nfs_fh *fhandle, struct iattr *sattr, + struct nfs4_state *state) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_do_setattr(server, fattr, fhandle, sattr, + state), + &exception); + } while (exception.retry); + return err; +} + +struct nfs4_closedata { + struct inode *inode; + struct nfs4_state *state; + struct nfs_closeargs arg; + struct nfs_closeres res; +}; + +static void nfs4_close_done(struct rpc_task *task) +{ + struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; + struct nfs4_state *state = calldata->state; + struct nfs4_state_owner *sp = state->owner; + struct nfs_server *server = NFS_SERVER(calldata->inode); + + /* hmm. we are done with the inode, and in the process of freeing + * the state_owner. we keep this around to process errors + */ + nfs4_increment_seqid(task->tk_status, sp); + switch (task->tk_status) { + case 0: + memcpy(&state->stateid, &calldata->res.stateid, + sizeof(state->stateid)); + break; + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + state->state = calldata->arg.open_flags; + nfs4_schedule_state_recovery(server->nfs4_state); + break; + default: + if (nfs4_async_handle_error(task, server) == -EAGAIN) { + rpc_restart_call(task); + return; + } + } + state->state = calldata->arg.open_flags; + nfs4_put_open_state(state); + up(&sp->so_sema); + nfs4_put_state_owner(sp); + up_read(&server->nfs4_state->cl_sem); + kfree(calldata); +} + +static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], + .rpc_argp = &calldata->arg, + .rpc_resp = &calldata->res, + .rpc_cred = calldata->state->owner->so_cred, + }; + if (calldata->arg.open_flags != 0) + msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; + return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata); +} + +/* + * It is possible for data to be read/written from a mem-mapped file + * after the sys_close call (which hits the vfs layer as a flush). + * This means that we can't safely call nfsv4 close on a file until + * the inode is cleared. This in turn means that we are not good + * NFSv4 citizens - we do not indicate to the server to update the file's + * share state even when we are done with one of the three share + * stateid's in the inode. + * + * NOTE: Caller must be holding the sp->so_owner semaphore! + */ +int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) +{ + struct nfs4_closedata *calldata; + int status; + + /* Tell caller we're done */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { + state->state = mode; + return 0; + } + calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL); + if (calldata == NULL) + return -ENOMEM; + calldata->inode = inode; + calldata->state = state; + calldata->arg.fh = NFS_FH(inode); + /* Serialization for the sequence id */ + calldata->arg.seqid = state->owner->so_seqid; + calldata->arg.open_flags = mode; + memcpy(&calldata->arg.stateid, &state->stateid, + sizeof(calldata->arg.stateid)); + status = nfs4_close_call(NFS_SERVER(inode)->client, calldata); + /* + * Return -EINPROGRESS on success in order to indicate to the + * caller that an asynchronous RPC call has been launched, and + * that it will release the semaphores on completion. + */ + return (status == 0) ? -EINPROGRESS : status; +} + +struct inode * +nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct iattr attr; + struct rpc_cred *cred; + struct nfs4_state *state; + + if (nd->flags & LOOKUP_CREATE) { + attr.ia_mode = nd->intent.open.create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current->fs->umask; + } else { + attr.ia_valid = 0; + BUG_ON(nd->intent.open.flags & O_CREAT); + } + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + if (IS_ERR(cred)) + return (struct inode *)cred; + state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); + put_rpccred(cred); + if (IS_ERR(state)) + return (struct inode *)state; + return state->inode; +} + +int +nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) +{ + struct rpc_cred *cred; + struct nfs4_state *state; + struct inode *inode; + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + if (IS_ERR(cred)) + return PTR_ERR(cred); + state = nfs4_open_delegated(dentry->d_inode, openflags, cred); + if (IS_ERR(state)) + state = nfs4_do_open(dir, dentry, openflags, NULL, cred); + put_rpccred(cred); + if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) + return 1; + if (IS_ERR(state)) + return 0; + inode = state->inode; + if (inode == dentry->d_inode) { + iput(inode); + return 1; + } + d_drop(dentry); + nfs4_close_state(state, openflags); + iput(inode); + return 0; +} + + +static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) +{ + struct nfs4_server_caps_res res = {}; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SERVER_CAPS], + .rpc_argp = fhandle, + .rpc_resp = &res, + }; + int status; + + status = rpc_call_sync(server->client, &msg, 0); + if (status == 0) { + memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); + if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) + server->caps |= NFS_CAP_ACLS; + if (res.has_links != 0) + server->caps |= NFS_CAP_HARDLINKS; + if (res.has_symlinks != 0) + server->caps |= NFS_CAP_SYMLINKS; + server->acl_bitmask = res.acl_bitmask; + } + return status; +} + +static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_server_capabilities(server, fhandle), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + struct nfs_fattr * fattr = info->fattr; + struct nfs4_lookup_root_arg args = { + .bitmask = nfs4_fattr_bitmap, + }; + struct nfs4_lookup_res res = { + .server = server, + .fattr = fattr, + .fh = fhandle, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP_ROOT], + .rpc_argp = &args, + .rpc_resp = &res, + }; + fattr->valid = 0; + return rpc_call_sync(server->client, &msg, 0); +} + +static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_lookup_root(server, fhandle, info), + &exception); + } while (exception.retry); + return err; +} + +static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + struct nfs_fattr * fattr = info->fattr; + unsigned char * p; + struct qstr q; + struct nfs4_lookup_arg args = { + .dir_fh = fhandle, + .name = &q, + .bitmask = nfs4_fattr_bitmap, + }; + struct nfs4_lookup_res res = { + .server = server, + .fattr = fattr, + .fh = fhandle, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP], + .rpc_argp = &args, + .rpc_resp = &res, + }; + int status; + + /* + * Now we do a separate LOOKUP for each component of the mount path. + * The LOOKUPs are done separately so that we can conveniently + * catch an ERR_WRONGSEC if it occurs along the way... + */ + status = nfs4_lookup_root(server, fhandle, info); + if (status) + goto out; + + p = server->mnt_path; + for (;;) { + struct nfs4_exception exception = { }; + + while (*p == '/') + p++; + if (!*p) + break; + q.name = p; + while (*p && (*p != '/')) + p++; + q.len = p - q.name; + + do { + fattr->valid = 0; + status = nfs4_handle_exception(server, + rpc_call_sync(server->client, &msg, 0), + &exception); + } while (exception.retry); + if (status == 0) + continue; + if (status == -ENOENT) { + printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path); + printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n"); + } + break; + } + if (status == 0) + status = nfs4_server_capabilities(server, fhandle); + if (status == 0) + status = nfs4_do_fsinfo(server, fhandle, info); +out: + return status; +} + +static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct nfs4_getattr_arg args = { + .fh = fhandle, + .bitmask = server->attr_bitmask, + }; + struct nfs4_getattr_res res = { + .fattr = fattr, + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + fattr->valid = 0; + return rpc_call_sync(server->client, &msg, 0); +} + +static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_proc_getattr(server, fhandle, fattr), + &exception); + } while (exception.retry); + return err; +} + +/* + * The file is not closed if it is opened due to the a request to change + * the size of the file. The open call will not be needed once the + * VFS layer lookup-intents are implemented. + * + * Close is called when the inode is destroyed. + * If we haven't opened the file for O_WRONLY, we + * need to in the size_change case to obtain a stateid. + * + * Got race? + * Because OPEN is always done by name in nfsv4, it is + * possible that we opened a different file by the same + * name. We can recognize this race condition, but we + * can't do anything about it besides returning an error. + * + * This will be fixed with VFS changes (lookup-intent). + */ +static int +nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, + struct iattr *sattr) +{ + struct inode * inode = dentry->d_inode; + int size_change = sattr->ia_valid & ATTR_SIZE; + struct nfs4_state *state = NULL; + int need_iput = 0; + int status; + + fattr->valid = 0; + + if (size_change) { + struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); + if (IS_ERR(cred)) + return PTR_ERR(cred); + state = nfs4_find_state(inode, cred, FMODE_WRITE); + if (state == NULL) { + state = nfs4_open_delegated(dentry->d_inode, + FMODE_WRITE, cred); + if (IS_ERR(state)) + state = nfs4_do_open(dentry->d_parent->d_inode, + dentry, FMODE_WRITE, + NULL, cred); + need_iput = 1; + } + put_rpccred(cred); + if (IS_ERR(state)) + return PTR_ERR(state); + + if (state->inode != inode) { + printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode); + status = -EIO; + goto out; + } + } + status = nfs4_do_setattr(NFS_SERVER(inode), fattr, + NFS_FH(inode), sattr, state); +out: + if (state) { + inode = state->inode; + nfs4_close_state(state, FMODE_WRITE); + if (need_iput) + iput(inode); + } + return status; +} + +static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + int status; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs4_lookup_arg args = { + .bitmask = server->attr_bitmask, + .dir_fh = NFS_FH(dir), + .name = name, + }; + struct nfs4_lookup_res res = { + .server = server, + .fattr = fattr, + .fh = fhandle, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + fattr->valid = 0; + + dprintk("NFS call lookup %s\n", name->name); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + dprintk("NFS reply lookup: %d\n", status); + return status; +} + +static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), + _nfs4_proc_lookup(dir, name, fhandle, fattr), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) +{ + struct nfs4_accessargs args = { + .fh = NFS_FH(inode), + }; + struct nfs4_accessres res = { 0 }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = entry->cred, + }; + int mode = entry->mask; + int status; + + /* + * Determine which access bits we want to ask for... + */ + if (mode & MAY_READ) + args.access |= NFS4_ACCESS_READ; + if (S_ISDIR(inode->i_mode)) { + if (mode & MAY_WRITE) + args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE; + if (mode & MAY_EXEC) + args.access |= NFS4_ACCESS_LOOKUP; + } else { + if (mode & MAY_WRITE) + args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND; + if (mode & MAY_EXEC) + args.access |= NFS4_ACCESS_EXECUTE; + } + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + if (!status) { + entry->mask = 0; + if (res.access & NFS4_ACCESS_READ) + entry->mask |= MAY_READ; + if (res.access & (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE)) + entry->mask |= MAY_WRITE; + if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) + entry->mask |= MAY_EXEC; + } + return status; +} + +static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), + _nfs4_proc_access(inode, entry), + &exception); + } while (exception.retry); + return err; +} + +/* + * TODO: For the time being, we don't try to get any attributes + * along with any of the zero-copy operations READ, READDIR, + * READLINK, WRITE. + * + * In the case of the first three, we want to put the GETATTR + * after the read-type operation -- this is because it is hard + * to predict the length of a GETATTR response in v4, and thus + * align the READ data correctly. This means that the GETATTR + * may end up partially falling into the page cache, and we should + * shift it into the 'tail' of the xdr_buf before processing. + * To do this efficiently, we need to know the total length + * of data received, which doesn't seem to be available outside + * of the RPC layer. + * + * In the case of WRITE, we also want to put the GETATTR after + * the operation -- in this case because we want to make sure + * we get the post-operation mtime and size. This means that + * we can't use xdr_encode_pages() as written: we need a variant + * of it which would leave room in the 'tail' iovec. + * + * Both of these changes to the XDR layer would in fact be quite + * minor, but I decided to leave them for a subsequent patch. + */ +static int _nfs4_proc_readlink(struct inode *inode, struct page *page, + unsigned int pgbase, unsigned int pglen) +{ + struct nfs4_readlink args = { + .fh = NFS_FH(inode), + .pgbase = pgbase, + .pglen = pglen, + .pages = &page, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READLINK], + .rpc_argp = &args, + .rpc_resp = NULL, + }; + + return rpc_call_sync(NFS_CLIENT(inode), &msg, 0); +} + +static int nfs4_proc_readlink(struct inode *inode, struct page *page, + unsigned int pgbase, unsigned int pglen) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), + _nfs4_proc_readlink(inode, page, pgbase, pglen), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_read(struct nfs_read_data *rdata) +{ + int flags = rdata->flags; + struct inode *inode = rdata->inode; + struct nfs_fattr *fattr = rdata->res.fattr; + struct nfs_server *server = NFS_SERVER(inode); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ], + .rpc_argp = &rdata->args, + .rpc_resp = &rdata->res, + .rpc_cred = rdata->cred, + }; + unsigned long timestamp = jiffies; + int status; + + dprintk("NFS call read %d @ %Ld\n", rdata->args.count, + (long long) rdata->args.offset); + + fattr->valid = 0; + status = rpc_call_sync(server->client, &msg, flags); + if (!status) + renew_lease(server, timestamp); + dprintk("NFS reply read: %d\n", status); + return status; +} + +static int nfs4_proc_read(struct nfs_read_data *rdata) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(rdata->inode), + _nfs4_proc_read(rdata), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_write(struct nfs_write_data *wdata) +{ + int rpcflags = wdata->flags; + struct inode *inode = wdata->inode; + struct nfs_fattr *fattr = wdata->res.fattr; + struct nfs_server *server = NFS_SERVER(inode); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE], + .rpc_argp = &wdata->args, + .rpc_resp = &wdata->res, + .rpc_cred = wdata->cred, + }; + int status; + + dprintk("NFS call write %d @ %Ld\n", wdata->args.count, + (long long) wdata->args.offset); + + fattr->valid = 0; + status = rpc_call_sync(server->client, &msg, rpcflags); + dprintk("NFS reply write: %d\n", status); + return status; +} + +static int nfs4_proc_write(struct nfs_write_data *wdata) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(wdata->inode), + _nfs4_proc_write(wdata), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_commit(struct nfs_write_data *cdata) +{ + struct inode *inode = cdata->inode; + struct nfs_fattr *fattr = cdata->res.fattr; + struct nfs_server *server = NFS_SERVER(inode); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT], + .rpc_argp = &cdata->args, + .rpc_resp = &cdata->res, + .rpc_cred = cdata->cred, + }; + int status; + + dprintk("NFS call commit %d @ %Ld\n", cdata->args.count, + (long long) cdata->args.offset); + + fattr->valid = 0; + status = rpc_call_sync(server->client, &msg, 0); + dprintk("NFS reply commit: %d\n", status); + return status; +} + +static int nfs4_proc_commit(struct nfs_write_data *cdata) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(cdata->inode), + _nfs4_proc_commit(cdata), + &exception); + } while (exception.retry); + return err; +} + +/* + * Got race? + * We will need to arrange for the VFS layer to provide an atomic open. + * Until then, this create/open method is prone to inefficiency and race + * conditions due to the lookup, create, and open VFS calls from sys_open() + * placed on the wire. + * + * Given the above sorry state of affairs, I'm simply sending an OPEN. + * The file will be opened again in the subsequent VFS open call + * (nfs4_proc_file_open). + * + * The open for read will just hang around to be used by any process that + * opens the file O_RDONLY. This will all be resolved with the VFS changes. + */ + +static int +nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, + int flags) +{ + struct nfs4_state *state; + struct rpc_cred *cred; + int status = 0; + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + if (IS_ERR(cred)) { + status = PTR_ERR(cred); + goto out; + } + state = nfs4_do_open(dir, dentry, flags, sattr, cred); + put_rpccred(cred); + if (IS_ERR(state)) { + status = PTR_ERR(state); + goto out; + } + d_instantiate(dentry, state->inode); + if (flags & O_EXCL) { + struct nfs_fattr fattr; + status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, + NFS_FH(state->inode), sattr, state); + if (status == 0) + goto out; + } else if (flags != 0) + goto out; + nfs4_close_state(state, flags); +out: + return status; +} + +static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) +{ + struct nfs4_remove_arg args = { + .fh = NFS_FH(dir), + .name = name, + }; + struct nfs4_change_info res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE], + .rpc_argp = &args, + .rpc_resp = &res, + }; + int status; + + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + if (status == 0) + update_changeattr(dir, &res); + return status; +} + +static int nfs4_proc_remove(struct inode *dir, struct qstr *name) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), + _nfs4_proc_remove(dir, name), + &exception); + } while (exception.retry); + return err; +} + +struct unlink_desc { + struct nfs4_remove_arg args; + struct nfs4_change_info res; +}; + +static int nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, + struct qstr *name) +{ + struct unlink_desc *up; + + up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL); + if (!up) + return -ENOMEM; + + up->args.fh = NFS_FH(dir->d_inode); + up->args.name = name; + + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; + msg->rpc_argp = &up->args; + msg->rpc_resp = &up->res; + return 0; +} + +static int nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task) +{ + struct rpc_message *msg = &task->tk_msg; + struct unlink_desc *up; + + if (msg->rpc_resp != NULL) { + up = container_of(msg->rpc_resp, struct unlink_desc, res); + update_changeattr(dir->d_inode, &up->res); + kfree(up); + msg->rpc_resp = NULL; + msg->rpc_argp = NULL; + } + return 0; +} + +static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, + struct inode *new_dir, struct qstr *new_name) +{ + struct nfs4_rename_arg arg = { + .old_dir = NFS_FH(old_dir), + .new_dir = NFS_FH(new_dir), + .old_name = old_name, + .new_name = new_name, + }; + struct nfs4_rename_res res = { }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int status; + + status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); + + if (!status) { + update_changeattr(old_dir, &res.old_cinfo); + update_changeattr(new_dir, &res.new_cinfo); + } + return status; +} + +static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, + struct inode *new_dir, struct qstr *new_name) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(old_dir), + _nfs4_proc_rename(old_dir, old_name, + new_dir, new_name), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) +{ + struct nfs4_link_arg arg = { + .fh = NFS_FH(inode), + .dir_fh = NFS_FH(dir), + .name = name, + }; + struct nfs4_change_info cinfo = { }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], + .rpc_argp = &arg, + .rpc_resp = &cinfo, + }; + int status; + + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + if (!status) + update_changeattr(dir, &cinfo); + + return status; +} + +static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), + _nfs4_proc_link(inode, dir, name), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name, + struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct nfs_server *server = NFS_SERVER(dir); + struct nfs4_create_arg arg = { + .dir_fh = NFS_FH(dir), + .server = server, + .name = name, + .attrs = sattr, + .ftype = NF4LNK, + .bitmask = server->attr_bitmask, + }; + struct nfs4_create_res res = { + .server = server, + .fh = fhandle, + .fattr = fattr, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int status; + + if (path->len > NFS4_MAXPATHLEN) + return -ENAMETOOLONG; + arg.u.symlink = path; + fattr->valid = 0; + + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + if (!status) + update_changeattr(dir, &res.dir_cinfo); + return status; +} + +static int nfs4_proc_symlink(struct inode *dir, struct qstr *name, + struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), + _nfs4_proc_symlink(dir, name, path, sattr, + fhandle, fattr), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, + struct iattr *sattr) +{ + struct nfs_server *server = NFS_SERVER(dir); + struct nfs_fh fhandle; + struct nfs_fattr fattr; + struct nfs4_create_arg arg = { + .dir_fh = NFS_FH(dir), + .server = server, + .name = &dentry->d_name, + .attrs = sattr, + .ftype = NF4DIR, + .bitmask = server->attr_bitmask, + }; + struct nfs4_create_res res = { + .server = server, + .fh = &fhandle, + .fattr = &fattr, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int status; + + fattr.valid = 0; + + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + if (!status) { + update_changeattr(dir, &res.dir_cinfo); + status = nfs_instantiate(dentry, &fhandle, &fattr); + } + return status; +} + +static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, + struct iattr *sattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), + _nfs4_proc_mkdir(dir, dentry, sattr), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, + u64 cookie, struct page *page, unsigned int count, int plus) +{ + struct inode *dir = dentry->d_inode; + struct nfs4_readdir_arg args = { + .fh = NFS_FH(dir), + .pages = &page, + .pgbase = 0, + .count = count, + .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, + }; + struct nfs4_readdir_res res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READDIR], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + int status; + + lock_kernel(); + nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); + res.pgbase = args.pgbase; + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + if (status == 0) + memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); + unlock_kernel(); + return status; +} + +static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, + u64 cookie, struct page *page, unsigned int count, int plus) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), + _nfs4_proc_readdir(dentry, cred, cookie, + page, count, plus), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, + struct iattr *sattr, dev_t rdev) +{ + struct nfs_server *server = NFS_SERVER(dir); + struct nfs_fh fh; + struct nfs_fattr fattr; + struct nfs4_create_arg arg = { + .dir_fh = NFS_FH(dir), + .server = server, + .name = &dentry->d_name, + .attrs = sattr, + .bitmask = server->attr_bitmask, + }; + struct nfs4_create_res res = { + .server = server, + .fh = &fh, + .fattr = &fattr, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int status; + int mode = sattr->ia_mode; + + fattr.valid = 0; + + BUG_ON(!(sattr->ia_valid & ATTR_MODE)); + BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); + if (S_ISFIFO(mode)) + arg.ftype = NF4FIFO; + else if (S_ISBLK(mode)) { + arg.ftype = NF4BLK; + arg.u.device.specdata1 = MAJOR(rdev); + arg.u.device.specdata2 = MINOR(rdev); + } + else if (S_ISCHR(mode)) { + arg.ftype = NF4CHR; + arg.u.device.specdata1 = MAJOR(rdev); + arg.u.device.specdata2 = MINOR(rdev); + } + else + arg.ftype = NF4SOCK; + + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + if (status == 0) { + update_changeattr(dir, &res.dir_cinfo); + status = nfs_instantiate(dentry, &fh, &fattr); + } + return status; +} + +static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, + struct iattr *sattr, dev_t rdev) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), + _nfs4_proc_mknod(dir, dentry, sattr, rdev), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsstat *fsstat) +{ + struct nfs4_statfs_arg args = { + .fh = fhandle, + .bitmask = server->attr_bitmask, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_STATFS], + .rpc_argp = &args, + .rpc_resp = fsstat, + }; + + fsstat->fattr->valid = 0; + return rpc_call_sync(server->client, &msg, 0); +} + +static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_proc_statfs(server, fhandle, fsstat), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *fsinfo) +{ + struct nfs4_fsinfo_arg args = { + .fh = fhandle, + .bitmask = server->attr_bitmask, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSINFO], + .rpc_argp = &args, + .rpc_resp = fsinfo, + }; + + return rpc_call_sync(server->client, &msg, 0); +} + +static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = nfs4_handle_exception(server, + _nfs4_do_fsinfo(server, fhandle, fsinfo), + &exception); + } while (exception.retry); + return err; +} + +static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) +{ + fsinfo->fattr->valid = 0; + return nfs4_do_fsinfo(server, fhandle, fsinfo); +} + +static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *pathconf) +{ + struct nfs4_pathconf_arg args = { + .fh = fhandle, + .bitmask = server->attr_bitmask, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PATHCONF], + .rpc_argp = &args, + .rpc_resp = pathconf, + }; + + /* None of the pathconf attributes are mandatory to implement */ + if ((args.bitmask[0] & nfs4_pathconf_bitmap[0]) == 0) { + memset(pathconf, 0, sizeof(*pathconf)); + return 0; + } + + pathconf->fattr->valid = 0; + return rpc_call_sync(server->client, &msg, 0); +} + +static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *pathconf) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = nfs4_handle_exception(server, + _nfs4_proc_pathconf(server, fhandle, pathconf), + &exception); + } while (exception.retry); + return err; +} + +static void +nfs4_read_done(struct rpc_task *task) +{ + struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; + struct inode *inode = data->inode; + + if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + rpc_restart_call(task); + return; + } + if (task->tk_status > 0) + renew_lease(NFS_SERVER(inode), data->timestamp); + /* Call back common NFS readpage processing */ + nfs_readpage_result(task); +} + +static void +nfs4_proc_read_setup(struct nfs_read_data *data) +{ + struct rpc_task *task = &data->task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = data->cred, + }; + struct inode *inode = data->inode; + int flags; + + data->timestamp = jiffies; + + /* N.B. Do we need to test? Never called for swapfile inode */ + flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); + + /* Finalize the task. */ + rpc_init_task(task, NFS_CLIENT(inode), nfs4_read_done, flags); + rpc_call_setup(task, &msg, 0); +} + +static void +nfs4_write_done(struct rpc_task *task) +{ + struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct inode *inode = data->inode; + + if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + rpc_restart_call(task); + return; + } + if (task->tk_status >= 0) + renew_lease(NFS_SERVER(inode), data->timestamp); + /* Call back common NFS writeback processing */ + nfs_writeback_done(task); +} + +static void +nfs4_proc_write_setup(struct nfs_write_data *data, int how) +{ + struct rpc_task *task = &data->task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = data->cred, + }; + struct inode *inode = data->inode; + int stable; + int flags; + + if (how & FLUSH_STABLE) { + if (!NFS_I(inode)->ncommit) + stable = NFS_FILE_SYNC; + else + stable = NFS_DATA_SYNC; + } else + stable = NFS_UNSTABLE; + data->args.stable = stable; + + data->timestamp = jiffies; + + /* Set the initial flags for the task. */ + flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + + /* Finalize the task. */ + rpc_init_task(task, NFS_CLIENT(inode), nfs4_write_done, flags); + rpc_call_setup(task, &msg, 0); +} + +static void +nfs4_commit_done(struct rpc_task *task) +{ + struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct inode *inode = data->inode; + + if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + rpc_restart_call(task); + return; + } + /* Call back common NFS writeback processing */ + nfs_commit_done(task); +} + +static void +nfs4_proc_commit_setup(struct nfs_write_data *data, int how) +{ + struct rpc_task *task = &data->task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = data->cred, + }; + struct inode *inode = data->inode; + int flags; + + /* Set the initial flags for the task. */ + flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + + /* Finalize the task. */ + rpc_init_task(task, NFS_CLIENT(inode), nfs4_commit_done, flags); + rpc_call_setup(task, &msg, 0); +} + +/* + * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special + * standalone procedure for queueing an asynchronous RENEW. + */ +static void +renew_done(struct rpc_task *task) +{ + struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; + unsigned long timestamp = (unsigned long)task->tk_calldata; + + if (task->tk_status < 0) { + switch (task->tk_status) { + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_CB_PATH_DOWN: + nfs4_schedule_state_recovery(clp); + } + return; + } + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,timestamp)) + clp->cl_last_renewal = timestamp; + spin_unlock(&clp->cl_lock); +} + +int +nfs4_proc_async_renew(struct nfs4_client *clp) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], + .rpc_argp = clp, + .rpc_cred = clp->cl_cred, + }; + + return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, + renew_done, (void *)jiffies); +} + +int +nfs4_proc_renew(struct nfs4_client *clp) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], + .rpc_argp = clp, + .rpc_cred = clp->cl_cred, + }; + unsigned long now = jiffies; + int status; + + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + if (status < 0) + return status; + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,now)) + clp->cl_last_renewal = now; + spin_unlock(&clp->cl_lock); + return 0; +} + +/* + * We will need to arrange for the VFS layer to provide an atomic open. + * Until then, this open method is prone to inefficiency and race conditions + * due to the lookup, potential create, and open VFS calls from sys_open() + * placed on the wire. + */ +static int +nfs4_proc_file_open(struct inode *inode, struct file *filp) +{ + struct dentry *dentry = filp->f_dentry; + struct nfs_open_context *ctx; + struct nfs4_state *state = NULL; + struct rpc_cred *cred; + int status = -ENOMEM; + + dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n", + (int)dentry->d_parent->d_name.len, + dentry->d_parent->d_name.name, + (int)dentry->d_name.len, dentry->d_name.name); + + + /* Find our open stateid */ + cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); + if (IS_ERR(cred)) + return PTR_ERR(cred); + ctx = alloc_nfs_open_context(dentry, cred); + put_rpccred(cred); + if (unlikely(ctx == NULL)) + return -ENOMEM; + status = -EIO; /* ERACE actually */ + state = nfs4_find_state(inode, cred, filp->f_mode); + if (unlikely(state == NULL)) + goto no_state; + ctx->state = state; + nfs4_close_state(state, filp->f_mode); + ctx->mode = filp->f_mode; + nfs_file_set_open_context(filp, ctx); + put_nfs_open_context(ctx); + if (filp->f_mode & FMODE_WRITE) + nfs_begin_data_update(inode); + return 0; +no_state: + printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); + put_nfs_open_context(ctx); + return status; +} + +/* + * Release our state + */ +static int +nfs4_proc_file_release(struct inode *inode, struct file *filp) +{ + if (filp->f_mode & FMODE_WRITE) + nfs_end_data_update(inode); + nfs_file_clear_open_context(filp); + return 0; +} + +static int +nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) +{ + struct nfs4_client *clp = server->nfs4_state; + + if (!clp || task->tk_status >= 0) + return 0; + switch(task->tk_status) { + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL); + nfs4_schedule_state_recovery(clp); + if (test_bit(NFS4CLNT_OK, &clp->cl_state)) + rpc_wake_up_task(task); + task->tk_status = 0; + return -EAGAIN; + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + rpc_delay(task, NFS4_POLL_RETRY_MAX); + task->tk_status = 0; + return -EAGAIN; + case -NFS4ERR_OLD_STATEID: + task->tk_status = 0; + return -EAGAIN; + } + task->tk_status = nfs4_map_errors(task->tk_status); + return 0; +} + +static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) +{ + DEFINE_WAIT(wait); + sigset_t oldset; + int interruptible, res = 0; + + might_sleep(); + + rpc_clnt_sigmask(clnt, &oldset); + interruptible = TASK_UNINTERRUPTIBLE; + if (clnt->cl_intr) + interruptible = TASK_INTERRUPTIBLE; + prepare_to_wait(&clp->cl_waitq, &wait, interruptible); + nfs4_schedule_state_recovery(clp); + if (clnt->cl_intr && signalled()) + res = -ERESTARTSYS; + else if (!test_bit(NFS4CLNT_OK, &clp->cl_state)) + schedule(); + finish_wait(&clp->cl_waitq, &wait); + rpc_clnt_sigunmask(clnt, &oldset); + return res; +} + +static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) +{ + sigset_t oldset; + int res = 0; + + might_sleep(); + + if (*timeout <= 0) + *timeout = NFS4_POLL_RETRY_MIN; + if (*timeout > NFS4_POLL_RETRY_MAX) + *timeout = NFS4_POLL_RETRY_MAX; + rpc_clnt_sigmask(clnt, &oldset); + if (clnt->cl_intr) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(*timeout); + if (signalled()) + res = -ERESTARTSYS; + } else { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(*timeout); + } + rpc_clnt_sigunmask(clnt, &oldset); + *timeout <<= 1; + return res; +} + +/* This is the error handling routine for processes that are allowed + * to sleep. + */ +int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +{ + struct nfs4_client *clp = server->nfs4_state; + int ret = errorcode; + + exception->retry = 0; + switch(errorcode) { + case 0: + return 0; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + ret = nfs4_wait_clnt_recover(server->client, clp); + if (ret == 0) + exception->retry = 1; + break; + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + ret = nfs4_delay(server->client, &exception->timeout); + if (ret == 0) + exception->retry = 1; + break; + case -NFS4ERR_OLD_STATEID: + if (ret == 0) + exception->retry = 1; + } + /* We failed to handle the error */ + return nfs4_map_errors(ret); +} + +int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port) +{ + nfs4_verifier sc_verifier; + struct nfs4_setclientid setclientid = { + .sc_verifier = &sc_verifier, + .sc_prog = program, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], + .rpc_argp = &setclientid, + .rpc_resp = clp, + .rpc_cred = clp->cl_cred, + }; + u32 *p; + int loop = 0; + int status; + + p = (u32*)sc_verifier.data; + *p++ = htonl((u32)clp->cl_boot_time.tv_sec); + *p = htonl((u32)clp->cl_boot_time.tv_nsec); + + for(;;) { + setclientid.sc_name_len = scnprintf(setclientid.sc_name, + sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u", + clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr), + clp->cl_cred->cr_ops->cr_name, + clp->cl_id_uniquifier); + setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, + sizeof(setclientid.sc_netid), "tcp"); + setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, + sizeof(setclientid.sc_uaddr), "%s.%d.%d", + clp->cl_ipaddr, port >> 8, port & 255); + + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + if (status != -NFS4ERR_CLID_INUSE) + break; + if (signalled()) + break; + if (loop++ & 1) + ssleep(clp->cl_lease_time + 1); + else + if (++clp->cl_id_uniquifier == 0) + break; + } + return status; +} + +int +nfs4_proc_setclientid_confirm(struct nfs4_client *clp) +{ + struct nfs_fsinfo fsinfo; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM], + .rpc_argp = clp, + .rpc_resp = &fsinfo, + .rpc_cred = clp->cl_cred, + }; + unsigned long now; + int status; + + now = jiffies; + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + if (status == 0) { + spin_lock(&clp->cl_lock); + clp->cl_lease_time = fsinfo.lease_time * HZ; + clp->cl_last_renewal = now; + spin_unlock(&clp->cl_lock); + } + return status; +} + +static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid) +{ + struct nfs4_delegreturnargs args = { + .fhandle = NFS_FH(inode), + .stateid = stateid, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN], + .rpc_argp = &args, + .rpc_cred = cred, + }; + + return rpc_call_sync(NFS_CLIENT(inode), &msg, 0); +} + +int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs4_proc_delegreturn(inode, cred, stateid); + switch (err) { + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + nfs4_schedule_state_recovery(server->nfs4_state); + case 0: + return 0; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + +#define NFS4_LOCK_MINTIMEOUT (1 * HZ) +#define NFS4_LOCK_MAXTIMEOUT (30 * HZ) + +/* + * sleep, with exponential backoff, and retry the LOCK operation. + */ +static unsigned long +nfs4_set_lock_task_retry(unsigned long timeout) +{ + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(timeout); + timeout <<= 1; + if (timeout > NFS4_LOCK_MAXTIMEOUT) + return NFS4_LOCK_MAXTIMEOUT; + return timeout; +} + +static inline int +nfs4_lck_type(int cmd, struct file_lock *request) +{ + /* set lock type */ + switch (request->fl_type) { + case F_RDLCK: + return IS_SETLKW(cmd) ? NFS4_READW_LT : NFS4_READ_LT; + case F_WRLCK: + return IS_SETLKW(cmd) ? NFS4_WRITEW_LT : NFS4_WRITE_LT; + case F_UNLCK: + return NFS4_WRITE_LT; + } + BUG(); + return 0; +} + +static inline uint64_t +nfs4_lck_length(struct file_lock *request) +{ + if (request->fl_end == OFFSET_MAX) + return ~(uint64_t)0; + return request->fl_end - request->fl_start + 1; +} + +static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_client *clp = server->nfs4_state; + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKT], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs_lowner nlo; + struct nfs4_lock_state *lsp; + int status; + + down_read(&clp->cl_sem); + nlo.clientid = clp->cl_clientid; + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (lsp) + nlo.id = lsp->ls_id; + else { + spin_lock(&clp->cl_lock); + nlo.id = nfs4_alloc_lockowner_id(clp); + spin_unlock(&clp->cl_lock); + } + arg.u.lockt = &nlo; + status = rpc_call_sync(server->client, &msg, 0); + if (!status) { + request->fl_type = F_UNLCK; + } else if (status == -NFS4ERR_DENIED) { + int64_t len, start, end; + start = res.u.denied.offset; + len = res.u.denied.length; + end = start + len - 1; + if (end < 0 || len == 0) + request->fl_end = OFFSET_MAX; + else + request->fl_end = (loff_t)end; + request->fl_start = (loff_t)start; + request->fl_type = F_WRLCK; + if (res.u.denied.type & 1) + request->fl_type = F_RDLCK; + request->fl_pid = 0; + status = 0; + } + if (lsp) + nfs4_put_lock_state(lsp); + up(&state->lock_sema); + up_read(&clp->cl_sem); + return status; +} + +static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = nfs4_handle_exception(NFS_SERVER(state->inode), + _nfs4_proc_getlk(state, cmd, request), + &exception); + } while (exception.retry); + return err; +} + +static int do_vfs_lock(struct file *file, struct file_lock *fl) +{ + int res = 0; + switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { + case FL_POSIX: + res = posix_lock_file_wait(file, fl); + break; + case FL_FLOCK: + res = flock_lock_file_wait(file, fl); + break; + default: + BUG(); + } + if (res < 0) + printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", + __FUNCTION__); + return res; +} + +static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_client *clp = server->nfs4_state; + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs4_lock_state *lsp; + struct nfs_locku_opargs luargs; + int status = 0; + + down_read(&clp->cl_sem); + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (!lsp) + goto out; + /* We might have lost the locks! */ + if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) { + luargs.seqid = lsp->ls_seqid; + memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); + arg.u.locku = &luargs; + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + nfs4_increment_lock_seqid(status, lsp); + } + + if (status == 0) { + memcpy(&lsp->ls_stateid, &res.u.stateid, + sizeof(lsp->ls_stateid)); + nfs4_notify_unlck(state, request, lsp); + } + nfs4_put_lock_state(lsp); +out: + up(&state->lock_sema); + if (status == 0) + do_vfs_lock(request->fl_file, request); + up_read(&clp->cl_sem); + return status; +} + +static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = nfs4_handle_exception(NFS_SERVER(state->inode), + _nfs4_proc_unlck(state, cmd, request), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_lock_state *lsp; + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs_lock_opargs largs = { + .reclaim = reclaim, + .new_lock_owner = 0, + }; + int status; + + lsp = nfs4_get_lock_state(state, request->fl_owner); + if (lsp == NULL) + return -ENOMEM; + if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) { + struct nfs4_state_owner *owner = state->owner; + struct nfs_open_to_lock otl = { + .lock_owner = { + .clientid = server->nfs4_state->cl_clientid, + }, + }; + + otl.lock_seqid = lsp->ls_seqid; + otl.lock_owner.id = lsp->ls_id; + memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid)); + largs.u.open_lock = &otl; + largs.new_lock_owner = 1; + arg.u.lock = &largs; + down(&owner->so_sema); + otl.open_seqid = owner->so_seqid; + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + /* increment open_owner seqid on success, and + * seqid mutating errors */ + nfs4_increment_seqid(status, owner); + up(&owner->so_sema); + } else { + struct nfs_exist_lock el = { + .seqid = lsp->ls_seqid, + }; + memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); + largs.u.exist_lock = ⪙ + largs.new_lock_owner = 0; + arg.u.lock = &largs; + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + } + /* increment seqid on success, and * seqid mutating errors*/ + nfs4_increment_lock_seqid(status, lsp); + /* save the returned stateid. */ + if (status == 0) { + memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); + lsp->ls_flags |= NFS_LOCK_INITIALIZED; + if (!reclaim) + nfs4_notify_setlk(state, request, lsp); + } else if (status == -NFS4ERR_DENIED) + status = -EAGAIN; + nfs4_put_lock_state(lsp); + return status; +} + +static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request) +{ + return _nfs4_do_setlk(state, F_SETLK, request, 1); +} + +static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) +{ + return _nfs4_do_setlk(state, F_SETLK, request, 0); +} + +static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs4_client *clp = state->owner->so_client; + int status; + + down_read(&clp->cl_sem); + down(&state->lock_sema); + status = _nfs4_do_setlk(state, cmd, request, 0); + up(&state->lock_sema); + if (status == 0) { + /* Note: we always want to sleep here! */ + request->fl_flags |= FL_SLEEP; + if (do_vfs_lock(request->fl_file, request) < 0) + printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); + } + up_read(&clp->cl_sem); + return status; +} + +static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = nfs4_handle_exception(NFS_SERVER(state->inode), + _nfs4_proc_setlk(state, cmd, request), + &exception); + } while (exception.retry); + return err; +} + +static int +nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) +{ + struct nfs_open_context *ctx; + struct nfs4_state *state; + unsigned long timeout = NFS4_LOCK_MINTIMEOUT; + int status; + + /* verify open state */ + ctx = (struct nfs_open_context *)filp->private_data; + state = ctx->state; + + if (request->fl_start < 0 || request->fl_end < 0) + return -EINVAL; + + if (IS_GETLK(cmd)) + return nfs4_proc_getlk(state, F_GETLK, request); + + if (!(IS_SETLK(cmd) || IS_SETLKW(cmd))) + return -EINVAL; + + if (request->fl_type == F_UNLCK) + return nfs4_proc_unlck(state, cmd, request); + + do { + status = nfs4_proc_setlk(state, cmd, request); + if ((status != -EAGAIN) || IS_SETLK(cmd)) + break; + timeout = nfs4_set_lock_task_retry(timeout); + status = -ERESTARTSYS; + if (signalled()) + break; + } while(status < 0); + + return status; +} + +struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { + .recover_open = nfs4_open_reclaim, + .recover_lock = nfs4_lock_reclaim, +}; + +struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = { + .recover_open = nfs4_open_expired, + .recover_lock = nfs4_lock_expired, +}; + +struct nfs_rpc_ops nfs_v4_clientops = { + .version = 4, /* protocol version */ + .dentry_ops = &nfs4_dentry_operations, + .dir_inode_ops = &nfs4_dir_inode_operations, + .getroot = nfs4_proc_get_root, + .getattr = nfs4_proc_getattr, + .setattr = nfs4_proc_setattr, + .lookup = nfs4_proc_lookup, + .access = nfs4_proc_access, + .readlink = nfs4_proc_readlink, + .read = nfs4_proc_read, + .write = nfs4_proc_write, + .commit = nfs4_proc_commit, + .create = nfs4_proc_create, + .remove = nfs4_proc_remove, + .unlink_setup = nfs4_proc_unlink_setup, + .unlink_done = nfs4_proc_unlink_done, + .rename = nfs4_proc_rename, + .link = nfs4_proc_link, + .symlink = nfs4_proc_symlink, + .mkdir = nfs4_proc_mkdir, + .rmdir = nfs4_proc_remove, + .readdir = nfs4_proc_readdir, + .mknod = nfs4_proc_mknod, + .statfs = nfs4_proc_statfs, + .fsinfo = nfs4_proc_fsinfo, + .pathconf = nfs4_proc_pathconf, + .decode_dirent = nfs4_decode_dirent, + .read_setup = nfs4_proc_read_setup, + .write_setup = nfs4_proc_write_setup, + .commit_setup = nfs4_proc_commit_setup, + .file_open = nfs4_proc_file_open, + .file_release = nfs4_proc_file_release, + .lock = nfs4_proc_lock, +}; + +/* + * Local variables: + * c-basic-offset: 8 + * End: + */ diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c new file mode 100644 index 00000000000..667e06f1c64 --- /dev/null +++ b/fs/nfs/nfs4renewd.c @@ -0,0 +1,148 @@ +/* + * fs/nfs/nfs4renewd.c + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Implementation of the NFSv4 "renew daemon", which wakes up periodically to + * send a RENEW, to keep state alive on the server. The daemon is implemented + * as an rpc_task, not a real kernel thread, so it always runs in rpciod's + * context. There is one renewd per nfs_server. + * + * TODO: If the send queue gets backlogged (e.g., if the server goes down), + * we will keep filling the queue with periodic RENEW requests. We need a + * mechanism for ensuring that if renewd successfully sends off a request, + * then it only wakes up when the request is finished. Maybe use the + * child task framework of the RPC layer? + */ + +#include <linux/sched.h> +#include <linux/smp_lock.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/sunrpc/sched.h> +#include <linux/sunrpc/clnt.h> + +#include <linux/nfs.h> +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> + +#define NFSDBG_FACILITY NFSDBG_PROC + +void +nfs4_renew_state(void *data) +{ + struct nfs4_client *clp = (struct nfs4_client *)data; + long lease, timeout; + unsigned long last, now; + + down_read(&clp->cl_sem); + dprintk("%s: start\n", __FUNCTION__); + /* Are there any active superblocks? */ + if (list_empty(&clp->cl_superblocks)) + goto out; + spin_lock(&clp->cl_lock); + lease = clp->cl_lease_time; + last = clp->cl_last_renewal; + now = jiffies; + timeout = (2 * lease) / 3 + (long)last - (long)now; + /* Are we close to a lease timeout? */ + if (time_after(now, last + lease/3)) { + spin_unlock(&clp->cl_lock); + /* Queue an asynchronous RENEW. */ + nfs4_proc_async_renew(clp); + timeout = (2 * lease) / 3; + spin_lock(&clp->cl_lock); + } else + dprintk("%s: failed to call renewd. Reason: lease not expired \n", + __FUNCTION__); + if (timeout < 5 * HZ) /* safeguard */ + timeout = 5 * HZ; + dprintk("%s: requeueing work. Lease period = %ld\n", + __FUNCTION__, (timeout + HZ - 1) / HZ); + cancel_delayed_work(&clp->cl_renewd); + schedule_delayed_work(&clp->cl_renewd, timeout); + spin_unlock(&clp->cl_lock); +out: + up_read(&clp->cl_sem); + dprintk("%s: done\n", __FUNCTION__); +} + +/* Must be called with clp->cl_sem locked for writes */ +void +nfs4_schedule_state_renewal(struct nfs4_client *clp) +{ + long timeout; + + spin_lock(&clp->cl_lock); + timeout = (2 * clp->cl_lease_time) / 3 + (long)clp->cl_last_renewal + - (long)jiffies; + if (timeout < 5 * HZ) + timeout = 5 * HZ; + dprintk("%s: requeueing work. Lease period = %ld\n", + __FUNCTION__, (timeout + HZ - 1) / HZ); + cancel_delayed_work(&clp->cl_renewd); + schedule_delayed_work(&clp->cl_renewd, timeout); + spin_unlock(&clp->cl_lock); +} + +void +nfs4_renewd_prepare_shutdown(struct nfs_server *server) +{ + struct nfs4_client *clp = server->nfs4_state; + + if (!clp) + return; + flush_scheduled_work(); + down_write(&clp->cl_sem); + if (!list_empty(&server->nfs4_siblings)) + list_del_init(&server->nfs4_siblings); + up_write(&clp->cl_sem); +} + +/* Must be called with clp->cl_sem locked for writes */ +void +nfs4_kill_renewd(struct nfs4_client *clp) +{ + down_read(&clp->cl_sem); + if (!list_empty(&clp->cl_superblocks)) { + up_read(&clp->cl_sem); + return; + } + cancel_delayed_work(&clp->cl_renewd); + up_read(&clp->cl_sem); + flush_scheduled_work(); +} + +/* + * Local variables: + * c-basic-offset: 8 + * End: + */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c new file mode 100644 index 00000000000..231cebce3c8 --- /dev/null +++ b/fs/nfs/nfs4state.c @@ -0,0 +1,932 @@ +/* + * fs/nfs/nfs4state.c + * + * Client-side XDR for NFSv4. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Implementation of the NFSv4 state model. For the time being, + * this is minimal, but will be made much more complex in a + * subsequent patch. + */ + +#include <linux/config.h> +#include <linux/slab.h> +#include <linux/smp_lock.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_idmap.h> +#include <linux/workqueue.h> +#include <linux/bitops.h> + +#include "callback.h" +#include "delegation.h" + +#define OPENOWNER_POOL_SIZE 8 + +static DEFINE_SPINLOCK(state_spinlock); + +nfs4_stateid zero_stateid; + +#if 0 +nfs4_stateid one_stateid = + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +#endif + +static LIST_HEAD(nfs4_clientid_list); + +static void nfs4_recover_state(void *); +extern void nfs4_renew_state(void *); + +void +init_nfsv4_state(struct nfs_server *server) +{ + server->nfs4_state = NULL; + INIT_LIST_HEAD(&server->nfs4_siblings); +} + +void +destroy_nfsv4_state(struct nfs_server *server) +{ + if (server->mnt_path) { + kfree(server->mnt_path); + server->mnt_path = NULL; + } + if (server->nfs4_state) { + nfs4_put_client(server->nfs4_state); + server->nfs4_state = NULL; + } +} + +/* + * nfs4_get_client(): returns an empty client structure + * nfs4_put_client(): drops reference to client structure + * + * Since these are allocated/deallocated very rarely, we don't + * bother putting them in a slab cache... + */ +static struct nfs4_client * +nfs4_alloc_client(struct in_addr *addr) +{ + struct nfs4_client *clp; + + if (nfs_callback_up() < 0) + return NULL; + if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL)) == NULL) { + nfs_callback_down(); + return NULL; + } + memset(clp, 0, sizeof(*clp)); + memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr)); + init_rwsem(&clp->cl_sem); + INIT_LIST_HEAD(&clp->cl_delegations); + INIT_LIST_HEAD(&clp->cl_state_owners); + INIT_LIST_HEAD(&clp->cl_unused); + spin_lock_init(&clp->cl_lock); + atomic_set(&clp->cl_count, 1); + INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp); + INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); + INIT_LIST_HEAD(&clp->cl_superblocks); + init_waitqueue_head(&clp->cl_waitq); + rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client"); + clp->cl_boot_time = CURRENT_TIME; + clp->cl_state = 1 << NFS4CLNT_OK; + return clp; +} + +static void +nfs4_free_client(struct nfs4_client *clp) +{ + struct nfs4_state_owner *sp; + + while (!list_empty(&clp->cl_unused)) { + sp = list_entry(clp->cl_unused.next, + struct nfs4_state_owner, + so_list); + list_del(&sp->so_list); + kfree(sp); + } + BUG_ON(!list_empty(&clp->cl_state_owners)); + if (clp->cl_cred) + put_rpccred(clp->cl_cred); + nfs_idmap_delete(clp); + if (clp->cl_rpcclient) + rpc_shutdown_client(clp->cl_rpcclient); + kfree(clp); + nfs_callback_down(); +} + +static struct nfs4_client *__nfs4_find_client(struct in_addr *addr) +{ + struct nfs4_client *clp; + list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) { + if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) { + atomic_inc(&clp->cl_count); + return clp; + } + } + return NULL; +} + +struct nfs4_client *nfs4_find_client(struct in_addr *addr) +{ + struct nfs4_client *clp; + spin_lock(&state_spinlock); + clp = __nfs4_find_client(addr); + spin_unlock(&state_spinlock); + return clp; +} + +struct nfs4_client * +nfs4_get_client(struct in_addr *addr) +{ + struct nfs4_client *clp, *new = NULL; + + spin_lock(&state_spinlock); + for (;;) { + clp = __nfs4_find_client(addr); + if (clp != NULL) + break; + clp = new; + if (clp != NULL) { + list_add(&clp->cl_servers, &nfs4_clientid_list); + new = NULL; + break; + } + spin_unlock(&state_spinlock); + new = nfs4_alloc_client(addr); + spin_lock(&state_spinlock); + if (new == NULL) + break; + } + spin_unlock(&state_spinlock); + if (new) + nfs4_free_client(new); + return clp; +} + +void +nfs4_put_client(struct nfs4_client *clp) +{ + if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock)) + return; + list_del(&clp->cl_servers); + spin_unlock(&state_spinlock); + BUG_ON(!list_empty(&clp->cl_superblocks)); + wake_up_all(&clp->cl_waitq); + rpc_wake_up(&clp->cl_rpcwaitq); + nfs4_kill_renewd(clp); + nfs4_free_client(clp); +} + +static int __nfs4_init_client(struct nfs4_client *clp) +{ + int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, nfs_callback_tcpport); + if (status == 0) + status = nfs4_proc_setclientid_confirm(clp); + if (status == 0) + nfs4_schedule_state_renewal(clp); + return status; +} + +int nfs4_init_client(struct nfs4_client *clp) +{ + return nfs4_map_errors(__nfs4_init_client(clp)); +} + +u32 +nfs4_alloc_lockowner_id(struct nfs4_client *clp) +{ + return clp->cl_lockowner_id ++; +} + +static struct nfs4_state_owner * +nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred) +{ + struct nfs4_state_owner *sp = NULL; + + if (!list_empty(&clp->cl_unused)) { + sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list); + atomic_inc(&sp->so_count); + sp->so_cred = cred; + list_move(&sp->so_list, &clp->cl_state_owners); + clp->cl_nunused--; + } + return sp; +} + +static struct nfs4_state_owner * +nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred) +{ + struct nfs4_state_owner *sp, *res = NULL; + + list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + if (sp->so_cred != cred) + continue; + atomic_inc(&sp->so_count); + /* Move to the head of the list */ + list_move(&sp->so_list, &clp->cl_state_owners); + res = sp; + break; + } + return res; +} + +/* + * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to + * create a new state_owner. + * + */ +static struct nfs4_state_owner * +nfs4_alloc_state_owner(void) +{ + struct nfs4_state_owner *sp; + + sp = kmalloc(sizeof(*sp),GFP_KERNEL); + if (!sp) + return NULL; + init_MUTEX(&sp->so_sema); + sp->so_seqid = 0; /* arbitrary */ + INIT_LIST_HEAD(&sp->so_states); + INIT_LIST_HEAD(&sp->so_delegations); + atomic_set(&sp->so_count, 1); + return sp; +} + +void +nfs4_drop_state_owner(struct nfs4_state_owner *sp) +{ + struct nfs4_client *clp = sp->so_client; + spin_lock(&clp->cl_lock); + list_del_init(&sp->so_list); + spin_unlock(&clp->cl_lock); +} + +/* + * Note: must be called with clp->cl_sem held in order to prevent races + * with reboot recovery! + */ +struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) +{ + struct nfs4_client *clp = server->nfs4_state; + struct nfs4_state_owner *sp, *new; + + get_rpccred(cred); + new = nfs4_alloc_state_owner(); + spin_lock(&clp->cl_lock); + sp = nfs4_find_state_owner(clp, cred); + if (sp == NULL) + sp = nfs4_client_grab_unused(clp, cred); + if (sp == NULL && new != NULL) { + list_add(&new->so_list, &clp->cl_state_owners); + new->so_client = clp; + new->so_id = nfs4_alloc_lockowner_id(clp); + new->so_cred = cred; + sp = new; + new = NULL; + } + spin_unlock(&clp->cl_lock); + if (new) + kfree(new); + if (sp != NULL) + return sp; + put_rpccred(cred); + return NULL; +} + +/* + * Must be called with clp->cl_sem held in order to avoid races + * with state recovery... + */ +void nfs4_put_state_owner(struct nfs4_state_owner *sp) +{ + struct nfs4_client *clp = sp->so_client; + struct rpc_cred *cred = sp->so_cred; + + if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) + return; + if (clp->cl_nunused >= OPENOWNER_POOL_SIZE) + goto out_free; + if (list_empty(&sp->so_list)) + goto out_free; + list_move(&sp->so_list, &clp->cl_unused); + clp->cl_nunused++; + spin_unlock(&clp->cl_lock); + put_rpccred(cred); + cred = NULL; + return; +out_free: + list_del(&sp->so_list); + spin_unlock(&clp->cl_lock); + put_rpccred(cred); + kfree(sp); +} + +static struct nfs4_state * +nfs4_alloc_open_state(void) +{ + struct nfs4_state *state; + + state = kmalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + state->state = 0; + state->nreaders = 0; + state->nwriters = 0; + state->flags = 0; + memset(state->stateid.data, 0, sizeof(state->stateid.data)); + atomic_set(&state->count, 1); + INIT_LIST_HEAD(&state->lock_states); + init_MUTEX(&state->lock_sema); + rwlock_init(&state->state_lock); + return state; +} + +static struct nfs4_state * +__nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs4_state *state; + + mode &= (FMODE_READ|FMODE_WRITE); + list_for_each_entry(state, &nfsi->open_states, inode_states) { + if (state->owner->so_cred != cred) + continue; + if ((mode & FMODE_READ) != 0 && state->nreaders == 0) + continue; + if ((mode & FMODE_WRITE) != 0 && state->nwriters == 0) + continue; + if ((state->state & mode) != mode) + continue; + atomic_inc(&state->count); + if (mode & FMODE_READ) + state->nreaders++; + if (mode & FMODE_WRITE) + state->nwriters++; + return state; + } + return NULL; +} + +static struct nfs4_state * +__nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs4_state *state; + + list_for_each_entry(state, &nfsi->open_states, inode_states) { + /* Is this in the process of being freed? */ + if (state->nreaders == 0 && state->nwriters == 0) + continue; + if (state->owner == owner) { + atomic_inc(&state->count); + return state; + } + } + return NULL; +} + +struct nfs4_state * +nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode) +{ + struct nfs4_state *state; + + spin_lock(&inode->i_lock); + state = __nfs4_find_state(inode, cred, mode); + spin_unlock(&inode->i_lock); + return state; +} + +static void +nfs4_free_open_state(struct nfs4_state *state) +{ + kfree(state); +} + +struct nfs4_state * +nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner) +{ + struct nfs4_state *state, *new; + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&inode->i_lock); + state = __nfs4_find_state_byowner(inode, owner); + spin_unlock(&inode->i_lock); + if (state) + goto out; + new = nfs4_alloc_open_state(); + spin_lock(&inode->i_lock); + state = __nfs4_find_state_byowner(inode, owner); + if (state == NULL && new != NULL) { + state = new; + /* Caller *must* be holding owner->so_sem */ + /* Note: The reclaim code dictates that we add stateless + * and read-only stateids to the end of the list */ + list_add_tail(&state->open_states, &owner->so_states); + state->owner = owner; + atomic_inc(&owner->so_count); + list_add(&state->inode_states, &nfsi->open_states); + state->inode = igrab(inode); + spin_unlock(&inode->i_lock); + } else { + spin_unlock(&inode->i_lock); + if (new) + nfs4_free_open_state(new); + } +out: + return state; +} + +/* + * Beware! Caller must be holding exactly one + * reference to clp->cl_sem and owner->so_sema! + */ +void nfs4_put_open_state(struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct nfs4_state_owner *owner = state->owner; + + if (!atomic_dec_and_lock(&state->count, &inode->i_lock)) + return; + if (!list_empty(&state->inode_states)) + list_del(&state->inode_states); + spin_unlock(&inode->i_lock); + list_del(&state->open_states); + iput(inode); + BUG_ON (state->state != 0); + nfs4_free_open_state(state); + nfs4_put_state_owner(owner); +} + +/* + * Beware! Caller must be holding no references to clp->cl_sem! + * of owner->so_sema! + */ +void nfs4_close_state(struct nfs4_state *state, mode_t mode) +{ + struct inode *inode = state->inode; + struct nfs4_state_owner *owner = state->owner; + struct nfs4_client *clp = owner->so_client; + int newstate; + + atomic_inc(&owner->so_count); + down_read(&clp->cl_sem); + down(&owner->so_sema); + /* Protect against nfs4_find_state() */ + spin_lock(&inode->i_lock); + if (mode & FMODE_READ) + state->nreaders--; + if (mode & FMODE_WRITE) + state->nwriters--; + if (state->nwriters == 0) { + if (state->nreaders == 0) + list_del_init(&state->inode_states); + /* See reclaim code */ + list_move_tail(&state->open_states, &owner->so_states); + } + spin_unlock(&inode->i_lock); + newstate = 0; + if (state->state != 0) { + if (state->nreaders) + newstate |= FMODE_READ; + if (state->nwriters) + newstate |= FMODE_WRITE; + if (state->state == newstate) + goto out; + if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS) + return; + } +out: + nfs4_put_open_state(state); + up(&owner->so_sema); + nfs4_put_state_owner(owner); + up_read(&clp->cl_sem); +} + +/* + * Search the state->lock_states for an existing lock_owner + * that is compatible with current->files + */ +static struct nfs4_lock_state * +__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *pos; + list_for_each_entry(pos, &state->lock_states, ls_locks) { + if (pos->ls_owner != fl_owner) + continue; + atomic_inc(&pos->ls_count); + return pos; + } + return NULL; +} + +struct nfs4_lock_state * +nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *lsp; + read_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, fl_owner); + read_unlock(&state->state_lock); + return lsp; +} + +/* + * Return a compatible lock_state. If no initialized lock_state structure + * exists, return an uninitialized one. + * + * The caller must be holding state->lock_sema + */ +static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *lsp; + struct nfs4_client *clp = state->owner->so_client; + + lsp = kmalloc(sizeof(*lsp), GFP_KERNEL); + if (lsp == NULL) + return NULL; + lsp->ls_flags = 0; + lsp->ls_seqid = 0; /* arbitrary */ + lsp->ls_id = -1; + memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); + atomic_set(&lsp->ls_count, 1); + lsp->ls_owner = fl_owner; + INIT_LIST_HEAD(&lsp->ls_locks); + spin_lock(&clp->cl_lock); + lsp->ls_id = nfs4_alloc_lockowner_id(clp); + spin_unlock(&clp->cl_lock); + return lsp; +} + +/* + * Return a compatible lock_state. If no initialized lock_state structure + * exists, return an uninitialized one. + * + * The caller must be holding state->lock_sema and clp->cl_sem + */ +struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) +{ + struct nfs4_lock_state * lsp; + + lsp = nfs4_find_lock_state(state, owner); + if (lsp == NULL) + lsp = nfs4_alloc_lock_state(state, owner); + return lsp; +} + +/* + * Byte-range lock aware utility to initialize the stateid of read/write + * requests. + */ +void +nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) +{ + if (test_bit(LK_STATE_IN_USE, &state->flags)) { + struct nfs4_lock_state *lsp; + + lsp = nfs4_find_lock_state(state, fl_owner); + if (lsp) { + memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); + nfs4_put_lock_state(lsp); + return; + } + } + memcpy(dst, &state->stateid, sizeof(*dst)); +} + +/* +* Called with state->lock_sema and clp->cl_sem held. +*/ +void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) +{ + if (status == NFS_OK || seqid_mutating_err(-status)) + lsp->ls_seqid++; +} + +/* +* Check to see if the request lock (type FL_UNLK) effects the fl lock. +* +* fl and request must have the same posix owner +* +* return: +* 0 -> fl not effected by request +* 1 -> fl consumed by request +*/ + +static int +nfs4_check_unlock(struct file_lock *fl, struct file_lock *request) +{ + if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end) + return 1; + return 0; +} + +/* + * Post an initialized lock_state on the state->lock_states list. + */ +void nfs4_notify_setlk(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp) +{ + if (!list_empty(&lsp->ls_locks)) + return; + atomic_inc(&lsp->ls_count); + write_lock(&state->state_lock); + list_add(&lsp->ls_locks, &state->lock_states); + set_bit(LK_STATE_IN_USE, &state->flags); + write_unlock(&state->state_lock); +} + +/* + * to decide to 'reap' lock state: + * 1) search i_flock for file_locks with fl.lock_state = to ls. + * 2) determine if unlock will consume found lock. + * if so, reap + * + * else, don't reap. + * + */ +void +nfs4_notify_unlck(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp) +{ + struct inode *inode = state->inode; + struct file_lock *fl; + + for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { + if (!(fl->fl_flags & FL_POSIX)) + continue; + if (fl->fl_owner != lsp->ls_owner) + continue; + /* Exit if we find at least one lock which is not consumed */ + if (nfs4_check_unlock(fl,request) == 0) + return; + } + + write_lock(&state->state_lock); + list_del_init(&lsp->ls_locks); + if (list_empty(&state->lock_states)) + clear_bit(LK_STATE_IN_USE, &state->flags); + write_unlock(&state->state_lock); + nfs4_put_lock_state(lsp); +} + +/* + * Release reference to lock_state, and free it if we see that + * it is no longer in use + */ +void +nfs4_put_lock_state(struct nfs4_lock_state *lsp) +{ + if (!atomic_dec_and_test(&lsp->ls_count)) + return; + BUG_ON (!list_empty(&lsp->ls_locks)); + kfree(lsp); +} + +/* +* Called with sp->so_sema and clp->cl_sem held. +* +* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or +* failed with a seqid incrementing error - +* see comments nfs_fs.h:seqid_mutating_error() +*/ +void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) +{ + if (status == NFS_OK || seqid_mutating_err(-status)) + sp->so_seqid++; + /* If the server returns BAD_SEQID, unhash state_owner here */ + if (status == -NFS4ERR_BAD_SEQID) + nfs4_drop_state_owner(sp); +} + +static int reclaimer(void *); +struct reclaimer_args { + struct nfs4_client *clp; + struct completion complete; +}; + +/* + * State recovery routine + */ +void +nfs4_recover_state(void *data) +{ + struct nfs4_client *clp = (struct nfs4_client *)data; + struct reclaimer_args args = { + .clp = clp, + }; + might_sleep(); + + init_completion(&args.complete); + + if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0) + goto out_failed_clear; + wait_for_completion(&args.complete); + return; +out_failed_clear: + set_bit(NFS4CLNT_OK, &clp->cl_state); + wake_up_all(&clp->cl_waitq); + rpc_wake_up(&clp->cl_rpcwaitq); +} + +/* + * Schedule a state recovery attempt + */ +void +nfs4_schedule_state_recovery(struct nfs4_client *clp) +{ + if (!clp) + return; + if (test_and_clear_bit(NFS4CLNT_OK, &clp->cl_state)) + schedule_work(&clp->cl_recoverd); +} + +static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct file_lock *fl; + int status = 0; + + for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { + if (!(fl->fl_flags & FL_POSIX)) + continue; + if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state) + continue; + status = ops->recover_lock(state, fl); + if (status >= 0) + continue; + switch (status) { + default: + printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", + __FUNCTION__, status); + case -NFS4ERR_EXPIRED: + case -NFS4ERR_NO_GRACE: + case -NFS4ERR_RECLAIM_BAD: + case -NFS4ERR_RECLAIM_CONFLICT: + /* kill_proc(fl->fl_owner, SIGLOST, 1); */ + break; + case -NFS4ERR_STALE_CLIENTID: + goto out_err; + } + } + return 0; +out_err: + return status; +} + +static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct nfs4_state_owner *sp) +{ + struct nfs4_state *state; + struct nfs4_lock_state *lock; + int status = 0; + + /* Note: we rely on the sp->so_states list being ordered + * so that we always reclaim open(O_RDWR) and/or open(O_WRITE) + * states first. + * This is needed to ensure that the server won't give us any + * read delegations that we have to return if, say, we are + * recovering after a network partition or a reboot from a + * server that doesn't support a grace period. + */ + list_for_each_entry(state, &sp->so_states, open_states) { + if (state->state == 0) + continue; + status = ops->recover_open(sp, state); + list_for_each_entry(lock, &state->lock_states, ls_locks) + lock->ls_flags &= ~NFS_LOCK_INITIALIZED; + if (status >= 0) { + status = nfs4_reclaim_locks(ops, state); + if (status < 0) + goto out_err; + list_for_each_entry(lock, &state->lock_states, ls_locks) { + if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) + printk("%s: Lock reclaim failed!\n", + __FUNCTION__); + } + continue; + } + switch (status) { + default: + printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", + __FUNCTION__, status); + case -ENOENT: + case -NFS4ERR_RECLAIM_BAD: + case -NFS4ERR_RECLAIM_CONFLICT: + /* + * Open state on this file cannot be recovered + * All we can do is revert to using the zero stateid. + */ + memset(state->stateid.data, 0, + sizeof(state->stateid.data)); + /* Mark the file as being 'closed' */ + state->state = 0; + break; + case -NFS4ERR_EXPIRED: + case -NFS4ERR_NO_GRACE: + case -NFS4ERR_STALE_CLIENTID: + goto out_err; + } + } + return 0; +out_err: + return status; +} + +static int reclaimer(void *ptr) +{ + struct reclaimer_args *args = (struct reclaimer_args *)ptr; + struct nfs4_client *clp = args->clp; + struct nfs4_state_owner *sp; + struct nfs4_state_recovery_ops *ops; + int status = 0; + + daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr)); + allow_signal(SIGKILL); + + atomic_inc(&clp->cl_count); + complete(&args->complete); + + /* Ensure exclusive access to NFSv4 state */ + lock_kernel(); + down_write(&clp->cl_sem); + /* Are there any NFS mounts out there? */ + if (list_empty(&clp->cl_superblocks)) + goto out; +restart_loop: + status = nfs4_proc_renew(clp); + switch (status) { + case 0: + case -NFS4ERR_CB_PATH_DOWN: + goto out; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_LEASE_MOVED: + ops = &nfs4_reboot_recovery_ops; + break; + default: + ops = &nfs4_network_partition_recovery_ops; + }; + status = __nfs4_init_client(clp); + if (status) + goto out_error; + /* Mark all delegations for reclaim */ + nfs_delegation_mark_reclaim(clp); + /* Note: list is protected by exclusive lock on cl->cl_sem */ + list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + status = nfs4_reclaim_open_state(ops, sp); + if (status < 0) { + if (status == -NFS4ERR_NO_GRACE) { + ops = &nfs4_network_partition_recovery_ops; + status = nfs4_reclaim_open_state(ops, sp); + } + if (status == -NFS4ERR_STALE_CLIENTID) + goto restart_loop; + if (status == -NFS4ERR_EXPIRED) + goto restart_loop; + } + } + nfs_delegation_reap_unclaimed(clp); +out: + set_bit(NFS4CLNT_OK, &clp->cl_state); + up_write(&clp->cl_sem); + unlock_kernel(); + wake_up_all(&clp->cl_waitq); + rpc_wake_up(&clp->cl_rpcwaitq); + if (status == -NFS4ERR_CB_PATH_DOWN) + nfs_handle_cb_pathdown(clp); + nfs4_put_client(clp); + return 0; +out_error: + printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n", + NIPQUAD(clp->cl_addr.s_addr), -status); + goto out; +} + +/* + * Local variables: + * c-basic-offset: 8 + * End: + */ diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c new file mode 100644 index 00000000000..5f4de05763c --- /dev/null +++ b/fs/nfs/nfs4xdr.c @@ -0,0 +1,4034 @@ +/* + * fs/nfs/nfs4xdr.c + * + * Client-side XDR for NFSv4. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * Andy Adamson <andros@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/param.h> +#include <linux/time.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/utsname.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/in.h> +#include <linux/pagemap.h> +#include <linux/proc_fs.h> +#include <linux/kdev_t.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_idmap.h> + +#define NFSDBG_FACILITY NFSDBG_XDR + +/* Mapping from NFS error code to "errno" error code. */ +#define errno_NFSERR_IO EIO + +static int nfs_stat_to_errno(int); + +/* NFSv4 COMPOUND tags are only wanted for debugging purposes */ +#ifdef DEBUG +#define NFS4_MAXTAGLEN 20 +#else +#define NFS4_MAXTAGLEN 0 +#endif + +/* lock,open owner id: + * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2) + */ +#define owner_id_maxsz (1 + 1) +#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) +#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) +#define op_encode_hdr_maxsz (1) +#define op_decode_hdr_maxsz (2) +#define encode_putfh_maxsz (op_encode_hdr_maxsz + 1 + \ + (NFS4_FHSIZE >> 2)) +#define decode_putfh_maxsz (op_decode_hdr_maxsz) +#define encode_putrootfh_maxsz (op_encode_hdr_maxsz) +#define decode_putrootfh_maxsz (op_decode_hdr_maxsz) +#define encode_getfh_maxsz (op_encode_hdr_maxsz) +#define decode_getfh_maxsz (op_decode_hdr_maxsz + 1 + \ + ((3+NFS4_FHSIZE) >> 2)) +#define encode_getattr_maxsz (op_encode_hdr_maxsz + 3) +#define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) +#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) +#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz) +#define decode_getattr_maxsz (op_decode_hdr_maxsz + 3 + \ + nfs4_fattr_bitmap_maxsz) +#define encode_savefh_maxsz (op_encode_hdr_maxsz) +#define decode_savefh_maxsz (op_decode_hdr_maxsz) +#define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2) +#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11) +#define encode_renew_maxsz (op_encode_hdr_maxsz + 3) +#define decode_renew_maxsz (op_decode_hdr_maxsz) +#define encode_setclientid_maxsz \ + (op_encode_hdr_maxsz + \ + 4 /*server->ip_addr*/ + \ + 1 /*Netid*/ + \ + 6 /*uaddr*/ + \ + 6 + (NFS4_VERIFIER_SIZE >> 2)) +#define decode_setclientid_maxsz \ + (op_decode_hdr_maxsz + \ + 2 + \ + 1024) /* large value for CLID_INUSE */ +#define encode_setclientid_confirm_maxsz \ + (op_encode_hdr_maxsz + \ + 3 + (NFS4_VERIFIER_SIZE >> 2)) +#define decode_setclientid_confirm_maxsz \ + (op_decode_hdr_maxsz) +#define encode_lookup_maxsz (op_encode_hdr_maxsz + \ + 1 + ((3 + NFS4_FHSIZE) >> 2)) +#define encode_remove_maxsz (op_encode_hdr_maxsz + \ + nfs4_name_maxsz) +#define encode_rename_maxsz (op_encode_hdr_maxsz + \ + 2 * nfs4_name_maxsz) +#define decode_rename_maxsz (op_decode_hdr_maxsz + 5 + 5) +#define encode_link_maxsz (op_encode_hdr_maxsz + \ + nfs4_name_maxsz) +#define decode_link_maxsz (op_decode_hdr_maxsz + 5) +#define encode_symlink_maxsz (op_encode_hdr_maxsz + \ + 1 + nfs4_name_maxsz + \ + nfs4_path_maxsz + \ + nfs4_fattr_bitmap_maxsz) +#define decode_symlink_maxsz (op_decode_hdr_maxsz + 8) +#define encode_create_maxsz (op_encode_hdr_maxsz + \ + 2 + nfs4_name_maxsz + \ + nfs4_fattr_bitmap_maxsz) +#define decode_create_maxsz (op_decode_hdr_maxsz + 8) +#define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) +#define decode_delegreturn_maxsz (op_decode_hdr_maxsz) +#define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ +#define NFS4_dec_compound_sz (1024) /* XXX: large enough? */ +#define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 7) +#define NFS4_dec_read_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 2) +#define NFS4_enc_readlink_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz) +#define NFS4_dec_readlink_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz) +#define NFS4_enc_readdir_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 9) +#define NFS4_dec_readdir_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 2) +#define NFS4_enc_write_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 8) +#define NFS4_dec_write_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 4) +#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 3) +#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 2) +#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + \ + 13 + 3 + 2 + 64 + \ + encode_getattr_maxsz + \ + encode_getfh_maxsz) +#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \ + decode_getattr_maxsz + \ + decode_getfh_maxsz) +#define NFS4_enc_open_confirm_sz \ + (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 5) +#define NFS4_dec_open_confirm_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 4) +#define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + \ + 11) +#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + 4 + 5 + 2 + 3) +#define NFS4_enc_open_downgrade_sz \ + (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 7) +#define NFS4_dec_open_downgrade_sz \ + (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 4) +#define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 5) +#define NFS4_dec_close_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 4) +#define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 4 + \ + nfs4_fattr_bitmap_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 3) +#define NFS4_enc_fsinfo_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_fsinfo_maxsz) +#define NFS4_dec_fsinfo_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_fsinfo_maxsz) +#define NFS4_enc_renew_sz (compound_encode_hdr_maxsz + \ + encode_renew_maxsz) +#define NFS4_dec_renew_sz (compound_decode_hdr_maxsz + \ + decode_renew_maxsz) +#define NFS4_enc_setclientid_sz (compound_encode_hdr_maxsz + \ + encode_setclientid_maxsz) +#define NFS4_dec_setclientid_sz (compound_decode_hdr_maxsz + \ + decode_setclientid_maxsz) +#define NFS4_enc_setclientid_confirm_sz \ + (compound_encode_hdr_maxsz + \ + encode_setclientid_confirm_maxsz + \ + encode_putrootfh_maxsz + \ + encode_fsinfo_maxsz) +#define NFS4_dec_setclientid_confirm_sz \ + (compound_decode_hdr_maxsz + \ + decode_setclientid_confirm_maxsz + \ + decode_putrootfh_maxsz + \ + decode_fsinfo_maxsz) +#define NFS4_enc_lock_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + op_encode_hdr_maxsz + \ + 1 + 1 + 2 + 2 + \ + 1 + 4 + 1 + 2 + \ + owner_id_maxsz) +#define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz + \ + op_decode_hdr_maxsz + \ + 2 + 2 + 1 + 2 + \ + owner_id_maxsz) +#define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + op_encode_hdr_maxsz + \ + 1 + 2 + 2 + 2 + \ + owner_id_maxsz) +#define NFS4_dec_lockt_sz (NFS4_dec_lock_sz) +#define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + op_encode_hdr_maxsz + \ + 1 + 1 + 4 + 2 + 2) +#define NFS4_dec_locku_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz + \ + op_decode_hdr_maxsz + 4) +#define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 1) +#define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 2) +#define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_getattr_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_lookup_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_lookup_maxsz + \ + encode_getattr_maxsz + \ + encode_getfh_maxsz) +#define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + decode_getattr_maxsz + \ + decode_getfh_maxsz) +#define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \ + encode_putrootfh_maxsz + \ + encode_getattr_maxsz + \ + encode_getfh_maxsz) +#define NFS4_dec_lookup_root_sz (compound_decode_hdr_maxsz + \ + decode_putrootfh_maxsz + \ + decode_getattr_maxsz + \ + decode_getfh_maxsz) +#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_remove_maxsz) +#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 5) +#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_putfh_maxsz + \ + encode_rename_maxsz) +#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_putfh_maxsz + \ + decode_rename_maxsz) +#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_putfh_maxsz + \ + encode_link_maxsz) +#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_putfh_maxsz + \ + decode_link_maxsz) +#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_symlink_maxsz + \ + encode_getattr_maxsz + \ + encode_getfh_maxsz) +#define NFS4_dec_symlink_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_symlink_maxsz + \ + decode_getattr_maxsz + \ + decode_getfh_maxsz) +#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_create_maxsz + \ + encode_getattr_maxsz + \ + encode_getfh_maxsz) +#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_create_maxsz + \ + decode_getattr_maxsz + \ + decode_getfh_maxsz) +#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_pathconf_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_statfs_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_statfs_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 12) +#define NFS4_enc_server_caps_sz (compound_encode_hdr_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_server_caps_sz (compound_decode_hdr_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_delegreturn_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_delegreturn_maxsz) +#define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ + decode_delegreturn_maxsz) + +static struct { + unsigned int mode; + unsigned int nfs2type; +} nfs_type2fmt[] = { + { 0, NFNON }, + { S_IFREG, NFREG }, + { S_IFDIR, NFDIR }, + { S_IFBLK, NFBLK }, + { S_IFCHR, NFCHR }, + { S_IFLNK, NFLNK }, + { S_IFSOCK, NFSOCK }, + { S_IFIFO, NFFIFO }, + { 0, NFNON }, + { 0, NFNON }, +}; + +struct compound_hdr { + int32_t status; + uint32_t nops; + uint32_t taglen; + char * tag; +}; + +/* + * START OF "GENERIC" ENCODE ROUTINES. + * These may look a little ugly since they are imported from a "generic" + * set of XDR encode/decode routines which are intended to be shared by + * all of our NFSv4 implementations (OpenBSD, MacOS X...). + * + * If the pain of reading these is too great, it should be a straightforward + * task to translate them into Linux-specific versions which are more + * consistent with the style used in NFSv2/v3... + */ +#define WRITE32(n) *p++ = htonl(n) +#define WRITE64(n) do { \ + *p++ = htonl((uint32_t)((n) >> 32)); \ + *p++ = htonl((uint32_t)(n)); \ +} while (0) +#define WRITEMEM(ptr,nbytes) do { \ + p = xdr_encode_opaque_fixed(p, ptr, nbytes); \ +} while (0) + +#define RESERVE_SPACE(nbytes) do { \ + p = xdr_reserve_space(xdr, nbytes); \ + if (!p) printk("RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \ + BUG_ON(!p); \ +} while (0) + +static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, 4 + len); + BUG_ON(p == NULL); + xdr_encode_opaque(p, str, len); +} + +static int encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) +{ + uint32_t *p; + + dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag); + BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); + RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2)); + WRITE32(hdr->taglen); + WRITEMEM(hdr->tag, hdr->taglen); + WRITE32(NFS4_MINOR_VERSION); + WRITE32(hdr->nops); + return 0; +} + +static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + BUG_ON(p == NULL); + xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE); +} + +static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) +{ + char owner_name[IDMAP_NAMESZ]; + char owner_group[IDMAP_NAMESZ]; + int owner_namelen = 0; + int owner_grouplen = 0; + uint32_t *p; + uint32_t *q; + int len; + uint32_t bmval0 = 0; + uint32_t bmval1 = 0; + int status; + + /* + * We reserve enough space to write the entire attribute buffer at once. + * In the worst-case, this would be + * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) + * = 36 bytes, plus any contribution from variable-length fields + * such as owner/group/acl's. + */ + len = 16; + + /* Sigh */ + if (iap->ia_valid & ATTR_SIZE) + len += 8; + if (iap->ia_valid & ATTR_MODE) + len += 4; + if (iap->ia_valid & ATTR_UID) { + owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name); + if (owner_namelen < 0) { + printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n", + iap->ia_uid); + /* XXX */ + strcpy(owner_name, "nobody"); + owner_namelen = sizeof("nobody") - 1; + /* goto out; */ + } + len += 4 + (XDR_QUADLEN(owner_namelen) << 2); + } + if (iap->ia_valid & ATTR_GID) { + owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group); + if (owner_grouplen < 0) { + printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n", + iap->ia_gid); + strcpy(owner_group, "nobody"); + owner_grouplen = sizeof("nobody") - 1; + /* goto out; */ + } + len += 4 + (XDR_QUADLEN(owner_grouplen) << 2); + } + if (iap->ia_valid & ATTR_ATIME_SET) + len += 16; + else if (iap->ia_valid & ATTR_ATIME) + len += 4; + if (iap->ia_valid & ATTR_MTIME_SET) + len += 16; + else if (iap->ia_valid & ATTR_MTIME) + len += 4; + RESERVE_SPACE(len); + + /* + * We write the bitmap length now, but leave the bitmap and the attribute + * buffer length to be backfilled at the end of this routine. + */ + WRITE32(2); + q = p; + p += 3; + + if (iap->ia_valid & ATTR_SIZE) { + bmval0 |= FATTR4_WORD0_SIZE; + WRITE64(iap->ia_size); + } + if (iap->ia_valid & ATTR_MODE) { + bmval1 |= FATTR4_WORD1_MODE; + WRITE32(iap->ia_mode); + } + if (iap->ia_valid & ATTR_UID) { + bmval1 |= FATTR4_WORD1_OWNER; + WRITE32(owner_namelen); + WRITEMEM(owner_name, owner_namelen); + } + if (iap->ia_valid & ATTR_GID) { + bmval1 |= FATTR4_WORD1_OWNER_GROUP; + WRITE32(owner_grouplen); + WRITEMEM(owner_group, owner_grouplen); + } + if (iap->ia_valid & ATTR_ATIME_SET) { + bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; + WRITE32(NFS4_SET_TO_CLIENT_TIME); + WRITE32(0); + WRITE32(iap->ia_mtime.tv_sec); + WRITE32(iap->ia_mtime.tv_nsec); + } + else if (iap->ia_valid & ATTR_ATIME) { + bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; + WRITE32(NFS4_SET_TO_SERVER_TIME); + } + if (iap->ia_valid & ATTR_MTIME_SET) { + bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; + WRITE32(NFS4_SET_TO_CLIENT_TIME); + WRITE32(0); + WRITE32(iap->ia_mtime.tv_sec); + WRITE32(iap->ia_mtime.tv_nsec); + } + else if (iap->ia_valid & ATTR_MTIME) { + bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; + WRITE32(NFS4_SET_TO_SERVER_TIME); + } + + /* + * Now we backfill the bitmap and the attribute buffer length. + */ + if (len != ((char *)p - (char *)q) + 4) { + printk ("encode_attr: Attr length calculation error! %u != %Zu\n", + len, ((char *)p - (char *)q) + 4); + BUG(); + } + len = (char *)p - (char *)q - 12; + *q++ = htonl(bmval0); + *q++ = htonl(bmval1); + *q++ = htonl(len); + + status = 0; +/* out: */ + return status; +} + +static int encode_access(struct xdr_stream *xdr, u32 access) +{ + uint32_t *p; + + RESERVE_SPACE(8); + WRITE32(OP_ACCESS); + WRITE32(access); + + return 0; +} + +static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(8+sizeof(arg->stateid.data)); + WRITE32(OP_CLOSE); + WRITE32(arg->seqid); + WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + + return 0; +} + +static int encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args) +{ + uint32_t *p; + + RESERVE_SPACE(16); + WRITE32(OP_COMMIT); + WRITE64(args->offset); + WRITE32(args->count); + + return 0; +} + +static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create) +{ + uint32_t *p; + + RESERVE_SPACE(8); + WRITE32(OP_CREATE); + WRITE32(create->ftype); + + switch (create->ftype) { + case NF4LNK: + RESERVE_SPACE(4 + create->u.symlink->len); + WRITE32(create->u.symlink->len); + WRITEMEM(create->u.symlink->name, create->u.symlink->len); + break; + + case NF4BLK: case NF4CHR: + RESERVE_SPACE(8); + WRITE32(create->u.device.specdata1); + WRITE32(create->u.device.specdata2); + break; + + default: + break; + } + + RESERVE_SPACE(4 + create->name->len); + WRITE32(create->name->len); + WRITEMEM(create->name->name, create->name->len); + + return encode_attrs(xdr, create->attrs, create->server); +} + +static int encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap) +{ + uint32_t *p; + + RESERVE_SPACE(12); + WRITE32(OP_GETATTR); + WRITE32(1); + WRITE32(bitmap); + return 0; +} + +static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1) +{ + uint32_t *p; + + RESERVE_SPACE(16); + WRITE32(OP_GETATTR); + WRITE32(2); + WRITE32(bm0); + WRITE32(bm1); + return 0; +} + +static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask) +{ + extern u32 nfs4_fattr_bitmap[]; + + return encode_getattr_two(xdr, + bitmask[0] & nfs4_fattr_bitmap[0], + bitmask[1] & nfs4_fattr_bitmap[1]); +} + +static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask) +{ + extern u32 nfs4_fsinfo_bitmap[]; + + return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], + bitmask[1] & nfs4_fsinfo_bitmap[1]); +} + +static int encode_getfh(struct xdr_stream *xdr) +{ + uint32_t *p; + + RESERVE_SPACE(4); + WRITE32(OP_GETFH); + + return 0; +} + +static int encode_link(struct xdr_stream *xdr, const struct qstr *name) +{ + uint32_t *p; + + RESERVE_SPACE(8 + name->len); + WRITE32(OP_LINK); + WRITE32(name->len); + WRITEMEM(name->name, name->len); + + return 0; +} + +/* + * opcode,type,reclaim,offset,length,new_lock_owner = 32 + * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40 + */ +static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg) +{ + uint32_t *p; + struct nfs_lock_opargs *opargs = arg->u.lock; + + RESERVE_SPACE(32); + WRITE32(OP_LOCK); + WRITE32(arg->type); + WRITE32(opargs->reclaim); + WRITE64(arg->offset); + WRITE64(arg->length); + WRITE32(opargs->new_lock_owner); + if (opargs->new_lock_owner){ + struct nfs_open_to_lock *ol = opargs->u.open_lock; + + RESERVE_SPACE(40); + WRITE32(ol->open_seqid); + WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid)); + WRITE32(ol->lock_seqid); + WRITE64(ol->lock_owner.clientid); + WRITE32(4); + WRITE32(ol->lock_owner.id); + } + else { + struct nfs_exist_lock *el = opargs->u.exist_lock; + + RESERVE_SPACE(20); + WRITEMEM(&el->stateid, sizeof(el->stateid)); + WRITE32(el->seqid); + } + + return 0; +} + +static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockargs *arg) +{ + uint32_t *p; + struct nfs_lowner *opargs = arg->u.lockt; + + RESERVE_SPACE(40); + WRITE32(OP_LOCKT); + WRITE32(arg->type); + WRITE64(arg->offset); + WRITE64(arg->length); + WRITE64(opargs->clientid); + WRITE32(4); + WRITE32(opargs->id); + + return 0; +} + +static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg) +{ + uint32_t *p; + struct nfs_locku_opargs *opargs = arg->u.locku; + + RESERVE_SPACE(44); + WRITE32(OP_LOCKU); + WRITE32(arg->type); + WRITE32(opargs->seqid); + WRITEMEM(&opargs->stateid, sizeof(opargs->stateid)); + WRITE64(arg->offset); + WRITE64(arg->length); + + return 0; +} + +static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name) +{ + int len = name->len; + uint32_t *p; + + RESERVE_SPACE(8 + len); + WRITE32(OP_LOOKUP); + WRITE32(len); + WRITEMEM(name->name, len); + + return 0; +} + +static void encode_share_access(struct xdr_stream *xdr, int open_flags) +{ + uint32_t *p; + + RESERVE_SPACE(8); + switch (open_flags & (FMODE_READ|FMODE_WRITE)) { + case FMODE_READ: + WRITE32(NFS4_SHARE_ACCESS_READ); + break; + case FMODE_WRITE: + WRITE32(NFS4_SHARE_ACCESS_WRITE); + break; + case FMODE_READ|FMODE_WRITE: + WRITE32(NFS4_SHARE_ACCESS_BOTH); + break; + default: + BUG(); + } + WRITE32(0); /* for linux, share_deny = 0 always */ +} + +static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg) +{ + uint32_t *p; + /* + * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, + * owner 4 = 32 + */ + RESERVE_SPACE(8); + WRITE32(OP_OPEN); + WRITE32(arg->seqid); + encode_share_access(xdr, arg->open_flags); + RESERVE_SPACE(16); + WRITE64(arg->clientid); + WRITE32(4); + WRITE32(arg->id); +} + +static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(4); + switch(arg->open_flags & O_EXCL) { + case 0: + WRITE32(NFS4_CREATE_UNCHECKED); + encode_attrs(xdr, arg->u.attrs, arg->server); + break; + default: + WRITE32(NFS4_CREATE_EXCLUSIVE); + encode_nfs4_verifier(xdr, &arg->u.verifier); + } +} + +static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(4); + switch (arg->open_flags & O_CREAT) { + case 0: + WRITE32(NFS4_OPEN_NOCREATE); + break; + default: + BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL); + WRITE32(NFS4_OPEN_CREATE); + encode_createmode(xdr, arg); + } +} + +static inline void encode_delegation_type(struct xdr_stream *xdr, int delegation_type) +{ + uint32_t *p; + + RESERVE_SPACE(4); + switch (delegation_type) { + case 0: + WRITE32(NFS4_OPEN_DELEGATE_NONE); + break; + case FMODE_READ: + WRITE32(NFS4_OPEN_DELEGATE_READ); + break; + case FMODE_WRITE|FMODE_READ: + WRITE32(NFS4_OPEN_DELEGATE_WRITE); + break; + default: + BUG(); + } +} + +static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *name) +{ + uint32_t *p; + + RESERVE_SPACE(4); + WRITE32(NFS4_OPEN_CLAIM_NULL); + encode_string(xdr, name->len, name->name); +} + +static inline void encode_claim_previous(struct xdr_stream *xdr, int type) +{ + uint32_t *p; + + RESERVE_SPACE(4); + WRITE32(NFS4_OPEN_CLAIM_PREVIOUS); + encode_delegation_type(xdr, type); +} + +static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struct qstr *name, const nfs4_stateid *stateid) +{ + uint32_t *p; + + RESERVE_SPACE(4+sizeof(stateid->data)); + WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR); + WRITEMEM(stateid->data, sizeof(stateid->data)); + encode_string(xdr, name->len, name->name); +} + +static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg) +{ + encode_openhdr(xdr, arg); + encode_opentype(xdr, arg); + switch (arg->claim) { + case NFS4_OPEN_CLAIM_NULL: + encode_claim_null(xdr, arg->name); + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + encode_claim_previous(xdr, arg->u.delegation_type); + break; + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation); + break; + default: + BUG(); + } + return 0; +} + +static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(8+sizeof(arg->stateid.data)); + WRITE32(OP_OPEN_CONFIRM); + WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITE32(arg->seqid); + + return 0; +} + +static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(8+sizeof(arg->stateid.data)); + WRITE32(OP_OPEN_DOWNGRADE); + WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITE32(arg->seqid); + encode_share_access(xdr, arg->open_flags); + return 0; +} + +static int +encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh) +{ + int len = fh->size; + uint32_t *p; + + RESERVE_SPACE(8 + len); + WRITE32(OP_PUTFH); + WRITE32(len); + WRITEMEM(fh->data, len); + + return 0; +} + +static int encode_putrootfh(struct xdr_stream *xdr) +{ + uint32_t *p; + + RESERVE_SPACE(4); + WRITE32(OP_PUTROOTFH); + + return 0; +} + +static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) +{ + extern nfs4_stateid zero_stateid; + nfs4_stateid stateid; + uint32_t *p; + + RESERVE_SPACE(16); + if (ctx->state != NULL) { + nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner); + WRITEMEM(stateid.data, sizeof(stateid.data)); + } else + WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); +} + +static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) +{ + uint32_t *p; + + RESERVE_SPACE(4); + WRITE32(OP_READ); + + encode_stateid(xdr, args->context); + + RESERVE_SPACE(12); + WRITE64(args->offset); + WRITE32(args->count); + + return 0; +} + +static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + int replen; + uint32_t *p; + + RESERVE_SPACE(32+sizeof(nfs4_verifier)); + WRITE32(OP_READDIR); + WRITE64(readdir->cookie); + WRITEMEM(readdir->verifier.data, sizeof(readdir->verifier.data)); + WRITE32(readdir->count >> 1); /* We're not doing readdirplus */ + WRITE32(readdir->count); + WRITE32(2); + if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) { + WRITE32(0); + WRITE32(FATTR4_WORD1_MOUNTED_ON_FILEID); + } else { + WRITE32(FATTR4_WORD0_FILEID); + WRITE32(0); + } + + /* set up reply kvec + * toplevel_status + taglen + rescount + OP_PUTFH + status + * + OP_READDIR + status + verifer(2) = 9 + */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + 9) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->pages, + readdir->pgbase, readdir->count); + + return 0; +} + +static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + unsigned int replen; + uint32_t *p; + + RESERVE_SPACE(4); + WRITE32(OP_READLINK); + + /* set up reply kvec + * toplevel_status + taglen + rescount + OP_PUTFH + status + * + OP_READLINK + status + string length = 8 + */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + 8) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, readlink->pages, + readlink->pgbase, readlink->pglen); + + return 0; +} + +static int encode_remove(struct xdr_stream *xdr, const struct qstr *name) +{ + uint32_t *p; + + RESERVE_SPACE(8 + name->len); + WRITE32(OP_REMOVE); + WRITE32(name->len); + WRITEMEM(name->name, name->len); + + return 0; +} + +static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname) +{ + uint32_t *p; + + RESERVE_SPACE(8 + oldname->len); + WRITE32(OP_RENAME); + WRITE32(oldname->len); + WRITEMEM(oldname->name, oldname->len); + + RESERVE_SPACE(4 + newname->len); + WRITE32(newname->len); + WRITEMEM(newname->name, newname->len); + + return 0; +} + +static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client_stateid) +{ + uint32_t *p; + + RESERVE_SPACE(12); + WRITE32(OP_RENEW); + WRITE64(client_stateid->cl_clientid); + + return 0; +} + +static int +encode_savefh(struct xdr_stream *xdr) +{ + uint32_t *p; + + RESERVE_SPACE(4); + WRITE32(OP_SAVEFH); + + return 0; +} + +static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server) +{ + int status; + uint32_t *p; + + RESERVE_SPACE(4+sizeof(arg->stateid.data)); + WRITE32(OP_SETATTR); + WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + + if ((status = encode_attrs(xdr, arg->iap, server))) + return status; + + return 0; +} + +static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid) +{ + uint32_t *p; + + RESERVE_SPACE(4 + sizeof(setclientid->sc_verifier->data)); + WRITE32(OP_SETCLIENTID); + WRITEMEM(setclientid->sc_verifier->data, sizeof(setclientid->sc_verifier->data)); + + encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); + RESERVE_SPACE(4); + WRITE32(setclientid->sc_prog); + encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); + encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); + RESERVE_SPACE(4); + WRITE32(setclientid->sc_cb_ident); + + return 0; +} + +static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_client *client_state) +{ + uint32_t *p; + + RESERVE_SPACE(12 + sizeof(client_state->cl_confirm.data)); + WRITE32(OP_SETCLIENTID_CONFIRM); + WRITE64(client_state->cl_clientid); + WRITEMEM(client_state->cl_confirm.data, sizeof(client_state->cl_confirm.data)); + + return 0; +} + +static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args) +{ + uint32_t *p; + + RESERVE_SPACE(4); + WRITE32(OP_WRITE); + + encode_stateid(xdr, args->context); + + RESERVE_SPACE(16); + WRITE64(args->offset); + WRITE32(args->stable); + WRITE32(args->count); + + xdr_write_pages(xdr, args->pages, args->pgbase, args->count); + + return 0; +} + +static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid) +{ + uint32_t *p; + + RESERVE_SPACE(20); + + WRITE32(OP_DELEGRETURN); + WRITEMEM(stateid->data, sizeof(stateid->data)); + return 0; + +} +/* + * END OF "GENERIC" ENCODE ROUTINES. + */ + +/* + * Encode an ACCESS request + */ +static int nfs4_xdr_enc_access(struct rpc_rqst *req, uint32_t *p, const struct nfs4_accessargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + if ((status = encode_putfh(&xdr, args->fh)) == 0) + status = encode_access(&xdr, args->access); + return status; +} + +/* + * Encode LOOKUP request + */ +static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, uint32_t *p, const struct nfs4_lookup_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 4, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) + goto out; + if ((status = encode_lookup(&xdr, args->name)) != 0) + goto out; + if ((status = encode_getfh(&xdr)) != 0) + goto out; + status = encode_getfattr(&xdr, args->bitmask); +out: + return status; +} + +/* + * Encode LOOKUP_ROOT request + */ +static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, uint32_t *p, const struct nfs4_lookup_root_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + if ((status = encode_putrootfh(&xdr)) != 0) + goto out; + if ((status = encode_getfh(&xdr)) == 0) + status = encode_getfattr(&xdr, args->bitmask); +out: + return status; +} + +/* + * Encode REMOVE request + */ +static int nfs4_xdr_enc_remove(struct rpc_rqst *req, uint32_t *p, const struct nfs4_remove_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + if ((status = encode_putfh(&xdr, args->fh)) == 0) + status = encode_remove(&xdr, args->name); + return status; +} + +/* + * Encode RENAME request + */ +static int nfs4_xdr_enc_rename(struct rpc_rqst *req, uint32_t *p, const struct nfs4_rename_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 4, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + if ((status = encode_putfh(&xdr, args->old_dir)) != 0) + goto out; + if ((status = encode_savefh(&xdr)) != 0) + goto out; + if ((status = encode_putfh(&xdr, args->new_dir)) != 0) + goto out; + status = encode_rename(&xdr, args->old_name, args->new_name); +out: + return status; +} + +/* + * Encode LINK request + */ +static int nfs4_xdr_enc_link(struct rpc_rqst *req, uint32_t *p, const struct nfs4_link_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 4, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + if ((status = encode_putfh(&xdr, args->fh)) != 0) + goto out; + if ((status = encode_savefh(&xdr)) != 0) + goto out; + if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) + goto out; + status = encode_link(&xdr, args->name); +out: + return status; +} + +/* + * Encode CREATE request + */ +static int nfs4_xdr_enc_create(struct rpc_rqst *req, uint32_t *p, const struct nfs4_create_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 4, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) + goto out; + if ((status = encode_create(&xdr, args)) != 0) + goto out; + if ((status = encode_getfh(&xdr)) != 0) + goto out; + status = encode_getfattr(&xdr, args->bitmask); +out: + return status; +} + +/* + * Encode SYMLINK request + */ +static int nfs4_xdr_enc_symlink(struct rpc_rqst *req, uint32_t *p, const struct nfs4_create_arg *args) +{ + return nfs4_xdr_enc_create(req, p, args); +} + +/* + * Encode GETATTR request + */ +static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, uint32_t *p, const struct nfs4_getattr_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + if ((status = encode_putfh(&xdr, args->fh)) == 0) + status = encode_getfattr(&xdr, args->bitmask); + return status; +} + +/* + * Encode a CLOSE request + */ +static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_close(&xdr, args); +out: + return status; +} + +/* + * Encode an OPEN request + */ +static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 4, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_open(&xdr, args); + if (status) + goto out; + status = encode_getfh(&xdr); + if (status) + goto out; + status = encode_getfattr(&xdr, args->bitmask); +out: + return status; +} + +/* + * Encode an OPEN_CONFIRM request + */ +static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_open_confirmargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_open_confirm(&xdr, args); +out: + return status; +} + +/* + * Encode an OPEN request with no attributes. + */ +static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_open(&xdr, args); +out: + return status; +} + +/* + * Encode an OPEN_DOWNGRADE request + */ +static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_open_downgrade(&xdr, args); +out: + return status; +} + +/* + * Encode a LOCK request + */ +static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_lock(&xdr, args); +out: + return status; +} + +/* + * Encode a LOCKT request + */ +static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_lockt(&xdr, args); +out: + return status; +} + +/* + * Encode a LOCKU request + */ +static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_locku(&xdr, args); +out: + return status; +} + +/* + * Encode a READLINK request + */ +static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, uint32_t *p, const struct nfs4_readlink *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_readlink(&xdr, args, req); +out: + return status; +} + +/* + * Encode a READDIR request + */ +static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, uint32_t *p, const struct nfs4_readdir_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_readdir(&xdr, args, req); +out: + return status; +} + +/* + * Encode a READ request + */ +static int nfs4_xdr_enc_read(struct rpc_rqst *req, uint32_t *p, struct nfs_readargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int replen, status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_read(&xdr, args); + if (status) + goto out; + + /* set up reply kvec + * toplevel status + taglen=0 + rescount + OP_PUTFH + status + * + OP_READ + status + eof + datalen = 9 + */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, + args->pages, args->pgbase, args->count); +out: + return status; +} + +/* + * Encode an SETATTR request + */ +static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, uint32_t *p, struct nfs_setattrargs *args) + +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_setattr(&xdr, args, args->server); + if(status) + goto out; + status = encode_getfattr(&xdr, args->bitmask); +out: + return status; +} + +/* + * Encode a WRITE request + */ +static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_write(&xdr, args); +out: + return status; +} + +/* + * a COMMIT request + */ +static int nfs4_xdr_enc_commit(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_commit(&xdr, args); +out: + return status; +} + +/* + * FSINFO request + */ +static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs4_fsinfo_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (!status) + status = encode_fsinfo(&xdr, args->bitmask); + return status; +} + +/* + * a PATHCONF request + */ +static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct nfs4_pathconf_arg *args) +{ + extern u32 nfs4_pathconf_bitmap[2]; + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (!status) + status = encode_getattr_one(&xdr, + args->bitmask[0] & nfs4_pathconf_bitmap[0]); + return status; +} + +/* + * a STATFS request + */ +static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, const struct nfs4_statfs_arg *args) +{ + extern u32 nfs4_statfs_bitmap[]; + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status == 0) + status = encode_getattr_two(&xdr, + args->bitmask[0] & nfs4_statfs_bitmap[0], + args->bitmask[1] & nfs4_statfs_bitmap[1]); + return status; +} + +/* + * GETATTR_BITMAP request + */ +static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, uint32_t *p, const struct nfs_fh *fhandle) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, fhandle); + if (status == 0) + status = encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS| + FATTR4_WORD0_LINK_SUPPORT| + FATTR4_WORD0_SYMLINK_SUPPORT| + FATTR4_WORD0_ACLSUPPORT); + return status; +} + +/* + * a RENEW request + */ +static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 1, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + return encode_renew(&xdr, clp); +} + +/* + * a SETCLIENTID request + */ +static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, struct nfs4_setclientid *sc) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 1, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + return encode_setclientid(&xdr, sc); +} + +/* + * a SETCLIENTID_CONFIRM request + */ +static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_setclientid_confirm(&xdr, clp); + if (!status) + status = encode_putrootfh(&xdr); + if (!status) + status = encode_fsinfo(&xdr, lease_bitmap); + return status; +} + +/* + * DELEGRETURN request + */ +static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, uint32_t *p, const struct nfs4_delegreturnargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + if ((status = encode_putfh(&xdr, args->fhandle)) == 0) + status = encode_delegreturn(&xdr, args->stateid); + return status; +} + +/* + * START OF "GENERIC" DECODE ROUTINES. + * These may look a little ugly since they are imported from a "generic" + * set of XDR encode/decode routines which are intended to be shared by + * all of our NFSv4 implementations (OpenBSD, MacOS X...). + * + * If the pain of reading these is too great, it should be a straightforward + * task to translate them into Linux-specific versions which are more + * consistent with the style used in NFSv2/v3... + */ +#define READ32(x) (x) = ntohl(*p++) +#define READ64(x) do { \ + (x) = (u64)ntohl(*p++) << 32; \ + (x) |= ntohl(*p++); \ +} while (0) +#define READTIME(x) do { \ + p++; \ + (x.tv_sec) = ntohl(*p++); \ + (x.tv_nsec) = ntohl(*p++); \ +} while (0) +#define COPYMEM(x,nbytes) do { \ + memcpy((x), p, nbytes); \ + p += XDR_QUADLEN(nbytes); \ +} while (0) + +#define READ_BUF(nbytes) do { \ + p = xdr_inline_decode(xdr, nbytes); \ + if (!p) { \ + printk(KERN_WARNING "%s: reply buffer overflowed in line %d.", \ + __FUNCTION__, __LINE__); \ + return -EIO; \ + } \ +} while (0) + +static int decode_opaque_inline(struct xdr_stream *xdr, uint32_t *len, char **string) +{ + uint32_t *p; + + READ_BUF(4); + READ32(*len); + READ_BUF(*len); + *string = (char *)p; + return 0; +} + +static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) +{ + uint32_t *p; + + READ_BUF(8); + READ32(hdr->status); + READ32(hdr->taglen); + + READ_BUF(hdr->taglen + 4); + hdr->tag = (char *)p; + p += XDR_QUADLEN(hdr->taglen); + READ32(hdr->nops); + return 0; +} + +static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) +{ + uint32_t *p; + uint32_t opnum; + int32_t nfserr; + + READ_BUF(8); + READ32(opnum); + if (opnum != expected) { + printk(KERN_NOTICE + "nfs4_decode_op_hdr: Server returned operation" + " %d but we issued a request for %d\n", + opnum, expected); + return -EIO; + } + READ32(nfserr); + if (nfserr != NFS_OK) + return -nfs_stat_to_errno(nfserr); + return 0; +} + +/* Dummy routine */ +static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp) +{ + uint32_t *p; + uint32_t strlen; + char *str; + + READ_BUF(12); + return decode_opaque_inline(xdr, &strlen, &str); +} + +static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) +{ + uint32_t bmlen, *p; + + READ_BUF(4); + READ32(bmlen); + + bitmap[0] = bitmap[1] = 0; + READ_BUF((bmlen << 2)); + if (bmlen > 0) { + READ32(bitmap[0]); + if (bmlen > 1) + READ32(bitmap[1]); + } + return 0; +} + +static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, uint32_t **savep) +{ + uint32_t *p; + + READ_BUF(4); + READ32(*attrlen); + *savep = xdr->p; + return 0; +} + +static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask) +{ + if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) { + decode_attr_bitmap(xdr, bitmask); + bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; + } else + bitmask[0] = bitmask[1] = 0; + dprintk("%s: bitmask=0x%x%x\n", __FUNCTION__, bitmask[0], bitmask[1]); + return 0; +} + +static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *type) +{ + uint32_t *p; + + *type = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) { + READ_BUF(4); + READ32(*type); + if (*type < NF4REG || *type > NF4NAMEDATTR) { + dprintk("%s: bad type %d\n", __FUNCTION__, *type); + return -EIO; + } + bitmap[0] &= ~FATTR4_WORD0_TYPE; + } + dprintk("%s: type=0%o\n", __FUNCTION__, nfs_type2fmt[*type].nfs2type); + return 0; +} + +static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) +{ + uint32_t *p; + + *change = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) { + READ_BUF(8); + READ64(*change); + bitmap[0] &= ~FATTR4_WORD0_CHANGE; + } + dprintk("%s: change attribute=%Lu\n", __FUNCTION__, + (unsigned long long)*change); + return 0; +} + +static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size) +{ + uint32_t *p; + + *size = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) { + READ_BUF(8); + READ64(*size); + bitmap[0] &= ~FATTR4_WORD0_SIZE; + } + dprintk("%s: file size=%Lu\n", __FUNCTION__, (unsigned long long)*size); + return 0; +} + +static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) +{ + uint32_t *p; + + *res = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) { + READ_BUF(4); + READ32(*res); + bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT; + } + dprintk("%s: link support=%s\n", __FUNCTION__, *res == 0 ? "false" : "true"); + return 0; +} + +static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) +{ + uint32_t *p; + + *res = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) { + READ_BUF(4); + READ32(*res); + bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT; + } + dprintk("%s: symlink support=%s\n", __FUNCTION__, *res == 0 ? "false" : "true"); + return 0; +} + +static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fsid *fsid) +{ + uint32_t *p; + + fsid->major = 0; + fsid->minor = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_FSID)) { + READ_BUF(16); + READ64(fsid->major); + READ64(fsid->minor); + bitmap[0] &= ~FATTR4_WORD0_FSID; + } + dprintk("%s: fsid=(0x%Lx/0x%Lx)\n", __FUNCTION__, + (unsigned long long)fsid->major, + (unsigned long long)fsid->minor); + return 0; +} + +static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) +{ + uint32_t *p; + + *res = 60; + if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) { + READ_BUF(4); + READ32(*res); + bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME; + } + dprintk("%s: file size=%u\n", __FUNCTION__, (unsigned int)*res); + return 0; +} + +static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) +{ + uint32_t *p; + + *res = ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL; + if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) { + READ_BUF(4); + READ32(*res); + bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT; + } + dprintk("%s: ACLs supported=%u\n", __FUNCTION__, (unsigned int)*res); + return 0; +} + +static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) +{ + uint32_t *p; + + *fileid = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) { + READ_BUF(8); + READ64(*fileid); + bitmap[0] &= ~FATTR4_WORD0_FILEID; + } + dprintk("%s: fileid=%Lu\n", __FUNCTION__, (unsigned long long)*fileid); + return 0; +} + +static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) +{ + uint32_t *p; + int status = 0; + + *res = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) { + READ_BUF(8); + READ64(*res); + bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL; + } + dprintk("%s: files avail=%Lu\n", __FUNCTION__, (unsigned long long)*res); + return status; +} + +static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) +{ + uint32_t *p; + int status = 0; + + *res = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) { + READ_BUF(8); + READ64(*res); + bitmap[0] &= ~FATTR4_WORD0_FILES_FREE; + } + dprintk("%s: files free=%Lu\n", __FUNCTION__, (unsigned long long)*res); + return status; +} + +static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) +{ + uint32_t *p; + int status = 0; + + *res = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) { + READ_BUF(8); + READ64(*res); + bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL; + } + dprintk("%s: files total=%Lu\n", __FUNCTION__, (unsigned long long)*res); + return status; +} + +static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) +{ + uint32_t *p; + int status = 0; + + *res = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) { + READ_BUF(8); + READ64(*res); + bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE; + } + dprintk("%s: maxfilesize=%Lu\n", __FUNCTION__, (unsigned long long)*res); + return status; +} + +static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink) +{ + uint32_t *p; + int status = 0; + + *maxlink = 1; + if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) { + READ_BUF(4); + READ32(*maxlink); + bitmap[0] &= ~FATTR4_WORD0_MAXLINK; + } + dprintk("%s: maxlink=%u\n", __FUNCTION__, *maxlink); + return status; +} + +static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname) +{ + uint32_t *p; + int status = 0; + + *maxname = 1024; + if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) { + READ_BUF(4); + READ32(*maxname); + bitmap[0] &= ~FATTR4_WORD0_MAXNAME; + } + dprintk("%s: maxname=%u\n", __FUNCTION__, *maxname); + return status; +} + +static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) +{ + uint32_t *p; + int status = 0; + + *res = 1024; + if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXREAD - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) { + uint64_t maxread; + READ_BUF(8); + READ64(maxread); + if (maxread > 0x7FFFFFFF) + maxread = 0x7FFFFFFF; + *res = (uint32_t)maxread; + bitmap[0] &= ~FATTR4_WORD0_MAXREAD; + } + dprintk("%s: maxread=%lu\n", __FUNCTION__, (unsigned long)*res); + return status; +} + +static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) +{ + uint32_t *p; + int status = 0; + + *res = 1024; + if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXWRITE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) { + uint64_t maxwrite; + READ_BUF(8); + READ64(maxwrite); + if (maxwrite > 0x7FFFFFFF) + maxwrite = 0x7FFFFFFF; + *res = (uint32_t)maxwrite; + bitmap[0] &= ~FATTR4_WORD0_MAXWRITE; + } + dprintk("%s: maxwrite=%lu\n", __FUNCTION__, (unsigned long)*res); + return status; +} + +static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *mode) +{ + uint32_t *p; + + *mode = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_MODE)) { + READ_BUF(4); + READ32(*mode); + *mode &= ~S_IFMT; + bitmap[1] &= ~FATTR4_WORD1_MODE; + } + dprintk("%s: file mode=0%o\n", __FUNCTION__, (unsigned int)*mode); + return 0; +} + +static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink) +{ + uint32_t *p; + + *nlink = 1; + if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) { + READ_BUF(4); + READ32(*nlink); + bitmap[1] &= ~FATTR4_WORD1_NUMLINKS; + } + dprintk("%s: nlink=%u\n", __FUNCTION__, (unsigned int)*nlink); + return 0; +} + +static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *uid) +{ + uint32_t len, *p; + + *uid = -2; + if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) { + READ_BUF(4); + READ32(len); + READ_BUF(len); + if (len < XDR_MAX_NETOBJ) { + if (nfs_map_name_to_uid(clp, (char *)p, len, uid) != 0) + dprintk("%s: nfs_map_name_to_uid failed!\n", + __FUNCTION__); + } else + printk(KERN_WARNING "%s: name too long (%u)!\n", + __FUNCTION__, len); + bitmap[1] &= ~FATTR4_WORD1_OWNER; + } + dprintk("%s: uid=%d\n", __FUNCTION__, (int)*uid); + return 0; +} + +static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *gid) +{ + uint32_t len, *p; + + *gid = -2; + if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) { + READ_BUF(4); + READ32(len); + READ_BUF(len); + if (len < XDR_MAX_NETOBJ) { + if (nfs_map_group_to_gid(clp, (char *)p, len, gid) != 0) + dprintk("%s: nfs_map_group_to_gid failed!\n", + __FUNCTION__); + } else + printk(KERN_WARNING "%s: name too long (%u)!\n", + __FUNCTION__, len); + bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; + } + dprintk("%s: gid=%d\n", __FUNCTION__, (int)*gid); + return 0; +} + +static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev) +{ + uint32_t major = 0, minor = 0, *p; + + *rdev = MKDEV(0,0); + if (unlikely(bitmap[1] & (FATTR4_WORD1_RAWDEV - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) { + dev_t tmp; + + READ_BUF(8); + READ32(major); + READ32(minor); + tmp = MKDEV(major, minor); + if (MAJOR(tmp) == major && MINOR(tmp) == minor) + *rdev = tmp; + bitmap[1] &= ~ FATTR4_WORD1_RAWDEV; + } + dprintk("%s: rdev=(0x%x:0x%x)\n", __FUNCTION__, major, minor); + return 0; +} + +static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) +{ + uint32_t *p; + int status = 0; + + *res = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) { + READ_BUF(8); + READ64(*res); + bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL; + } + dprintk("%s: space avail=%Lu\n", __FUNCTION__, (unsigned long long)*res); + return status; +} + +static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) +{ + uint32_t *p; + int status = 0; + + *res = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) { + READ_BUF(8); + READ64(*res); + bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE; + } + dprintk("%s: space free=%Lu\n", __FUNCTION__, (unsigned long long)*res); + return status; +} + +static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) +{ + uint32_t *p; + int status = 0; + + *res = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) { + READ_BUF(8); + READ64(*res); + bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL; + } + dprintk("%s: space total=%Lu\n", __FUNCTION__, (unsigned long long)*res); + return status; +} + +static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used) +{ + uint32_t *p; + + *used = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) { + READ_BUF(8); + READ64(*used); + bitmap[1] &= ~FATTR4_WORD1_SPACE_USED; + } + dprintk("%s: space used=%Lu\n", __FUNCTION__, + (unsigned long long)*used); + return 0; +} + +static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) +{ + uint32_t *p; + uint64_t sec; + uint32_t nsec; + + READ_BUF(12); + READ64(sec); + READ32(nsec); + time->tv_sec = (time_t)sec; + time->tv_nsec = (long)nsec; + return 0; +} + +static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) +{ + int status = 0; + + time->tv_sec = 0; + time->tv_nsec = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_ACCESS - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_TIME_ACCESS)) { + status = decode_attr_time(xdr, time); + bitmap[1] &= ~FATTR4_WORD1_TIME_ACCESS; + } + dprintk("%s: atime=%ld\n", __FUNCTION__, (long)time->tv_sec); + return status; +} + +static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) +{ + int status = 0; + + time->tv_sec = 0; + time->tv_nsec = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_METADATA - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) { + status = decode_attr_time(xdr, time); + bitmap[1] &= ~FATTR4_WORD1_TIME_METADATA; + } + dprintk("%s: ctime=%ld\n", __FUNCTION__, (long)time->tv_sec); + return status; +} + +static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) +{ + int status = 0; + + time->tv_sec = 0; + time->tv_nsec = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_MODIFY - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) { + status = decode_attr_time(xdr, time); + bitmap[1] &= ~FATTR4_WORD1_TIME_MODIFY; + } + dprintk("%s: mtime=%ld\n", __FUNCTION__, (long)time->tv_sec); + return status; +} + +static int verify_attr_len(struct xdr_stream *xdr, uint32_t *savep, uint32_t attrlen) +{ + unsigned int attrwords = XDR_QUADLEN(attrlen); + unsigned int nwords = xdr->p - savep; + + if (unlikely(attrwords != nwords)) { + printk(KERN_WARNING "%s: server returned incorrect attribute length: %u %c %u\n", + __FUNCTION__, + attrwords << 2, + (attrwords < nwords) ? '<' : '>', + nwords << 2); + return -EIO; + } + return 0; +} + +static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) +{ + uint32_t *p; + + READ_BUF(20); + READ32(cinfo->atomic); + READ64(cinfo->before); + READ64(cinfo->after); + return 0; +} + +static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) +{ + uint32_t *p; + uint32_t supp, acc; + int status; + + status = decode_op_hdr(xdr, OP_ACCESS); + if (status) + return status; + READ_BUF(8); + READ32(supp); + READ32(acc); + access->supported = supp; + access->access = acc; + return 0; +} + +static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_CLOSE); + if (status) + return status; + READ_BUF(sizeof(res->stateid.data)); + COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + return 0; +} + +static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_COMMIT); + if (status) + return status; + READ_BUF(8); + COPYMEM(res->verf->verifier, 8); + return 0; +} + +static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) +{ + uint32_t *p; + uint32_t bmlen; + int status; + + status = decode_op_hdr(xdr, OP_CREATE); + if (status) + return status; + if ((status = decode_change_info(xdr, cinfo))) + return status; + READ_BUF(4); + READ32(bmlen); + READ_BUF(bmlen << 2); + return 0; +} + +static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) +{ + uint32_t *savep; + uint32_t attrlen, + bitmap[2] = {0}; + int status; + + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto xdr_error; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto xdr_error; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto xdr_error; + if ((status = decode_attr_supported(xdr, bitmap, res->attr_bitmask)) != 0) + goto xdr_error; + if ((status = decode_attr_link_support(xdr, bitmap, &res->has_links)) != 0) + goto xdr_error; + if ((status = decode_attr_symlink_support(xdr, bitmap, &res->has_symlinks)) != 0) + goto xdr_error; + if ((status = decode_attr_aclsupport(xdr, bitmap, &res->acl_bitmask)) != 0) + goto xdr_error; + status = verify_attr_len(xdr, savep, attrlen); +xdr_error: + if (status != 0) + printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status); + return status; +} + +static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) +{ + uint32_t *savep; + uint32_t attrlen, + bitmap[2] = {0}; + int status; + + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto xdr_error; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto xdr_error; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto xdr_error; + + if ((status = decode_attr_files_avail(xdr, bitmap, &fsstat->afiles)) != 0) + goto xdr_error; + if ((status = decode_attr_files_free(xdr, bitmap, &fsstat->ffiles)) != 0) + goto xdr_error; + if ((status = decode_attr_files_total(xdr, bitmap, &fsstat->tfiles)) != 0) + goto xdr_error; + if ((status = decode_attr_space_avail(xdr, bitmap, &fsstat->abytes)) != 0) + goto xdr_error; + if ((status = decode_attr_space_free(xdr, bitmap, &fsstat->fbytes)) != 0) + goto xdr_error; + if ((status = decode_attr_space_total(xdr, bitmap, &fsstat->tbytes)) != 0) + goto xdr_error; + + status = verify_attr_len(xdr, savep, attrlen); +xdr_error: + if (status != 0) + printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status); + return status; +} + +static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf) +{ + uint32_t *savep; + uint32_t attrlen, + bitmap[2] = {0}; + int status; + + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto xdr_error; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto xdr_error; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto xdr_error; + + if ((status = decode_attr_maxlink(xdr, bitmap, &pathconf->max_link)) != 0) + goto xdr_error; + if ((status = decode_attr_maxname(xdr, bitmap, &pathconf->max_namelen)) != 0) + goto xdr_error; + + status = verify_attr_len(xdr, savep, attrlen); +xdr_error: + if (status != 0) + printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status); + return status; +} + +static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server) +{ + uint32_t *savep; + uint32_t attrlen, + bitmap[2] = {0}, + type; + int status, fmode = 0; + + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto xdr_error; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto xdr_error; + + fattr->bitmap[0] = bitmap[0]; + fattr->bitmap[1] = bitmap[1]; + + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto xdr_error; + + + if ((status = decode_attr_type(xdr, bitmap, &type)) != 0) + goto xdr_error; + fattr->type = nfs_type2fmt[type].nfs2type; + fmode = nfs_type2fmt[type].mode; + + if ((status = decode_attr_change(xdr, bitmap, &fattr->change_attr)) != 0) + goto xdr_error; + if ((status = decode_attr_size(xdr, bitmap, &fattr->size)) != 0) + goto xdr_error; + if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid_u.nfs4)) != 0) + goto xdr_error; + if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0) + goto xdr_error; + if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0) + goto xdr_error; + fattr->mode |= fmode; + if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0) + goto xdr_error; + if ((status = decode_attr_owner(xdr, bitmap, server->nfs4_state, &fattr->uid)) != 0) + goto xdr_error; + if ((status = decode_attr_group(xdr, bitmap, server->nfs4_state, &fattr->gid)) != 0) + goto xdr_error; + if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0) + goto xdr_error; + if ((status = decode_attr_space_used(xdr, bitmap, &fattr->du.nfs3.used)) != 0) + goto xdr_error; + if ((status = decode_attr_time_access(xdr, bitmap, &fattr->atime)) != 0) + goto xdr_error; + if ((status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime)) != 0) + goto xdr_error; + if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0) + goto xdr_error; + if ((status = verify_attr_len(xdr, savep, attrlen)) == 0) { + fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4; + fattr->timestamp = jiffies; + } +xdr_error: + if (status != 0) + printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status); + return status; +} + + +static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) +{ + uint32_t *savep; + uint32_t attrlen, bitmap[2]; + int status; + + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto xdr_error; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto xdr_error; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto xdr_error; + + fsinfo->rtmult = fsinfo->wtmult = 512; /* ??? */ + + if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0) + goto xdr_error; + if ((status = decode_attr_maxfilesize(xdr, bitmap, &fsinfo->maxfilesize)) != 0) + goto xdr_error; + if ((status = decode_attr_maxread(xdr, bitmap, &fsinfo->rtmax)) != 0) + goto xdr_error; + fsinfo->rtpref = fsinfo->dtpref = fsinfo->rtmax; + if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0) + goto xdr_error; + fsinfo->wtpref = fsinfo->wtmax; + + status = verify_attr_len(xdr, savep, attrlen); +xdr_error: + if (status != 0) + printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status); + return status; +} + +static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh) +{ + uint32_t *p; + uint32_t len; + int status; + + status = decode_op_hdr(xdr, OP_GETFH); + if (status) + return status; + /* Zero handle first to allow comparisons */ + memset(fh, 0, sizeof(*fh)); + + READ_BUF(4); + READ32(len); + if (len > NFS4_FHSIZE) + return -EIO; + fh->size = len; + READ_BUF(len); + COPYMEM(fh->data, len); + return 0; +} + +static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) +{ + int status; + + status = decode_op_hdr(xdr, OP_LINK); + if (status) + return status; + return decode_change_info(xdr, cinfo); +} + +/* + * We create the owner, so we know a proper owner.id length is 4. + */ +static int decode_lock_denied (struct xdr_stream *xdr, struct nfs_lock_denied *denied) +{ + uint32_t *p; + uint32_t namelen; + + READ_BUF(32); + READ64(denied->offset); + READ64(denied->length); + READ32(denied->type); + READ64(denied->owner.clientid); + READ32(namelen); + READ_BUF(namelen); + if (namelen == 4) + READ32(denied->owner.id); + return -NFS4ERR_DENIED; +} + +static int decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_LOCK); + if (status == 0) { + READ_BUF(sizeof(nfs4_stateid)); + COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); + } else if (status == -NFS4ERR_DENIED) + return decode_lock_denied(xdr, &res->u.denied); + return status; +} + +static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockres *res) +{ + int status; + status = decode_op_hdr(xdr, OP_LOCKT); + if (status == -NFS4ERR_DENIED) + return decode_lock_denied(xdr, &res->u.denied); + return status; +} + +static int decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_LOCKU); + if (status == 0) { + READ_BUF(sizeof(nfs4_stateid)); + COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); + } + return status; +} + +static int decode_lookup(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_LOOKUP); +} + +/* This is too sick! */ +static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize) +{ + uint32_t *p; + uint32_t limit_type, nblocks, blocksize; + + READ_BUF(12); + READ32(limit_type); + switch (limit_type) { + case 1: + READ64(*maxsize); + break; + case 2: + READ32(nblocks); + READ32(blocksize); + *maxsize = (uint64_t)nblocks * (uint64_t)blocksize; + } + return 0; +} + +static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) +{ + uint32_t *p; + uint32_t delegation_type; + + READ_BUF(4); + READ32(delegation_type); + if (delegation_type == NFS4_OPEN_DELEGATE_NONE) { + res->delegation_type = 0; + return 0; + } + READ_BUF(20); + COPYMEM(res->delegation.data, sizeof(res->delegation.data)); + READ32(res->do_recall); + switch (delegation_type) { + case NFS4_OPEN_DELEGATE_READ: + res->delegation_type = FMODE_READ; + break; + case NFS4_OPEN_DELEGATE_WRITE: + res->delegation_type = FMODE_WRITE|FMODE_READ; + if (decode_space_limit(xdr, &res->maxsize) < 0) + return -EIO; + } + return decode_ace(xdr, NULL, res->server->nfs4_state); +} + +static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) +{ + uint32_t *p; + uint32_t bmlen; + int status; + + status = decode_op_hdr(xdr, OP_OPEN); + if (status) + return status; + READ_BUF(sizeof(res->stateid.data)); + COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + + decode_change_info(xdr, &res->cinfo); + + READ_BUF(8); + READ32(res->rflags); + READ32(bmlen); + if (bmlen > 10) + goto xdr_error; + + READ_BUF(bmlen << 2); + p += bmlen; + return decode_delegation(xdr, res); +xdr_error: + printk(KERN_NOTICE "%s: xdr error!\n", __FUNCTION__); + return -EIO; +} + +static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); + if (status) + return status; + READ_BUF(sizeof(res->stateid.data)); + COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + return 0; +} + +static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE); + if (status) + return status; + READ_BUF(sizeof(res->stateid.data)); + COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + return 0; +} + +static int decode_putfh(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_PUTFH); +} + +static int decode_putrootfh(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_PUTROOTFH); +} + +static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res) +{ + struct kvec *iov = req->rq_rcv_buf.head; + uint32_t *p; + uint32_t count, eof, recvd, hdrlen; + int status; + + status = decode_op_hdr(xdr, OP_READ); + if (status) + return status; + READ_BUF(8); + READ32(eof); + READ32(count); + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + recvd = req->rq_rcv_buf.len - hdrlen; + if (count > recvd) { + printk(KERN_WARNING "NFS: server cheating in read reply: " + "count %u > recvd %u\n", count, recvd); + count = recvd; + eof = 0; + } + xdr_read_pages(xdr, count); + res->eof = eof; + res->count = count; + return 0; +} + +static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) +{ + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct page *page = *rcvbuf->pages; + struct kvec *iov = rcvbuf->head; + unsigned int nr, pglen = rcvbuf->page_len; + uint32_t *end, *entry, *p, *kaddr; + uint32_t len, attrlen; + int hdrlen, recvd, status; + + status = decode_op_hdr(xdr, OP_READDIR); + if (status) + return status; + READ_BUF(8); + COPYMEM(readdir->verifier.data, 8); + + hdrlen = (char *) p - (char *) iov->iov_base; + recvd = rcvbuf->len - hdrlen; + if (pglen > recvd) + pglen = recvd; + xdr_read_pages(xdr, pglen); + + BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE); + kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0); + end = (uint32_t *) ((char *)p + pglen + readdir->pgbase); + entry = p; + for (nr = 0; *p++; nr++) { + if (p + 3 > end) + goto short_pkt; + p += 2; /* cookie */ + len = ntohl(*p++); /* filename length */ + if (len > NFS4_MAXNAMLEN) { + printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len); + goto err_unmap; + } + p += XDR_QUADLEN(len); + if (p + 1 > end) + goto short_pkt; + len = ntohl(*p++); /* bitmap length */ + p += len; + if (p + 1 > end) + goto short_pkt; + attrlen = XDR_QUADLEN(ntohl(*p++)); + p += attrlen; /* attributes */ + if (p + 2 > end) + goto short_pkt; + entry = p; + } + if (!nr && (entry[0] != 0 || entry[1] == 0)) + goto short_pkt; +out: + kunmap_atomic(kaddr, KM_USER0); + return 0; +short_pkt: + entry[0] = entry[1] = 0; + /* truncate listing ? */ + if (!nr) { + printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); + entry[1] = 1; + } + goto out; +err_unmap: + kunmap_atomic(kaddr, KM_USER0); + return -errno_NFSERR_IO; +} + +static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) +{ + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct kvec *iov = rcvbuf->head; + int hdrlen, len, recvd; + uint32_t *p; + char *kaddr; + int status; + + status = decode_op_hdr(xdr, OP_READLINK); + if (status) + return status; + + /* Convert length of symlink */ + READ_BUF(4); + READ32(len); + if (len >= rcvbuf->page_len || len <= 0) { + dprintk(KERN_WARNING "nfs: server returned giant symlink!\n"); + return -ENAMETOOLONG; + } + hdrlen = (char *) xdr->p - (char *) iov->iov_base; + recvd = req->rq_rcv_buf.len - hdrlen; + if (recvd < len) { + printk(KERN_WARNING "NFS: server cheating in readlink reply: " + "count %u > recvd %u\n", len, recvd); + return -EIO; + } + xdr_read_pages(xdr, len); + /* + * The XDR encode routine has set things up so that + * the link text will be copied directly into the + * buffer. We just have to do overflow-checking, + * and and null-terminate the text (the VFS expects + * null-termination). + */ + kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0); + kaddr[len+rcvbuf->page_base] = '\0'; + kunmap_atomic(kaddr, KM_USER0); + return 0; +} + +static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) +{ + int status; + + status = decode_op_hdr(xdr, OP_REMOVE); + if (status) + goto out; + status = decode_change_info(xdr, cinfo); +out: + return status; +} + +static int decode_rename(struct xdr_stream *xdr, struct nfs4_change_info *old_cinfo, + struct nfs4_change_info *new_cinfo) +{ + int status; + + status = decode_op_hdr(xdr, OP_RENAME); + if (status) + goto out; + if ((status = decode_change_info(xdr, old_cinfo))) + goto out; + status = decode_change_info(xdr, new_cinfo); +out: + return status; +} + +static int decode_renew(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_RENEW); +} + +static int +decode_savefh(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_SAVEFH); +} + +static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res) +{ + uint32_t *p; + uint32_t bmlen; + int status; + + + status = decode_op_hdr(xdr, OP_SETATTR); + if (status) + return status; + READ_BUF(4); + READ32(bmlen); + READ_BUF(bmlen << 2); + return 0; +} + +static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp) +{ + uint32_t *p; + uint32_t opnum; + int32_t nfserr; + + READ_BUF(8); + READ32(opnum); + if (opnum != OP_SETCLIENTID) { + printk(KERN_NOTICE + "nfs4_decode_setclientid: Server returned operation" + " %d\n", opnum); + return -EIO; + } + READ32(nfserr); + if (nfserr == NFS_OK) { + READ_BUF(8 + sizeof(clp->cl_confirm.data)); + READ64(clp->cl_clientid); + COPYMEM(clp->cl_confirm.data, sizeof(clp->cl_confirm.data)); + } else if (nfserr == NFSERR_CLID_INUSE) { + uint32_t len; + + /* skip netid string */ + READ_BUF(4); + READ32(len); + READ_BUF(len); + + /* skip uaddr string */ + READ_BUF(4); + READ32(len); + READ_BUF(len); + return -NFSERR_CLID_INUSE; + } else + return -nfs_stat_to_errno(nfserr); + + return 0; +} + +static int decode_setclientid_confirm(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM); +} + +static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_WRITE); + if (status) + return status; + + READ_BUF(16); + READ32(res->count); + READ32(res->verf->committed); + COPYMEM(res->verf->verifier, 8); + return 0; +} + +static int decode_delegreturn(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_DELEGRETURN); +} + +/* + * Decode OPEN_DOWNGRADE response + */ +static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open_downgrade(&xdr, res); +out: + return status; +} + +/* + * END OF "GENERIC" DECODE ROUTINES. + */ + +/* + * Decode ACCESS response + */ +static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_accessres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + goto out; + if ((status = decode_putfh(&xdr)) == 0) + status = decode_access(&xdr, res); +out: + return status; +} + +/* + * Decode LOOKUP response + */ +static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_lookup_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_lookup(&xdr)) != 0) + goto out; + if ((status = decode_getfh(&xdr, res->fh)) != 0) + goto out; + status = decode_getfattr(&xdr, res->fattr, res->server); +out: + return status; +} + +/* + * Decode LOOKUP_ROOT response + */ +static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_lookup_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + goto out; + if ((status = decode_putrootfh(&xdr)) != 0) + goto out; + if ((status = decode_getfh(&xdr, res->fh)) == 0) + status = decode_getfattr(&xdr, res->fattr, res->server); +out: + return status; +} + +/* + * Decode REMOVE response + */ +static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_change_info *cinfo) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + goto out; + if ((status = decode_putfh(&xdr)) == 0) + status = decode_remove(&xdr, cinfo); +out: + return status; +} + +/* + * Decode RENAME response + */ +static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_rename_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_savefh(&xdr)) != 0) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo); +out: + return status; +} + +/* + * Decode LINK response + */ +static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_change_info *cinfo) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_savefh(&xdr)) != 0) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + status = decode_link(&xdr, cinfo); +out: + return status; +} + +/* + * Decode CREATE response + */ +static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_create_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0) + goto out; + if ((status = decode_getfh(&xdr, res->fh)) != 0) + goto out; + status = decode_getfattr(&xdr, res->fattr, res->server); + if (status == NFS4ERR_DELAY) + status = 0; +out: + return status; +} + +/* + * Decode SYMLINK response + */ +static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_create_res *res) +{ + return nfs4_xdr_dec_create(rqstp, p, res); +} + +/* + * Decode GETATTR response + */ +static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_getattr_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_getfattr(&xdr, res->fattr, res->server); +out: + return status; + +} + + +/* + * Decode CLOSE response + */ +static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_close(&xdr, res); +out: + return status; +} + +/* + * Decode OPEN response + */ +static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open(&xdr, res); + if (status) + goto out; + status = decode_getfh(&xdr, &res->fh); + if (status) + goto out; + status = decode_getfattr(&xdr, res->f_attr, res->server); + if (status == NFS4ERR_DELAY) + status = 0; +out: + return status; +} + +/* + * Decode OPEN_CONFIRM response + */ +static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_open_confirmres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open_confirm(&xdr, res); +out: + return status; +} + +/* + * Decode OPEN response + */ +static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open(&xdr, res); +out: + return status; +} + +/* + * Decode SETATTR response + */ +static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_setattrres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_setattr(&xdr, res); + if (status) + goto out; + status = decode_getfattr(&xdr, res->fattr, res->server); + if (status == NFS4ERR_DELAY) + status = 0; +out: + return status; +} + +/* + * Decode LOCK response + */ +static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_lock(&xdr, res); +out: + return status; +} + +/* + * Decode LOCKT response + */ +static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_lockt(&xdr, res); +out: + return status; +} + +/* + * Decode LOCKU response + */ +static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_locku(&xdr, res); +out: + return status; +} + +/* + * Decode READLINK response + */ +static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, uint32_t *p, void *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_readlink(&xdr, rqstp); +out: + return status; +} + +/* + * Decode READDIR response + */ +static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_readdir_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_readdir(&xdr, rqstp, res); +out: + return status; +} + +/* + * Decode Read response + */ +static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_readres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_read(&xdr, rqstp, res); + if (!status) + status = res->count; +out: + return status; +} + +/* + * Decode WRITE response + */ +static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_writeres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_write(&xdr, res); + if (!status) + status = res->count; +out: + return status; +} + +/* + * Decode COMMIT response + */ +static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_writeres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_commit(&xdr, res); +out: + return status; +} + +/* + * FSINFO request + */ +static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs_fsinfo *fsinfo) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_putfh(&xdr); + if (!status) + status = decode_fsinfo(&xdr, fsinfo); + if (!status) + status = -nfs_stat_to_errno(hdr.status); + return status; +} + +/* + * PATHCONF request + */ +static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, uint32_t *p, struct nfs_pathconf *pathconf) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_putfh(&xdr); + if (!status) + status = decode_pathconf(&xdr, pathconf); + return status; +} + +/* + * STATFS request + */ +static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, uint32_t *p, struct nfs_fsstat *fsstat) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_putfh(&xdr); + if (!status) + status = decode_statfs(&xdr, fsstat); + return status; +} + +/* + * GETATTR_BITMAP request + */ +static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, uint32_t *p, struct nfs4_server_caps_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + status = decode_server_caps(&xdr, res); +out: + return status; +} + +/* + * Decode RENEW response + */ +static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_renew(&xdr); + return status; +} + +/* + * a SETCLIENTID request + */ +static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p, + struct nfs4_client *clp) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_setclientid(&xdr, clp); + if (!status) + status = -nfs_stat_to_errno(hdr.status); + return status; +} + +/* + * a SETCLIENTID_CONFIRM request + */ +static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_fsinfo *fsinfo) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_setclientid_confirm(&xdr); + if (!status) + status = decode_putrootfh(&xdr); + if (!status) + status = decode_fsinfo(&xdr, fsinfo); + if (!status) + status = -nfs_stat_to_errno(hdr.status); + return status; +} + +/* + * DELEGRETURN request + */ +static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, void *dummy) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status == 0) { + status = decode_putfh(&xdr); + if (status == 0) + status = decode_delegreturn(&xdr); + } + return status; +} + +uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) +{ + uint32_t bitmap[2] = {0}; + uint32_t len; + + if (!*p++) { + if (!*p) + return ERR_PTR(-EAGAIN); + entry->eof = 1; + return ERR_PTR(-EBADCOOKIE); + } + + entry->prev_cookie = entry->cookie; + p = xdr_decode_hyper(p, &entry->cookie); + entry->len = ntohl(*p++); + entry->name = (const char *) p; + p += XDR_QUADLEN(entry->len); + + /* + * In case the server doesn't return an inode number, + * we fake one here. (We don't use inode number 0, + * since glibc seems to choke on it...) + */ + entry->ino = 1; + + len = ntohl(*p++); /* bitmap length */ + if (len-- > 0) { + bitmap[0] = ntohl(*p++); + if (len-- > 0) { + bitmap[1] = ntohl(*p++); + p += len; + } + } + len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */ + if (len > 0) { + if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) + xdr_decode_hyper(p, &entry->ino); + else if (bitmap[0] == FATTR4_WORD0_FILEID) + xdr_decode_hyper(p, &entry->ino); + p += len; + } + + entry->eof = !p[0] && p[1]; + return p; +} + +/* + * We need to translate between nfs status return values and + * the local errno values which may not be the same. + */ +static struct { + int stat; + int errno; +} nfs_errtbl[] = { + { NFS4_OK, 0 }, + { NFS4ERR_PERM, EPERM }, + { NFS4ERR_NOENT, ENOENT }, + { NFS4ERR_IO, errno_NFSERR_IO }, + { NFS4ERR_NXIO, ENXIO }, + { NFS4ERR_ACCESS, EACCES }, + { NFS4ERR_EXIST, EEXIST }, + { NFS4ERR_XDEV, EXDEV }, + { NFS4ERR_NOTDIR, ENOTDIR }, + { NFS4ERR_ISDIR, EISDIR }, + { NFS4ERR_INVAL, EINVAL }, + { NFS4ERR_FBIG, EFBIG }, + { NFS4ERR_NOSPC, ENOSPC }, + { NFS4ERR_ROFS, EROFS }, + { NFS4ERR_MLINK, EMLINK }, + { NFS4ERR_NAMETOOLONG, ENAMETOOLONG }, + { NFS4ERR_NOTEMPTY, ENOTEMPTY }, + { NFS4ERR_DQUOT, EDQUOT }, + { NFS4ERR_STALE, ESTALE }, + { NFS4ERR_BADHANDLE, EBADHANDLE }, + { NFS4ERR_BAD_COOKIE, EBADCOOKIE }, + { NFS4ERR_NOTSUPP, ENOTSUPP }, + { NFS4ERR_TOOSMALL, ETOOSMALL }, + { NFS4ERR_SERVERFAULT, ESERVERFAULT }, + { NFS4ERR_BADTYPE, EBADTYPE }, + { NFS4ERR_LOCKED, EAGAIN }, + { NFS4ERR_RESOURCE, EREMOTEIO }, + { NFS4ERR_SYMLINK, ELOOP }, + { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP }, + { NFS4ERR_DEADLOCK, EDEADLK }, + { NFS4ERR_WRONGSEC, EPERM }, /* FIXME: this needs + * to be handled by a + * middle-layer. + */ + { -1, EIO } +}; + +/* + * Convert an NFS error code to a local one. + * This one is used jointly by NFSv2 and NFSv3. + */ +static int +nfs_stat_to_errno(int stat) +{ + int i; + for (i = 0; nfs_errtbl[i].stat != -1; i++) { + if (nfs_errtbl[i].stat == stat) + return nfs_errtbl[i].errno; + } + if (stat <= 10000 || stat > 10100) { + /* The server is looney tunes. */ + return ESERVERFAULT; + } + /* If we cannot translate the error, the recovery routines should + * handle it. + * Note: remaining NFSv4 error codes have values > 10000, so should + * not conflict with native Linux error codes. + */ + return stat; +} + +#ifndef MAX +# define MAX(a, b) (((a) > (b))? (a) : (b)) +#endif + +#define PROC(proc, argtype, restype) \ +[NFSPROC4_CLNT_##proc] = { \ + .p_proc = NFSPROC4_COMPOUND, \ + .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ + .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ + .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ + } + +struct rpc_procinfo nfs4_procedures[] = { + PROC(READ, enc_read, dec_read), + PROC(WRITE, enc_write, dec_write), + PROC(COMMIT, enc_commit, dec_commit), + PROC(OPEN, enc_open, dec_open), + PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm), + PROC(OPEN_NOATTR, enc_open_noattr, dec_open_noattr), + PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade), + PROC(CLOSE, enc_close, dec_close), + PROC(SETATTR, enc_setattr, dec_setattr), + PROC(FSINFO, enc_fsinfo, dec_fsinfo), + PROC(RENEW, enc_renew, dec_renew), + PROC(SETCLIENTID, enc_setclientid, dec_setclientid), + PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm), + PROC(LOCK, enc_lock, dec_lock), + PROC(LOCKT, enc_lockt, dec_lockt), + PROC(LOCKU, enc_locku, dec_locku), + PROC(ACCESS, enc_access, dec_access), + PROC(GETATTR, enc_getattr, dec_getattr), + PROC(LOOKUP, enc_lookup, dec_lookup), + PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root), + PROC(REMOVE, enc_remove, dec_remove), + PROC(RENAME, enc_rename, dec_rename), + PROC(LINK, enc_link, dec_link), + PROC(SYMLINK, enc_symlink, dec_symlink), + PROC(CREATE, enc_create, dec_create), + PROC(PATHCONF, enc_pathconf, dec_pathconf), + PROC(STATFS, enc_statfs, dec_statfs), + PROC(READLINK, enc_readlink, dec_readlink), + PROC(READDIR, enc_readdir, dec_readdir), + PROC(SERVER_CAPS, enc_server_caps, dec_server_caps), + PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), +}; + +struct rpc_version nfs_version4 = { + .number = 4, + .nrprocs = sizeof(nfs4_procedures)/sizeof(nfs4_procedures[0]), + .procs = nfs4_procedures +}; + +/* + * Local variables: + * c-basic-offset: 8 + * End: + */ diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c new file mode 100644 index 00000000000..fd5bc596fe8 --- /dev/null +++ b/fs/nfs/nfsroot.c @@ -0,0 +1,513 @@ +/* + * $Id: nfsroot.c,v 1.45 1998/03/07 10:44:46 mj Exp $ + * + * Copyright (C) 1995, 1996 Gero Kuhlmann <gero@gkminix.han.de> + * + * Allow an NFS filesystem to be mounted as root. The way this works is: + * (1) Use the IP autoconfig mechanism to set local IP addresses and routes. + * (2) Handle RPC negotiation with the system which replied to RARP or + * was reported as a boot server by BOOTP or manually. + * (3) The actual mounting is done later, when init() is running. + * + * + * Changes: + * + * Alan Cox : Removed get_address name clash with FPU. + * Alan Cox : Reformatted a bit. + * Gero Kuhlmann : Code cleanup + * Michael Rausch : Fixed recognition of an incoming RARP answer. + * Martin Mares : (2.0) Auto-configuration via BOOTP supported. + * Martin Mares : Manual selection of interface & BOOTP/RARP. + * Martin Mares : Using network routes instead of host routes, + * allowing the default configuration to be used + * for normal operation of the host. + * Martin Mares : Randomized timer with exponential backoff + * installed to minimize network congestion. + * Martin Mares : Code cleanup. + * Martin Mares : (2.1) BOOTP and RARP made configuration options. + * Martin Mares : Server hostname generation fixed. + * Gerd Knorr : Fixed wired inode handling + * Martin Mares : (2.2) "0.0.0.0" addresses from command line ignored. + * Martin Mares : RARP replies not tested for server address. + * Gero Kuhlmann : (2.3) Some bug fixes and code cleanup again (please + * send me your new patches _before_ bothering + * Linus so that I don' always have to cleanup + * _afterwards_ - thanks) + * Gero Kuhlmann : Last changes of Martin Mares undone. + * Gero Kuhlmann : RARP replies are tested for specified server + * again. However, it's now possible to have + * different RARP and NFS servers. + * Gero Kuhlmann : "0.0.0.0" addresses from command line are + * now mapped to INADDR_NONE. + * Gero Kuhlmann : Fixed a bug which prevented BOOTP path name + * from being used (thanks to Leo Spiekman) + * Andy Walker : Allow to specify the NFS server in nfs_root + * without giving a path name + * Swen Thümmler : Allow to specify the NFS options in nfs_root + * without giving a path name. Fix BOOTP request + * for domainname (domainname is NIS domain, not + * DNS domain!). Skip dummy devices for BOOTP. + * Jacek Zapala : Fixed a bug which prevented server-ip address + * from nfsroot parameter from being used. + * Olaf Kirch : Adapted to new NFS code. + * Jakub Jelinek : Free used code segment. + * Marko Kohtala : Fixed some bugs. + * Martin Mares : Debug message cleanup + * Martin Mares : Changed to use the new generic IP layer autoconfig + * code. BOOTP and RARP moved there. + * Martin Mares : Default path now contains host name instead of + * host IP address (but host name defaults to IP + * address anyway). + * Martin Mares : Use root_server_addr appropriately during setup. + * Martin Mares : Rewrote parameter parsing, now hopefully giving + * correct overriding. + * Trond Myklebust : Add in preliminary support for NFSv3 and TCP. + * Fix bug in root_nfs_addr(). nfs_data.namlen + * is NOT for the length of the hostname. + * Hua Qin : Support for mounting root file system via + * NFS over TCP. + * Fabian Frederick: Option parser rebuilt (using parser lib) +*/ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/in.h> +#include <linux/major.h> +#include <linux/utsname.h> +#include <linux/inet.h> +#include <linux/root_dev.h> +#include <net/ipconfig.h> +#include <linux/parser.h> + +/* Define this to allow debugging output */ +#undef NFSROOT_DEBUG +#define NFSDBG_FACILITY NFSDBG_ROOT + +/* Default path we try to mount. "%s" gets replaced by our IP address */ +#define NFS_ROOT "/tftpboot/%s" + +/* Parameters passed from the kernel command line */ +static char nfs_root_name[256] __initdata = ""; + +/* Address of NFS server */ +static __u32 servaddr __initdata = 0; + +/* Name of directory to mount */ +static char nfs_path[NFS_MAXPATHLEN] __initdata = { 0, }; + +/* NFS-related data */ +static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */ +static int nfs_port __initdata = 0; /* Port to connect to for NFS */ +static int mount_port __initdata = 0; /* Mount daemon port number */ + + +/*************************************************************************** + + Parsing of options + + ***************************************************************************/ + +enum { + /* Options that take integer arguments */ + Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin, + Opt_acregmax, Opt_acdirmin, Opt_acdirmax, + /* Options that take no arguments */ + Opt_soft, Opt_hard, Opt_intr, + Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, + Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp, + /* Error token */ + Opt_err +}; + +static match_table_t __initdata tokens = { + {Opt_port, "port=%u"}, + {Opt_rsize, "rsize=%u"}, + {Opt_wsize, "wsize=%u"}, + {Opt_timeo, "timeo=%u"}, + {Opt_retrans, "retrans=%u"}, + {Opt_acregmin, "acregmin=%u"}, + {Opt_acregmax, "acregmax=%u"}, + {Opt_acdirmin, "acdirmin=%u"}, + {Opt_acdirmax, "acdirmax=%u"}, + {Opt_soft, "soft"}, + {Opt_hard, "hard"}, + {Opt_intr, "intr"}, + {Opt_nointr, "nointr"}, + {Opt_posix, "posix"}, + {Opt_noposix, "noposix"}, + {Opt_cto, "cto"}, + {Opt_nocto, "nocto"}, + {Opt_ac, "ac"}, + {Opt_noac, "noac"}, + {Opt_lock, "lock"}, + {Opt_nolock, "nolock"}, + {Opt_v2, "nfsvers=2"}, + {Opt_v2, "v2"}, + {Opt_v3, "nfsvers=3"}, + {Opt_v3, "v3"}, + {Opt_udp, "proto=udp"}, + {Opt_udp, "udp"}, + {Opt_tcp, "proto=tcp"}, + {Opt_tcp, "tcp"}, + {Opt_err, NULL} + +}; + +/* + * Parse option string. + */ + +static int __init root_nfs_parse(char *name, char *buf) +{ + + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + + if (!name) + return 1; + + /* Set the NFS remote path */ + p = strsep(&name, ","); + if (p[0] != '\0' && strcmp(p, "default") != 0) + strlcpy(buf, p, NFS_MAXPATHLEN); + + while ((p = strsep (&name, ",")) != NULL) { + int token; + if (!*p) + continue; + token = match_token(p, tokens, args); + + /* %u tokens only. Beware if you add new tokens! */ + if (token < Opt_soft && match_int(&args[0], &option)) + return 0; + switch (token) { + case Opt_port: + nfs_port = option; + break; + case Opt_rsize: + nfs_data.rsize = option; + break; + case Opt_wsize: + nfs_data.wsize = option; + break; + case Opt_timeo: + nfs_data.timeo = option; + break; + case Opt_retrans: + nfs_data.retrans = option; + break; + case Opt_acregmin: + nfs_data.acregmin = option; + break; + case Opt_acregmax: + nfs_data.acregmax = option; + break; + case Opt_acdirmin: + nfs_data.acdirmin = option; + break; + case Opt_acdirmax: + nfs_data.acdirmax = option; + break; + case Opt_soft: + nfs_data.flags |= NFS_MOUNT_SOFT; + break; + case Opt_hard: + nfs_data.flags &= ~NFS_MOUNT_SOFT; + break; + case Opt_intr: + nfs_data.flags |= NFS_MOUNT_INTR; + break; + case Opt_nointr: + nfs_data.flags &= ~NFS_MOUNT_INTR; + break; + case Opt_posix: + nfs_data.flags |= NFS_MOUNT_POSIX; + break; + case Opt_noposix: + nfs_data.flags &= ~NFS_MOUNT_POSIX; + break; + case Opt_cto: + nfs_data.flags &= ~NFS_MOUNT_NOCTO; + break; + case Opt_nocto: + nfs_data.flags |= NFS_MOUNT_NOCTO; + break; + case Opt_ac: + nfs_data.flags &= ~NFS_MOUNT_NOAC; + break; + case Opt_noac: + nfs_data.flags |= NFS_MOUNT_NOAC; + break; + case Opt_lock: + nfs_data.flags &= ~NFS_MOUNT_NONLM; + break; + case Opt_nolock: + nfs_data.flags |= NFS_MOUNT_NONLM; + break; + case Opt_v2: + nfs_data.flags &= ~NFS_MOUNT_VER3; + break; + case Opt_v3: + nfs_data.flags |= NFS_MOUNT_VER3; + break; + case Opt_udp: + nfs_data.flags &= ~NFS_MOUNT_TCP; + break; + case Opt_tcp: + nfs_data.flags |= NFS_MOUNT_TCP; + break; + default : + return 0; + } + } + + return 1; +} + +/* + * Prepare the NFS data structure and parse all options. + */ +static int __init root_nfs_name(char *name) +{ + static char buf[NFS_MAXPATHLEN] __initdata; + char *cp; + + /* Set some default values */ + memset(&nfs_data, 0, sizeof(nfs_data)); + nfs_port = -1; + nfs_data.version = NFS_MOUNT_VERSION; + nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ + nfs_data.rsize = NFS_DEF_FILE_IO_BUFFER_SIZE; + nfs_data.wsize = NFS_DEF_FILE_IO_BUFFER_SIZE; + nfs_data.acregmin = 3; + nfs_data.acregmax = 60; + nfs_data.acdirmin = 30; + nfs_data.acdirmax = 60; + strcpy(buf, NFS_ROOT); + + /* Process options received from the remote server */ + root_nfs_parse(root_server_path, buf); + + /* Override them by options set on kernel command-line */ + root_nfs_parse(name, buf); + + cp = system_utsname.nodename; + if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) { + printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n"); + return -1; + } + sprintf(nfs_path, buf, cp); + + return 1; +} + + +/* + * Get NFS server address. + */ +static int __init root_nfs_addr(void) +{ + if ((servaddr = root_server_addr) == INADDR_NONE) { + printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n"); + return -1; + } + + snprintf(nfs_data.hostname, sizeof(nfs_data.hostname), + "%u.%u.%u.%u", NIPQUAD(servaddr)); + return 0; +} + +/* + * Tell the user what's going on. + */ +#ifdef NFSROOT_DEBUG +static void __init root_nfs_print(void) +{ + printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n", + nfs_path, nfs_data.hostname); + printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n", + nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans); + printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n", + nfs_data.acregmin, nfs_data.acregmax, + nfs_data.acdirmin, nfs_data.acdirmax); + printk(KERN_NOTICE "Root-NFS: nfsd port = %d, mountd port = %d, flags = %08x\n", + nfs_port, mount_port, nfs_data.flags); +} +#endif + + +static int __init root_nfs_init(void) +{ +#ifdef NFSROOT_DEBUG + nfs_debug |= NFSDBG_ROOT; +#endif + + /* + * Decode the root directory path name and NFS options from + * the kernel command line. This has to go here in order to + * be able to use the client IP address for the remote root + * directory (necessary for pure RARP booting). + */ + if (root_nfs_name(nfs_root_name) < 0 || + root_nfs_addr() < 0) + return -1; + +#ifdef NFSROOT_DEBUG + root_nfs_print(); +#endif + + return 0; +} + + +/* + * Parse NFS server and directory information passed on the kernel + * command line. + */ +static int __init nfs_root_setup(char *line) +{ + ROOT_DEV = Root_NFS; + if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) { + strlcpy(nfs_root_name, line, sizeof(nfs_root_name)); + } else { + int n = strlen(line) + sizeof(NFS_ROOT) - 1; + if (n >= sizeof(nfs_root_name)) + line[sizeof(nfs_root_name) - sizeof(NFS_ROOT) - 2] = '\0'; + sprintf(nfs_root_name, NFS_ROOT, line); + } + root_server_addr = root_nfs_parse_addr(nfs_root_name); + return 1; +} + +__setup("nfsroot=", nfs_root_setup); + +/*************************************************************************** + + Routines to actually mount the root directory + + ***************************************************************************/ + +/* + * Construct sockaddr_in from address and port number. + */ +static inline void +set_sockaddr(struct sockaddr_in *sin, __u32 addr, __u16 port) +{ + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = addr; + sin->sin_port = port; +} + +/* + * Query server portmapper for the port of a daemon program. + */ +static int __init root_nfs_getport(int program, int version, int proto) +{ + struct sockaddr_in sin; + + printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n", + program, version, NIPQUAD(servaddr)); + set_sockaddr(&sin, servaddr, 0); + return rpc_getport_external(&sin, program, version, proto); +} + + +/* + * Use portmapper to find mountd and nfsd port numbers if not overriden + * by the user. Use defaults if portmapper is not available. + * XXX: Is there any nfs server with no portmapper? + */ +static int __init root_nfs_ports(void) +{ + int port; + int nfsd_ver, mountd_ver; + int nfsd_port, mountd_port; + int proto; + + if (nfs_data.flags & NFS_MOUNT_VER3) { + nfsd_ver = NFS3_VERSION; + mountd_ver = NFS_MNT3_VERSION; + nfsd_port = NFS_PORT; + mountd_port = NFS_MNT_PORT; + } else { + nfsd_ver = NFS2_VERSION; + mountd_ver = NFS_MNT_VERSION; + nfsd_port = NFS_PORT; + mountd_port = NFS_MNT_PORT; + } + + proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; + + if (nfs_port < 0) { + if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) { + printk(KERN_ERR "Root-NFS: Unable to get nfsd port " + "number from server, using default\n"); + port = nfsd_port; + } + nfs_port = htons(port); + dprintk("Root-NFS: Portmapper on server returned %d " + "as nfsd port\n", port); + } + + if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) { + printk(KERN_ERR "Root-NFS: Unable to get mountd port " + "number from server, using default\n"); + port = mountd_port; + } + mount_port = htons(port); + dprintk("Root-NFS: mountd port is %d\n", port); + + return 0; +} + + +/* + * Get a file handle from the server for the directory which is to be + * mounted. + */ +static int __init root_nfs_get_handle(void) +{ + struct nfs_fh fh; + struct sockaddr_in sin; + int status; + int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ? + IPPROTO_TCP : IPPROTO_UDP; + int version = (nfs_data.flags & NFS_MOUNT_VER3) ? + NFS_MNT3_VERSION : NFS_MNT_VERSION; + + set_sockaddr(&sin, servaddr, mount_port); + status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol); + if (status < 0) + printk(KERN_ERR "Root-NFS: Server returned error %d " + "while mounting %s\n", status, nfs_path); + else { + nfs_data.root.size = fh.size; + memcpy(nfs_data.root.data, fh.data, fh.size); + } + + return status; +} + +/* + * Get the NFS port numbers and file handle, and return the prepared 'data' + * argument for mount() if everything went OK. Return NULL otherwise. + */ +void * __init nfs_root_data(void) +{ + if (root_nfs_init() < 0 + || root_nfs_ports() < 0 + || root_nfs_get_handle() < 0) + return NULL; + set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, nfs_port); + return (void*)&nfs_data; +} diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c new file mode 100644 index 00000000000..4f1ba723848 --- /dev/null +++ b/fs/nfs/pagelist.c @@ -0,0 +1,309 @@ +/* + * linux/fs/nfs/pagelist.c + * + * A set of helper functions for managing NFS read and write requests. + * The main purpose of these routines is to provide support for the + * coalescing of several requests into a single RPC call. + * + * Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no> + * + */ + +#include <linux/config.h> +#include <linux/slab.h> +#include <linux/file.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs3.h> +#include <linux/nfs4.h> +#include <linux/nfs_page.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> + +#define NFS_PARANOIA 1 + +static kmem_cache_t *nfs_page_cachep; + +static inline struct nfs_page * +nfs_page_alloc(void) +{ + struct nfs_page *p; + p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL); + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->wb_list); + } + return p; +} + +static inline void +nfs_page_free(struct nfs_page *p) +{ + kmem_cache_free(nfs_page_cachep, p); +} + +/** + * nfs_create_request - Create an NFS read/write request. + * @file: file descriptor to use + * @inode: inode to which the request is attached + * @page: page to write + * @offset: starting offset within the page for the write + * @count: number of bytes to read/write + * + * The page must be locked by the caller. This makes sure we never + * create two different requests for the same page, and avoids + * a possible deadlock when we reach the hard limit on the number + * of dirty pages. + * User should ensure it is safe to sleep in this function. + */ +struct nfs_page * +nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, + struct page *page, + unsigned int offset, unsigned int count) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_page *req; + + /* Deal with hard limits. */ + for (;;) { + /* try to allocate the request struct */ + req = nfs_page_alloc(); + if (req != NULL) + break; + + /* Try to free up at least one request in order to stay + * below the hard limit + */ + if (signalled() && (server->flags & NFS_MOUNT_INTR)) + return ERR_PTR(-ERESTARTSYS); + yield(); + } + + /* Initialize the request struct. Initially, we assume a + * long write-back delay. This will be adjusted in + * update_nfs_request below if the region is not locked. */ + req->wb_page = page; + atomic_set(&req->wb_complete, 0); + req->wb_index = page->index; + page_cache_get(page); + req->wb_offset = offset; + req->wb_pgbase = offset; + req->wb_bytes = count; + atomic_set(&req->wb_count, 1); + req->wb_context = get_nfs_open_context(ctx); + + return req; +} + +/** + * nfs_unlock_request - Unlock request and wake up sleepers. + * @req: + */ +void nfs_unlock_request(struct nfs_page *req) +{ + if (!NFS_WBACK_BUSY(req)) { + printk(KERN_ERR "NFS: Invalid unlock attempted\n"); + BUG(); + } + smp_mb__before_clear_bit(); + clear_bit(PG_BUSY, &req->wb_flags); + smp_mb__after_clear_bit(); + wake_up_all(&req->wb_context->waitq); + nfs_release_request(req); +} + +/** + * nfs_clear_request - Free up all resources allocated to the request + * @req: + * + * Release page resources associated with a write request after it + * has completed. + */ +void nfs_clear_request(struct nfs_page *req) +{ + if (req->wb_page) { + page_cache_release(req->wb_page); + req->wb_page = NULL; + } +} + + +/** + * nfs_release_request - Release the count on an NFS read/write request + * @req: request to release + * + * Note: Should never be called with the spinlock held! + */ +void +nfs_release_request(struct nfs_page *req) +{ + if (!atomic_dec_and_test(&req->wb_count)) + return; + +#ifdef NFS_PARANOIA + BUG_ON (!list_empty(&req->wb_list)); + BUG_ON (NFS_WBACK_BUSY(req)); +#endif + + /* Release struct file or cached credential */ + nfs_clear_request(req); + put_nfs_open_context(req->wb_context); + nfs_page_free(req); +} + +/** + * nfs_list_add_request - Insert a request into a sorted list + * @req: request + * @head: head of list into which to insert the request. + * + * Note that the wb_list is sorted by page index in order to facilitate + * coalescing of requests. + * We use an insertion sort that is optimized for the case of appended + * writes. + */ +void +nfs_list_add_request(struct nfs_page *req, struct list_head *head) +{ + struct list_head *pos; + +#ifdef NFS_PARANOIA + if (!list_empty(&req->wb_list)) { + printk(KERN_ERR "NFS: Add to list failed!\n"); + BUG(); + } +#endif + list_for_each_prev(pos, head) { + struct nfs_page *p = nfs_list_entry(pos); + if (p->wb_index < req->wb_index) + break; + } + list_add(&req->wb_list, pos); + req->wb_list_head = head; +} + +/** + * nfs_wait_on_request - Wait for a request to complete. + * @req: request to wait upon. + * + * Interruptible by signals only if mounted with intr flag. + * The user is responsible for holding a count on the request. + */ +int +nfs_wait_on_request(struct nfs_page *req) +{ + struct inode *inode = req->wb_context->dentry->d_inode; + struct rpc_clnt *clnt = NFS_CLIENT(inode); + + if (!NFS_WBACK_BUSY(req)) + return 0; + return nfs_wait_event(clnt, req->wb_context->waitq, !NFS_WBACK_BUSY(req)); +} + +/** + * nfs_coalesce_requests - Split coalesced requests out from a list. + * @head: source list + * @dst: destination list + * @nmax: maximum number of requests to coalesce + * + * Moves a maximum of 'nmax' elements from one list to another. + * The elements are checked to ensure that they form a contiguous set + * of pages, and that the RPC credentials are the same. + */ +int +nfs_coalesce_requests(struct list_head *head, struct list_head *dst, + unsigned int nmax) +{ + struct nfs_page *req = NULL; + unsigned int npages = 0; + + while (!list_empty(head)) { + struct nfs_page *prev = req; + + req = nfs_list_entry(head->next); + if (prev) { + if (req->wb_context->cred != prev->wb_context->cred) + break; + if (req->wb_context->lockowner != prev->wb_context->lockowner) + break; + if (req->wb_context->state != prev->wb_context->state) + break; + if (req->wb_index != (prev->wb_index + 1)) + break; + + if (req->wb_pgbase != 0) + break; + } + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + npages++; + if (req->wb_pgbase + req->wb_bytes != PAGE_CACHE_SIZE) + break; + if (npages >= nmax) + break; + } + return npages; +} + +/** + * nfs_scan_list - Scan a list for matching requests + * @head: One of the NFS inode request lists + * @dst: Destination list + * @idx_start: lower bound of page->index to scan + * @npages: idx_start + npages sets the upper bound to scan. + * + * Moves elements from one of the inode request lists. + * If the number of requests is set to 0, the entire address_space + * starting at index idx_start, is scanned. + * The requests are *not* checked to ensure that they form a contiguous set. + * You must be holding the inode's req_lock when calling this function + */ +int +nfs_scan_list(struct list_head *head, struct list_head *dst, + unsigned long idx_start, unsigned int npages) +{ + struct list_head *pos, *tmp; + struct nfs_page *req; + unsigned long idx_end; + int res; + + res = 0; + if (npages == 0) + idx_end = ~0; + else + idx_end = idx_start + npages - 1; + + list_for_each_safe(pos, tmp, head) { + + req = nfs_list_entry(pos); + + if (req->wb_index < idx_start) + continue; + if (req->wb_index > idx_end) + break; + + if (!nfs_lock_request(req)) + continue; + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + res++; + } + return res; +} + +int nfs_init_nfspagecache(void) +{ + nfs_page_cachep = kmem_cache_create("nfs_page", + sizeof(struct nfs_page), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (nfs_page_cachep == NULL) + return -ENOMEM; + + return 0; +} + +void nfs_destroy_nfspagecache(void) +{ + if (kmem_cache_destroy(nfs_page_cachep)) + printk(KERN_INFO "nfs_page: not all structures were freed\n"); +} + diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c new file mode 100644 index 00000000000..d31b4d6e5a5 --- /dev/null +++ b/fs/nfs/proc.c @@ -0,0 +1,655 @@ +/* + * linux/fs/nfs/proc.c + * + * Copyright (C) 1992, 1993, 1994 Rick Sladkey + * + * OS-independent nfs remote procedure call functions + * + * Tuned by Alan Cox <A.Cox@swansea.ac.uk> for >3K buffers + * so at last we can have decent(ish) throughput off a + * Sun server. + * + * Coding optimized and cleaned up by Florian La Roche. + * Note: Error returns are optimized for NFS_OK, which isn't translated via + * nfs_stat_to_errno(), but happens to be already the right return code. + * + * Also, the code currently doesn't check the size of the packet, when + * it decodes the packet. + * + * Feel free to fix it and mail me the diffs if it worries you. + * + * Completely rewritten to support the new RPC call interface; + * rewrote and moved the entire XDR stuff to xdr.c + * --Olaf Kirch June 1996 + * + * The code below initializes all auto variables explicitly, otherwise + * it will fail to work as a module (gcc generates a memset call for an + * incomplete struct). + */ + +#include <linux/types.h> +#include <linux/param.h> +#include <linux/slab.h> +#include <linux/time.h> +#include <linux/mm.h> +#include <linux/utsname.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/in.h> +#include <linux/pagemap.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs2.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_page.h> +#include <linux/lockd/bind.h> +#include <linux/smp_lock.h> + +#define NFSDBG_FACILITY NFSDBG_PROC + +extern struct rpc_procinfo nfs_procedures[]; + +/* + * Bare-bones access to getattr: this is for nfs_read_super. + */ +static int +nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + struct nfs_fattr *fattr = info->fattr; + struct nfs2_fsstat fsinfo; + int status; + + dprintk("%s: call getattr\n", __FUNCTION__); + fattr->valid = 0; + status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0); + dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); + if (status) + return status; + dprintk("%s: call statfs\n", __FUNCTION__); + status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0); + dprintk("%s: reply statfs: %d\n", __FUNCTION__, status); + if (status) + return status; + info->rtmax = NFS_MAXDATA; + info->rtpref = fsinfo.tsize; + info->rtmult = fsinfo.bsize; + info->wtmax = NFS_MAXDATA; + info->wtpref = fsinfo.tsize; + info->wtmult = fsinfo.bsize; + info->dtpref = fsinfo.tsize; + info->maxfilesize = 0x7FFFFFFF; + info->lease_time = 0; + return 0; +} + +/* + * One function for each procedure in the NFS protocol. + */ +static int +nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + int status; + + dprintk("NFS call getattr\n"); + fattr->valid = 0; + status = rpc_call(server->client, NFSPROC_GETATTR, + fhandle, fattr, 0); + dprintk("NFS reply getattr: %d\n", status); + return status; +} + +static int +nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, + struct iattr *sattr) +{ + struct inode *inode = dentry->d_inode; + struct nfs_sattrargs arg = { + .fh = NFS_FH(inode), + .sattr = sattr + }; + int status; + + dprintk("NFS call setattr\n"); + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0); + dprintk("NFS reply setattr: %d\n", status); + return status; +} + +static int +nfs_proc_lookup(struct inode *dir, struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct nfs_diropargs arg = { + .fh = NFS_FH(dir), + .name = name->name, + .len = name->len + }; + struct nfs_diropok res = { + .fh = fhandle, + .fattr = fattr + }; + int status; + + dprintk("NFS call lookup %s\n", name->name); + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0); + dprintk("NFS reply lookup: %d\n", status); + return status; +} + +static int nfs_proc_readlink(struct inode *inode, struct page *page, + unsigned int pgbase, unsigned int pglen) +{ + struct nfs_readlinkargs args = { + .fh = NFS_FH(inode), + .pgbase = pgbase, + .pglen = pglen, + .pages = &page + }; + int status; + + dprintk("NFS call readlink\n"); + status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0); + dprintk("NFS reply readlink: %d\n", status); + return status; +} + +static int nfs_proc_read(struct nfs_read_data *rdata) +{ + int flags = rdata->flags; + struct inode * inode = rdata->inode; + struct nfs_fattr * fattr = rdata->res.fattr; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_READ], + .rpc_argp = &rdata->args, + .rpc_resp = &rdata->res, + .rpc_cred = rdata->cred, + }; + int status; + + dprintk("NFS call read %d @ %Ld\n", rdata->args.count, + (long long) rdata->args.offset); + fattr->valid = 0; + status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); + if (status >= 0) { + nfs_refresh_inode(inode, fattr); + /* Emulate the eof flag, which isn't normally needed in NFSv2 + * as it is guaranteed to always return the file attributes + */ + if (rdata->args.offset + rdata->args.count >= fattr->size) + rdata->res.eof = 1; + } + dprintk("NFS reply read: %d\n", status); + return status; +} + +static int nfs_proc_write(struct nfs_write_data *wdata) +{ + int flags = wdata->flags; + struct inode * inode = wdata->inode; + struct nfs_fattr * fattr = wdata->res.fattr; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_WRITE], + .rpc_argp = &wdata->args, + .rpc_resp = &wdata->res, + .rpc_cred = wdata->cred, + }; + int status; + + dprintk("NFS call write %d @ %Ld\n", wdata->args.count, + (long long) wdata->args.offset); + fattr->valid = 0; + status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); + if (status >= 0) { + nfs_refresh_inode(inode, fattr); + wdata->res.count = wdata->args.count; + wdata->verf.committed = NFS_FILE_SYNC; + } + dprintk("NFS reply write: %d\n", status); + return status < 0? status : wdata->res.count; +} + +static int +nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, + int flags) +{ + struct nfs_fh fhandle; + struct nfs_fattr fattr; + struct nfs_createargs arg = { + .fh = NFS_FH(dir), + .name = dentry->d_name.name, + .len = dentry->d_name.len, + .sattr = sattr + }; + struct nfs_diropok res = { + .fh = &fhandle, + .fattr = &fattr + }; + int status; + + fattr.valid = 0; + dprintk("NFS call create %s\n", dentry->d_name.name); + status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); + if (status == 0) + status = nfs_instantiate(dentry, &fhandle, &fattr); + dprintk("NFS reply create: %d\n", status); + return status; +} + +/* + * In NFSv2, mknod is grafted onto the create call. + */ +static int +nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, + dev_t rdev) +{ + struct nfs_fh fhandle; + struct nfs_fattr fattr; + struct nfs_createargs arg = { + .fh = NFS_FH(dir), + .name = dentry->d_name.name, + .len = dentry->d_name.len, + .sattr = sattr + }; + struct nfs_diropok res = { + .fh = &fhandle, + .fattr = &fattr + }; + int status, mode; + + dprintk("NFS call mknod %s\n", dentry->d_name.name); + + mode = sattr->ia_mode; + if (S_ISFIFO(mode)) { + sattr->ia_mode = (mode & ~S_IFMT) | S_IFCHR; + sattr->ia_valid &= ~ATTR_SIZE; + } else if (S_ISCHR(mode) || S_ISBLK(mode)) { + sattr->ia_valid |= ATTR_SIZE; + sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */ + } + + fattr.valid = 0; + status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); + + if (status == -EINVAL && S_ISFIFO(mode)) { + sattr->ia_mode = mode; + fattr.valid = 0; + status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); + } + if (status == 0) + status = nfs_instantiate(dentry, &fhandle, &fattr); + dprintk("NFS reply mknod: %d\n", status); + return status; +} + +static int +nfs_proc_remove(struct inode *dir, struct qstr *name) +{ + struct nfs_diropargs arg = { + .fh = NFS_FH(dir), + .name = name->name, + .len = name->len + }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_REMOVE], + .rpc_argp = &arg, + .rpc_resp = NULL, + .rpc_cred = NULL + }; + int status; + + dprintk("NFS call remove %s\n", name->name); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + + dprintk("NFS reply remove: %d\n", status); + return status; +} + +static int +nfs_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name) +{ + struct nfs_diropargs *arg; + + arg = (struct nfs_diropargs *)kmalloc(sizeof(*arg), GFP_KERNEL); + if (!arg) + return -ENOMEM; + arg->fh = NFS_FH(dir->d_inode); + arg->name = name->name; + arg->len = name->len; + msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE]; + msg->rpc_argp = arg; + return 0; +} + +static int +nfs_proc_unlink_done(struct dentry *dir, struct rpc_task *task) +{ + struct rpc_message *msg = &task->tk_msg; + + if (msg->rpc_argp) + kfree(msg->rpc_argp); + return 0; +} + +static int +nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, + struct inode *new_dir, struct qstr *new_name) +{ + struct nfs_renameargs arg = { + .fromfh = NFS_FH(old_dir), + .fromname = old_name->name, + .fromlen = old_name->len, + .tofh = NFS_FH(new_dir), + .toname = new_name->name, + .tolen = new_name->len + }; + int status; + + dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); + status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0); + dprintk("NFS reply rename: %d\n", status); + return status; +} + +static int +nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) +{ + struct nfs_linkargs arg = { + .fromfh = NFS_FH(inode), + .tofh = NFS_FH(dir), + .toname = name->name, + .tolen = name->len + }; + int status; + + dprintk("NFS call link %s\n", name->name); + status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0); + dprintk("NFS reply link: %d\n", status); + return status; +} + +static int +nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, + struct iattr *sattr, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct nfs_symlinkargs arg = { + .fromfh = NFS_FH(dir), + .fromname = name->name, + .fromlen = name->len, + .topath = path->name, + .tolen = path->len, + .sattr = sattr + }; + int status; + + if (path->len > NFS2_MAXPATHLEN) + return -ENAMETOOLONG; + dprintk("NFS call symlink %s -> %s\n", name->name, path->name); + fattr->valid = 0; + fhandle->size = 0; + status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0); + dprintk("NFS reply symlink: %d\n", status); + return status; +} + +static int +nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) +{ + struct nfs_fh fhandle; + struct nfs_fattr fattr; + struct nfs_createargs arg = { + .fh = NFS_FH(dir), + .name = dentry->d_name.name, + .len = dentry->d_name.len, + .sattr = sattr + }; + struct nfs_diropok res = { + .fh = &fhandle, + .fattr = &fattr + }; + int status; + + dprintk("NFS call mkdir %s\n", dentry->d_name.name); + fattr.valid = 0; + status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0); + if (status == 0) + status = nfs_instantiate(dentry, &fhandle, &fattr); + dprintk("NFS reply mkdir: %d\n", status); + return status; +} + +static int +nfs_proc_rmdir(struct inode *dir, struct qstr *name) +{ + struct nfs_diropargs arg = { + .fh = NFS_FH(dir), + .name = name->name, + .len = name->len + }; + int status; + + dprintk("NFS call rmdir %s\n", name->name); + status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0); + dprintk("NFS reply rmdir: %d\n", status); + return status; +} + +/* + * The READDIR implementation is somewhat hackish - we pass a temporary + * buffer to the encode function, which installs it in the receive + * the receive iovec. The decode function just parses the reply to make + * sure it is syntactically correct; the entries itself are decoded + * from nfs_readdir by calling the decode_entry function directly. + */ +static int +nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, + u64 cookie, struct page *page, unsigned int count, int plus) +{ + struct inode *dir = dentry->d_inode; + struct nfs_readdirargs arg = { + .fh = NFS_FH(dir), + .cookie = cookie, + .count = count, + .pages = &page + }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_READDIR], + .rpc_argp = &arg, + .rpc_resp = NULL, + .rpc_cred = cred + }; + int status; + + lock_kernel(); + + dprintk("NFS call readdir %d\n", (unsigned int)cookie); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + + dprintk("NFS reply readdir: %d\n", status); + unlock_kernel(); + return status; +} + +static int +nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsstat *stat) +{ + struct nfs2_fsstat fsinfo; + int status; + + dprintk("NFS call statfs\n"); + stat->fattr->valid = 0; + status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); + dprintk("NFS reply statfs: %d\n", status); + if (status) + goto out; + stat->tbytes = (u64)fsinfo.blocks * fsinfo.bsize; + stat->fbytes = (u64)fsinfo.bfree * fsinfo.bsize; + stat->abytes = (u64)fsinfo.bavail * fsinfo.bsize; + stat->tfiles = 0; + stat->ffiles = 0; + stat->afiles = 0; +out: + return status; +} + +static int +nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + struct nfs2_fsstat fsinfo; + int status; + + dprintk("NFS call fsinfo\n"); + info->fattr->valid = 0; + status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); + dprintk("NFS reply fsinfo: %d\n", status); + if (status) + goto out; + info->rtmax = NFS_MAXDATA; + info->rtpref = fsinfo.tsize; + info->rtmult = fsinfo.bsize; + info->wtmax = NFS_MAXDATA; + info->wtpref = fsinfo.tsize; + info->wtmult = fsinfo.bsize; + info->dtpref = fsinfo.tsize; + info->maxfilesize = 0x7FFFFFFF; + info->lease_time = 0; +out: + return status; +} + +static int +nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *info) +{ + info->max_link = 0; + info->max_namelen = NFS2_MAXNAMLEN; + return 0; +} + +extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); + +static void +nfs_read_done(struct rpc_task *task) +{ + struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; + + if (task->tk_status >= 0) { + nfs_refresh_inode(data->inode, data->res.fattr); + /* Emulate the eof flag, which isn't normally needed in NFSv2 + * as it is guaranteed to always return the file attributes + */ + if (data->args.offset + data->args.count >= data->res.fattr->size) + data->res.eof = 1; + } + nfs_readpage_result(task); +} + +static void +nfs_proc_read_setup(struct nfs_read_data *data) +{ + struct rpc_task *task = &data->task; + struct inode *inode = data->inode; + int flags; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_READ], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = data->cred, + }; + + /* N.B. Do we need to test? Never called for swapfile inode */ + flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); + + /* Finalize the task. */ + rpc_init_task(task, NFS_CLIENT(inode), nfs_read_done, flags); + rpc_call_setup(task, &msg, 0); +} + +static void +nfs_write_done(struct rpc_task *task) +{ + struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + + if (task->tk_status >= 0) + nfs_refresh_inode(data->inode, data->res.fattr); + nfs_writeback_done(task); +} + +static void +nfs_proc_write_setup(struct nfs_write_data *data, int how) +{ + struct rpc_task *task = &data->task; + struct inode *inode = data->inode; + int flags; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_WRITE], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = data->cred, + }; + + /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ + data->args.stable = NFS_FILE_SYNC; + + /* Set the initial flags for the task. */ + flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + + /* Finalize the task. */ + rpc_init_task(task, NFS_CLIENT(inode), nfs_write_done, flags); + rpc_call_setup(task, &msg, 0); +} + +static void +nfs_proc_commit_setup(struct nfs_write_data *data, int how) +{ + BUG(); +} + +static int +nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl); +} + + +struct nfs_rpc_ops nfs_v2_clientops = { + .version = 2, /* protocol version */ + .dentry_ops = &nfs_dentry_operations, + .dir_inode_ops = &nfs_dir_inode_operations, + .getroot = nfs_proc_get_root, + .getattr = nfs_proc_getattr, + .setattr = nfs_proc_setattr, + .lookup = nfs_proc_lookup, + .access = NULL, /* access */ + .readlink = nfs_proc_readlink, + .read = nfs_proc_read, + .write = nfs_proc_write, + .commit = NULL, /* commit */ + .create = nfs_proc_create, + .remove = nfs_proc_remove, + .unlink_setup = nfs_proc_unlink_setup, + .unlink_done = nfs_proc_unlink_done, + .rename = nfs_proc_rename, + .link = nfs_proc_link, + .symlink = nfs_proc_symlink, + .mkdir = nfs_proc_mkdir, + .rmdir = nfs_proc_rmdir, + .readdir = nfs_proc_readdir, + .mknod = nfs_proc_mknod, + .statfs = nfs_proc_statfs, + .fsinfo = nfs_proc_fsinfo, + .pathconf = nfs_proc_pathconf, + .decode_dirent = nfs_decode_dirent, + .read_setup = nfs_proc_read_setup, + .write_setup = nfs_proc_write_setup, + .commit_setup = nfs_proc_commit_setup, + .file_open = nfs_open, + .file_release = nfs_release, + .lock = nfs_proc_lock, +}; diff --git a/fs/nfs/read.c b/fs/nfs/read.c new file mode 100644 index 00000000000..a0042fb5863 --- /dev/null +++ b/fs/nfs/read.c @@ -0,0 +1,618 @@ +/* + * linux/fs/nfs/read.c + * + * Block I/O for NFS + * + * Partial copy of Linus' read cache modifications to fs/nfs/file.c + * modified for async RPC by okir@monad.swb.de + * + * We do an ugly hack here in order to return proper error codes to the + * user program when a read request failed: since generic_file_read + * only checks the return value of inode->i_op->readpage() which is always 0 + * for async RPC, we set the error bit of the page to 1 when an error occurs, + * and make nfs_readpage transmit requests synchronously when encountering this. + * This is only a small problem, though, since we now retry all operations + * within the RPC code when root squashing is suspected. + */ + +#include <linux/config.h> +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_page.h> +#include <linux/smp_lock.h> + +#include <asm/system.h> + +#define NFSDBG_FACILITY NFSDBG_PAGECACHE + +static int nfs_pagein_one(struct list_head *, struct inode *); +static void nfs_readpage_result_partial(struct nfs_read_data *, int); +static void nfs_readpage_result_full(struct nfs_read_data *, int); + +static kmem_cache_t *nfs_rdata_cachep; +mempool_t *nfs_rdata_mempool; + +#define MIN_POOL_READ (32) + +void nfs_readdata_release(struct rpc_task *task) +{ + struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; + nfs_readdata_free(data); +} + +static +unsigned int nfs_page_length(struct inode *inode, struct page *page) +{ + loff_t i_size = i_size_read(inode); + unsigned long idx; + + if (i_size <= 0) + return 0; + idx = (i_size - 1) >> PAGE_CACHE_SHIFT; + if (page->index > idx) + return 0; + if (page->index != idx) + return PAGE_CACHE_SIZE; + return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1)); +} + +static +int nfs_return_empty_page(struct page *page) +{ + memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + unlock_page(page); + return 0; +} + +/* + * Read a page synchronously. + */ +static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, + struct page *page) +{ + unsigned int rsize = NFS_SERVER(inode)->rsize; + unsigned int count = PAGE_CACHE_SIZE; + int result; + struct nfs_read_data *rdata; + + rdata = nfs_readdata_alloc(); + if (!rdata) + return -ENOMEM; + + memset(rdata, 0, sizeof(*rdata)); + rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); + rdata->cred = ctx->cred; + rdata->inode = inode; + INIT_LIST_HEAD(&rdata->pages); + rdata->args.fh = NFS_FH(inode); + rdata->args.context = ctx; + rdata->args.pages = &page; + rdata->args.pgbase = 0UL; + rdata->args.count = rsize; + rdata->res.fattr = &rdata->fattr; + + dprintk("NFS: nfs_readpage_sync(%p)\n", page); + + /* + * This works now because the socket layer never tries to DMA + * into this buffer directly. + */ + do { + if (count < rsize) + rdata->args.count = count; + rdata->res.count = rdata->args.count; + rdata->args.offset = page_offset(page) + rdata->args.pgbase; + + dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n", + NFS_SERVER(inode)->hostname, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + (unsigned long long)rdata->args.pgbase, + rdata->args.count); + + lock_kernel(); + result = NFS_PROTO(inode)->read(rdata); + unlock_kernel(); + + /* + * Even if we had a partial success we can't mark the page + * cache valid. + */ + if (result < 0) { + if (result == -EISDIR) + result = -EINVAL; + goto io_error; + } + count -= result; + rdata->args.pgbase += result; + /* Note: result == 0 should only happen if we're caching + * a write that extends the file and punches a hole. + */ + if (rdata->res.eof != 0 || result == 0) + break; + } while (count); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; + + if (count) + memclear_highpage_flush(page, rdata->args.pgbase, count); + SetPageUptodate(page); + if (PageError(page)) + ClearPageError(page); + result = 0; + +io_error: + unlock_page(page); + nfs_readdata_free(rdata); + return result; +} + +static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, + struct page *page) +{ + LIST_HEAD(one_request); + struct nfs_page *new; + unsigned int len; + + len = nfs_page_length(inode, page); + if (len == 0) + return nfs_return_empty_page(page); + new = nfs_create_request(ctx, inode, page, 0, len); + if (IS_ERR(new)) { + unlock_page(page); + return PTR_ERR(new); + } + if (len < PAGE_CACHE_SIZE) + memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); + + nfs_lock_request(new); + nfs_list_add_request(new, &one_request); + nfs_pagein_one(&one_request, inode); + return 0; +} + +static void nfs_readpage_release(struct nfs_page *req) +{ + unlock_page(req->wb_page); + + nfs_clear_request(req); + nfs_release_request(req); + nfs_unlock_request(req); + + dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_bytes, + (long long)req_offset(req)); +} + +/* + * Set up the NFS read request struct + */ +static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, + unsigned int count, unsigned int offset) +{ + struct inode *inode; + + data->req = req; + data->inode = inode = req->wb_context->dentry->d_inode; + data->cred = req->wb_context->cred; + + data->args.fh = NFS_FH(inode); + data->args.offset = req_offset(req) + offset; + data->args.pgbase = req->wb_pgbase + offset; + data->args.pages = data->pagevec; + data->args.count = count; + data->args.context = req->wb_context; + + data->res.fattr = &data->fattr; + data->res.count = count; + data->res.eof = 0; + + NFS_PROTO(inode)->read_setup(data); + + data->task.tk_cookie = (unsigned long)inode; + data->task.tk_calldata = data; + /* Release requests */ + data->task.tk_release = nfs_readdata_release; + + dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + count, + (unsigned long long)data->args.offset); +} + +static void +nfs_async_read_error(struct list_head *head) +{ + struct nfs_page *req; + + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + SetPageError(req->wb_page); + nfs_readpage_release(req); + } +} + +/* + * Start an async read operation + */ +static void nfs_execute_read(struct nfs_read_data *data) +{ + struct rpc_clnt *clnt = NFS_CLIENT(data->inode); + sigset_t oldset; + + rpc_clnt_sigmask(clnt, &oldset); + lock_kernel(); + rpc_execute(&data->task); + unlock_kernel(); + rpc_clnt_sigunmask(clnt, &oldset); +} + +/* + * Generate multiple requests to fill a single page. + * + * We optimize to reduce the number of read operations on the wire. If we + * detect that we're reading a page, or an area of a page, that is past the + * end of file, we do not generate NFS read operations but just clear the + * parts of the page that would have come back zero from the server anyway. + * + * We rely on the cached value of i_size to make this determination; another + * client can fill pages on the server past our cached end-of-file, but we + * won't see the new data until our attribute cache is updated. This is more + * or less conventional NFS client behavior. + */ +static int nfs_pagein_multi(struct list_head *head, struct inode *inode) +{ + struct nfs_page *req = nfs_list_entry(head->next); + struct page *page = req->wb_page; + struct nfs_read_data *data; + unsigned int rsize = NFS_SERVER(inode)->rsize; + unsigned int nbytes, offset; + int requests = 0; + LIST_HEAD(list); + + nfs_list_remove_request(req); + + nbytes = req->wb_bytes; + for(;;) { + data = nfs_readdata_alloc(); + if (!data) + goto out_bad; + INIT_LIST_HEAD(&data->pages); + list_add(&data->pages, &list); + requests++; + if (nbytes <= rsize) + break; + nbytes -= rsize; + } + atomic_set(&req->wb_complete, requests); + + ClearPageError(page); + offset = 0; + nbytes = req->wb_bytes; + do { + data = list_entry(list.next, struct nfs_read_data, pages); + list_del_init(&data->pages); + + data->pagevec[0] = page; + data->complete = nfs_readpage_result_partial; + + if (nbytes > rsize) { + nfs_read_rpcsetup(req, data, rsize, offset); + offset += rsize; + nbytes -= rsize; + } else { + nfs_read_rpcsetup(req, data, nbytes, offset); + nbytes = 0; + } + nfs_execute_read(data); + } while (nbytes != 0); + + return 0; + +out_bad: + while (!list_empty(&list)) { + data = list_entry(list.next, struct nfs_read_data, pages); + list_del(&data->pages); + nfs_readdata_free(data); + } + SetPageError(page); + nfs_readpage_release(req); + return -ENOMEM; +} + +static int nfs_pagein_one(struct list_head *head, struct inode *inode) +{ + struct nfs_page *req; + struct page **pages; + struct nfs_read_data *data; + unsigned int count; + + if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) + return nfs_pagein_multi(head, inode); + + data = nfs_readdata_alloc(); + if (!data) + goto out_bad; + + INIT_LIST_HEAD(&data->pages); + pages = data->pagevec; + count = 0; + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_list_add_request(req, &data->pages); + ClearPageError(req->wb_page); + *pages++ = req->wb_page; + count += req->wb_bytes; + } + req = nfs_list_entry(data->pages.next); + + data->complete = nfs_readpage_result_full; + nfs_read_rpcsetup(req, data, count, 0); + + nfs_execute_read(data); + return 0; +out_bad: + nfs_async_read_error(head); + return -ENOMEM; +} + +static int +nfs_pagein_list(struct list_head *head, int rpages) +{ + LIST_HEAD(one_request); + struct nfs_page *req; + int error = 0; + unsigned int pages = 0; + + while (!list_empty(head)) { + pages += nfs_coalesce_requests(head, &one_request, rpages); + req = nfs_list_entry(one_request.next); + error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode); + if (error < 0) + break; + } + if (error >= 0) + return pages; + + nfs_async_read_error(head); + return error; +} + +/* + * Handle a read reply that fills part of a page. + */ +static void nfs_readpage_result_partial(struct nfs_read_data *data, int status) +{ + struct nfs_page *req = data->req; + struct page *page = req->wb_page; + + if (status >= 0) { + unsigned int request = data->args.count; + unsigned int result = data->res.count; + + if (result < request) { + memclear_highpage_flush(page, + data->args.pgbase + result, + request - result); + } + } else + SetPageError(page); + + if (atomic_dec_and_test(&req->wb_complete)) { + if (!PageError(page)) + SetPageUptodate(page); + nfs_readpage_release(req); + } +} + +/* + * This is the callback from RPC telling us whether a reply was + * received or some error occurred (timeout or socket shutdown). + */ +static void nfs_readpage_result_full(struct nfs_read_data *data, int status) +{ + unsigned int count = data->res.count; + + while (!list_empty(&data->pages)) { + struct nfs_page *req = nfs_list_entry(data->pages.next); + struct page *page = req->wb_page; + nfs_list_remove_request(req); + + if (status >= 0) { + if (count < PAGE_CACHE_SIZE) { + if (count < req->wb_bytes) + memclear_highpage_flush(page, + req->wb_pgbase + count, + req->wb_bytes - count); + count = 0; + } else + count -= PAGE_CACHE_SIZE; + SetPageUptodate(page); + } else + SetPageError(page); + nfs_readpage_release(req); + } +} + +/* + * This is the callback from RPC telling us whether a reply was + * received or some error occurred (timeout or socket shutdown). + */ +void nfs_readpage_result(struct rpc_task *task) +{ + struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; + struct nfs_readargs *argp = &data->args; + struct nfs_readres *resp = &data->res; + int status = task->tk_status; + + dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", + task->tk_pid, status); + + /* Is this a short read? */ + if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) { + /* Has the server at least made some progress? */ + if (resp->count != 0) { + /* Yes, so retry the read at the end of the data */ + argp->offset += resp->count; + argp->pgbase += resp->count; + argp->count -= resp->count; + rpc_restart_call(task); + return; + } + task->tk_status = -EIO; + } + NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME; + data->complete(data, status); +} + +/* + * Read a page over NFS. + * We read the page synchronously in the following case: + * - The error flag is set for this page. This happens only when a + * previous async read operation failed. + */ +int nfs_readpage(struct file *file, struct page *page) +{ + struct nfs_open_context *ctx; + struct inode *inode = page->mapping->host; + int error; + + dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", + page, PAGE_CACHE_SIZE, page->index); + /* + * Try to flush any pending writes to the file.. + * + * NOTE! Because we own the page lock, there cannot + * be any new pending writes generated at this point + * for this page (other pages can be written to). + */ + error = nfs_wb_page(inode, page); + if (error) + goto out_error; + + if (file == NULL) { + ctx = nfs_find_open_context(inode, FMODE_READ); + if (ctx == NULL) + return -EBADF; + } else + ctx = get_nfs_open_context((struct nfs_open_context *) + file->private_data); + if (!IS_SYNC(inode)) { + error = nfs_readpage_async(ctx, inode, page); + goto out; + } + + error = nfs_readpage_sync(ctx, inode, page); + if (error < 0 && IS_SWAPFILE(inode)) + printk("Aiee.. nfs swap-in of page failed!\n"); +out: + put_nfs_open_context(ctx); + return error; + +out_error: + unlock_page(page); + return error; +} + +struct nfs_readdesc { + struct list_head *head; + struct nfs_open_context *ctx; +}; + +static int +readpage_async_filler(void *data, struct page *page) +{ + struct nfs_readdesc *desc = (struct nfs_readdesc *)data; + struct inode *inode = page->mapping->host; + struct nfs_page *new; + unsigned int len; + + nfs_wb_page(inode, page); + len = nfs_page_length(inode, page); + if (len == 0) + return nfs_return_empty_page(page); + new = nfs_create_request(desc->ctx, inode, page, 0, len); + if (IS_ERR(new)) { + SetPageError(page); + unlock_page(page); + return PTR_ERR(new); + } + if (len < PAGE_CACHE_SIZE) + memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); + nfs_lock_request(new); + nfs_list_add_request(new, desc->head); + return 0; +} + +int nfs_readpages(struct file *filp, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + LIST_HEAD(head); + struct nfs_readdesc desc = { + .head = &head, + }; + struct inode *inode = mapping->host; + struct nfs_server *server = NFS_SERVER(inode); + int ret; + + dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + nr_pages); + + if (filp == NULL) { + desc.ctx = nfs_find_open_context(inode, FMODE_READ); + if (desc.ctx == NULL) + return -EBADF; + } else + desc.ctx = get_nfs_open_context((struct nfs_open_context *) + filp->private_data); + ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); + if (!list_empty(&head)) { + int err = nfs_pagein_list(&head, server->rpages); + if (!ret) + ret = err; + } + put_nfs_open_context(desc.ctx); + return ret; +} + +int nfs_init_readpagecache(void) +{ + nfs_rdata_cachep = kmem_cache_create("nfs_read_data", + sizeof(struct nfs_read_data), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (nfs_rdata_cachep == NULL) + return -ENOMEM; + + nfs_rdata_mempool = mempool_create(MIN_POOL_READ, + mempool_alloc_slab, + mempool_free_slab, + nfs_rdata_cachep); + if (nfs_rdata_mempool == NULL) + return -ENOMEM; + + return 0; +} + +void nfs_destroy_readpagecache(void) +{ + mempool_destroy(nfs_rdata_mempool); + if (kmem_cache_destroy(nfs_rdata_cachep)) + printk(KERN_INFO "nfs_read_data: not all structures were freed\n"); +} diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c new file mode 100644 index 00000000000..35f10659914 --- /dev/null +++ b/fs/nfs/symlink.c @@ -0,0 +1,117 @@ +/* + * linux/fs/nfs/symlink.c + * + * Copyright (C) 1992 Rick Sladkey + * + * Optimization changes Copyright (C) 1994 Florian La Roche + * + * Jun 7 1999, cache symlink lookups in the page cache. -DaveM + * + * nfs symlink handling code + */ + +#define NFS_NEED_XDR_TYPES +#include <linux/time.h> +#include <linux/errno.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs2.h> +#include <linux/nfs_fs.h> +#include <linux/pagemap.h> +#include <linux/stat.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/smp_lock.h> +#include <linux/namei.h> + +/* Symlink caching in the page cache is even more simplistic + * and straight-forward than readdir caching. + * + * At the beginning of the page we store pointer to struct page in question, + * simplifying nfs_put_link() (if inode got invalidated we can't find the page + * to be freed via pagecache lookup). + * The NUL-terminated string follows immediately thereafter. + */ + +struct nfs_symlink { + struct page *page; + char body[0]; +}; + +static int nfs_symlink_filler(struct inode *inode, struct page *page) +{ + const unsigned int pgbase = offsetof(struct nfs_symlink, body); + const unsigned int pglen = PAGE_SIZE - pgbase; + int error; + + lock_kernel(); + error = NFS_PROTO(inode)->readlink(inode, page, pgbase, pglen); + unlock_kernel(); + if (error < 0) + goto error; + SetPageUptodate(page); + unlock_page(page); + return 0; + +error: + SetPageError(page); + unlock_page(page); + return -EIO; +} + +static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode = dentry->d_inode; + struct page *page; + struct nfs_symlink *p; + void *err = ERR_PTR(nfs_revalidate_inode(NFS_SERVER(inode), inode)); + if (err) + goto read_failed; + page = read_cache_page(&inode->i_data, 0, + (filler_t *)nfs_symlink_filler, inode); + if (IS_ERR(page)) { + err = page; + goto read_failed; + } + if (!PageUptodate(page)) { + err = ERR_PTR(-EIO); + goto getlink_read_error; + } + p = kmap(page); + p->page = page; + nd_set_link(nd, p->body); + return 0; + +getlink_read_error: + page_cache_release(page); +read_failed: + nd_set_link(nd, err); + return 0; +} + +static void nfs_put_link(struct dentry *dentry, struct nameidata *nd) +{ + char *s = nd_get_link(nd); + if (!IS_ERR(s)) { + struct nfs_symlink *p; + struct page *page; + + p = container_of(s, struct nfs_symlink, body[0]); + page = p->page; + + kunmap(page); + page_cache_release(page); + } +} + +/* + * symlinks can't do much... + */ +struct inode_operations nfs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = nfs_follow_link, + .put_link = nfs_put_link, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c new file mode 100644 index 00000000000..f732541a333 --- /dev/null +++ b/fs/nfs/unlink.c @@ -0,0 +1,227 @@ +/* + * linux/fs/nfs/unlink.c + * + * nfs sillydelete handling + * + * NOTE: we rely on holding the BKL for list manipulation protection. + */ + +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/dcache.h> +#include <linux/sunrpc/sched.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs_fs.h> + + +struct nfs_unlinkdata { + struct nfs_unlinkdata *next; + struct dentry *dir, *dentry; + struct qstr name; + struct rpc_task task; + struct rpc_cred *cred; + unsigned int count; +}; + +static struct nfs_unlinkdata *nfs_deletes; +static RPC_WAITQ(nfs_delete_queue, "nfs_delete_queue"); + +/** + * nfs_detach_unlinkdata - Remove asynchronous unlink from global list + * @data: pointer to descriptor + */ +static inline void +nfs_detach_unlinkdata(struct nfs_unlinkdata *data) +{ + struct nfs_unlinkdata **q; + + for (q = &nfs_deletes; *q != NULL; q = &((*q)->next)) { + if (*q == data) { + *q = data->next; + break; + } + } +} + +/** + * nfs_put_unlinkdata - release data from a sillydelete operation. + * @data: pointer to unlink structure. + */ +static void +nfs_put_unlinkdata(struct nfs_unlinkdata *data) +{ + if (--data->count == 0) { + nfs_detach_unlinkdata(data); + if (data->name.name != NULL) + kfree(data->name.name); + kfree(data); + } +} + +#define NAME_ALLOC_LEN(len) ((len+16) & ~15) +/** + * nfs_copy_dname - copy dentry name to data structure + * @dentry: pointer to dentry + * @data: nfs_unlinkdata + */ +static inline void +nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data) +{ + char *str; + int len = dentry->d_name.len; + + str = kmalloc(NAME_ALLOC_LEN(len), GFP_KERNEL); + if (!str) + return; + memcpy(str, dentry->d_name.name, len); + if (!data->name.len) { + data->name.len = len; + data->name.name = str; + } else + kfree(str); +} + +/** + * nfs_async_unlink_init - Initialize the RPC info + * @task: rpc_task of the sillydelete + * + * We delay initializing RPC info until after the call to dentry_iput() + * in order to minimize races against rename(). + */ +static void +nfs_async_unlink_init(struct rpc_task *task) +{ + struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata; + struct dentry *dir = data->dir; + struct rpc_message msg = { + .rpc_cred = data->cred, + }; + int status = -ENOENT; + + if (!data->name.len) + goto out_err; + + status = NFS_PROTO(dir->d_inode)->unlink_setup(&msg, dir, &data->name); + if (status < 0) + goto out_err; + nfs_begin_data_update(dir->d_inode); + rpc_call_setup(task, &msg, 0); + return; + out_err: + rpc_exit(task, status); +} + +/** + * nfs_async_unlink_done - Sillydelete post-processing + * @task: rpc_task of the sillydelete + * + * Do the directory attribute update. + */ +static void +nfs_async_unlink_done(struct rpc_task *task) +{ + struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata; + struct dentry *dir = data->dir; + struct inode *dir_i; + + if (!dir) + return; + dir_i = dir->d_inode; + nfs_end_data_update(dir_i); + if (NFS_PROTO(dir_i)->unlink_done(dir, task)) + return; + put_rpccred(data->cred); + data->cred = NULL; + dput(dir); +} + +/** + * nfs_async_unlink_release - Release the sillydelete data. + * @task: rpc_task of the sillydelete + * + * We need to call nfs_put_unlinkdata as a 'tk_release' task since the + * rpc_task would be freed too. + */ +static void +nfs_async_unlink_release(struct rpc_task *task) +{ + struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata; + nfs_put_unlinkdata(data); +} + +/** + * nfs_async_unlink - asynchronous unlinking of a file + * @dentry: dentry to unlink + */ +int +nfs_async_unlink(struct dentry *dentry) +{ + struct dentry *dir = dentry->d_parent; + struct nfs_unlinkdata *data; + struct rpc_task *task; + struct rpc_clnt *clnt = NFS_CLIENT(dir->d_inode); + int status = -ENOMEM; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + goto out; + memset(data, 0, sizeof(*data)); + + data->cred = rpcauth_lookupcred(clnt->cl_auth, 0); + if (IS_ERR(data->cred)) { + status = PTR_ERR(data->cred); + goto out_free; + } + data->dir = dget(dir); + data->dentry = dentry; + + data->next = nfs_deletes; + nfs_deletes = data; + data->count = 1; + + task = &data->task; + rpc_init_task(task, clnt, nfs_async_unlink_done , RPC_TASK_ASYNC); + task->tk_calldata = data; + task->tk_action = nfs_async_unlink_init; + task->tk_release = nfs_async_unlink_release; + + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_NFSFS_RENAMED; + spin_unlock(&dentry->d_lock); + + rpc_sleep_on(&nfs_delete_queue, task, NULL, NULL); + status = 0; + out: + return status; +out_free: + kfree(data); + return status; +} + +/** + * nfs_complete_unlink - Initialize completion of the sillydelete + * @dentry: dentry to delete + * + * Since we're most likely to be called by dentry_iput(), we + * only use the dentry to find the sillydelete. We then copy the name + * into the qstr. + */ +void +nfs_complete_unlink(struct dentry *dentry) +{ + struct nfs_unlinkdata *data; + + for(data = nfs_deletes; data != NULL; data = data->next) { + if (dentry == data->dentry) + break; + } + if (!data) + return; + data->count++; + nfs_copy_dname(dentry, data); + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; + spin_unlock(&dentry->d_lock); + rpc_wake_up_task(&data->task); + nfs_put_unlinkdata(data); +} diff --git a/fs/nfs/write.c b/fs/nfs/write.c new file mode 100644 index 00000000000..6f7a4af3bc4 --- /dev/null +++ b/fs/nfs/write.c @@ -0,0 +1,1431 @@ +/* + * linux/fs/nfs/write.c + * + * Writing file data over NFS. + * + * We do it like this: When a (user) process wishes to write data to an + * NFS file, a write request is allocated that contains the RPC task data + * plus some info on the page to be written, and added to the inode's + * write chain. If the process writes past the end of the page, an async + * RPC call to write the page is scheduled immediately; otherwise, the call + * is delayed for a few seconds. + * + * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE. + * + * Write requests are kept on the inode's writeback list. Each entry in + * that list references the page (portion) to be written. When the + * cache timeout has expired, the RPC task is woken up, and tries to + * lock the page. As soon as it manages to do so, the request is moved + * from the writeback list to the writelock list. + * + * Note: we must make sure never to confuse the inode passed in the + * write_page request with the one in page->inode. As far as I understand + * it, these are different when doing a swap-out. + * + * To understand everything that goes on here and in the NFS read code, + * one should be aware that a page is locked in exactly one of the following + * cases: + * + * - A write request is in progress. + * - A user process is in generic_file_write/nfs_update_page + * - A user process is in generic_file_read + * + * Also note that because of the way pages are invalidated in + * nfs_revalidate_inode, the following assertions hold: + * + * - If a page is dirty, there will be no read requests (a page will + * not be re-read unless invalidated by nfs_revalidate_inode). + * - If the page is not uptodate, there will be no pending write + * requests, and no process will be in nfs_update_page. + * + * FIXME: Interaction with the vmscan routines is not optimal yet. + * Either vmscan must be made nfs-savvy, or we need a different page + * reclaim concept that supports something like FS-independent + * buffer_heads with a b_ops-> field. + * + * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/file.h> +#include <linux/mpage.h> +#include <linux/writeback.h> + +#include <linux/sunrpc/clnt.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/nfs_page.h> +#include <asm/uaccess.h> +#include <linux/smp_lock.h> + +#include "delegation.h" + +#define NFSDBG_FACILITY NFSDBG_PAGECACHE + +#define MIN_POOL_WRITE (32) +#define MIN_POOL_COMMIT (4) + +/* + * Local function declarations + */ +static struct nfs_page * nfs_update_request(struct nfs_open_context*, + struct inode *, + struct page *, + unsigned int, unsigned int); +static void nfs_writeback_done_partial(struct nfs_write_data *, int); +static void nfs_writeback_done_full(struct nfs_write_data *, int); +static int nfs_wait_on_write_congestion(struct address_space *, int); +static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); +static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, + unsigned int npages, int how); + +static kmem_cache_t *nfs_wdata_cachep; +mempool_t *nfs_wdata_mempool; +static mempool_t *nfs_commit_mempool; + +static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); + +static inline struct nfs_write_data *nfs_commit_alloc(void) +{ + struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + } + return p; +} + +static inline void nfs_commit_free(struct nfs_write_data *p) +{ + mempool_free(p, nfs_commit_mempool); +} + +static void nfs_writedata_release(struct rpc_task *task) +{ + struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata; + nfs_writedata_free(wdata); +} + +/* Adjust the file length if we're writing beyond the end */ +static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) +{ + struct inode *inode = page->mapping->host; + loff_t end, i_size = i_size_read(inode); + unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; + + if (i_size > 0 && page->index < end_index) + return; + end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); + if (i_size >= end) + return; + i_size_write(inode, end); +} + +/* We can set the PG_uptodate flag if we see that a write request + * covers the full page. + */ +static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) +{ + loff_t end_offs; + + if (PageUptodate(page)) + return; + if (base != 0) + return; + if (count == PAGE_CACHE_SIZE) { + SetPageUptodate(page); + return; + } + + end_offs = i_size_read(page->mapping->host) - 1; + if (end_offs < 0) + return; + /* Is this the last page? */ + if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT)) + return; + /* This is the last page: set PG_uptodate if we cover the entire + * extent of the data, then zero the rest of the page. + */ + if (count == (unsigned int)(end_offs & (PAGE_CACHE_SIZE - 1)) + 1) { + memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count); + SetPageUptodate(page); + } +} + +/* + * Write a page synchronously. + * Offset is the data offset within the page. + */ +static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, + struct page *page, unsigned int offset, unsigned int count, + int how) +{ + unsigned int wsize = NFS_SERVER(inode)->wsize; + int result, written = 0; + struct nfs_write_data *wdata; + + wdata = nfs_writedata_alloc(); + if (!wdata) + return -ENOMEM; + + wdata->flags = how; + wdata->cred = ctx->cred; + wdata->inode = inode; + wdata->args.fh = NFS_FH(inode); + wdata->args.context = ctx; + wdata->args.pages = &page; + wdata->args.stable = NFS_FILE_SYNC; + wdata->args.pgbase = offset; + wdata->args.count = wsize; + wdata->res.fattr = &wdata->fattr; + wdata->res.verf = &wdata->verf; + + dprintk("NFS: nfs_writepage_sync(%s/%Ld %d@%Ld)\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + count, (long long)(page_offset(page) + offset)); + + nfs_begin_data_update(inode); + do { + if (count < wsize) + wdata->args.count = count; + wdata->args.offset = page_offset(page) + wdata->args.pgbase; + + result = NFS_PROTO(inode)->write(wdata); + + if (result < 0) { + /* Must mark the page invalid after I/O error */ + ClearPageUptodate(page); + goto io_error; + } + if (result < wdata->args.count) + printk(KERN_WARNING "NFS: short write, count=%u, result=%d\n", + wdata->args.count, result); + + wdata->args.offset += result; + wdata->args.pgbase += result; + written += result; + count -= result; + } while (count); + /* Update file length */ + nfs_grow_file(page, offset, written); + /* Set the PG_uptodate flag? */ + nfs_mark_uptodate(page, offset, written); + + if (PageError(page)) + ClearPageError(page); + +io_error: + nfs_end_data_update_defer(inode); + nfs_writedata_free(wdata); + return written ? written : result; +} + +static int nfs_writepage_async(struct nfs_open_context *ctx, + struct inode *inode, struct page *page, + unsigned int offset, unsigned int count) +{ + struct nfs_page *req; + int status; + + req = nfs_update_request(ctx, inode, page, offset, count); + status = (IS_ERR(req)) ? PTR_ERR(req) : 0; + if (status < 0) + goto out; + /* Update file length */ + nfs_grow_file(page, offset, count); + /* Set the PG_uptodate flag? */ + nfs_mark_uptodate(page, offset, count); + nfs_unlock_request(req); + out: + return status; +} + +static int wb_priority(struct writeback_control *wbc) +{ + if (wbc->for_reclaim) + return FLUSH_HIGHPRI; + if (wbc->for_kupdate) + return FLUSH_LOWPRI; + return 0; +} + +/* + * Write an mmapped page to the server. + */ +int nfs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct nfs_open_context *ctx; + struct inode *inode = page->mapping->host; + unsigned long end_index; + unsigned offset = PAGE_CACHE_SIZE; + loff_t i_size = i_size_read(inode); + int inode_referenced = 0; + int priority = wb_priority(wbc); + int err; + + /* + * Note: We need to ensure that we have a reference to the inode + * if we are to do asynchronous writes. If not, waiting + * in nfs_wait_on_request() may deadlock with clear_inode(). + * + * If igrab() fails here, then it is in any case safe to + * call nfs_wb_page(), since there will be no pending writes. + */ + if (igrab(inode) != 0) + inode_referenced = 1; + end_index = i_size >> PAGE_CACHE_SHIFT; + + /* Ensure we've flushed out any previous writes */ + nfs_wb_page_priority(inode, page, priority); + + /* easy case */ + if (page->index < end_index) + goto do_it; + /* things got complicated... */ + offset = i_size & (PAGE_CACHE_SIZE-1); + + /* OK, are we completely out? */ + err = 0; /* potential race with truncate - ignore */ + if (page->index >= end_index+1 || !offset) + goto out; +do_it: + ctx = nfs_find_open_context(inode, FMODE_WRITE); + if (ctx == NULL) { + err = -EBADF; + goto out; + } + lock_kernel(); + if (!IS_SYNC(inode) && inode_referenced) { + err = nfs_writepage_async(ctx, inode, page, 0, offset); + if (err >= 0) { + err = 0; + if (wbc->for_reclaim) + nfs_flush_inode(inode, 0, 0, FLUSH_STABLE); + } + } else { + err = nfs_writepage_sync(ctx, inode, page, 0, + offset, priority); + if (err >= 0) { + if (err != offset) + redirty_page_for_writepage(wbc, page); + err = 0; + } + } + unlock_kernel(); + put_nfs_open_context(ctx); +out: + unlock_page(page); + if (inode_referenced) + iput(inode); + return err; +} + +/* + * Note: causes nfs_update_request() to block on the assumption + * that the writeback is generated due to memory pressure. + */ +int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + struct inode *inode = mapping->host; + int err; + + err = generic_writepages(mapping, wbc); + if (err) + return err; + while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) { + if (wbc->nonblocking) + return 0; + nfs_wait_on_write_congestion(mapping, 0); + } + err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc)); + if (err < 0) + goto out; + wbc->nr_to_write -= err; + if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) { + err = nfs_wait_on_requests(inode, 0, 0); + if (err < 0) + goto out; + } + err = nfs_commit_inode(inode, 0, 0, wb_priority(wbc)); + if (err > 0) { + wbc->nr_to_write -= err; + err = 0; + } +out: + clear_bit(BDI_write_congested, &bdi->state); + wake_up_all(&nfs_write_congestion); + return err; +} + +/* + * Insert a write request into an inode + */ +static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int error; + + error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); + BUG_ON(error == -EEXIST); + if (error) + return error; + if (!nfsi->npages) { + igrab(inode); + nfs_begin_data_update(inode); + if (nfs_have_delegation(inode, FMODE_WRITE)) + nfsi->change_attr++; + } + nfsi->npages++; + atomic_inc(&req->wb_count); + return 0; +} + +/* + * Insert a write request into an inode + */ +static void nfs_inode_remove_request(struct nfs_page *req) +{ + struct inode *inode = req->wb_context->dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); + + BUG_ON (!NFS_WBACK_BUSY(req)); + + spin_lock(&nfsi->req_lock); + radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); + nfsi->npages--; + if (!nfsi->npages) { + spin_unlock(&nfsi->req_lock); + nfs_end_data_update_defer(inode); + iput(inode); + } else + spin_unlock(&nfsi->req_lock); + nfs_clear_request(req); + nfs_release_request(req); +} + +/* + * Find a request + */ +static inline struct nfs_page * +_nfs_find_request(struct inode *inode, unsigned long index) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_page *req; + + req = (struct nfs_page*)radix_tree_lookup(&nfsi->nfs_page_tree, index); + if (req) + atomic_inc(&req->wb_count); + return req; +} + +static struct nfs_page * +nfs_find_request(struct inode *inode, unsigned long index) +{ + struct nfs_page *req; + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&nfsi->req_lock); + req = _nfs_find_request(inode, index); + spin_unlock(&nfsi->req_lock); + return req; +} + +/* + * Add a request to the inode's dirty list. + */ +static void +nfs_mark_request_dirty(struct nfs_page *req) +{ + struct inode *inode = req->wb_context->dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&nfsi->req_lock); + nfs_list_add_request(req, &nfsi->dirty); + nfsi->ndirty++; + spin_unlock(&nfsi->req_lock); + inc_page_state(nr_dirty); + mark_inode_dirty(inode); +} + +/* + * Check if a request is dirty + */ +static inline int +nfs_dirty_request(struct nfs_page *req) +{ + struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + return !list_empty(&req->wb_list) && req->wb_list_head == &nfsi->dirty; +} + +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +/* + * Add a request to the inode's commit list. + */ +static void +nfs_mark_request_commit(struct nfs_page *req) +{ + struct inode *inode = req->wb_context->dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&nfsi->req_lock); + nfs_list_add_request(req, &nfsi->commit); + nfsi->ncommit++; + spin_unlock(&nfsi->req_lock); + inc_page_state(nr_unstable); + mark_inode_dirty(inode); +} +#endif + +/* + * Wait for a request to complete. + * + * Interruptible by signals only if mounted with intr flag. + */ +static int +nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_page *req; + unsigned long idx_end, next; + unsigned int res = 0; + int error; + + if (npages == 0) + idx_end = ~0; + else + idx_end = idx_start + npages - 1; + + spin_lock(&nfsi->req_lock); + next = idx_start; + while (radix_tree_gang_lookup(&nfsi->nfs_page_tree, (void **)&req, next, 1)) { + if (req->wb_index > idx_end) + break; + + next = req->wb_index + 1; + if (!NFS_WBACK_BUSY(req)) + continue; + + atomic_inc(&req->wb_count); + spin_unlock(&nfsi->req_lock); + error = nfs_wait_on_request(req); + nfs_release_request(req); + if (error < 0) + return error; + spin_lock(&nfsi->req_lock); + res++; + } + spin_unlock(&nfsi->req_lock); + return res; +} + +/* + * nfs_scan_dirty - Scan an inode for dirty requests + * @inode: NFS inode to scan + * @dst: destination list + * @idx_start: lower bound of page->index to scan. + * @npages: idx_start + npages sets the upper bound to scan. + * + * Moves requests from the inode's dirty page list. + * The requests are *not* checked to ensure that they form a contiguous set. + */ +static int +nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int res; + res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages); + nfsi->ndirty -= res; + sub_page_state(nr_dirty,res); + if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); + return res; +} + +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +/* + * nfs_scan_commit - Scan an inode for commit requests + * @inode: NFS inode to scan + * @dst: destination list + * @idx_start: lower bound of page->index to scan. + * @npages: idx_start + npages sets the upper bound to scan. + * + * Moves requests from the inode's 'commit' request list. + * The requests are *not* checked to ensure that they form a contiguous set. + */ +static int +nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int res; + res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); + nfsi->ncommit -= res; + if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); + return res; +} +#endif + +static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + DEFINE_WAIT(wait); + int ret = 0; + + might_sleep(); + + if (!bdi_write_congested(bdi)) + return 0; + if (intr) { + struct rpc_clnt *clnt = NFS_CLIENT(mapping->host); + sigset_t oldset; + + rpc_clnt_sigmask(clnt, &oldset); + prepare_to_wait(&nfs_write_congestion, &wait, TASK_INTERRUPTIBLE); + if (bdi_write_congested(bdi)) { + if (signalled()) + ret = -ERESTARTSYS; + else + schedule(); + } + rpc_clnt_sigunmask(clnt, &oldset); + } else { + prepare_to_wait(&nfs_write_congestion, &wait, TASK_UNINTERRUPTIBLE); + if (bdi_write_congested(bdi)) + schedule(); + } + finish_wait(&nfs_write_congestion, &wait); + return ret; +} + + +/* + * Try to update any existing write request, or create one if there is none. + * In order to match, the request's credentials must match those of + * the calling process. + * + * Note: Should always be called with the Page Lock held! + */ +static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, + struct inode *inode, struct page *page, + unsigned int offset, unsigned int bytes) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_page *req, *new = NULL; + unsigned long rqend, end; + + end = offset + bytes; + + if (nfs_wait_on_write_congestion(page->mapping, server->flags & NFS_MOUNT_INTR)) + return ERR_PTR(-ERESTARTSYS); + for (;;) { + /* Loop over all inode entries and see if we find + * A request for the page we wish to update + */ + spin_lock(&nfsi->req_lock); + req = _nfs_find_request(inode, page->index); + if (req) { + if (!nfs_lock_request_dontget(req)) { + int error; + spin_unlock(&nfsi->req_lock); + error = nfs_wait_on_request(req); + nfs_release_request(req); + if (error < 0) + return ERR_PTR(error); + continue; + } + spin_unlock(&nfsi->req_lock); + if (new) + nfs_release_request(new); + break; + } + + if (new) { + int error; + nfs_lock_request_dontget(new); + error = nfs_inode_add_request(inode, new); + if (error) { + spin_unlock(&nfsi->req_lock); + nfs_unlock_request(new); + return ERR_PTR(error); + } + spin_unlock(&nfsi->req_lock); + nfs_mark_request_dirty(new); + return new; + } + spin_unlock(&nfsi->req_lock); + + new = nfs_create_request(ctx, inode, page, offset, bytes); + if (IS_ERR(new)) + return new; + } + + /* We have a request for our page. + * If the creds don't match, or the + * page addresses don't match, + * tell the caller to wait on the conflicting + * request. + */ + rqend = req->wb_offset + req->wb_bytes; + if (req->wb_context != ctx + || req->wb_page != page + || !nfs_dirty_request(req) + || offset > rqend || end < req->wb_offset) { + nfs_unlock_request(req); + return ERR_PTR(-EBUSY); + } + + /* Okay, the request matches. Update the region */ + if (offset < req->wb_offset) { + req->wb_offset = offset; + req->wb_pgbase = offset; + req->wb_bytes = rqend - req->wb_offset; + } + + if (end > rqend) + req->wb_bytes = end - req->wb_offset; + + return req; +} + +int nfs_flush_incompatible(struct file *file, struct page *page) +{ + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct inode *inode = page->mapping->host; + struct nfs_page *req; + int status = 0; + /* + * Look for a request corresponding to this page. If there + * is one, and it belongs to another file, we flush it out + * before we try to copy anything into the page. Do this + * due to the lack of an ACCESS-type call in NFSv2. + * Also do the same if we find a request from an existing + * dropped page. + */ + req = nfs_find_request(inode, page->index); + if (req) { + if (req->wb_page != page || ctx != req->wb_context) + status = nfs_wb_page(inode, page); + nfs_release_request(req); + } + return (status < 0) ? status : 0; +} + +/* + * Update and possibly write a cached page of an NFS file. + * + * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad + * things with a page scheduled for an RPC call (e.g. invalidate it). + */ +int nfs_updatepage(struct file *file, struct page *page, + unsigned int offset, unsigned int count) +{ + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct dentry *dentry = file->f_dentry; + struct inode *inode = page->mapping->host; + struct nfs_page *req; + int status = 0; + + dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + count, (long long)(page_offset(page) +offset)); + + if (IS_SYNC(inode)) { + status = nfs_writepage_sync(ctx, inode, page, offset, count, 0); + if (status > 0) { + if (offset == 0 && status == PAGE_CACHE_SIZE) + SetPageUptodate(page); + return 0; + } + return status; + } + + /* If we're not using byte range locks, and we know the page + * is entirely in cache, it may be more efficient to avoid + * fragmenting write requests. + */ + if (PageUptodate(page) && inode->i_flock == NULL) { + loff_t end_offs = i_size_read(inode) - 1; + unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT; + + count += offset; + offset = 0; + if (unlikely(end_offs < 0)) { + /* Do nothing */ + } else if (page->index == end_index) { + unsigned int pglen; + pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1; + if (count < pglen) + count = pglen; + } else if (page->index < end_index) + count = PAGE_CACHE_SIZE; + } + + /* + * Try to find an NFS request corresponding to this page + * and update it. + * If the existing request cannot be updated, we must flush + * it out now. + */ + do { + req = nfs_update_request(ctx, inode, page, offset, count); + status = (IS_ERR(req)) ? PTR_ERR(req) : 0; + if (status != -EBUSY) + break; + /* Request could not be updated. Flush it out and try again */ + status = nfs_wb_page(inode, page); + } while (status >= 0); + if (status < 0) + goto done; + + status = 0; + + /* Update file length */ + nfs_grow_file(page, offset, count); + /* Set the PG_uptodate flag? */ + nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); + nfs_unlock_request(req); +done: + dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", + status, (long long)i_size_read(inode)); + if (status < 0) + ClearPageUptodate(page); + return status; +} + +static void nfs_writepage_release(struct nfs_page *req) +{ + end_page_writeback(req->wb_page); + +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) + if (!PageError(req->wb_page)) { + if (NFS_NEED_RESCHED(req)) { + nfs_mark_request_dirty(req); + goto out; + } else if (NFS_NEED_COMMIT(req)) { + nfs_mark_request_commit(req); + goto out; + } + } + nfs_inode_remove_request(req); + +out: + nfs_clear_commit(req); + nfs_clear_reschedule(req); +#else + nfs_inode_remove_request(req); +#endif + nfs_unlock_request(req); +} + +static inline int flush_task_priority(int how) +{ + switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { + case FLUSH_HIGHPRI: + return RPC_PRIORITY_HIGH; + case FLUSH_LOWPRI: + return RPC_PRIORITY_LOW; + } + return RPC_PRIORITY_NORMAL; +} + +/* + * Set up the argument/result storage required for the RPC call. + */ +static void nfs_write_rpcsetup(struct nfs_page *req, + struct nfs_write_data *data, + unsigned int count, unsigned int offset, + int how) +{ + struct rpc_task *task = &data->task; + struct inode *inode; + + /* Set up the RPC argument and reply structs + * NB: take care not to mess about with data->commit et al. */ + + data->req = req; + data->inode = inode = req->wb_context->dentry->d_inode; + data->cred = req->wb_context->cred; + + data->args.fh = NFS_FH(inode); + data->args.offset = req_offset(req) + offset; + data->args.pgbase = req->wb_pgbase + offset; + data->args.pages = data->pagevec; + data->args.count = count; + data->args.context = req->wb_context; + + data->res.fattr = &data->fattr; + data->res.count = count; + data->res.verf = &data->verf; + + NFS_PROTO(inode)->write_setup(data, how); + + data->task.tk_priority = flush_task_priority(how); + data->task.tk_cookie = (unsigned long)inode; + data->task.tk_calldata = data; + /* Release requests */ + data->task.tk_release = nfs_writedata_release; + + dprintk("NFS: %4d initiated write call (req %s/%Ld, %u bytes @ offset %Lu)\n", + task->tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + count, + (unsigned long long)data->args.offset); +} + +static void nfs_execute_write(struct nfs_write_data *data) +{ + struct rpc_clnt *clnt = NFS_CLIENT(data->inode); + sigset_t oldset; + + rpc_clnt_sigmask(clnt, &oldset); + lock_kernel(); + rpc_execute(&data->task); + unlock_kernel(); + rpc_clnt_sigunmask(clnt, &oldset); +} + +/* + * Generate multiple small requests to write out a single + * contiguous dirty area on one page. + */ +static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how) +{ + struct nfs_page *req = nfs_list_entry(head->next); + struct page *page = req->wb_page; + struct nfs_write_data *data; + unsigned int wsize = NFS_SERVER(inode)->wsize; + unsigned int nbytes, offset; + int requests = 0; + LIST_HEAD(list); + + nfs_list_remove_request(req); + + nbytes = req->wb_bytes; + for (;;) { + data = nfs_writedata_alloc(); + if (!data) + goto out_bad; + list_add(&data->pages, &list); + requests++; + if (nbytes <= wsize) + break; + nbytes -= wsize; + } + atomic_set(&req->wb_complete, requests); + + ClearPageError(page); + SetPageWriteback(page); + offset = 0; + nbytes = req->wb_bytes; + do { + data = list_entry(list.next, struct nfs_write_data, pages); + list_del_init(&data->pages); + + data->pagevec[0] = page; + data->complete = nfs_writeback_done_partial; + + if (nbytes > wsize) { + nfs_write_rpcsetup(req, data, wsize, offset, how); + offset += wsize; + nbytes -= wsize; + } else { + nfs_write_rpcsetup(req, data, nbytes, offset, how); + nbytes = 0; + } + nfs_execute_write(data); + } while (nbytes != 0); + + return 0; + +out_bad: + while (!list_empty(&list)) { + data = list_entry(list.next, struct nfs_write_data, pages); + list_del(&data->pages); + nfs_writedata_free(data); + } + nfs_mark_request_dirty(req); + nfs_unlock_request(req); + return -ENOMEM; +} + +/* + * Create an RPC task for the given write request and kick it. + * The page must have been locked by the caller. + * + * It may happen that the page we're passed is not marked dirty. + * This is the case if nfs_updatepage detects a conflicting request + * that has been written but not committed. + */ +static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) +{ + struct nfs_page *req; + struct page **pages; + struct nfs_write_data *data; + unsigned int count; + + if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE) + return nfs_flush_multi(head, inode, how); + + data = nfs_writedata_alloc(); + if (!data) + goto out_bad; + + pages = data->pagevec; + count = 0; + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_list_add_request(req, &data->pages); + ClearPageError(req->wb_page); + SetPageWriteback(req->wb_page); + *pages++ = req->wb_page; + count += req->wb_bytes; + } + req = nfs_list_entry(data->pages.next); + + data->complete = nfs_writeback_done_full; + /* Set up the argument struct */ + nfs_write_rpcsetup(req, data, count, 0, how); + + nfs_execute_write(data); + return 0; + out_bad: + while (!list_empty(head)) { + struct nfs_page *req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_mark_request_dirty(req); + nfs_unlock_request(req); + } + return -ENOMEM; +} + +static int +nfs_flush_list(struct list_head *head, int wpages, int how) +{ + LIST_HEAD(one_request); + struct nfs_page *req; + int error = 0; + unsigned int pages = 0; + + while (!list_empty(head)) { + pages += nfs_coalesce_requests(head, &one_request, wpages); + req = nfs_list_entry(one_request.next); + error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how); + if (error < 0) + break; + } + if (error >= 0) + return pages; + + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_mark_request_dirty(req); + nfs_unlock_request(req); + } + return error; +} + +/* + * Handle a write reply that flushed part of a page. + */ +static void nfs_writeback_done_partial(struct nfs_write_data *data, int status) +{ + struct nfs_page *req = data->req; + struct page *page = req->wb_page; + + dprintk("NFS: write (%s/%Ld %d@%Ld)", + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_bytes, + (long long)req_offset(req)); + + if (status < 0) { + ClearPageUptodate(page); + SetPageError(page); + req->wb_context->error = status; + dprintk(", error = %d\n", status); + } else { +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) + if (data->verf.committed < NFS_FILE_SYNC) { + if (!NFS_NEED_COMMIT(req)) { + nfs_defer_commit(req); + memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); + dprintk(" defer commit\n"); + } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { + nfs_defer_reschedule(req); + dprintk(" server reboot detected\n"); + } + } else +#endif + dprintk(" OK\n"); + } + + if (atomic_dec_and_test(&req->wb_complete)) + nfs_writepage_release(req); +} + +/* + * Handle a write reply that flushes a whole page. + * + * FIXME: There is an inherent race with invalidate_inode_pages and + * writebacks since the page->count is kept > 1 for as long + * as the page has a write request pending. + */ +static void nfs_writeback_done_full(struct nfs_write_data *data, int status) +{ + struct nfs_page *req; + struct page *page; + + /* Update attributes as result of writeback. */ + while (!list_empty(&data->pages)) { + req = nfs_list_entry(data->pages.next); + nfs_list_remove_request(req); + page = req->wb_page; + + dprintk("NFS: write (%s/%Ld %d@%Ld)", + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_bytes, + (long long)req_offset(req)); + + if (status < 0) { + ClearPageUptodate(page); + SetPageError(page); + req->wb_context->error = status; + end_page_writeback(page); + nfs_inode_remove_request(req); + dprintk(", error = %d\n", status); + goto next; + } + end_page_writeback(page); + +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) + if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) { + nfs_inode_remove_request(req); + dprintk(" OK\n"); + goto next; + } + memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); + nfs_mark_request_commit(req); + dprintk(" marked for commit\n"); +#else + nfs_inode_remove_request(req); +#endif + next: + nfs_unlock_request(req); + } +} + +/* + * This function is called when the WRITE call is complete. + */ +void nfs_writeback_done(struct rpc_task *task) +{ + struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_writeargs *argp = &data->args; + struct nfs_writeres *resp = &data->res; + + dprintk("NFS: %4d nfs_writeback_done (status %d)\n", + task->tk_pid, task->tk_status); + +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) + if (resp->verf->committed < argp->stable && task->tk_status >= 0) { + /* We tried a write call, but the server did not + * commit data to stable storage even though we + * requested it. + * Note: There is a known bug in Tru64 < 5.0 in which + * the server reports NFS_DATA_SYNC, but performs + * NFS_FILE_SYNC. We therefore implement this checking + * as a dprintk() in order to avoid filling syslog. + */ + static unsigned long complain; + + if (time_before(complain, jiffies)) { + dprintk("NFS: faulty NFS server %s:" + " (committed = %d) != (stable = %d)\n", + NFS_SERVER(data->inode)->hostname, + resp->verf->committed, argp->stable); + complain = jiffies + 300 * HZ; + } + } +#endif + /* Is this a short write? */ + if (task->tk_status >= 0 && resp->count < argp->count) { + static unsigned long complain; + + /* Has the server at least made some progress? */ + if (resp->count != 0) { + /* Was this an NFSv2 write or an NFSv3 stable write? */ + if (resp->verf->committed != NFS_UNSTABLE) { + /* Resend from where the server left off */ + argp->offset += resp->count; + argp->pgbase += resp->count; + argp->count -= resp->count; + } else { + /* Resend as a stable write in order to avoid + * headaches in the case of a server crash. + */ + argp->stable = NFS_FILE_SYNC; + } + rpc_restart_call(task); + return; + } + if (time_before(complain, jiffies)) { + printk(KERN_WARNING + "NFS: Server wrote zero bytes, expected %u.\n", + argp->count); + complain = jiffies + 300 * HZ; + } + /* Can't do anything about it except throw an error. */ + task->tk_status = -EIO; + } + + /* + * Process the nfs_page list + */ + data->complete(data, task->tk_status); +} + + +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +static void nfs_commit_release(struct rpc_task *task) +{ + struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata; + nfs_commit_free(wdata); +} + +/* + * Set up the argument/result storage required for the RPC call. + */ +static void nfs_commit_rpcsetup(struct list_head *head, + struct nfs_write_data *data, int how) +{ + struct rpc_task *task = &data->task; + struct nfs_page *first, *last; + struct inode *inode; + loff_t start, end, len; + + /* Set up the RPC argument and reply structs + * NB: take care not to mess about with data->commit et al. */ + + list_splice_init(head, &data->pages); + first = nfs_list_entry(data->pages.next); + last = nfs_list_entry(data->pages.prev); + inode = first->wb_context->dentry->d_inode; + + /* + * Determine the offset range of requests in the COMMIT call. + * We rely on the fact that data->pages is an ordered list... + */ + start = req_offset(first); + end = req_offset(last) + last->wb_bytes; + len = end - start; + /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */ + if (end >= i_size_read(inode) || len < 0 || len > (~((u32)0) >> 1)) + len = 0; + + data->inode = inode; + data->cred = first->wb_context->cred; + + data->args.fh = NFS_FH(data->inode); + data->args.offset = start; + data->args.count = len; + data->res.count = len; + data->res.fattr = &data->fattr; + data->res.verf = &data->verf; + + NFS_PROTO(inode)->commit_setup(data, how); + + data->task.tk_priority = flush_task_priority(how); + data->task.tk_cookie = (unsigned long)inode; + data->task.tk_calldata = data; + /* Release requests */ + data->task.tk_release = nfs_commit_release; + + dprintk("NFS: %4d initiated commit call\n", task->tk_pid); +} + +/* + * Commit dirty pages + */ +static int +nfs_commit_list(struct list_head *head, int how) +{ + struct nfs_write_data *data; + struct nfs_page *req; + + data = nfs_commit_alloc(); + + if (!data) + goto out_bad; + + /* Set up the argument struct */ + nfs_commit_rpcsetup(head, data, how); + + nfs_execute_write(data); + return 0; + out_bad: + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_mark_request_commit(req); + nfs_unlock_request(req); + } + return -ENOMEM; +} + +/* + * COMMIT call returned + */ +void +nfs_commit_done(struct rpc_task *task) +{ + struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata; + struct nfs_page *req; + int res = 0; + + dprintk("NFS: %4d nfs_commit_done (status %d)\n", + task->tk_pid, task->tk_status); + + while (!list_empty(&data->pages)) { + req = nfs_list_entry(data->pages.next); + nfs_list_remove_request(req); + + dprintk("NFS: commit (%s/%Ld %d@%Ld)", + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_bytes, + (long long)req_offset(req)); + if (task->tk_status < 0) { + req->wb_context->error = task->tk_status; + nfs_inode_remove_request(req); + dprintk(", error = %d\n", task->tk_status); + goto next; + } + + /* Okay, COMMIT succeeded, apparently. Check the verifier + * returned by the server against all stored verfs. */ + if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { + /* We have a match */ + nfs_inode_remove_request(req); + dprintk(" OK\n"); + goto next; + } + /* We have a mismatch. Write the page again */ + dprintk(" mismatch\n"); + nfs_mark_request_dirty(req); + next: + nfs_unlock_request(req); + res++; + } + sub_page_state(nr_unstable,res); +} +#endif + +static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, + unsigned int npages, int how) +{ + struct nfs_inode *nfsi = NFS_I(inode); + LIST_HEAD(head); + int res, + error = 0; + + spin_lock(&nfsi->req_lock); + res = nfs_scan_dirty(inode, &head, idx_start, npages); + spin_unlock(&nfsi->req_lock); + if (res) + error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how); + if (error < 0) + return error; + return res; +} + +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +int nfs_commit_inode(struct inode *inode, unsigned long idx_start, + unsigned int npages, int how) +{ + struct nfs_inode *nfsi = NFS_I(inode); + LIST_HEAD(head); + int res, + error = 0; + + spin_lock(&nfsi->req_lock); + res = nfs_scan_commit(inode, &head, idx_start, npages); + if (res) { + res += nfs_scan_commit(inode, &head, 0, 0); + spin_unlock(&nfsi->req_lock); + error = nfs_commit_list(&head, how); + } else + spin_unlock(&nfsi->req_lock); + if (error < 0) + return error; + return res; +} +#endif + +int nfs_sync_inode(struct inode *inode, unsigned long idx_start, + unsigned int npages, int how) +{ + int error, + wait; + + wait = how & FLUSH_WAIT; + how &= ~FLUSH_WAIT; + + do { + error = 0; + if (wait) + error = nfs_wait_on_requests(inode, idx_start, npages); + if (error == 0) + error = nfs_flush_inode(inode, idx_start, npages, how); +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) + if (error == 0) + error = nfs_commit_inode(inode, idx_start, npages, how); +#endif + } while (error > 0); + return error; +} + +int nfs_init_writepagecache(void) +{ + nfs_wdata_cachep = kmem_cache_create("nfs_write_data", + sizeof(struct nfs_write_data), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (nfs_wdata_cachep == NULL) + return -ENOMEM; + + nfs_wdata_mempool = mempool_create(MIN_POOL_WRITE, + mempool_alloc_slab, + mempool_free_slab, + nfs_wdata_cachep); + if (nfs_wdata_mempool == NULL) + return -ENOMEM; + + nfs_commit_mempool = mempool_create(MIN_POOL_COMMIT, + mempool_alloc_slab, + mempool_free_slab, + nfs_wdata_cachep); + if (nfs_commit_mempool == NULL) + return -ENOMEM; + + return 0; +} + +void nfs_destroy_writepagecache(void) +{ + mempool_destroy(nfs_commit_mempool); + mempool_destroy(nfs_wdata_mempool); + if (kmem_cache_destroy(nfs_wdata_cachep)) + printk(KERN_INFO "nfs_write_data: not all structures were freed\n"); +} + |