diff options
Diffstat (limited to 'fs/nfs')
45 files changed, 37226 insertions, 0 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig new file mode 100644 index 00000000000..59e5673b459 --- /dev/null +++ b/fs/nfs/Kconfig @@ -0,0 +1,103 @@ +config NFS_FS + tristate "NFS client support" + depends on INET && FILE_LOCKING + select LOCKD + select SUNRPC + select NFS_ACL_SUPPORT if NFS_V3_ACL + help + Choose Y here if you want to access files residing on other + computers using Sun's Network File System protocol. To compile + this file system support as a module, choose M here: the module + will be called nfs. + + To mount file systems exported by NFS servers, you also need to + install the user space mount.nfs command which can be found in + the Linux nfs-utils package, available from http://linux-nfs.org/. + Information about using the mount command is available in the + mount(8) man page. More detail about the Linux NFS client + implementation is available via the nfs(5) man page. + + Below you can choose which versions of the NFS protocol are + available in the kernel to mount NFS servers. Support for NFS + version 2 (RFC 1094) is always available when NFS_FS is selected. + + To configure a system which mounts its root file system via NFS + at boot time, say Y here, select "Kernel level IP + autoconfiguration" in the NETWORK menu, and select "Root file + system on NFS" below. You cannot compile this file system as a + module in this case. + + If unsure, say N. + +config NFS_V3 + bool "NFS client support for NFS version 3" + depends on NFS_FS + help + This option enables support for version 3 of the NFS protocol + (RFC 1813) in the kernel's NFS client. + + If unsure, say Y. + +config NFS_V3_ACL + bool "NFS client support for the NFSv3 ACL protocol extension" + depends on NFS_V3 + help + Some NFS servers support an auxiliary NFSv3 ACL protocol that + Sun added to Solaris but never became an official part of the + NFS version 3 protocol. This protocol extension allows + applications on NFS clients to manipulate POSIX Access Control + Lists on files residing on NFS servers. NFS servers enforce + ACLs on local files whether this protocol is available or not. + + Choose Y here if your NFS server supports the Solaris NFSv3 ACL + protocol extension and you want your NFS client to allow + applications to access and modify ACLs on files on the server. + + Most NFS servers don't support the Solaris NFSv3 ACL protocol + extension. You can choose N here or specify the "noacl" mount + option to prevent your NFS client from trying to use the NFSv3 + ACL protocol. + + If unsure, say N. + +config NFS_V4 + bool "NFS client support for NFS version 4 (EXPERIMENTAL)" + depends on NFS_FS && EXPERIMENTAL + select RPCSEC_GSS_KRB5 + help + This option enables support for version 4 of the NFS protocol + (RFC 3530) in the kernel's NFS client. + + To mount NFS servers using NFSv4, you also need to install user + space programs which can be found in the Linux nfs-utils package, + available from http://linux-nfs.org/. + + If unsure, say N. + +config NFS_V4_1 + bool "NFS client support for NFSv4.1 (DEVELOPER ONLY)" + depends on NFS_V4 && EXPERIMENTAL + help + This option enables support for minor version 1 of the NFSv4 protocol + (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. + + Unless you're an NFS developer, say N. + +config ROOT_NFS + bool "Root file system on NFS" + depends on NFS_FS=y && IP_PNP + help + If you want your system to mount its root file system via NFS, + choose Y here. This is common practice for managing systems + without local permanent storage. For details, read + <file:Documentation/filesystems/nfs/nfsroot.txt>. + + Most people say N here. + +config NFS_FSCACHE + bool "Provide NFS client caching support (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y + help + Say Y here if you want NFS data to be cached locally on disc through + the general filesystem cache manager diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile new file mode 100644 index 00000000000..da7fda639ea --- /dev/null +++ b/fs/nfs/Makefile @@ -0,0 +1,19 @@ +# +# Makefile for the Linux nfs filesystem routines. +# + +obj-$(CONFIG_NFS_FS) += nfs.o + +nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ + direct.o pagelist.o proc.o read.o symlink.o unlink.o \ + write.o namespace.o mount_clnt.o \ + dns_resolve.o cache_lib.o +nfs-$(CONFIG_ROOT_NFS) += nfsroot.o +nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o +nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o +nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ + delegation.o idmap.o \ + callback.o callback_xdr.o callback_proc.o \ + nfs4namespace.o +nfs-$(CONFIG_SYSCTL) += sysctl.o +nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c new file mode 100644 index 00000000000..b4ffd0146ea --- /dev/null +++ b/fs/nfs/cache_lib.c @@ -0,0 +1,140 @@ +/* + * linux/fs/nfs/cache_lib.c + * + * Helper routines for the NFS client caches + * + * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com> + */ +#include <linux/kmod.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/sunrpc/cache.h> +#include <linux/sunrpc/rpc_pipe_fs.h> + +#include "cache_lib.h" + +#define NFS_CACHE_UPCALL_PATHLEN 256 +#define NFS_CACHE_UPCALL_TIMEOUT 15 + +static char nfs_cache_getent_prog[NFS_CACHE_UPCALL_PATHLEN] = + "/sbin/nfs_cache_getent"; +static unsigned long nfs_cache_getent_timeout = NFS_CACHE_UPCALL_TIMEOUT; + +module_param_string(cache_getent, nfs_cache_getent_prog, + sizeof(nfs_cache_getent_prog), 0600); +MODULE_PARM_DESC(cache_getent, "Path to the client cache upcall program"); +module_param_named(cache_getent_timeout, nfs_cache_getent_timeout, ulong, 0600); +MODULE_PARM_DESC(cache_getent_timeout, "Timeout (in seconds) after which " + "the cache upcall is assumed to have failed"); + +int nfs_cache_upcall(struct cache_detail *cd, char *entry_name) +{ + static char *envp[] = { "HOME=/", + "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL + }; + char *argv[] = { + nfs_cache_getent_prog, + cd->name, + entry_name, + NULL + }; + int ret = -EACCES; + + if (nfs_cache_getent_prog[0] == '\0') + goto out; + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); + /* + * Disable the upcall mechanism if we're getting an ENOENT or + * EACCES error. The admin can re-enable it on the fly by using + * sysfs to set the 'cache_getent' parameter once the problem + * has been fixed. + */ + if (ret == -ENOENT || ret == -EACCES) + nfs_cache_getent_prog[0] = '\0'; +out: + return ret > 0 ? 0 : ret; +} + +/* + * Deferred request handling + */ +void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq) +{ + if (atomic_dec_and_test(&dreq->count)) + kfree(dreq); +} + +static void nfs_dns_cache_revisit(struct cache_deferred_req *d, int toomany) +{ + struct nfs_cache_defer_req *dreq; + + dreq = container_of(d, struct nfs_cache_defer_req, deferred_req); + + complete_all(&dreq->completion); + nfs_cache_defer_req_put(dreq); +} + +static struct cache_deferred_req *nfs_dns_cache_defer(struct cache_req *req) +{ + struct nfs_cache_defer_req *dreq; + + dreq = container_of(req, struct nfs_cache_defer_req, req); + dreq->deferred_req.revisit = nfs_dns_cache_revisit; + atomic_inc(&dreq->count); + + return &dreq->deferred_req; +} + +struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void) +{ + struct nfs_cache_defer_req *dreq; + + dreq = kzalloc(sizeof(*dreq), GFP_KERNEL); + if (dreq) { + init_completion(&dreq->completion); + atomic_set(&dreq->count, 1); + dreq->req.defer = nfs_dns_cache_defer; + } + return dreq; +} + +int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq) +{ + if (wait_for_completion_timeout(&dreq->completion, + nfs_cache_getent_timeout * HZ) == 0) + return -ETIMEDOUT; + return 0; +} + +int nfs_cache_register(struct cache_detail *cd) +{ + struct nameidata nd; + struct vfsmount *mnt; + int ret; + + mnt = rpc_get_mount(); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &nd); + if (ret) + goto err; + ret = sunrpc_cache_register_pipefs(nd.path.dentry, + cd->name, 0600, cd); + path_put(&nd.path); + if (!ret) + return ret; +err: + rpc_put_mount(); + return ret; +} + +void nfs_cache_unregister(struct cache_detail *cd) +{ + sunrpc_cache_unregister_pipefs(cd); + rpc_put_mount(); +} + diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h new file mode 100644 index 00000000000..76f856e284e --- /dev/null +++ b/fs/nfs/cache_lib.h @@ -0,0 +1,27 @@ +/* + * Helper routines for the NFS client caches + * + * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com> + */ + +#include <linux/completion.h> +#include <linux/sunrpc/cache.h> +#include <asm/atomic.h> + +/* + * Deferred request handling + */ +struct nfs_cache_defer_req { + struct cache_req req; + struct cache_deferred_req deferred_req; + struct completion completion; + atomic_t count; +}; + +extern int nfs_cache_upcall(struct cache_detail *cd, char *entry_name); +extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void); +extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq); +extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq); + +extern int nfs_cache_register(struct cache_detail *cd); +extern void nfs_cache_unregister(struct cache_detail *cd); diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c new file mode 100644 index 00000000000..73ab220354d --- /dev/null +++ b/fs/nfs/callback.c @@ -0,0 +1,401 @@ +/* + * linux/fs/nfs/callback.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback handling + */ + +#include <linux/completion.h> +#include <linux/ip.h> +#include <linux/module.h> +#include <linux/smp_lock.h> +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/svcsock.h> +#include <linux/nfs_fs.h> +#include <linux/mutex.h> +#include <linux/freezer.h> +#include <linux/kthread.h> +#include <linux/sunrpc/svcauth_gss.h> +#if defined(CONFIG_NFS_V4_1) +#include <linux/sunrpc/bc_xprt.h> +#endif + +#include <net/inet_sock.h> + +#include "nfs4_fs.h" +#include "callback.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_CALLBACK + +struct nfs_callback_data { + unsigned int users; + struct svc_serv *serv; + struct svc_rqst *rqst; + struct task_struct *task; +}; + +static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1]; +static DEFINE_MUTEX(nfs_callback_mutex); +static struct svc_program nfs4_callback_program; + +unsigned int nfs_callback_set_tcpport; +unsigned short nfs_callback_tcpport; +unsigned short nfs_callback_tcpport6; +#define NFS_CALLBACK_MAXPORTNR (65535U) + +static int param_set_portnr(const char *val, struct kernel_param *kp) +{ + unsigned long num; + int ret; + + if (!val) + return -EINVAL; + ret = strict_strtoul(val, 0, &num); + if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR) + return -EINVAL; + *((unsigned int *)kp->arg) = num; + return 0; +} + +static int param_get_portnr(char *buffer, struct kernel_param *kp) +{ + return param_get_uint(buffer, kp); +} +#define param_check_portnr(name, p) __param_check(name, p, unsigned int); + +module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); + +/* + * This is the NFSv4 callback kernel thread. + */ +static int +nfs4_callback_svc(void *vrqstp) +{ + int err, preverr = 0; + struct svc_rqst *rqstp = vrqstp; + + set_freezable(); + + while (!kthread_should_stop()) { + /* + * Listen for a request on the socket + */ + err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT); + if (err == -EAGAIN || err == -EINTR) { + preverr = err; + continue; + } + if (err < 0) { + if (err != preverr) { + printk(KERN_WARNING "%s: unexpected error " + "from svc_recv (%d)\n", __func__, err); + preverr = err; + } + schedule_timeout_uninterruptible(HZ); + continue; + } + preverr = err; + svc_process(rqstp); + } + return 0; +} + +/* + * Prepare to bring up the NFSv4 callback service + */ +struct svc_rqst * +nfs4_callback_up(struct svc_serv *serv) +{ + int ret; + + ret = svc_create_xprt(serv, "tcp", PF_INET, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); + if (ret <= 0) + goto out_err; + nfs_callback_tcpport = ret; + dprintk("NFS: Callback listener port = %u (af %u)\n", + nfs_callback_tcpport, PF_INET); + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + ret = svc_create_xprt(serv, "tcp", PF_INET6, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); + if (ret > 0) { + nfs_callback_tcpport6 = ret; + dprintk("NFS: Callback listener port = %u (af %u)\n", + nfs_callback_tcpport6, PF_INET6); + } else if (ret == -EAFNOSUPPORT) + ret = 0; + else + goto out_err; +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ + + return svc_prepare_thread(serv, &serv->sv_pools[0]); + +out_err: + if (ret == 0) + ret = -ENOMEM; + return ERR_PTR(ret); +} + +#if defined(CONFIG_NFS_V4_1) +/* + * The callback service for NFSv4.1 callbacks + */ +static int +nfs41_callback_svc(void *vrqstp) +{ + struct svc_rqst *rqstp = vrqstp; + struct svc_serv *serv = rqstp->rq_server; + struct rpc_rqst *req; + int error; + DEFINE_WAIT(wq); + + set_freezable(); + + while (!kthread_should_stop()) { + prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); + spin_lock_bh(&serv->sv_cb_lock); + if (!list_empty(&serv->sv_cb_list)) { + req = list_first_entry(&serv->sv_cb_list, + struct rpc_rqst, rq_bc_list); + list_del(&req->rq_bc_list); + spin_unlock_bh(&serv->sv_cb_lock); + dprintk("Invoking bc_svc_process()\n"); + error = bc_svc_process(serv, req, rqstp); + dprintk("bc_svc_process() returned w/ error code= %d\n", + error); + } else { + spin_unlock_bh(&serv->sv_cb_lock); + schedule(); + } + finish_wait(&serv->sv_cb_waitq, &wq); + } + return 0; +} + +/* + * Bring up the NFSv4.1 callback service + */ +struct svc_rqst * +nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) +{ + struct svc_xprt *bc_xprt; + struct svc_rqst *rqstp = ERR_PTR(-ENOMEM); + + dprintk("--> %s\n", __func__); + /* Create a svc_sock for the service */ + bc_xprt = svc_sock_create(serv, xprt->prot); + if (!bc_xprt) + goto out; + + /* + * Save the svc_serv in the transport so that it can + * be referenced when the session backchannel is initialized + */ + serv->bc_xprt = bc_xprt; + xprt->bc_serv = serv; + + INIT_LIST_HEAD(&serv->sv_cb_list); + spin_lock_init(&serv->sv_cb_lock); + init_waitqueue_head(&serv->sv_cb_waitq); + rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); + if (IS_ERR(rqstp)) + svc_sock_destroy(bc_xprt); +out: + dprintk("--> %s return %p\n", __func__, rqstp); + return rqstp; +} + +static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, + struct svc_serv *serv, struct rpc_xprt *xprt, + struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) +{ + if (minorversion) { + *rqstpp = nfs41_callback_up(serv, xprt); + *callback_svc = nfs41_callback_svc; + } + return minorversion; +} + +static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, + struct nfs_callback_data *cb_info) +{ + if (minorversion) + xprt->bc_serv = cb_info->serv; +} +#else +static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, + struct svc_serv *serv, struct rpc_xprt *xprt, + struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) +{ + return 0; +} + +static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, + struct nfs_callback_data *cb_info) +{ +} +#endif /* CONFIG_NFS_V4_1 */ + +/* + * Bring up the callback thread if it is not already up. + */ +int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) +{ + struct svc_serv *serv = NULL; + struct svc_rqst *rqstp; + int (*callback_svc)(void *vrqstp); + struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + char svc_name[12]; + int ret = 0; + int minorversion_setup; + + mutex_lock(&nfs_callback_mutex); + if (cb_info->users++ || cb_info->task != NULL) { + nfs_callback_bc_serv(minorversion, xprt, cb_info); + goto out; + } + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); + if (!serv) { + ret = -ENOMEM; + goto out_err; + } + + minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion, + serv, xprt, &rqstp, &callback_svc); + if (!minorversion_setup) { + /* v4.0 callback setup */ + rqstp = nfs4_callback_up(serv); + callback_svc = nfs4_callback_svc; + } + + if (IS_ERR(rqstp)) { + ret = PTR_ERR(rqstp); + goto out_err; + } + + svc_sock_update_bufs(serv); + + sprintf(svc_name, "nfsv4.%u-svc", minorversion); + cb_info->serv = serv; + cb_info->rqst = rqstp; + cb_info->task = kthread_run(callback_svc, cb_info->rqst, svc_name); + if (IS_ERR(cb_info->task)) { + ret = PTR_ERR(cb_info->task); + svc_exit_thread(cb_info->rqst); + cb_info->rqst = NULL; + cb_info->task = NULL; + goto out_err; + } +out: + /* + * svc_create creates the svc_serv with sv_nrthreads == 1, and then + * svc_prepare_thread increments that. So we need to call svc_destroy + * on both success and failure so that the refcount is 1 when the + * thread exits. + */ + if (serv) + svc_destroy(serv); + mutex_unlock(&nfs_callback_mutex); + return ret; +out_err: + dprintk("NFS: Couldn't create callback socket or server thread; " + "err = %d\n", ret); + cb_info->users--; + goto out; +} + +/* + * Kill the callback thread if it's no longer being used. + */ +void nfs_callback_down(int minorversion) +{ + struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + + mutex_lock(&nfs_callback_mutex); + cb_info->users--; + if (cb_info->users == 0 && cb_info->task != NULL) { + kthread_stop(cb_info->task); + svc_exit_thread(cb_info->rqst); + cb_info->serv = NULL; + cb_info->rqst = NULL; + cb_info->task = NULL; + } + mutex_unlock(&nfs_callback_mutex); +} + +static int check_gss_callback_principal(struct nfs_client *clp, + struct svc_rqst *rqstp) +{ + struct rpc_clnt *r = clp->cl_rpcclient; + char *p = svc_gss_principal(rqstp); + + /* + * It might just be a normal user principal, in which case + * userspace won't bother to tell us the name at all. + */ + if (p == NULL) + return SVC_DENIED; + + /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */ + + if (memcmp(p, "nfs@", 4) != 0) + return SVC_DENIED; + p += 4; + if (strcmp(p, r->cl_server) != 0) + return SVC_DENIED; + return SVC_OK; +} + +static int nfs_callback_authenticate(struct svc_rqst *rqstp) +{ + struct nfs_client *clp; + RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); + int ret = SVC_OK; + + /* Don't talk to strangers */ + clp = nfs_find_client(svc_addr(rqstp), 4); + if (clp == NULL) + return SVC_DROP; + + dprintk("%s: %s NFSv4 callback!\n", __func__, + svc_print_addr(rqstp, buf, sizeof(buf))); + + switch (rqstp->rq_authop->flavour) { + case RPC_AUTH_NULL: + if (rqstp->rq_proc != CB_NULL) + ret = SVC_DENIED; + break; + case RPC_AUTH_UNIX: + break; + case RPC_AUTH_GSS: + ret = check_gss_callback_principal(clp, rqstp); + break; + default: + ret = SVC_DENIED; + } + nfs_put_client(clp); + return ret; +} + +/* + * Define NFS4 callback program + */ +static struct svc_version *nfs4_callback_version[] = { + [1] = &nfs4_callback_version1, + [4] = &nfs4_callback_version4, +}; + +static struct svc_stat nfs4_callback_stats; + +static struct svc_program nfs4_callback_program = { + .pg_prog = NFS4_CALLBACK, /* RPC service number */ + .pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */ + .pg_vers = nfs4_callback_version, /* version table */ + .pg_name = "NFSv4 callback", /* service name */ + .pg_class = "nfs", /* authentication class */ + .pg_stats = &nfs4_callback_stats, + .pg_authenticate = nfs_callback_authenticate, +}; diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h new file mode 100644 index 00000000000..d4036be0b58 --- /dev/null +++ b/fs/nfs/callback.h @@ -0,0 +1,145 @@ +/* + * linux/fs/nfs/callback.h + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback definitions + */ +#ifndef __LINUX_FS_NFS_CALLBACK_H +#define __LINUX_FS_NFS_CALLBACK_H + +#define NFS4_CALLBACK 0x40000000 +#define NFS4_CALLBACK_XDRSIZE 2048 +#define NFS4_CALLBACK_BUFSIZE (1024 + NFS4_CALLBACK_XDRSIZE) + +enum nfs4_callback_procnum { + CB_NULL = 0, + CB_COMPOUND = 1, +}; + +enum nfs4_callback_opnum { + OP_CB_GETATTR = 3, + OP_CB_RECALL = 4, +/* Callback operations new to NFSv4.1 */ + OP_CB_LAYOUTRECALL = 5, + OP_CB_NOTIFY = 6, + OP_CB_PUSH_DELEG = 7, + OP_CB_RECALL_ANY = 8, + OP_CB_RECALLABLE_OBJ_AVAIL = 9, + OP_CB_RECALL_SLOT = 10, + OP_CB_SEQUENCE = 11, + OP_CB_WANTS_CANCELLED = 12, + OP_CB_NOTIFY_LOCK = 13, + OP_CB_NOTIFY_DEVICEID = 14, + OP_CB_ILLEGAL = 10044, +}; + +struct cb_compound_hdr_arg { + unsigned int taglen; + const char *tag; + unsigned int minorversion; + unsigned nops; +}; + +struct cb_compound_hdr_res { + __be32 *status; + unsigned int taglen; + const char *tag; + __be32 *nops; +}; + +struct cb_getattrargs { + struct sockaddr *addr; + struct nfs_fh fh; + uint32_t bitmap[2]; +}; + +struct cb_getattrres { + __be32 status; + uint32_t bitmap[2]; + uint64_t size; + uint64_t change_attr; + struct timespec ctime; + struct timespec mtime; +}; + +struct cb_recallargs { + struct sockaddr *addr; + struct nfs_fh fh; + nfs4_stateid stateid; + uint32_t truncate; +}; + +#if defined(CONFIG_NFS_V4_1) + +struct referring_call { + uint32_t rc_sequenceid; + uint32_t rc_slotid; +}; + +struct referring_call_list { + struct nfs4_sessionid rcl_sessionid; + uint32_t rcl_nrefcalls; + struct referring_call *rcl_refcalls; +}; + +struct cb_sequenceargs { + struct sockaddr *csa_addr; + struct nfs4_sessionid csa_sessionid; + uint32_t csa_sequenceid; + uint32_t csa_slotid; + uint32_t csa_highestslotid; + uint32_t csa_cachethis; + uint32_t csa_nrclists; + struct referring_call_list *csa_rclists; +}; + +struct cb_sequenceres { + __be32 csr_status; + struct nfs4_sessionid csr_sessionid; + uint32_t csr_sequenceid; + uint32_t csr_slotid; + uint32_t csr_highestslotid; + uint32_t csr_target_highestslotid; +}; + +extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, + struct cb_sequenceres *res); + +extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, + const nfs4_stateid *stateid); + +#define RCA4_TYPE_MASK_RDATA_DLG 0 +#define RCA4_TYPE_MASK_WDATA_DLG 1 + +struct cb_recallanyargs { + struct sockaddr *craa_addr; + uint32_t craa_objs_to_keep; + uint32_t craa_type_mask; +}; + +extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy); +#endif /* CONFIG_NFS_V4_1 */ + +extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); +extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy); + +#ifdef CONFIG_NFS_V4 +extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); +extern void nfs_callback_down(int minorversion); +extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, + const nfs4_stateid *stateid); +#endif /* CONFIG_NFS_V4 */ +/* + * nfs41: Callbacks are expected to not cause substantial latency, + * so we limit their concurrency to 1 by setting up the maximum number + * of slots for the backchannel. + */ +#define NFS41_BC_MIN_CALLBACKS 1 +#define NFS41_BC_MAX_CALLBACKS 1 + +extern unsigned int nfs_callback_set_tcpport; +extern unsigned short nfs_callback_tcpport; +extern unsigned short nfs_callback_tcpport6; + +#endif /* __LINUX_FS_NFS_CALLBACK_H */ diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c new file mode 100644 index 00000000000..defa9b4c470 --- /dev/null +++ b/fs/nfs/callback_proc.c @@ -0,0 +1,292 @@ +/* + * linux/fs/nfs/callback_proc.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback procedures + */ +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> +#include "nfs4_fs.h" +#include "callback.h" +#include "delegation.h" +#include "internal.h" + +#ifdef NFS_DEBUG +#define NFSDBG_FACILITY NFSDBG_CALLBACK +#endif + +__be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res) +{ + struct nfs_client *clp; + struct nfs_delegation *delegation; + struct nfs_inode *nfsi; + struct inode *inode; + + res->bitmap[0] = res->bitmap[1] = 0; + res->status = htonl(NFS4ERR_BADHANDLE); + clp = nfs_find_client(args->addr, 4); + if (clp == NULL) + goto out; + + dprintk("NFS: GETATTR callback request from %s\n", + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + + inode = nfs_delegation_find_inode(clp, &args->fh); + if (inode == NULL) + goto out_putclient; + nfsi = NFS_I(inode); + down_read(&nfsi->rwsem); + delegation = nfsi->delegation; + if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0) + goto out_iput; + res->size = i_size_read(inode); + res->change_attr = delegation->change_attr; + if (nfsi->npages != 0) + res->change_attr++; + res->ctime = inode->i_ctime; + res->mtime = inode->i_mtime; + res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) & + args->bitmap[0]; + res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) & + args->bitmap[1]; + res->status = 0; +out_iput: + up_read(&nfsi->rwsem); + iput(inode); +out_putclient: + nfs_put_client(clp); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status)); + return res->status; +} + +static int (*nfs_validate_delegation_stateid(struct nfs_client *clp))(struct nfs_delegation *, const nfs4_stateid *) +{ +#if defined(CONFIG_NFS_V4_1) + if (clp->cl_minorversion > 0) + return nfs41_validate_delegation_stateid; +#endif + return nfs4_validate_delegation_stateid; +} + + +__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) +{ + struct nfs_client *clp; + struct inode *inode; + __be32 res; + + res = htonl(NFS4ERR_BADHANDLE); + clp = nfs_find_client(args->addr, 4); + if (clp == NULL) + goto out; + + dprintk("NFS: RECALL callback request from %s\n", + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + + do { + struct nfs_client *prev = clp; + + inode = nfs_delegation_find_inode(clp, &args->fh); + if (inode != NULL) { + /* Set up a helper thread to actually return the delegation */ + switch (nfs_async_inode_return_delegation(inode, &args->stateid, + nfs_validate_delegation_stateid(clp))) { + case 0: + res = 0; + break; + case -ENOENT: + if (res != 0) + res = htonl(NFS4ERR_BAD_STATEID); + break; + default: + res = htonl(NFS4ERR_RESOURCE); + } + iput(inode); + } + clp = nfs_find_client_next(prev); + nfs_put_client(prev); + } while (clp != NULL); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); + return res; +} + +int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) +{ + if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data, + sizeof(delegation->stateid.data)) != 0) + return 0; + return 1; +} + +#if defined(CONFIG_NFS_V4_1) + +int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) +{ + if (delegation == NULL) + return 0; + + /* seqid is 4-bytes long */ + if (((u32 *) &stateid->data)[0] != 0) + return 0; + if (memcmp(&delegation->stateid.data[4], &stateid->data[4], + sizeof(stateid->data)-4)) + return 0; + + return 1; +} + +/* + * Validate the sequenceID sent by the server. + * Return success if the sequenceID is one more than what we last saw on + * this slot, accounting for wraparound. Increments the slot's sequence. + * + * We don't yet implement a duplicate request cache, so at this time + * we will log replays, and process them as if we had not seen them before, + * but we don't bump the sequence in the slot. Not too worried about it, + * since we only currently implement idempotent callbacks anyway. + * + * We have a single slot backchannel at this time, so we don't bother + * checking the used_slots bit array on the table. The lower layer guarantees + * a single outstanding callback request at a time. + */ +static int +validate_seqid(struct nfs4_slot_table *tbl, u32 slotid, u32 seqid) +{ + struct nfs4_slot *slot; + + dprintk("%s enter. slotid %d seqid %d\n", + __func__, slotid, seqid); + + if (slotid > NFS41_BC_MAX_CALLBACKS) + return htonl(NFS4ERR_BADSLOT); + + slot = tbl->slots + slotid; + dprintk("%s slot table seqid: %d\n", __func__, slot->seq_nr); + + /* Normal */ + if (likely(seqid == slot->seq_nr + 1)) { + slot->seq_nr++; + return htonl(NFS4_OK); + } + + /* Replay */ + if (seqid == slot->seq_nr) { + dprintk("%s seqid %d is a replay - no DRC available\n", + __func__, seqid); + return htonl(NFS4_OK); + } + + /* Wraparound */ + if (seqid == 1 && (slot->seq_nr + 1) == 0) { + slot->seq_nr = 1; + return htonl(NFS4_OK); + } + + /* Misordered request */ + return htonl(NFS4ERR_SEQ_MISORDERED); +} + +/* + * Returns a pointer to a held 'struct nfs_client' that matches the server's + * address, major version number, and session ID. It is the caller's + * responsibility to release the returned reference. + * + * Returns NULL if there are no connections with sessions, or if no session + * matches the one of interest. + */ + static struct nfs_client *find_client_with_session( + const struct sockaddr *addr, u32 nfsversion, + struct nfs4_sessionid *sessionid) +{ + struct nfs_client *clp; + + clp = nfs_find_client(addr, 4); + if (clp == NULL) + return NULL; + + do { + struct nfs_client *prev = clp; + + if (clp->cl_session != NULL) { + if (memcmp(clp->cl_session->sess_id.data, + sessionid->data, + NFS4_MAX_SESSIONID_LEN) == 0) { + /* Returns a held reference to clp */ + return clp; + } + } + clp = nfs_find_client_next(prev); + nfs_put_client(prev); + } while (clp != NULL); + + return NULL; +} + +/* FIXME: referring calls should be processed */ +unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, + struct cb_sequenceres *res) +{ + struct nfs_client *clp; + int i, status; + + for (i = 0; i < args->csa_nrclists; i++) + kfree(args->csa_rclists[i].rcl_refcalls); + kfree(args->csa_rclists); + + status = htonl(NFS4ERR_BADSESSION); + clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid); + if (clp == NULL) + goto out; + + status = validate_seqid(&clp->cl_session->bc_slot_table, + args->csa_slotid, args->csa_sequenceid); + if (status) + goto out_putclient; + + memcpy(&res->csr_sessionid, &args->csa_sessionid, + sizeof(res->csr_sessionid)); + res->csr_sequenceid = args->csa_sequenceid; + res->csr_slotid = args->csa_slotid; + res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + +out_putclient: + nfs_put_client(clp); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + res->csr_status = status; + return res->csr_status; +} + +unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy) +{ + struct nfs_client *clp; + int status; + fmode_t flags = 0; + + status = htonl(NFS4ERR_OP_NOT_IN_SESSION); + clp = nfs_find_client(args->craa_addr, 4); + if (clp == NULL) + goto out; + + dprintk("NFS: RECALL_ANY callback request from %s\n", + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + + if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *) + &args->craa_type_mask)) + flags = FMODE_READ; + if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *) + &args->craa_type_mask)) + flags |= FMODE_WRITE; + + if (flags) + nfs_expire_all_delegation_types(clp, flags); + status = htonl(NFS4_OK); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + return status; +} +#endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c new file mode 100644 index 00000000000..8e1a2511c8b --- /dev/null +++ b/fs/nfs/callback_xdr.c @@ -0,0 +1,752 @@ +/* + * linux/fs/nfs/callback_xdr.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback encode/decode procedures + */ +#include <linux/kernel.h> +#include <linux/sunrpc/svc.h> +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> +#include "nfs4_fs.h" +#include "callback.h" + +#define CB_OP_TAGLEN_MAXSZ (512) +#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ) +#define CB_OP_GETATTR_BITMAP_MAXSZ (4) +#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ + CB_OP_GETATTR_BITMAP_MAXSZ + \ + 2 + 2 + 3 + 3) +#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) + +#if defined(CONFIG_NFS_V4_1) +#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ + 4 + 1 + 3) +#define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) +#endif /* CONFIG_NFS_V4_1 */ + +#define NFSDBG_FACILITY NFSDBG_CALLBACK + +typedef __be32 (*callback_process_op_t)(void *, void *); +typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *); +typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *); + + +struct callback_op { + callback_process_op_t process_op; + callback_decode_arg_t decode_args; + callback_encode_res_t encode_res; + long res_maxsize; +}; + +static struct callback_op callback_ops[]; + +static __be32 nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return htonl(NFS4_OK); +} + +static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy) +{ + return xdr_argsize_check(rqstp, p); +} + +static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy) +{ + return xdr_ressize_check(rqstp, p); +} + +static __be32 *read_buf(struct xdr_stream *xdr, int nbytes) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, nbytes); + if (unlikely(p == NULL)) + printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n"); + return p; +} + +static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str) +{ + __be32 *p; + + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *len = ntohl(*p); + + if (*len != 0) { + p = read_buf(xdr, *len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *str = (const char *)p; + } else + *str = NULL; + + return 0; +} + +static __be32 decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh) +{ + __be32 *p; + + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + fh->size = ntohl(*p); + if (fh->size > NFS4_FHSIZE) + return htonl(NFS4ERR_BADHANDLE); + p = read_buf(xdr, fh->size); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + memcpy(&fh->data[0], p, fh->size); + memset(&fh->data[fh->size], 0, sizeof(fh->data) - fh->size); + return 0; +} + +static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) +{ + __be32 *p; + unsigned int attrlen; + + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + attrlen = ntohl(*p); + p = read_buf(xdr, attrlen << 2); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + if (likely(attrlen > 0)) + bitmap[0] = ntohl(*p++); + if (attrlen > 1) + bitmap[1] = ntohl(*p); + return 0; +} + +static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) +{ + __be32 *p; + + p = read_buf(xdr, 16); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + memcpy(stateid->data, p, 16); + return 0; +} + +static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr) +{ + __be32 *p; + __be32 status; + + status = decode_string(xdr, &hdr->taglen, &hdr->tag); + if (unlikely(status != 0)) + return status; + /* We do not like overly long tags! */ + if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) { + printk("NFSv4 CALLBACK %s: client sent tag of length %u\n", + __func__, hdr->taglen); + return htonl(NFS4ERR_RESOURCE); + } + p = read_buf(xdr, 12); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + hdr->minorversion = ntohl(*p++); + /* Check minor version is zero or one. */ + if (hdr->minorversion <= 1) { + p++; /* skip callback_ident */ + } else { + printk(KERN_WARNING "%s: NFSv4 server callback with " + "illegal minor version %u!\n", + __func__, hdr->minorversion); + return htonl(NFS4ERR_MINOR_VERS_MISMATCH); + } + hdr->nops = ntohl(*p); + dprintk("%s: minorversion %d nops %d\n", __func__, + hdr->minorversion, hdr->nops); + return 0; +} + +static __be32 decode_op_hdr(struct xdr_stream *xdr, unsigned int *op) +{ + __be32 *p; + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *op = ntohl(*p); + return 0; +} + +static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_getattrargs *args) +{ + __be32 status; + + status = decode_fh(xdr, &args->fh); + if (unlikely(status != 0)) + goto out; + args->addr = svc_addr(rqstp); + status = decode_bitmap(xdr, args->bitmap); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + return status; +} + +static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args) +{ + __be32 *p; + __be32 status; + + args->addr = svc_addr(rqstp); + status = decode_stateid(xdr, &args->stateid); + if (unlikely(status != 0)) + goto out; + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) { + status = htonl(NFS4ERR_RESOURCE); + goto out; + } + args->truncate = ntohl(*p); + status = decode_fh(xdr, &args->fh); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + return status; +} + +#if defined(CONFIG_NFS_V4_1) + +static unsigned decode_sessionid(struct xdr_stream *xdr, + struct nfs4_sessionid *sid) +{ + uint32_t *p; + int len = NFS4_MAX_SESSIONID_LEN; + + p = read_buf(xdr, len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + + memcpy(sid->data, p, len); + return 0; +} + +static unsigned decode_rc_list(struct xdr_stream *xdr, + struct referring_call_list *rc_list) +{ + uint32_t *p; + int i; + unsigned status; + + status = decode_sessionid(xdr, &rc_list->rcl_sessionid); + if (status) + goto out; + + status = htonl(NFS4ERR_RESOURCE); + p = read_buf(xdr, sizeof(uint32_t)); + if (unlikely(p == NULL)) + goto out; + + rc_list->rcl_nrefcalls = ntohl(*p++); + if (rc_list->rcl_nrefcalls) { + p = read_buf(xdr, + rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t)); + if (unlikely(p == NULL)) + goto out; + rc_list->rcl_refcalls = kmalloc(rc_list->rcl_nrefcalls * + sizeof(*rc_list->rcl_refcalls), + GFP_KERNEL); + if (unlikely(rc_list->rcl_refcalls == NULL)) + goto out; + for (i = 0; i < rc_list->rcl_nrefcalls; i++) { + rc_list->rcl_refcalls[i].rc_sequenceid = ntohl(*p++); + rc_list->rcl_refcalls[i].rc_slotid = ntohl(*p++); + } + } + status = 0; + +out: + return status; +} + +static unsigned decode_cb_sequence_args(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + struct cb_sequenceargs *args) +{ + uint32_t *p; + int i; + unsigned status; + + status = decode_sessionid(xdr, &args->csa_sessionid); + if (status) + goto out; + + status = htonl(NFS4ERR_RESOURCE); + p = read_buf(xdr, 5 * sizeof(uint32_t)); + if (unlikely(p == NULL)) + goto out; + + args->csa_addr = svc_addr(rqstp); + args->csa_sequenceid = ntohl(*p++); + args->csa_slotid = ntohl(*p++); + args->csa_highestslotid = ntohl(*p++); + args->csa_cachethis = ntohl(*p++); + args->csa_nrclists = ntohl(*p++); + args->csa_rclists = NULL; + if (args->csa_nrclists) { + args->csa_rclists = kmalloc(args->csa_nrclists * + sizeof(*args->csa_rclists), + GFP_KERNEL); + if (unlikely(args->csa_rclists == NULL)) + goto out; + + for (i = 0; i < args->csa_nrclists; i++) { + status = decode_rc_list(xdr, &args->csa_rclists[i]); + if (status) + goto out_free; + } + } + status = 0; + + dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u slotid %u " + "highestslotid %u cachethis %d nrclists %u\n", + __func__, + ((u32 *)&args->csa_sessionid)[0], + ((u32 *)&args->csa_sessionid)[1], + ((u32 *)&args->csa_sessionid)[2], + ((u32 *)&args->csa_sessionid)[3], + args->csa_sequenceid, args->csa_slotid, + args->csa_highestslotid, args->csa_cachethis, + args->csa_nrclists); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + return status; + +out_free: + for (i = 0; i < args->csa_nrclists; i++) + kfree(args->csa_rclists[i].rcl_refcalls); + kfree(args->csa_rclists); + goto out; +} + +static unsigned decode_recallany_args(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + struct cb_recallanyargs *args) +{ + uint32_t *p; + + args->craa_addr = svc_addr(rqstp); + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_BADXDR); + args->craa_objs_to_keep = ntohl(*p++); + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_BADXDR); + args->craa_type_mask = ntohl(*p); + + return 0; +} + +#endif /* CONFIG_NFS_V4_1 */ + +static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 4 + len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + xdr_encode_opaque(p, str, len); + return 0; +} + +#define CB_SUPPORTED_ATTR0 (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) +#define CB_SUPPORTED_ATTR1 (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) +static __be32 encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, __be32 **savep) +{ + __be32 bm[2]; + __be32 *p; + + bm[0] = htonl(bitmap[0] & CB_SUPPORTED_ATTR0); + bm[1] = htonl(bitmap[1] & CB_SUPPORTED_ATTR1); + if (bm[1] != 0) { + p = xdr_reserve_space(xdr, 16); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(2); + *p++ = bm[0]; + *p++ = bm[1]; + } else if (bm[0] != 0) { + p = xdr_reserve_space(xdr, 12); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(1); + *p++ = bm[0]; + } else { + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(0); + } + *savep = p; + return 0; +} + +static __be32 encode_attr_change(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t change) +{ + __be32 *p; + + if (!(bitmap[0] & FATTR4_WORD0_CHANGE)) + return 0; + p = xdr_reserve_space(xdr, 8); + if (unlikely(!p)) + return htonl(NFS4ERR_RESOURCE); + p = xdr_encode_hyper(p, change); + return 0; +} + +static __be32 encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t size) +{ + __be32 *p; + + if (!(bitmap[0] & FATTR4_WORD0_SIZE)) + return 0; + p = xdr_reserve_space(xdr, 8); + if (unlikely(!p)) + return htonl(NFS4ERR_RESOURCE); + p = xdr_encode_hyper(p, size); + return 0; +} + +static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec *time) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 12); + if (unlikely(!p)) + return htonl(NFS4ERR_RESOURCE); + p = xdr_encode_hyper(p, time->tv_sec); + *p = htonl(time->tv_nsec); + return 0; +} + +static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time) +{ + if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) + return 0; + return encode_attr_time(xdr,time); +} + +static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time) +{ + if (!(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) + return 0; + return encode_attr_time(xdr,time); +} + +static __be32 encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr) +{ + __be32 status; + + hdr->status = xdr_reserve_space(xdr, 4); + if (unlikely(hdr->status == NULL)) + return htonl(NFS4ERR_RESOURCE); + status = encode_string(xdr, hdr->taglen, hdr->tag); + if (unlikely(status != 0)) + return status; + hdr->nops = xdr_reserve_space(xdr, 4); + if (unlikely(hdr->nops == NULL)) + return htonl(NFS4ERR_RESOURCE); + return 0; +} + +static __be32 encode_op_hdr(struct xdr_stream *xdr, uint32_t op, __be32 res) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(op); + *p = res; + return 0; +} + +static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res) +{ + __be32 *savep = NULL; + __be32 status = res->status; + + if (unlikely(status != 0)) + goto out; + status = encode_attr_bitmap(xdr, res->bitmap, &savep); + if (unlikely(status != 0)) + goto out; + status = encode_attr_change(xdr, res->bitmap, res->change_attr); + if (unlikely(status != 0)) + goto out; + status = encode_attr_size(xdr, res->bitmap, res->size); + if (unlikely(status != 0)) + goto out; + status = encode_attr_ctime(xdr, res->bitmap, &res->ctime); + if (unlikely(status != 0)) + goto out; + status = encode_attr_mtime(xdr, res->bitmap, &res->mtime); + *savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1))); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + return status; +} + +#if defined(CONFIG_NFS_V4_1) + +static unsigned encode_sessionid(struct xdr_stream *xdr, + const struct nfs4_sessionid *sid) +{ + uint32_t *p; + int len = NFS4_MAX_SESSIONID_LEN; + + p = xdr_reserve_space(xdr, len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + + memcpy(p, sid, len); + return 0; +} + +static unsigned encode_cb_sequence_res(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + const struct cb_sequenceres *res) +{ + uint32_t *p; + unsigned status = res->csr_status; + + if (unlikely(status != 0)) + goto out; + + encode_sessionid(xdr, &res->csr_sessionid); + + p = xdr_reserve_space(xdr, 4 * sizeof(uint32_t)); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + + *p++ = htonl(res->csr_sequenceid); + *p++ = htonl(res->csr_slotid); + *p++ = htonl(res->csr_highestslotid); + *p++ = htonl(res->csr_target_highestslotid); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + return status; +} + +static __be32 +preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) +{ + if (op_nr == OP_CB_SEQUENCE) { + if (nop != 0) + return htonl(NFS4ERR_SEQUENCE_POS); + } else { + if (nop == 0) + return htonl(NFS4ERR_OP_NOT_IN_SESSION); + } + + switch (op_nr) { + case OP_CB_GETATTR: + case OP_CB_RECALL: + case OP_CB_SEQUENCE: + case OP_CB_RECALL_ANY: + *op = &callback_ops[op_nr]; + break; + + case OP_CB_LAYOUTRECALL: + case OP_CB_NOTIFY_DEVICEID: + case OP_CB_NOTIFY: + case OP_CB_PUSH_DELEG: + case OP_CB_RECALLABLE_OBJ_AVAIL: + case OP_CB_RECALL_SLOT: + case OP_CB_WANTS_CANCELLED: + case OP_CB_NOTIFY_LOCK: + return htonl(NFS4ERR_NOTSUPP); + + default: + return htonl(NFS4ERR_OP_ILLEGAL); + } + + return htonl(NFS_OK); +} + +#else /* CONFIG_NFS_V4_1 */ + +static __be32 +preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) +{ + return htonl(NFS4ERR_MINOR_VERS_MISMATCH); +} + +#endif /* CONFIG_NFS_V4_1 */ + +static __be32 +preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op) +{ + switch (op_nr) { + case OP_CB_GETATTR: + case OP_CB_RECALL: + *op = &callback_ops[op_nr]; + break; + default: + return htonl(NFS4ERR_OP_ILLEGAL); + } + + return htonl(NFS_OK); +} + +static __be32 process_op(uint32_t minorversion, int nop, + struct svc_rqst *rqstp, + struct xdr_stream *xdr_in, void *argp, + struct xdr_stream *xdr_out, void *resp) +{ + struct callback_op *op = &callback_ops[0]; + unsigned int op_nr = OP_CB_ILLEGAL; + __be32 status; + long maxlen; + __be32 res; + + dprintk("%s: start\n", __func__); + status = decode_op_hdr(xdr_in, &op_nr); + if (unlikely(status)) { + status = htonl(NFS4ERR_OP_ILLEGAL); + goto out; + } + + dprintk("%s: minorversion=%d nop=%d op_nr=%u\n", + __func__, minorversion, nop, op_nr); + + status = minorversion ? preprocess_nfs41_op(nop, op_nr, &op) : + preprocess_nfs4_op(op_nr, &op); + if (status == htonl(NFS4ERR_OP_ILLEGAL)) + op_nr = OP_CB_ILLEGAL; +out: + maxlen = xdr_out->end - xdr_out->p; + if (maxlen > 0 && maxlen < PAGE_SIZE) { + if (likely(status == 0 && op->decode_args != NULL)) + status = op->decode_args(rqstp, xdr_in, argp); + if (likely(status == 0 && op->process_op != NULL)) + status = op->process_op(argp, resp); + } else + status = htonl(NFS4ERR_RESOURCE); + + res = encode_op_hdr(xdr_out, op_nr, status); + if (status == 0) + status = res; + if (op->encode_res != NULL && status == 0) + status = op->encode_res(rqstp, xdr_out, resp); + dprintk("%s: done, status = %d\n", __func__, ntohl(status)); + return status; +} + +/* + * Decode, process and encode a COMPOUND + */ +static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp) +{ + struct cb_compound_hdr_arg hdr_arg = { 0 }; + struct cb_compound_hdr_res hdr_res = { NULL }; + struct xdr_stream xdr_in, xdr_out; + __be32 *p; + __be32 status; + unsigned int nops = 0; + + dprintk("%s: start\n", __func__); + + xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); + + p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); + xdr_init_encode(&xdr_out, &rqstp->rq_res, p); + + status = decode_compound_hdr_arg(&xdr_in, &hdr_arg); + if (status == __constant_htonl(NFS4ERR_RESOURCE)) + return rpc_garbage_args; + + hdr_res.taglen = hdr_arg.taglen; + hdr_res.tag = hdr_arg.tag; + if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) + return rpc_system_err; + + while (status == 0 && nops != hdr_arg.nops) { + status = process_op(hdr_arg.minorversion, nops, + rqstp, &xdr_in, argp, &xdr_out, resp); + nops++; + } + + *hdr_res.status = status; + *hdr_res.nops = htonl(nops); + dprintk("%s: done, status = %u\n", __func__, ntohl(status)); + return rpc_success; +} + +/* + * Define NFS4 callback COMPOUND ops. + */ +static struct callback_op callback_ops[] = { + [0] = { + .res_maxsize = CB_OP_HDR_RES_MAXSZ, + }, + [OP_CB_GETATTR] = { + .process_op = (callback_process_op_t)nfs4_callback_getattr, + .decode_args = (callback_decode_arg_t)decode_getattr_args, + .encode_res = (callback_encode_res_t)encode_getattr_res, + .res_maxsize = CB_OP_GETATTR_RES_MAXSZ, + }, + [OP_CB_RECALL] = { + .process_op = (callback_process_op_t)nfs4_callback_recall, + .decode_args = (callback_decode_arg_t)decode_recall_args, + .res_maxsize = CB_OP_RECALL_RES_MAXSZ, + }, +#if defined(CONFIG_NFS_V4_1) + [OP_CB_SEQUENCE] = { + .process_op = (callback_process_op_t)nfs4_callback_sequence, + .decode_args = (callback_decode_arg_t)decode_cb_sequence_args, + .encode_res = (callback_encode_res_t)encode_cb_sequence_res, + .res_maxsize = CB_OP_SEQUENCE_RES_MAXSZ, + }, + [OP_CB_RECALL_ANY] = { + .process_op = (callback_process_op_t)nfs4_callback_recallany, + .decode_args = (callback_decode_arg_t)decode_recallany_args, + .res_maxsize = CB_OP_RECALLANY_RES_MAXSZ, + }, +#endif /* CONFIG_NFS_V4_1 */ +}; + +/* + * Define NFS4 callback procedures + */ +static struct svc_procedure nfs4_callback_procedures1[] = { + [CB_NULL] = { + .pc_func = nfs4_callback_null, + .pc_decode = (kxdrproc_t)nfs4_decode_void, + .pc_encode = (kxdrproc_t)nfs4_encode_void, + .pc_xdrressize = 1, + }, + [CB_COMPOUND] = { + .pc_func = nfs4_callback_compound, + .pc_encode = (kxdrproc_t)nfs4_encode_void, + .pc_argsize = 256, + .pc_ressize = 256, + .pc_xdrressize = NFS4_CALLBACK_BUFSIZE, + } +}; + +struct svc_version nfs4_callback_version1 = { + .vs_vers = 1, + .vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1), + .vs_proc = nfs4_callback_procedures1, + .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, + .vs_dispatch = NULL, +}; + +struct svc_version nfs4_callback_version4 = { + .vs_vers = 4, + .vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1), + .vs_proc = nfs4_callback_procedures1, + .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, + .vs_dispatch = NULL, +}; diff --git a/fs/nfs/client.c b/fs/nfs/client.c new file mode 100644 index 00000000000..37d555c32cd --- /dev/null +++ b/fs/nfs/client.c @@ -0,0 +1,1783 @@ +/* client.c: NFS client sharing and management code + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/unistd.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/stats.h> +#include <linux/sunrpc/metrics.h> +#include <linux/sunrpc/xprtsock.h> +#include <linux/sunrpc/xprtrdma.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/nfs4_mount.h> +#include <linux/lockd/bind.h> +#include <linux/seq_file.h> +#include <linux/mount.h> +#include <linux/nfs_idmap.h> +#include <linux/vfs.h> +#include <linux/inet.h> +#include <linux/in6.h> +#include <net/ipv6.h> +#include <linux/nfs_xdr.h> +#include <linux/sunrpc/bc_xprt.h> + +#include <asm/system.h> + +#include "nfs4_fs.h" +#include "callback.h" +#include "delegation.h" +#include "iostat.h" +#include "internal.h" +#include "fscache.h" + +#define NFSDBG_FACILITY NFSDBG_CLIENT + +static DEFINE_SPINLOCK(nfs_client_lock); +static LIST_HEAD(nfs_client_list); +static LIST_HEAD(nfs_volume_list); +static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); + +/* + * RPC cruft for NFS + */ +static struct rpc_version *nfs_version[5] = { + [2] = &nfs_version2, +#ifdef CONFIG_NFS_V3 + [3] = &nfs_version3, +#endif +#ifdef CONFIG_NFS_V4 + [4] = &nfs_version4, +#endif +}; + +struct rpc_program nfs_program = { + .name = "nfs", + .number = NFS_PROGRAM, + .nrvers = ARRAY_SIZE(nfs_version), + .version = nfs_version, + .stats = &nfs_rpcstat, + .pipe_dir_name = "/nfs", +}; + +struct rpc_stat nfs_rpcstat = { + .program = &nfs_program +}; + + +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; +static struct rpc_version * nfsacl_version[] = { + [3] = &nfsacl_version3, +}; + +struct rpc_program nfsacl_program = { + .name = "nfsacl", + .number = NFS_ACL_PROGRAM, + .nrvers = ARRAY_SIZE(nfsacl_version), + .version = nfsacl_version, + .stats = &nfsacl_rpcstat, +}; +#endif /* CONFIG_NFS_V3_ACL */ + +struct nfs_client_initdata { + const char *hostname; + const struct sockaddr *addr; + size_t addrlen; + const struct nfs_rpc_ops *rpc_ops; + int proto; + u32 minorversion; +}; + +/* + * Allocate a shared client record + * + * Since these are allocated/deallocated very rarely, we don't + * bother putting them in a slab cache... + */ +static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) +{ + struct nfs_client *clp; + struct rpc_cred *cred; + int err = -ENOMEM; + + if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) + goto error_0; + + clp->rpc_ops = cl_init->rpc_ops; + + atomic_set(&clp->cl_count, 1); + clp->cl_cons_state = NFS_CS_INITING; + + memcpy(&clp->cl_addr, cl_init->addr, cl_init->addrlen); + clp->cl_addrlen = cl_init->addrlen; + + if (cl_init->hostname) { + err = -ENOMEM; + clp->cl_hostname = kstrdup(cl_init->hostname, GFP_KERNEL); + if (!clp->cl_hostname) + goto error_cleanup; + } + + INIT_LIST_HEAD(&clp->cl_superblocks); + clp->cl_rpcclient = ERR_PTR(-EINVAL); + + clp->cl_proto = cl_init->proto; + +#ifdef CONFIG_NFS_V4 + INIT_LIST_HEAD(&clp->cl_delegations); + spin_lock_init(&clp->cl_lock); + INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); + rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); + clp->cl_boot_time = CURRENT_TIME; + clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; + clp->cl_minorversion = cl_init->minorversion; +#endif + cred = rpc_lookup_machine_cred(); + if (!IS_ERR(cred)) + clp->cl_machine_cred = cred; + + nfs_fscache_get_client_cookie(clp); + + return clp; + +error_cleanup: + kfree(clp); +error_0: + return ERR_PTR(err); +} + +static void nfs4_shutdown_client(struct nfs_client *clp) +{ +#ifdef CONFIG_NFS_V4 + if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) + nfs4_kill_renewd(clp); + BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners)); + if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) + nfs_idmap_delete(clp); + + rpc_destroy_wait_queue(&clp->cl_rpcwaitq); +#endif +} + +/* + * Destroy the NFS4 callback service + */ +static void nfs4_destroy_callback(struct nfs_client *clp) +{ +#ifdef CONFIG_NFS_V4 + if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) + nfs_callback_down(clp->cl_minorversion); +#endif /* CONFIG_NFS_V4 */ +} + +/* + * Clears/puts all minor version specific parts from an nfs_client struct + * reverting it to minorversion 0. + */ +static void nfs4_clear_client_minor_version(struct nfs_client *clp) +{ +#ifdef CONFIG_NFS_V4_1 + if (nfs4_has_session(clp)) { + nfs4_destroy_session(clp->cl_session); + clp->cl_session = NULL; + } + + clp->cl_call_sync = _nfs4_call_sync; +#endif /* CONFIG_NFS_V4_1 */ + + nfs4_destroy_callback(clp); +} + +/* + * Destroy a shared client record + */ +static void nfs_free_client(struct nfs_client *clp) +{ + dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version); + + nfs4_clear_client_minor_version(clp); + nfs4_shutdown_client(clp); + + nfs_fscache_release_client_cookie(clp); + + /* -EIO all pending I/O */ + if (!IS_ERR(clp->cl_rpcclient)) + rpc_shutdown_client(clp->cl_rpcclient); + + if (clp->cl_machine_cred != NULL) + put_rpccred(clp->cl_machine_cred); + + kfree(clp->cl_hostname); + kfree(clp); + + dprintk("<-- nfs_free_client()\n"); +} + +/* + * Release a reference to a shared client record + */ +void nfs_put_client(struct nfs_client *clp) +{ + if (!clp) + return; + + dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); + + if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) { + list_del(&clp->cl_share_link); + spin_unlock(&nfs_client_lock); + + BUG_ON(!list_empty(&clp->cl_superblocks)); + + nfs_free_client(clp); + } +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/* + * Test if two ip6 socket addresses refer to the same socket by + * comparing relevant fields. The padding bytes specifically, are not + * compared. sin6_flowinfo is not compared because it only affects QoS + * and sin6_scope_id is only compared if the address is "link local" + * because "link local" addresses need only be unique to a specific + * link. Conversely, ordinary unicast addresses might have different + * sin6_scope_id. + * + * The caller should ensure both socket addresses are AF_INET6. + */ +static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, + const struct sockaddr *sa2) +{ + const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1; + const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2; + + if (ipv6_addr_scope(&sin1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL && + sin1->sin6_scope_id != sin2->sin6_scope_id) + return 0; + + return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr); +} +#else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */ +static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, + const struct sockaddr *sa2) +{ + return 0; +} +#endif + +/* + * Test if two ip4 socket addresses refer to the same socket, by + * comparing relevant fields. The padding bytes specifically, are + * not compared. + * + * The caller should ensure both socket addresses are AF_INET. + */ +static int nfs_sockaddr_match_ipaddr4(const struct sockaddr *sa1, + const struct sockaddr *sa2) +{ + const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1; + const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2; + + return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; +} + +static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1, + const struct sockaddr *sa2) +{ + const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1; + const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2; + + return nfs_sockaddr_match_ipaddr6(sa1, sa2) && + (sin1->sin6_port == sin2->sin6_port); +} + +static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1, + const struct sockaddr *sa2) +{ + const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1; + const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2; + + return nfs_sockaddr_match_ipaddr4(sa1, sa2) && + (sin1->sin_port == sin2->sin_port); +} + +/* + * Test if two socket addresses represent the same actual socket, + * by comparing (only) relevant fields, excluding the port number. + */ +static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, + const struct sockaddr *sa2) +{ + if (sa1->sa_family != sa2->sa_family) + return 0; + + switch (sa1->sa_family) { + case AF_INET: + return nfs_sockaddr_match_ipaddr4(sa1, sa2); + case AF_INET6: + return nfs_sockaddr_match_ipaddr6(sa1, sa2); + } + return 0; +} + +/* + * Test if two socket addresses represent the same actual socket, + * by comparing (only) relevant fields, including the port number. + */ +static int nfs_sockaddr_cmp(const struct sockaddr *sa1, + const struct sockaddr *sa2) +{ + if (sa1->sa_family != sa2->sa_family) + return 0; + + switch (sa1->sa_family) { + case AF_INET: + return nfs_sockaddr_cmp_ip4(sa1, sa2); + case AF_INET6: + return nfs_sockaddr_cmp_ip6(sa1, sa2); + } + return 0; +} + +/* + * Find a client by IP address and protocol version + * - returns NULL if no such client + */ +struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion) +{ + struct nfs_client *clp; + + spin_lock(&nfs_client_lock); + list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; + + /* Don't match clients that failed to initialise properly */ + if (!(clp->cl_cons_state == NFS_CS_READY || + clp->cl_cons_state == NFS_CS_SESSION_INITING)) + continue; + + /* Different NFS versions cannot share the same nfs_client */ + if (clp->rpc_ops->version != nfsversion) + continue; + + /* Match only the IP address, not the port number */ + if (!nfs_sockaddr_match_ipaddr(addr, clap)) + continue; + + atomic_inc(&clp->cl_count); + spin_unlock(&nfs_client_lock); + return clp; + } + spin_unlock(&nfs_client_lock); + return NULL; +} + +/* + * Find a client by IP address and protocol version + * - returns NULL if no such client + */ +struct nfs_client *nfs_find_client_next(struct nfs_client *clp) +{ + struct sockaddr *sap = (struct sockaddr *)&clp->cl_addr; + u32 nfsvers = clp->rpc_ops->version; + + spin_lock(&nfs_client_lock); + list_for_each_entry_continue(clp, &nfs_client_list, cl_share_link) { + struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; + + /* Don't match clients that failed to initialise properly */ + if (clp->cl_cons_state != NFS_CS_READY) + continue; + + /* Different NFS versions cannot share the same nfs_client */ + if (clp->rpc_ops->version != nfsvers) + continue; + + /* Match only the IP address, not the port number */ + if (!nfs_sockaddr_match_ipaddr(sap, clap)) + continue; + + atomic_inc(&clp->cl_count); + spin_unlock(&nfs_client_lock); + return clp; + } + spin_unlock(&nfs_client_lock); + return NULL; +} + +/* + * Find an nfs_client on the list that matches the initialisation data + * that is supplied. + */ +static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *data) +{ + struct nfs_client *clp; + const struct sockaddr *sap = data->addr; + + list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; + /* Don't match clients that failed to initialise properly */ + if (clp->cl_cons_state < 0) + continue; + + /* Different NFS versions cannot share the same nfs_client */ + if (clp->rpc_ops != data->rpc_ops) + continue; + + if (clp->cl_proto != data->proto) + continue; + /* Match nfsv4 minorversion */ + if (clp->cl_minorversion != data->minorversion) + continue; + /* Match the full socket address */ + if (!nfs_sockaddr_cmp(sap, clap)) + continue; + + atomic_inc(&clp->cl_count); + return clp; + } + return NULL; +} + +/* + * Look up a client by IP address and protocol version + * - creates a new record if one doesn't yet exist + */ +static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) +{ + struct nfs_client *clp, *new = NULL; + int error; + + dprintk("--> nfs_get_client(%s,v%u)\n", + cl_init->hostname ?: "", cl_init->rpc_ops->version); + + /* see if the client already exists */ + do { + spin_lock(&nfs_client_lock); + + clp = nfs_match_client(cl_init); + if (clp) + goto found_client; + if (new) + goto install_client; + + spin_unlock(&nfs_client_lock); + + new = nfs_alloc_client(cl_init); + } while (!IS_ERR(new)); + + dprintk("--> nfs_get_client() = %ld [failed]\n", PTR_ERR(new)); + return new; + + /* install a new client and return with it unready */ +install_client: + clp = new; + list_add(&clp->cl_share_link, &nfs_client_list); + spin_unlock(&nfs_client_lock); + dprintk("--> nfs_get_client() = %p [new]\n", clp); + return clp; + + /* found an existing client + * - make sure it's ready before returning + */ +found_client: + spin_unlock(&nfs_client_lock); + + if (new) + nfs_free_client(new); + + error = wait_event_killable(nfs_client_active_wq, + clp->cl_cons_state < NFS_CS_INITING); + if (error < 0) { + nfs_put_client(clp); + return ERR_PTR(-ERESTARTSYS); + } + + if (clp->cl_cons_state < NFS_CS_READY) { + error = clp->cl_cons_state; + nfs_put_client(clp); + return ERR_PTR(error); + } + + BUG_ON(clp->cl_cons_state != NFS_CS_READY); + + dprintk("--> nfs_get_client() = %p [share]\n", clp); + return clp; +} + +/* + * Mark a server as ready or failed + */ +void nfs_mark_client_ready(struct nfs_client *clp, int state) +{ + clp->cl_cons_state = state; + wake_up_all(&nfs_client_active_wq); +} + +/* + * With sessions, the client is not marked ready until after a + * successful EXCHANGE_ID and CREATE_SESSION. + * + * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate + * other versions of NFS can be tried. + */ +int nfs4_check_client_ready(struct nfs_client *clp) +{ + if (!nfs4_has_session(clp)) + return 0; + if (clp->cl_cons_state < NFS_CS_READY) + return -EPROTONOSUPPORT; + return 0; +} + +/* + * Initialise the timeout values for a connection + */ +static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, + unsigned int timeo, unsigned int retrans) +{ + to->to_initval = timeo * HZ / 10; + to->to_retries = retrans; + + switch (proto) { + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_RDMA: + if (to->to_retries == 0) + to->to_retries = NFS_DEF_TCP_RETRANS; + if (to->to_initval == 0) + to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10; + if (to->to_initval > NFS_MAX_TCP_TIMEOUT) + to->to_initval = NFS_MAX_TCP_TIMEOUT; + to->to_increment = to->to_initval; + to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); + if (to->to_maxval > NFS_MAX_TCP_TIMEOUT) + to->to_maxval = NFS_MAX_TCP_TIMEOUT; + if (to->to_maxval < to->to_initval) + to->to_maxval = to->to_initval; + to->to_exponential = 0; + break; + case XPRT_TRANSPORT_UDP: + if (to->to_retries == 0) + to->to_retries = NFS_DEF_UDP_RETRANS; + if (!to->to_initval) + to->to_initval = NFS_DEF_UDP_TIMEO * HZ / 10; + if (to->to_initval > NFS_MAX_UDP_TIMEOUT) + to->to_initval = NFS_MAX_UDP_TIMEOUT; + to->to_maxval = NFS_MAX_UDP_TIMEOUT; + to->to_exponential = 1; + break; + default: + BUG(); + } +} + +/* + * Create an RPC client handle + */ +static int nfs_create_rpc_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, + rpc_authflavor_t flavor, + int discrtry, int noresvport) +{ + struct rpc_clnt *clnt = NULL; + struct rpc_create_args args = { + .protocol = clp->cl_proto, + .address = (struct sockaddr *)&clp->cl_addr, + .addrsize = clp->cl_addrlen, + .timeout = timeparms, + .servername = clp->cl_hostname, + .program = &nfs_program, + .version = clp->rpc_ops->version, + .authflavor = flavor, + }; + + if (discrtry) + args.flags |= RPC_CLNT_CREATE_DISCRTRY; + if (noresvport) + args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; + + if (!IS_ERR(clp->cl_rpcclient)) + return 0; + + clnt = rpc_create(&args); + if (IS_ERR(clnt)) { + dprintk("%s: cannot create RPC client. Error = %ld\n", + __func__, PTR_ERR(clnt)); + return PTR_ERR(clnt); + } + + clp->cl_rpcclient = clnt; + return 0; +} + +/* + * Version 2 or 3 client destruction + */ +static void nfs_destroy_server(struct nfs_server *server) +{ + if (!(server->flags & NFS_MOUNT_NONLM)) + nlmclnt_done(server->nlm_host); +} + +/* + * Version 2 or 3 lockd setup + */ +static int nfs_start_lockd(struct nfs_server *server) +{ + struct nlm_host *host; + struct nfs_client *clp = server->nfs_client; + struct nlmclnt_initdata nlm_init = { + .hostname = clp->cl_hostname, + .address = (struct sockaddr *)&clp->cl_addr, + .addrlen = clp->cl_addrlen, + .nfs_version = clp->rpc_ops->version, + .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? + 1 : 0, + }; + + if (nlm_init.nfs_version > 3) + return 0; + if (server->flags & NFS_MOUNT_NONLM) + return 0; + + switch (clp->cl_proto) { + default: + nlm_init.protocol = IPPROTO_TCP; + break; + case XPRT_TRANSPORT_UDP: + nlm_init.protocol = IPPROTO_UDP; + } + + host = nlmclnt_init(&nlm_init); + if (IS_ERR(host)) + return PTR_ERR(host); + + server->nlm_host = host; + server->destroy = nfs_destroy_server; + return 0; +} + +/* + * Initialise an NFSv3 ACL client connection + */ +#ifdef CONFIG_NFS_V3_ACL +static void nfs_init_server_aclclient(struct nfs_server *server) +{ + if (server->nfs_client->rpc_ops->version != 3) + goto out_noacl; + if (server->flags & NFS_MOUNT_NOACL) + goto out_noacl; + + server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); + if (IS_ERR(server->client_acl)) + goto out_noacl; + + /* No errors! Assume that Sun nfsacls are supported */ + server->caps |= NFS_CAP_ACLS; + return; + +out_noacl: + server->caps &= ~NFS_CAP_ACLS; +} +#else +static inline void nfs_init_server_aclclient(struct nfs_server *server) +{ + server->flags &= ~NFS_MOUNT_NOACL; + server->caps &= ~NFS_CAP_ACLS; +} +#endif + +/* + * Create a general RPC client + */ +static int nfs_init_server_rpcclient(struct nfs_server *server, + const struct rpc_timeout *timeo, + rpc_authflavor_t pseudoflavour) +{ + struct nfs_client *clp = server->nfs_client; + + server->client = rpc_clone_client(clp->cl_rpcclient); + if (IS_ERR(server->client)) { + dprintk("%s: couldn't create rpc_client!\n", __func__); + return PTR_ERR(server->client); + } + + memcpy(&server->client->cl_timeout_default, + timeo, + sizeof(server->client->cl_timeout_default)); + server->client->cl_timeout = &server->client->cl_timeout_default; + + if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) { + struct rpc_auth *auth; + + auth = rpcauth_create(pseudoflavour, server->client); + if (IS_ERR(auth)) { + dprintk("%s: couldn't create credcache!\n", __func__); + return PTR_ERR(auth); + } + } + server->client->cl_softrtry = 0; + if (server->flags & NFS_MOUNT_SOFT) + server->client->cl_softrtry = 1; + + return 0; +} + +/* + * Initialise an NFS2 or NFS3 client + */ +static int nfs_init_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, + const struct nfs_parsed_mount_data *data) +{ + int error; + + if (clp->cl_cons_state == NFS_CS_READY) { + /* the client is already initialised */ + dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp); + return 0; + } + + /* + * Create a client RPC handle for doing FSSTAT with UNIX auth only + * - RFC 2623, sec 2.3.2 + */ + error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, + 0, data->flags & NFS_MOUNT_NORESVPORT); + if (error < 0) + goto error; + nfs_mark_client_ready(clp, NFS_CS_READY); + return 0; + +error: + nfs_mark_client_ready(clp, error); + dprintk("<-- nfs_init_client() = xerror %d\n", error); + return error; +} + +/* + * Create a version 2 or 3 client + */ +static int nfs_init_server(struct nfs_server *server, + const struct nfs_parsed_mount_data *data) +{ + struct nfs_client_initdata cl_init = { + .hostname = data->nfs_server.hostname, + .addr = (const struct sockaddr *)&data->nfs_server.address, + .addrlen = data->nfs_server.addrlen, + .rpc_ops = &nfs_v2_clientops, + .proto = data->nfs_server.protocol, + }; + struct rpc_timeout timeparms; + struct nfs_client *clp; + int error; + + dprintk("--> nfs_init_server()\n"); + +#ifdef CONFIG_NFS_V3 + if (data->version == 3) + cl_init.rpc_ops = &nfs_v3_clientops; +#endif + + /* Allocate or find a client reference we can use */ + clp = nfs_get_client(&cl_init); + if (IS_ERR(clp)) { + dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); + return PTR_ERR(clp); + } + + nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, + data->timeo, data->retrans); + error = nfs_init_client(clp, &timeparms, data); + if (error < 0) + goto error; + + server->nfs_client = clp; + + /* Initialise the client representation from the mount data */ + server->flags = data->flags; + server->options = data->options; + server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| + NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| + NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; + + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); + + server->acregmin = data->acregmin * HZ; + server->acregmax = data->acregmax * HZ; + server->acdirmin = data->acdirmin * HZ; + server->acdirmax = data->acdirmax * HZ; + + /* Start lockd here, before we might error out */ + error = nfs_start_lockd(server); + if (error < 0) + goto error; + + server->port = data->nfs_server.port; + + error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); + if (error < 0) + goto error; + + /* Preserve the values of mount_server-related mount options */ + if (data->mount_server.addrlen) { + memcpy(&server->mountd_address, &data->mount_server.address, + data->mount_server.addrlen); + server->mountd_addrlen = data->mount_server.addrlen; + } + server->mountd_version = data->mount_server.version; + server->mountd_port = data->mount_server.port; + server->mountd_protocol = data->mount_server.protocol; + + server->namelen = data->namlen; + /* Create a client RPC handle for the NFSv3 ACL management interface */ + nfs_init_server_aclclient(server); + dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp); + return 0; + +error: + server->nfs_client = NULL; + nfs_put_client(clp); + dprintk("<-- nfs_init_server() = xerror %d\n", error); + return error; +} + +/* + * Load up the server record from information gained in an fsinfo record + */ +static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo) +{ + unsigned long max_rpc_payload; + + /* Work out a lot of parameters */ + if (server->rsize == 0) + server->rsize = nfs_block_size(fsinfo->rtpref, NULL); + if (server->wsize == 0) + server->wsize = nfs_block_size(fsinfo->wtpref, NULL); + + if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax) + server->rsize = nfs_block_size(fsinfo->rtmax, NULL); + if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax) + server->wsize = nfs_block_size(fsinfo->wtmax, NULL); + + max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); + if (server->rsize > max_rpc_payload) + server->rsize = max_rpc_payload; + if (server->rsize > NFS_MAX_FILE_IO_SIZE) + server->rsize = NFS_MAX_FILE_IO_SIZE; + server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + server->backing_dev_info.name = "nfs"; + server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; + + if (server->wsize > max_rpc_payload) + server->wsize = max_rpc_payload; + if (server->wsize > NFS_MAX_FILE_IO_SIZE) + server->wsize = NFS_MAX_FILE_IO_SIZE; + server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); + + server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); + if (server->dtsize > PAGE_CACHE_SIZE) + server->dtsize = PAGE_CACHE_SIZE; + if (server->dtsize > server->rsize) + server->dtsize = server->rsize; + + if (server->flags & NFS_MOUNT_NOAC) { + server->acregmin = server->acregmax = 0; + server->acdirmin = server->acdirmax = 0; + } + + server->maxfilesize = fsinfo->maxfilesize; + + /* We're airborne Set socket buffersize */ + rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); +} + +/* + * Probe filesystem information, including the FSID on v2/v3 + */ +static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) +{ + struct nfs_fsinfo fsinfo; + struct nfs_client *clp = server->nfs_client; + int error; + + dprintk("--> nfs_probe_fsinfo()\n"); + + if (clp->rpc_ops->set_capabilities != NULL) { + error = clp->rpc_ops->set_capabilities(server, mntfh); + if (error < 0) + goto out_error; + } + + fsinfo.fattr = fattr; + nfs_fattr_init(fattr); + error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); + if (error < 0) + goto out_error; + + nfs_server_set_fsinfo(server, &fsinfo); + + /* Get some general file system info */ + if (server->namelen == 0) { + struct nfs_pathconf pathinfo; + + pathinfo.fattr = fattr; + nfs_fattr_init(fattr); + + if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0) + server->namelen = pathinfo.max_namelen; + } + + dprintk("<-- nfs_probe_fsinfo() = 0\n"); + return 0; + +out_error: + dprintk("nfs_probe_fsinfo: error = %d\n", -error); + return error; +} + +/* + * Copy useful information when duplicating a server record + */ +static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) +{ + target->flags = source->flags; + target->rsize = source->rsize; + target->wsize = source->wsize; + target->acregmin = source->acregmin; + target->acregmax = source->acregmax; + target->acdirmin = source->acdirmin; + target->acdirmax = source->acdirmax; + target->caps = source->caps; + target->options = source->options; +} + +/* + * Allocate and initialise a server record + */ +static struct nfs_server *nfs_alloc_server(void) +{ + struct nfs_server *server; + + server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); + if (!server) + return NULL; + + server->client = server->client_acl = ERR_PTR(-EINVAL); + + /* Zero out the NFS state stuff */ + INIT_LIST_HEAD(&server->client_link); + INIT_LIST_HEAD(&server->master_link); + + atomic_set(&server->active, 0); + + server->io_stats = nfs_alloc_iostats(); + if (!server->io_stats) { + kfree(server); + return NULL; + } + + if (bdi_init(&server->backing_dev_info)) { + nfs_free_iostats(server->io_stats); + kfree(server); + return NULL; + } + + return server; +} + +/* + * Free up a server record + */ +void nfs_free_server(struct nfs_server *server) +{ + dprintk("--> nfs_free_server()\n"); + + spin_lock(&nfs_client_lock); + list_del(&server->client_link); + list_del(&server->master_link); + spin_unlock(&nfs_client_lock); + + if (server->destroy != NULL) + server->destroy(server); + + if (!IS_ERR(server->client_acl)) + rpc_shutdown_client(server->client_acl); + if (!IS_ERR(server->client)) + rpc_shutdown_client(server->client); + + nfs_put_client(server->nfs_client); + + nfs_free_iostats(server->io_stats); + bdi_destroy(&server->backing_dev_info); + kfree(server); + nfs_release_automount_timer(); + dprintk("<-- nfs_free_server()\n"); +} + +/* + * Create a version 2 or 3 volume record + * - keyed on server and FSID + */ +struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, + struct nfs_fh *mntfh) +{ + struct nfs_server *server; + struct nfs_fattr fattr; + int error; + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + /* Get a client representation */ + error = nfs_init_server(server, data); + if (error < 0) + goto error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + /* Probe the root fh to retrieve its FSID */ + error = nfs_probe_fsinfo(server, mntfh, &fattr); + if (error < 0) + goto error; + if (server->nfs_client->rpc_ops->version == 3) { + if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) + server->namelen = NFS3_MAXNAMLEN; + if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) + server->caps |= NFS_CAP_READDIRPLUS; + } else { + if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) + server->namelen = NFS2_MAXNAMLEN; + } + + if (!(fattr.valid & NFS_ATTR_FATTR)) { + error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr); + if (error < 0) { + dprintk("nfs_create_server: getattr error = %d\n", -error); + goto error; + } + } + memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid)); + + dprintk("Server FSID: %llx:%llx\n", + (unsigned long long) server->fsid.major, + (unsigned long long) server->fsid.minor); + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; + return server; + +error: + nfs_free_server(server); + return ERR_PTR(error); +} + +#ifdef CONFIG_NFS_V4 +/* + * Initialize the NFS4 callback service + */ +static int nfs4_init_callback(struct nfs_client *clp) +{ + int error; + + if (clp->rpc_ops->version == 4) { + if (nfs4_has_session(clp)) { + error = xprt_setup_backchannel( + clp->cl_rpcclient->cl_xprt, + NFS41_BC_MIN_CALLBACKS); + if (error < 0) + return error; + } + + error = nfs_callback_up(clp->cl_minorversion, + clp->cl_rpcclient->cl_xprt); + if (error < 0) { + dprintk("%s: failed to start callback. Error = %d\n", + __func__, error); + return error; + } + __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); + } + return 0; +} + +/* + * Initialize the minor version specific parts of an NFS4 client record + */ +static int nfs4_init_client_minor_version(struct nfs_client *clp) +{ + clp->cl_call_sync = _nfs4_call_sync; + +#if defined(CONFIG_NFS_V4_1) + if (clp->cl_minorversion) { + struct nfs4_session *session = NULL; + /* + * Create the session and mark it expired. + * When a SEQUENCE operation encounters the expired session + * it will do session recovery to initialize it. + */ + session = nfs4_alloc_session(clp); + if (!session) + return -ENOMEM; + + clp->cl_session = session; + clp->cl_call_sync = _nfs4_call_sync_session; + } +#endif /* CONFIG_NFS_V4_1 */ + + return nfs4_init_callback(clp); +} + +/* + * Initialise an NFS4 client record + */ +static int nfs4_init_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, + const char *ip_addr, + rpc_authflavor_t authflavour, + int flags) +{ + int error; + + if (clp->cl_cons_state == NFS_CS_READY) { + /* the client is initialised already */ + dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp); + return 0; + } + + /* Check NFS protocol revision and initialize RPC op vector */ + clp->rpc_ops = &nfs_v4_clientops; + + error = nfs_create_rpc_client(clp, timeparms, authflavour, + 1, flags & NFS_MOUNT_NORESVPORT); + if (error < 0) + goto error; + strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); + + error = nfs_idmap_new(clp); + if (error < 0) { + dprintk("%s: failed to create idmapper. Error = %d\n", + __func__, error); + goto error; + } + __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); + + error = nfs4_init_client_minor_version(clp); + if (error < 0) + goto error; + + if (!nfs4_has_session(clp)) + nfs_mark_client_ready(clp, NFS_CS_READY); + return 0; + +error: + nfs_mark_client_ready(clp, error); + dprintk("<-- nfs4_init_client() = xerror %d\n", error); + return error; +} + +/* + * Set up an NFS4 client + */ +static int nfs4_set_client(struct nfs_server *server, + const char *hostname, + const struct sockaddr *addr, + const size_t addrlen, + const char *ip_addr, + rpc_authflavor_t authflavour, + int proto, const struct rpc_timeout *timeparms, + u32 minorversion) +{ + struct nfs_client_initdata cl_init = { + .hostname = hostname, + .addr = addr, + .addrlen = addrlen, + .rpc_ops = &nfs_v4_clientops, + .proto = proto, + .minorversion = minorversion, + }; + struct nfs_client *clp; + int error; + + dprintk("--> nfs4_set_client()\n"); + + /* Allocate or find a client reference we can use */ + clp = nfs_get_client(&cl_init); + if (IS_ERR(clp)) { + error = PTR_ERR(clp); + goto error; + } + error = nfs4_init_client(clp, timeparms, ip_addr, authflavour, + server->flags); + if (error < 0) + goto error_put; + + server->nfs_client = clp; + dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); + return 0; + +error_put: + nfs_put_client(clp); +error: + dprintk("<-- nfs4_set_client() = xerror %d\n", error); + return error; +} + + +/* + * Session has been established, and the client marked ready. + * Set the mount rsize and wsize with negotiated fore channel + * attributes which will be bound checked in nfs_server_set_fsinfo. + */ +static void nfs4_session_set_rwsize(struct nfs_server *server) +{ +#ifdef CONFIG_NFS_V4_1 + struct nfs4_session *sess; + u32 server_resp_sz; + u32 server_rqst_sz; + + if (!nfs4_has_session(server->nfs_client)) + return; + sess = server->nfs_client->cl_session; + server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; + server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; + + if (server->rsize > server_resp_sz) + server->rsize = server_resp_sz; + if (server->wsize > server_rqst_sz) + server->wsize = server_rqst_sz; +#endif /* CONFIG_NFS_V4_1 */ +} + +/* + * Create a version 4 volume record + */ +static int nfs4_init_server(struct nfs_server *server, + const struct nfs_parsed_mount_data *data) +{ + struct rpc_timeout timeparms; + int error; + + dprintk("--> nfs4_init_server()\n"); + + nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, + data->timeo, data->retrans); + + /* Initialise the client representation from the mount data */ + server->flags = data->flags; + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR| + NFS_CAP_POSIX_LOCK; + server->options = data->options; + + /* Get a client record */ + error = nfs4_set_client(server, + data->nfs_server.hostname, + (const struct sockaddr *)&data->nfs_server.address, + data->nfs_server.addrlen, + data->client_address, + data->auth_flavors[0], + data->nfs_server.protocol, + &timeparms, + data->minorversion); + if (error < 0) + goto error; + + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); + + server->acregmin = data->acregmin * HZ; + server->acregmax = data->acregmax * HZ; + server->acdirmin = data->acdirmin * HZ; + server->acdirmax = data->acdirmax * HZ; + + server->port = data->nfs_server.port; + + error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); + +error: + /* Done */ + dprintk("<-- nfs4_init_server() = %d\n", error); + return error; +} + +/* + * Create a version 4 volume record + * - keyed on server and FSID + */ +struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, + struct nfs_fh *mntfh) +{ + struct nfs_fattr fattr; + struct nfs_server *server; + int error; + + dprintk("--> nfs4_create_server()\n"); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + /* set up the general RPC client */ + error = nfs4_init_server(server, data); + if (error < 0) + goto error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + error = nfs4_init_session(server); + if (error < 0) + goto error; + + /* Probe the root fh to retrieve its FSID */ + error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path); + if (error < 0) + goto error; + + dprintk("Server FSID: %llx:%llx\n", + (unsigned long long) server->fsid.major, + (unsigned long long) server->fsid.minor); + dprintk("Mount FH: %d\n", mntfh->size); + + nfs4_session_set_rwsize(server); + + error = nfs_probe_fsinfo(server, mntfh, &fattr); + if (error < 0) + goto error; + + if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) + server->namelen = NFS4_MAXNAMLEN; + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; + dprintk("<-- nfs4_create_server() = %p\n", server); + return server; + +error: + nfs_free_server(server); + dprintk("<-- nfs4_create_server() = error %d\n", error); + return ERR_PTR(error); +} + +/* + * Create an NFS4 referral server record + */ +struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, + struct nfs_fh *mntfh) +{ + struct nfs_client *parent_client; + struct nfs_server *server, *parent_server; + struct nfs_fattr fattr; + int error; + + dprintk("--> nfs4_create_referral_server()\n"); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + parent_server = NFS_SB(data->sb); + parent_client = parent_server->nfs_client; + + /* Initialise the client representation from the parent server */ + nfs_server_copy_userdata(server, parent_server); + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; + + /* Get a client representation. + * Note: NFSv4 always uses TCP, */ + error = nfs4_set_client(server, data->hostname, + data->addr, + data->addrlen, + parent_client->cl_ipaddr, + data->authflavor, + parent_server->client->cl_xprt->prot, + parent_server->client->cl_timeout, + parent_client->cl_minorversion); + if (error < 0) + goto error; + + error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); + if (error < 0) + goto error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + /* Probe the root fh to retrieve its FSID and filehandle */ + error = nfs4_path_walk(server, mntfh, data->mnt_path); + if (error < 0) + goto error; + + /* probe the filesystem info for this server filesystem */ + error = nfs_probe_fsinfo(server, mntfh, &fattr); + if (error < 0) + goto error; + + if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) + server->namelen = NFS4_MAXNAMLEN; + + dprintk("Referral FSID: %llx:%llx\n", + (unsigned long long) server->fsid.major, + (unsigned long long) server->fsid.minor); + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; + + dprintk("<-- nfs_create_referral_server() = %p\n", server); + return server; + +error: + nfs_free_server(server); + dprintk("<-- nfs4_create_referral_server() = error %d\n", error); + return ERR_PTR(error); +} + +#endif /* CONFIG_NFS_V4 */ + +/* + * Clone an NFS2, NFS3 or NFS4 server record + */ +struct nfs_server *nfs_clone_server(struct nfs_server *source, + struct nfs_fh *fh, + struct nfs_fattr *fattr) +{ + struct nfs_server *server; + struct nfs_fattr fattr_fsinfo; + int error; + + dprintk("--> nfs_clone_server(,%llx:%llx,)\n", + (unsigned long long) fattr->fsid.major, + (unsigned long long) fattr->fsid.minor); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + /* Copy data from the source */ + server->nfs_client = source->nfs_client; + atomic_inc(&server->nfs_client->cl_count); + nfs_server_copy_userdata(server, source); + + server->fsid = fattr->fsid; + + error = nfs_init_server_rpcclient(server, + source->client->cl_timeout, + source->client->cl_auth->au_flavor); + if (error < 0) + goto out_free_server; + if (!IS_ERR(source->client_acl)) + nfs_init_server_aclclient(server); + + /* probe the filesystem info for this server filesystem */ + error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo); + if (error < 0) + goto out_free_server; + + if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) + server->namelen = NFS4_MAXNAMLEN; + + dprintk("Cloned FSID: %llx:%llx\n", + (unsigned long long) server->fsid.major, + (unsigned long long) server->fsid.minor); + + error = nfs_start_lockd(server); + if (error < 0) + goto out_free_server; + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; + + dprintk("<-- nfs_clone_server() = %p\n", server); + return server; + +out_free_server: + nfs_free_server(server); + dprintk("<-- nfs_clone_server() = error %d\n", error); + return ERR_PTR(error); +} + +#ifdef CONFIG_PROC_FS +static struct proc_dir_entry *proc_fs_nfs; + +static int nfs_server_list_open(struct inode *inode, struct file *file); +static void *nfs_server_list_start(struct seq_file *p, loff_t *pos); +static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos); +static void nfs_server_list_stop(struct seq_file *p, void *v); +static int nfs_server_list_show(struct seq_file *m, void *v); + +static const struct seq_operations nfs_server_list_ops = { + .start = nfs_server_list_start, + .next = nfs_server_list_next, + .stop = nfs_server_list_stop, + .show = nfs_server_list_show, +}; + +static const struct file_operations nfs_server_list_fops = { + .open = nfs_server_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .owner = THIS_MODULE, +}; + +static int nfs_volume_list_open(struct inode *inode, struct file *file); +static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos); +static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos); +static void nfs_volume_list_stop(struct seq_file *p, void *v); +static int nfs_volume_list_show(struct seq_file *m, void *v); + +static const struct seq_operations nfs_volume_list_ops = { + .start = nfs_volume_list_start, + .next = nfs_volume_list_next, + .stop = nfs_volume_list_stop, + .show = nfs_volume_list_show, +}; + +static const struct file_operations nfs_volume_list_fops = { + .open = nfs_volume_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .owner = THIS_MODULE, +}; + +/* + * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which + * we're dealing + */ +static int nfs_server_list_open(struct inode *inode, struct file *file) +{ + struct seq_file *m; + int ret; + + ret = seq_open(file, &nfs_server_list_ops); + if (ret < 0) + return ret; + + m = file->private_data; + m->private = PDE(inode)->data; + + return 0; +} + +/* + * set up the iterator to start reading from the server list and return the first item + */ +static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) +{ + /* lock the list against modification */ + spin_lock(&nfs_client_lock); + return seq_list_start_head(&nfs_client_list, *_pos); +} + +/* + * move to next server + */ +static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) +{ + return seq_list_next(v, &nfs_client_list, pos); +} + +/* + * clean up after reading from the transports list + */ +static void nfs_server_list_stop(struct seq_file *p, void *v) +{ + spin_unlock(&nfs_client_lock); +} + +/* + * display a header line followed by a load of call lines + */ +static int nfs_server_list_show(struct seq_file *m, void *v) +{ + struct nfs_client *clp; + + /* display header on line 1 */ + if (v == &nfs_client_list) { + seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); + return 0; + } + + /* display one transport per line on subsequent lines */ + clp = list_entry(v, struct nfs_client, cl_share_link); + + seq_printf(m, "v%u %s %s %3d %s\n", + clp->rpc_ops->version, + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), + atomic_read(&clp->cl_count), + clp->cl_hostname); + + return 0; +} + +/* + * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes + */ +static int nfs_volume_list_open(struct inode *inode, struct file *file) +{ + struct seq_file *m; + int ret; + + ret = seq_open(file, &nfs_volume_list_ops); + if (ret < 0) + return ret; + + m = file->private_data; + m->private = PDE(inode)->data; + + return 0; +} + +/* + * set up the iterator to start reading from the volume list and return the first item + */ +static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) +{ + /* lock the list against modification */ + spin_lock(&nfs_client_lock); + return seq_list_start_head(&nfs_volume_list, *_pos); +} + +/* + * move to next volume + */ +static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) +{ + return seq_list_next(v, &nfs_volume_list, pos); +} + +/* + * clean up after reading from the transports list + */ +static void nfs_volume_list_stop(struct seq_file *p, void *v) +{ + spin_unlock(&nfs_client_lock); +} + +/* + * display a header line followed by a load of call lines + */ +static int nfs_volume_list_show(struct seq_file *m, void *v) +{ + struct nfs_server *server; + struct nfs_client *clp; + char dev[8], fsid[17]; + + /* display header on line 1 */ + if (v == &nfs_volume_list) { + seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); + return 0; + } + /* display one transport per line on subsequent lines */ + server = list_entry(v, struct nfs_server, master_link); + clp = server->nfs_client; + + snprintf(dev, 8, "%u:%u", + MAJOR(server->s_dev), MINOR(server->s_dev)); + + snprintf(fsid, 17, "%llx:%llx", + (unsigned long long) server->fsid.major, + (unsigned long long) server->fsid.minor); + + seq_printf(m, "v%u %s %s %-7s %-17s %s\n", + clp->rpc_ops->version, + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), + dev, + fsid, + nfs_server_fscache_state(server)); + + return 0; +} + +/* + * initialise the /proc/fs/nfsfs/ directory + */ +int __init nfs_fs_proc_init(void) +{ + struct proc_dir_entry *p; + + proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL); + if (!proc_fs_nfs) + goto error_0; + + /* a file of servers with which we're dealing */ + p = proc_create("servers", S_IFREG|S_IRUGO, + proc_fs_nfs, &nfs_server_list_fops); + if (!p) + goto error_1; + + /* a file of volumes that we have mounted */ + p = proc_create("volumes", S_IFREG|S_IRUGO, + proc_fs_nfs, &nfs_volume_list_fops); + if (!p) + goto error_2; + return 0; + +error_2: + remove_proc_entry("servers", proc_fs_nfs); +error_1: + remove_proc_entry("fs/nfsfs", NULL); +error_0: + return -ENOMEM; +} + +/* + * clean up the /proc/fs/nfsfs/ directory + */ +void nfs_fs_proc_exit(void) +{ + remove_proc_entry("volumes", proc_fs_nfs); + remove_proc_entry("servers", proc_fs_nfs); + remove_proc_entry("fs/nfsfs", NULL); +} + +#endif /* CONFIG_PROC_FS */ diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c new file mode 100644 index 00000000000..2563bebc4c6 --- /dev/null +++ b/fs/nfs/delegation.c @@ -0,0 +1,553 @@ +/* + * linux/fs/nfs/delegation.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFS file delegation management + * + */ +#include <linux/completion.h> +#include <linux/kthread.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/smp_lock.h> +#include <linux/spinlock.h> + +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_xdr.h> + +#include "nfs4_fs.h" +#include "delegation.h" +#include "internal.h" + +static void nfs_do_free_delegation(struct nfs_delegation *delegation) +{ + kfree(delegation); +} + +static void nfs_free_delegation_callback(struct rcu_head *head) +{ + struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu); + + nfs_do_free_delegation(delegation); +} + +static void nfs_free_delegation(struct nfs_delegation *delegation) +{ + struct rpc_cred *cred; + + cred = rcu_dereference(delegation->cred); + rcu_assign_pointer(delegation->cred, NULL); + call_rcu(&delegation->rcu, nfs_free_delegation_callback); + if (cred) + put_rpccred(cred); +} + +void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) +{ + set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags); +} + +int nfs_have_delegation(struct inode *inode, fmode_t flags) +{ + struct nfs_delegation *delegation; + int ret = 0; + + flags &= FMODE_READ|FMODE_WRITE; + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL && (delegation->type & flags) == flags) { + nfs_mark_delegation_referenced(delegation); + ret = 1; + } + rcu_read_unlock(); + return ret; +} + +static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct file_lock *fl; + int status = 0; + + if (inode->i_flock == NULL) + goto out; + + /* Protect inode->i_flock using the BKL */ + lock_kernel(); + for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { + if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) + continue; + if (nfs_file_open_context(fl->fl_file) != ctx) + continue; + unlock_kernel(); + status = nfs4_lock_delegation_recall(state, fl); + if (status < 0) + goto out; + lock_kernel(); + } + unlock_kernel(); +out: + return status; +} + +static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *ctx; + struct nfs4_state *state; + int err; + +again: + spin_lock(&inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + state = ctx->state; + if (state == NULL) + continue; + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) + continue; + if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0) + continue; + get_nfs_open_context(ctx); + spin_unlock(&inode->i_lock); + err = nfs4_open_delegation_recall(ctx, state, stateid); + if (err >= 0) + err = nfs_delegation_claim_locks(ctx, state); + put_nfs_open_context(ctx); + if (err != 0) + return err; + goto again; + } + spin_unlock(&inode->i_lock); + return 0; +} + +/* + * Set up a delegation on an inode + */ +void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) +{ + struct nfs_delegation *delegation = NFS_I(inode)->delegation; + struct rpc_cred *oldcred; + + if (delegation == NULL) + return; + memcpy(delegation->stateid.data, res->delegation.data, + sizeof(delegation->stateid.data)); + delegation->type = res->delegation_type; + delegation->maxsize = res->maxsize; + oldcred = delegation->cred; + delegation->cred = get_rpccred(cred); + clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); + NFS_I(inode)->delegation_state = delegation->type; + smp_wmb(); + put_rpccred(oldcred); +} + +static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) +{ + int res = 0; + + res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync); + nfs_free_delegation(delegation); + return res; +} + +static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation) +{ + struct inode *inode = NULL; + + spin_lock(&delegation->lock); + if (delegation->inode != NULL) + inode = igrab(delegation->inode); + spin_unlock(&delegation->lock); + return inode; +} + +static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) +{ + struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); + + if (delegation == NULL) + goto nomatch; + spin_lock(&delegation->lock); + if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data, + sizeof(delegation->stateid.data)) != 0) + goto nomatch_unlock; + list_del_rcu(&delegation->super_list); + delegation->inode = NULL; + nfsi->delegation_state = 0; + rcu_assign_pointer(nfsi->delegation, NULL); + spin_unlock(&delegation->lock); + return delegation; +nomatch_unlock: + spin_unlock(&delegation->lock); +nomatch: + return NULL; +} + +/* + * Set up a delegation on an inode + */ +int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + struct nfs_delegation *freeme = NULL; + int status = 0; + + delegation = kmalloc(sizeof(*delegation), GFP_KERNEL); + if (delegation == NULL) + return -ENOMEM; + memcpy(delegation->stateid.data, res->delegation.data, + sizeof(delegation->stateid.data)); + delegation->type = res->delegation_type; + delegation->maxsize = res->maxsize; + delegation->change_attr = nfsi->change_attr; + delegation->cred = get_rpccred(cred); + delegation->inode = inode; + delegation->flags = 1<<NFS_DELEGATION_REFERENCED; + spin_lock_init(&delegation->lock); + + spin_lock(&clp->cl_lock); + if (rcu_dereference(nfsi->delegation) != NULL) { + if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, + sizeof(delegation->stateid)) == 0 && + delegation->type == nfsi->delegation->type) { + goto out; + } + /* + * Deal with broken servers that hand out two + * delegations for the same file. + */ + dfprintk(FILE, "%s: server %s handed out " + "a duplicate delegation!\n", + __func__, clp->cl_hostname); + if (delegation->type <= nfsi->delegation->type) { + freeme = delegation; + delegation = NULL; + goto out; + } + freeme = nfs_detach_delegation_locked(nfsi, NULL); + } + list_add_rcu(&delegation->super_list, &clp->cl_delegations); + nfsi->delegation_state = delegation->type; + rcu_assign_pointer(nfsi->delegation, delegation); + delegation = NULL; + + /* Ensure we revalidate the attributes and page cache! */ + spin_lock(&inode->i_lock); + nfsi->cache_validity |= NFS_INO_REVAL_FORCED; + spin_unlock(&inode->i_lock); + +out: + spin_unlock(&clp->cl_lock); + if (delegation != NULL) + nfs_free_delegation(delegation); + if (freeme != NULL) + nfs_do_return_delegation(inode, freeme, 0); + return status; +} + +/* Sync all data to disk upon delegation return */ +static void nfs_msync_inode(struct inode *inode) +{ + filemap_fdatawrite(inode->i_mapping); + nfs_wb_all(inode); + filemap_fdatawait(inode->i_mapping); +} + +/* + * Basic procedure for returning a delegation to the server + */ +static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int err; + + /* + * Guard against new delegated open/lock/unlock calls and against + * state recovery + */ + down_write(&nfsi->rwsem); + err = nfs_delegation_claim_opens(inode, &delegation->stateid); + up_write(&nfsi->rwsem); + if (err) + goto out; + + err = nfs_do_return_delegation(inode, delegation, issync); +out: + return err; +} + +/* + * Return all delegations that have been marked for return + */ +int nfs_client_return_marked_delegations(struct nfs_client *clp) +{ + struct nfs_delegation *delegation; + struct inode *inode; + int err = 0; + +restart: + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { + if (!test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) + continue; + inode = nfs_delegation_grab_inode(delegation); + if (inode == NULL) + continue; + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); + spin_unlock(&clp->cl_lock); + rcu_read_unlock(); + if (delegation != NULL) { + filemap_flush(inode->i_mapping); + err = __nfs_inode_return_delegation(inode, delegation, 0); + } + iput(inode); + if (!err) + goto restart; + set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); + return err; + } + rcu_read_unlock(); + return 0; +} + +/* + * This function returns the delegation without reclaiming opens + * or protecting against delegation reclaims. + * It is therefore really only safe to be called from + * nfs4_clear_inode() + */ +void nfs_inode_return_delegation_noreclaim(struct inode *inode) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + + if (rcu_dereference(nfsi->delegation) != NULL) { + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(nfsi, NULL); + spin_unlock(&clp->cl_lock); + if (delegation != NULL) + nfs_do_return_delegation(inode, delegation, 0); + } +} + +int nfs_inode_return_delegation(struct inode *inode) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + int err = 0; + + if (rcu_dereference(nfsi->delegation) != NULL) { + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(nfsi, NULL); + spin_unlock(&clp->cl_lock); + if (delegation != NULL) { + nfs_msync_inode(inode); + err = __nfs_inode_return_delegation(inode, delegation, 1); + } + } + return err; +} + +static void nfs_mark_return_delegation(struct nfs_client *clp, struct nfs_delegation *delegation) +{ + set_bit(NFS_DELEGATION_RETURN, &delegation->flags); + set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); +} + +/* + * Return all delegations associated to a super block + */ +void nfs_super_return_all_delegations(struct super_block *sb) +{ + struct nfs_client *clp = NFS_SB(sb)->nfs_client; + struct nfs_delegation *delegation; + + if (clp == NULL) + return; + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { + spin_lock(&delegation->lock); + if (delegation->inode != NULL && delegation->inode->i_sb == sb) + set_bit(NFS_DELEGATION_RETURN, &delegation->flags); + spin_unlock(&delegation->lock); + } + rcu_read_unlock(); + if (nfs_client_return_marked_delegations(clp) != 0) + nfs4_schedule_state_manager(clp); +} + +static +void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, fmode_t flags) +{ + struct nfs_delegation *delegation; + + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { + if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) + continue; + if (delegation->type & flags) + nfs_mark_return_delegation(clp, delegation); + } + rcu_read_unlock(); +} + +static void nfs_client_mark_return_all_delegations(struct nfs_client *clp) +{ + nfs_client_mark_return_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); +} + +static void nfs_delegation_run_state_manager(struct nfs_client *clp) +{ + if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) + nfs4_schedule_state_manager(clp); +} + +void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags) +{ + nfs_client_mark_return_all_delegation_types(clp, flags); + nfs_delegation_run_state_manager(clp); +} + +void nfs_expire_all_delegations(struct nfs_client *clp) +{ + nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); +} + +/* + * Return all delegations following an NFS4ERR_CB_PATH_DOWN error. + */ +void nfs_handle_cb_pathdown(struct nfs_client *clp) +{ + if (clp == NULL) + return; + nfs_client_mark_return_all_delegations(clp); +} + +static void nfs_client_mark_return_unreferenced_delegations(struct nfs_client *clp) +{ + struct nfs_delegation *delegation; + + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { + if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) + continue; + nfs_mark_return_delegation(clp, delegation); + } + rcu_read_unlock(); +} + +void nfs_expire_unreferenced_delegations(struct nfs_client *clp) +{ + nfs_client_mark_return_unreferenced_delegations(clp); + nfs_delegation_run_state_manager(clp); +} + +/* + * Asynchronous delegation recall! + */ +int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid, + int (*validate_stateid)(struct nfs_delegation *delegation, + const nfs4_stateid *stateid)) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_delegation *delegation; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + + if (!validate_stateid(delegation, stateid)) { + rcu_read_unlock(); + return -ENOENT; + } + + nfs_mark_return_delegation(clp, delegation); + rcu_read_unlock(); + nfs_delegation_run_state_manager(clp); + return 0; +} + +/* + * Retrieve the inode associated with a delegation + */ +struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle) +{ + struct nfs_delegation *delegation; + struct inode *res = NULL; + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { + spin_lock(&delegation->lock); + if (delegation->inode != NULL && + nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { + res = igrab(delegation->inode); + } + spin_unlock(&delegation->lock); + if (res != NULL) + break; + } + rcu_read_unlock(); + return res; +} + +/* + * Mark all delegations as needing to be reclaimed + */ +void nfs_delegation_mark_reclaim(struct nfs_client *clp) +{ + struct nfs_delegation *delegation; + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) + set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); + rcu_read_unlock(); +} + +/* + * Reap all unclaimed delegations after reboot recovery is done + */ +void nfs_delegation_reap_unclaimed(struct nfs_client *clp) +{ + struct nfs_delegation *delegation; + struct inode *inode; +restart: + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { + if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) + continue; + inode = nfs_delegation_grab_inode(delegation); + if (inode == NULL) + continue; + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); + spin_unlock(&clp->cl_lock); + rcu_read_unlock(); + if (delegation != NULL) + nfs_free_delegation(delegation); + iput(inode); + goto restart; + } + rcu_read_unlock(); +} + +int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + int ret = 0; + + rcu_read_lock(); + delegation = rcu_dereference(nfsi->delegation); + if (delegation != NULL) { + memcpy(dst->data, delegation->stateid.data, sizeof(dst->data)); + ret = 1; + } + rcu_read_unlock(); + return ret; +} diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h new file mode 100644 index 00000000000..69e7b814012 --- /dev/null +++ b/fs/nfs/delegation.h @@ -0,0 +1,80 @@ +/* + * linux/fs/nfs/delegation.h + * + * Copyright (c) Trond Myklebust + * + * Definitions pertaining to NFS delegated files + */ +#ifndef FS_NFS_DELEGATION_H +#define FS_NFS_DELEGATION_H + +#if defined(CONFIG_NFS_V4) +/* + * NFSv4 delegation + */ +struct nfs_delegation { + struct list_head super_list; + struct rpc_cred *cred; + struct inode *inode; + nfs4_stateid stateid; + fmode_t type; + loff_t maxsize; + __u64 change_attr; + unsigned long flags; + spinlock_t lock; + struct rcu_head rcu; +}; + +enum { + NFS_DELEGATION_NEED_RECLAIM = 0, + NFS_DELEGATION_RETURN, + NFS_DELEGATION_REFERENCED, +}; + +int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); +void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); +int nfs_inode_return_delegation(struct inode *inode); +int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid, + int (*validate_stateid)(struct nfs_delegation *delegation, + const nfs4_stateid *stateid)); +void nfs_inode_return_delegation_noreclaim(struct inode *inode); + +struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); +void nfs_super_return_all_delegations(struct super_block *sb); +void nfs_expire_all_delegations(struct nfs_client *clp); +void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); +void nfs_expire_unreferenced_delegations(struct nfs_client *clp); +void nfs_handle_cb_pathdown(struct nfs_client *clp); +int nfs_client_return_marked_delegations(struct nfs_client *clp); + +void nfs_delegation_mark_reclaim(struct nfs_client *clp); +void nfs_delegation_reap_unclaimed(struct nfs_client *clp); + +/* NFSv4 delegation-related procedures */ +int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); +int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); +int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); + +void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); +int nfs_have_delegation(struct inode *inode, fmode_t flags); + +#else +static inline int nfs_have_delegation(struct inode *inode, fmode_t flags) +{ + return 0; +} + +static inline int nfs_inode_return_delegation(struct inode *inode) +{ + return 0; +} +#endif + +static inline int nfs_have_delegated_attributes(struct inode *inode) +{ + return nfs_have_delegation(inode, FMODE_READ) && + !(NFS_I(inode)->cache_validity & NFS_INO_REVAL_FORCED); +} + +#endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c new file mode 100644 index 00000000000..b5d55d39fb7 --- /dev/null +++ b/fs/nfs/dir.c @@ -0,0 +1,1983 @@ +/* + * linux/fs/nfs/dir.c + * + * Copyright (C) 1992 Rick Sladkey + * + * nfs directory handling functions + * + * 10 Apr 1996 Added silly rename for unlink --okir + * 28 Sep 1996 Improved directory cache --okir + * 23 Aug 1997 Claus Heine claus@momo.math.rwth-aachen.de + * Re-implemented silly rename for unlink, newly implemented + * silly rename for nfs_rename() following the suggestions + * of Olaf Kirch (okir) found in this file. + * Following Linus comments on my original hack, this version + * depends only on the dcache stuff and doesn't touch the inode + * layer (iput() and friends). + * 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM + */ + +#include <linux/time.h> +#include <linux/errno.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/pagemap.h> +#include <linux/pagevec.h> +#include <linux/namei.h> +#include <linux/mount.h> +#include <linux/sched.h> + +#include "nfs4_fs.h" +#include "delegation.h" +#include "iostat.h" +#include "internal.h" + +/* #define NFS_DEBUG_VERBOSE 1 */ + +static int nfs_opendir(struct inode *, struct file *); +static int nfs_readdir(struct file *, void *, filldir_t); +static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); +static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); +static int nfs_mkdir(struct inode *, struct dentry *, int); +static int nfs_rmdir(struct inode *, struct dentry *); +static int nfs_unlink(struct inode *, struct dentry *); +static int nfs_symlink(struct inode *, struct dentry *, const char *); +static int nfs_link(struct dentry *, struct inode *, struct dentry *); +static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); +static int nfs_rename(struct inode *, struct dentry *, + struct inode *, struct dentry *); +static int nfs_fsync_dir(struct file *, struct dentry *, int); +static loff_t nfs_llseek_dir(struct file *, loff_t, int); + +const struct file_operations nfs_dir_operations = { + .llseek = nfs_llseek_dir, + .read = generic_read_dir, + .readdir = nfs_readdir, + .open = nfs_opendir, + .release = nfs_release, + .fsync = nfs_fsync_dir, +}; + +const struct inode_operations nfs_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; + +#ifdef CONFIG_NFS_V3 +const struct inode_operations nfs3_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; +#endif /* CONFIG_NFS_V3 */ + +#ifdef CONFIG_NFS_V4 + +static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); +const struct inode_operations nfs4_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_atomic_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, +}; + +#endif /* CONFIG_NFS_V4 */ + +/* + * Open file + */ +static int +nfs_opendir(struct inode *inode, struct file *filp) +{ + int res; + + dfprintk(FILE, "NFS: open dir(%s/%s)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name); + + nfs_inc_stats(inode, NFSIOS_VFSOPEN); + + /* Call generic open code in order to cache credentials */ + res = nfs_open(inode, filp); + return res; +} + +typedef __be32 * (*decode_dirent_t)(__be32 *, struct nfs_entry *, int); +typedef struct { + struct file *file; + struct page *page; + unsigned long page_index; + __be32 *ptr; + u64 *dir_cookie; + loff_t current_index; + struct nfs_entry *entry; + decode_dirent_t decode; + int plus; + unsigned long timestamp; + unsigned long gencount; + int timestamp_valid; +} nfs_readdir_descriptor_t; + +/* Now we cache directories properly, by stuffing the dirent + * data directly in the page cache. + * + * Inode invalidation due to refresh etc. takes care of + * _everything_, no sloppy entry flushing logic, no extraneous + * copying, network direct to page cache, the way it was meant + * to be. + * + * NOTE: Dirent information verification is done always by the + * page-in of the RPC reply, nowhere else, this simplies + * things substantially. + */ +static +int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) +{ + struct file *file = desc->file; + struct inode *inode = file->f_path.dentry->d_inode; + struct rpc_cred *cred = nfs_file_cred(file); + unsigned long timestamp, gencount; + int error; + + dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", + __func__, (long long)desc->entry->cookie, + page->index); + + again: + timestamp = jiffies; + gencount = nfs_inc_attr_generation_counter(); + error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, + NFS_SERVER(inode)->dtsize, desc->plus); + if (error < 0) { + /* We requested READDIRPLUS, but the server doesn't grok it */ + if (error == -ENOTSUPP && desc->plus) { + NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; + clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); + desc->plus = 0; + goto again; + } + goto error; + } + desc->timestamp = timestamp; + desc->gencount = gencount; + desc->timestamp_valid = 1; + SetPageUptodate(page); + /* Ensure consistent page alignment of the data. + * Note: assumes we have exclusive access to this mapping either + * through inode->i_mutex or some other mechanism. + */ + if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) { + /* Should never happen */ + nfs_zap_mapping(inode, inode->i_mapping); + } + unlock_page(page); + return 0; + error: + unlock_page(page); + return -EIO; +} + +static inline +int dir_decode(nfs_readdir_descriptor_t *desc) +{ + __be32 *p = desc->ptr; + p = desc->decode(p, desc->entry, desc->plus); + if (IS_ERR(p)) + return PTR_ERR(p); + desc->ptr = p; + if (desc->timestamp_valid) { + desc->entry->fattr->time_start = desc->timestamp; + desc->entry->fattr->gencount = desc->gencount; + } else + desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; + return 0; +} + +static inline +void dir_page_release(nfs_readdir_descriptor_t *desc) +{ + kunmap(desc->page); + page_cache_release(desc->page); + desc->page = NULL; + desc->ptr = NULL; +} + +/* + * Given a pointer to a buffer that has already been filled by a call + * to readdir, find the next entry with cookie '*desc->dir_cookie'. + * + * If the end of the buffer has been reached, return -EAGAIN, if not, + * return the offset within the buffer of the next entry to be + * read. + */ +static inline +int find_dirent(nfs_readdir_descriptor_t *desc) +{ + struct nfs_entry *entry = desc->entry; + int loop_count = 0, + status; + + while((status = dir_decode(desc)) == 0) { + dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n", + __func__, (unsigned long long)entry->cookie); + if (entry->prev_cookie == *desc->dir_cookie) + break; + if (loop_count++ > 200) { + loop_count = 0; + schedule(); + } + } + return status; +} + +/* + * Given a pointer to a buffer that has already been filled by a call + * to readdir, find the entry at offset 'desc->file->f_pos'. + * + * If the end of the buffer has been reached, return -EAGAIN, if not, + * return the offset within the buffer of the next entry to be + * read. + */ +static inline +int find_dirent_index(nfs_readdir_descriptor_t *desc) +{ + struct nfs_entry *entry = desc->entry; + int loop_count = 0, + status; + + for(;;) { + status = dir_decode(desc); + if (status) + break; + + dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n", + (unsigned long long)entry->cookie, desc->current_index); + + if (desc->file->f_pos == desc->current_index) { + *desc->dir_cookie = entry->cookie; + break; + } + desc->current_index++; + if (loop_count++ > 200) { + loop_count = 0; + schedule(); + } + } + return status; +} + +/* + * Find the given page, and call find_dirent() or find_dirent_index in + * order to try to return the next entry. + */ +static inline +int find_dirent_page(nfs_readdir_descriptor_t *desc) +{ + struct inode *inode = desc->file->f_path.dentry->d_inode; + struct page *page; + int status; + + dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n", + __func__, desc->page_index, + (long long) *desc->dir_cookie); + + /* If we find the page in the page_cache, we cannot be sure + * how fresh the data is, so we will ignore readdir_plus attributes. + */ + desc->timestamp_valid = 0; + page = read_cache_page(inode->i_mapping, desc->page_index, + (filler_t *)nfs_readdir_filler, desc); + if (IS_ERR(page)) { + status = PTR_ERR(page); + goto out; + } + + /* NOTE: Someone else may have changed the READDIRPLUS flag */ + desc->page = page; + desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ + if (*desc->dir_cookie != 0) + status = find_dirent(desc); + else + status = find_dirent_index(desc); + if (status < 0) + dir_page_release(desc); + out: + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); + return status; +} + +/* + * Recurse through the page cache pages, and return a + * filled nfs_entry structure of the next directory entry if possible. + * + * The target for the search is '*desc->dir_cookie' if non-0, + * 'desc->file->f_pos' otherwise + */ +static inline +int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) +{ + int loop_count = 0; + int res; + + /* Always search-by-index from the beginning of the cache */ + if (*desc->dir_cookie == 0) { + dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n", + (long long)desc->file->f_pos); + desc->page_index = 0; + desc->entry->cookie = desc->entry->prev_cookie = 0; + desc->entry->eof = 0; + desc->current_index = 0; + } else + dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", + (unsigned long long)*desc->dir_cookie); + + for (;;) { + res = find_dirent_page(desc); + if (res != -EAGAIN) + break; + /* Align to beginning of next page */ + desc->page_index ++; + if (loop_count++ > 200) { + loop_count = 0; + schedule(); + } + } + + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, res); + return res; +} + +static inline unsigned int dt_type(struct inode *inode) +{ + return (inode->i_mode >> 12) & 15; +} + +static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc); + +/* + * Once we've found the start of the dirent within a page: fill 'er up... + */ +static +int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, + filldir_t filldir) +{ + struct file *file = desc->file; + struct nfs_entry *entry = desc->entry; + struct dentry *dentry = NULL; + u64 fileid; + int loop_count = 0, + res; + + dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", + (unsigned long long)entry->cookie); + + for(;;) { + unsigned d_type = DT_UNKNOWN; + /* Note: entry->prev_cookie contains the cookie for + * retrieving the current dirent on the server */ + fileid = entry->ino; + + /* Get a dentry if we have one */ + if (dentry != NULL) + dput(dentry); + dentry = nfs_readdir_lookup(desc); + + /* Use readdirplus info */ + if (dentry != NULL && dentry->d_inode != NULL) { + d_type = dt_type(dentry->d_inode); + fileid = NFS_FILEID(dentry->d_inode); + } + + res = filldir(dirent, entry->name, entry->len, + file->f_pos, nfs_compat_user_ino64(fileid), + d_type); + if (res < 0) + break; + file->f_pos++; + *desc->dir_cookie = entry->cookie; + if (dir_decode(desc) != 0) { + desc->page_index ++; + break; + } + if (loop_count++ > 200) { + loop_count = 0; + schedule(); + } + } + dir_page_release(desc); + if (dentry != NULL) + dput(dentry); + dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", + (unsigned long long)*desc->dir_cookie, res); + return res; +} + +/* + * If we cannot find a cookie in our cache, we suspect that this is + * because it points to a deleted file, so we ask the server to return + * whatever it thinks is the next entry. We then feed this to filldir. + * If all goes well, we should then be able to find our way round the + * cache on the next call to readdir_search_pagecache(); + * + * NOTE: we cannot add the anonymous page to the pagecache because + * the data it contains might not be page aligned. Besides, + * we should already have a complete representation of the + * directory in the page cache by the time we get here. + */ +static inline +int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, + filldir_t filldir) +{ + struct file *file = desc->file; + struct inode *inode = file->f_path.dentry->d_inode; + struct rpc_cred *cred = nfs_file_cred(file); + struct page *page = NULL; + int status; + unsigned long timestamp, gencount; + + dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", + (unsigned long long)*desc->dir_cookie); + + page = alloc_page(GFP_HIGHUSER); + if (!page) { + status = -ENOMEM; + goto out; + } + timestamp = jiffies; + gencount = nfs_inc_attr_generation_counter(); + status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, + *desc->dir_cookie, page, + NFS_SERVER(inode)->dtsize, + desc->plus); + desc->page = page; + desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ + if (status >= 0) { + desc->timestamp = timestamp; + desc->gencount = gencount; + desc->timestamp_valid = 1; + if ((status = dir_decode(desc)) == 0) + desc->entry->prev_cookie = *desc->dir_cookie; + } else + status = -EIO; + if (status < 0) + goto out_release; + + status = nfs_do_filldir(desc, dirent, filldir); + + /* Reset read descriptor so it searches the page cache from + * the start upon the next call to readdir_search_pagecache() */ + desc->page_index = 0; + desc->entry->cookie = desc->entry->prev_cookie = 0; + desc->entry->eof = 0; + out: + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", + __func__, status); + return status; + out_release: + dir_page_release(desc); + goto out; +} + +/* The file offset position represents the dirent entry number. A + last cookie cache takes care of the common case of reading the + whole directory. + */ +static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; + nfs_readdir_descriptor_t my_desc, + *desc = &my_desc; + struct nfs_entry my_entry; + struct nfs_fh fh; + struct nfs_fattr fattr; + long res; + + dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (long long)filp->f_pos); + nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); + + /* + * filp->f_pos points to the dirent entry number. + * *desc->dir_cookie has the cookie for the next entry. We have + * to either find the entry with the appropriate number or + * revalidate the cookie. + */ + memset(desc, 0, sizeof(*desc)); + + desc->file = filp; + desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie; + desc->decode = NFS_PROTO(inode)->decode_dirent; + desc->plus = NFS_USE_READDIRPLUS(inode); + + my_entry.cookie = my_entry.prev_cookie = 0; + my_entry.eof = 0; + my_entry.fh = &fh; + my_entry.fattr = &fattr; + nfs_fattr_init(&fattr); + desc->entry = &my_entry; + + nfs_block_sillyrename(dentry); + res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping); + if (res < 0) + goto out; + + while(!desc->entry->eof) { + res = readdir_search_pagecache(desc); + + if (res == -EBADCOOKIE) { + /* This means either end of directory */ + if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) { + /* Or that the server has 'lost' a cookie */ + res = uncached_readdir(desc, dirent, filldir); + if (res >= 0) + continue; + } + res = 0; + break; + } + if (res == -ETOOSMALL && desc->plus) { + clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); + nfs_zap_caches(inode); + desc->plus = 0; + desc->entry->eof = 0; + continue; + } + if (res < 0) + break; + + res = nfs_do_filldir(desc, dirent, filldir); + if (res < 0) { + res = 0; + break; + } + } +out: + nfs_unblock_sillyrename(dentry); + if (res > 0) + res = 0; + dfprintk(FILE, "NFS: readdir(%s/%s) returns %ld\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + res); + return res; +} + +static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) +{ + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; + + dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name, + offset, origin); + + mutex_lock(&inode->i_mutex); + switch (origin) { + case 1: + offset += filp->f_pos; + case 0: + if (offset >= 0) + break; + default: + offset = -EINVAL; + goto out; + } + if (offset != filp->f_pos) { + filp->f_pos = offset; + nfs_file_open_context(filp)->dir_cookie = 0; + } +out: + mutex_unlock(&inode->i_mutex); + return offset; +} + +/* + * All directory operations under NFS are synchronous, so fsync() + * is a dummy operation. + */ +static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) +{ + dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + datasync); + + nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC); + return 0; +} + +/** + * nfs_force_lookup_revalidate - Mark the directory as having changed + * @dir - pointer to directory inode + * + * This forces the revalidation code in nfs_lookup_revalidate() to do a + * full lookup on all child dentries of 'dir' whenever a change occurs + * on the server that might have invalidated our dcache. + * + * The caller should be holding dir->i_lock + */ +void nfs_force_lookup_revalidate(struct inode *dir) +{ + NFS_I(dir)->cache_change_attribute++; +} + +/* + * A check for whether or not the parent directory has changed. + * In the case it has, we assume that the dentries are untrustworthy + * and may need to be looked up again. + */ +static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) +{ + if (IS_ROOT(dentry)) + return 1; + if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) + return 0; + if (!nfs_verify_change_attribute(dir, dentry->d_time)) + return 0; + /* Revalidate nfsi->cache_change_attribute before we declare a match */ + if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) + return 0; + if (!nfs_verify_change_attribute(dir, dentry->d_time)) + return 0; + return 1; +} + +/* + * Return the intent data that applies to this particular path component + * + * Note that the current set of intents only apply to the very last + * component of the path. + * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT. + */ +static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigned int mask) +{ + if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT)) + return 0; + return nd->flags & mask; +} + +/* + * Use intent information to check whether or not we're going to do + * an O_EXCL create using this path component. + */ +static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) +{ + if (NFS_PROTO(dir)->version == 2) + return 0; + return nd && nfs_lookup_check_intent(nd, LOOKUP_EXCL); +} + +/* + * Inode and filehandle revalidation for lookups. + * + * We force revalidation in the cases where the VFS sets LOOKUP_REVAL, + * or if the intent information indicates that we're about to open this + * particular file and the "nocto" mount flag is not set. + * + */ +static inline +int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) +{ + struct nfs_server *server = NFS_SERVER(inode); + + if (test_bit(NFS_INO_MOUNTPOINT, &NFS_I(inode)->flags)) + return 0; + if (nd != NULL) { + /* VFS wants an on-the-wire revalidation */ + if (nd->flags & LOOKUP_REVAL) + goto out_force; + /* This is an open(2) */ + if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 && + !(server->flags & NFS_MOUNT_NOCTO) && + (S_ISREG(inode->i_mode) || + S_ISDIR(inode->i_mode))) + goto out_force; + return 0; + } + return nfs_revalidate_inode(server, inode); +out_force: + return __nfs_revalidate_inode(server, inode); +} + +/* + * We judge how long we want to trust negative + * dentries by looking at the parent inode mtime. + * + * If parent mtime has changed, we revalidate, else we wait for a + * period corresponding to the parent's attribute cache timeout value. + */ +static inline +int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + /* Don't revalidate a negative dentry if we're creating a new file */ + if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) + return 0; + if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) + return 1; + return !nfs_check_verifier(dir, dentry); +} + +/* + * This is called every time the dcache has a lookup hit, + * and we should check whether we can really trust that + * lookup. + * + * NOTE! The hit can be a negative hit too, don't assume + * we have an inode! + * + * If the parent directory is seen to have changed, we throw out the + * cached dentry and do a new lookup. + */ +static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) +{ + struct inode *dir; + struct inode *inode; + struct dentry *parent; + int error; + struct nfs_fh fhandle; + struct nfs_fattr fattr; + + parent = dget_parent(dentry); + dir = parent->d_inode; + nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); + inode = dentry->d_inode; + + if (!inode) { + if (nfs_neg_need_reval(dir, dentry, nd)) + goto out_bad; + goto out_valid; + } + + if (is_bad_inode(inode)) { + dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n", + __func__, dentry->d_parent->d_name.name, + dentry->d_name.name); + goto out_bad; + } + + if (nfs_have_delegation(inode, FMODE_READ)) + goto out_set_verifier; + + /* Force a full look up iff the parent directory has changed */ + if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) { + if (nfs_lookup_verify_inode(inode, nd)) + goto out_zap_parent; + goto out_valid; + } + + if (NFS_STALE(inode)) + goto out_bad; + + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); + if (error) + goto out_bad; + if (nfs_compare_fh(NFS_FH(inode), &fhandle)) + goto out_bad; + if ((error = nfs_refresh_inode(inode, &fattr)) != 0) + goto out_bad; + +out_set_verifier: + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + out_valid: + dput(parent); + dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", + __func__, dentry->d_parent->d_name.name, + dentry->d_name.name); + return 1; +out_zap_parent: + nfs_zap_caches(dir); + out_bad: + nfs_mark_for_revalidate(dir); + if (inode && S_ISDIR(inode->i_mode)) { + /* Purge readdir caches. */ + nfs_zap_caches(inode); + /* If we have submounts, don't unhash ! */ + if (have_submounts(dentry)) + goto out_valid; + if (dentry->d_flags & DCACHE_DISCONNECTED) + goto out_valid; + shrink_dcache_parent(dentry); + } + d_drop(dentry); + dput(parent); + dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", + __func__, dentry->d_parent->d_name.name, + dentry->d_name.name); + return 0; +} + +/* + * This is called from dput() when d_count is going to 0. + */ +static int nfs_dentry_delete(struct dentry *dentry) +{ + dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + dentry->d_flags); + + /* Unhash any dentry with a stale inode */ + if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode)) + return 1; + + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { + /* Unhash it, so that ->d_iput() would be called */ + return 1; + } + if (!(dentry->d_sb->s_flags & MS_ACTIVE)) { + /* Unhash it, so that ancestors of killed async unlink + * files will be cleaned up during umount */ + return 1; + } + return 0; + +} + +static void nfs_drop_nlink(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (inode->i_nlink > 0) + drop_nlink(inode); + spin_unlock(&inode->i_lock); +} + +/* + * Called when the dentry loses inode. + * We use it to clean up silly-renamed files. + */ +static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) +{ + if (S_ISDIR(inode->i_mode)) + /* drop any readdir cache as it could easily be old */ + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; + + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { + drop_nlink(inode); + nfs_complete_unlink(dentry, inode); + } + iput(inode); +} + +const struct dentry_operations nfs_dentry_operations = { + .d_revalidate = nfs_lookup_revalidate, + .d_delete = nfs_dentry_delete, + .d_iput = nfs_dentry_iput, +}; + +static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +{ + struct dentry *res; + struct dentry *parent; + struct inode *inode = NULL; + int error; + struct nfs_fh fhandle; + struct nfs_fattr fattr; + + dfprintk(VFS, "NFS: lookup(%s/%s)\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); + + res = ERR_PTR(-ENAMETOOLONG); + if (dentry->d_name.len > NFS_SERVER(dir)->namelen) + goto out; + + res = ERR_PTR(-ENOMEM); + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + /* + * If we're doing an exclusive create, optimize away the lookup + * but don't hash the dentry. + */ + if (nfs_is_exclusive_create(dir, nd)) { + d_instantiate(dentry, NULL); + res = NULL; + goto out; + } + + parent = dentry->d_parent; + /* Protect against concurrent sillydeletes */ + nfs_block_sillyrename(parent); + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); + if (error == -ENOENT) + goto no_entry; + if (error < 0) { + res = ERR_PTR(error); + goto out_unblock_sillyrename; + } + inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); + res = (struct dentry *)inode; + if (IS_ERR(res)) + goto out_unblock_sillyrename; + +no_entry: + res = d_materialise_unique(dentry, inode); + if (res != NULL) { + if (IS_ERR(res)) + goto out_unblock_sillyrename; + dentry = res; + } + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); +out_unblock_sillyrename: + nfs_unblock_sillyrename(parent); +out: + return res; +} + +#ifdef CONFIG_NFS_V4 +static int nfs_open_revalidate(struct dentry *, struct nameidata *); + +const struct dentry_operations nfs4_dentry_operations = { + .d_revalidate = nfs_open_revalidate, + .d_delete = nfs_dentry_delete, + .d_iput = nfs_dentry_iput, +}; + +/* + * Use intent information to determine whether we need to substitute + * the NFSv4-style stateful OPEN for the LOOKUP call + */ +static int is_atomic_open(struct nameidata *nd) +{ + if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_OPEN) == 0) + return 0; + /* NFS does not (yet) have a stateful open for directories */ + if (nd->flags & LOOKUP_DIRECTORY) + return 0; + /* Are we trying to write to a read only partition? */ + if (__mnt_is_readonly(nd->path.mnt) && + (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + return 0; + return 1; +} + +static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *res = NULL; + int error; + + dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + + /* Check that we are indeed trying to open this file */ + if (!is_atomic_open(nd)) + goto no_open; + + if (dentry->d_name.len > NFS_SERVER(dir)->namelen) { + res = ERR_PTR(-ENAMETOOLONG); + goto out; + } + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash + * the dentry. */ + if (nd->flags & LOOKUP_EXCL) { + d_instantiate(dentry, NULL); + goto out; + } + + /* Open the file on the server */ + res = nfs4_atomic_open(dir, dentry, nd); + if (IS_ERR(res)) { + error = PTR_ERR(res); + switch (error) { + /* Make a negative dentry */ + case -ENOENT: + res = NULL; + goto out; + /* This turned out not to be a regular file */ + case -EISDIR: + case -ENOTDIR: + goto no_open; + case -ELOOP: + if (!(nd->intent.open.flags & O_NOFOLLOW)) + goto no_open; + /* case -EINVAL: */ + default: + goto out; + } + } else if (res != NULL) + dentry = res; +out: + return res; +no_open: + return nfs_lookup(dir, dentry, nd); +} + +static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *parent = NULL; + struct inode *inode = dentry->d_inode; + struct inode *dir; + int openflags, ret = 0; + + if (!is_atomic_open(nd)) + goto no_open; + parent = dget_parent(dentry); + dir = parent->d_inode; + /* We can't create new files in nfs_open_revalidate(), so we + * optimize away revalidation of negative dentries. + */ + if (inode == NULL) { + if (!nfs_neg_need_reval(dir, dentry, nd)) + ret = 1; + goto out; + } + + /* NFS only supports OPEN on regular files */ + if (!S_ISREG(inode->i_mode)) + goto no_open_dput; + openflags = nd->intent.open.flags; + /* We cannot do exclusive creation on a positive dentry */ + if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) + goto no_open_dput; + /* We can't create new files, or truncate existing ones here */ + openflags &= ~(O_CREAT|O_TRUNC); + + /* + * Note: we're not holding inode->i_mutex and so may be racing with + * operations that change the directory. We therefore save the + * change attribute *before* we do the RPC call. + */ + ret = nfs4_open_revalidate(dir, dentry, openflags, nd); +out: + dput(parent); + if (!ret) + d_drop(dentry); + return ret; +no_open_dput: + dput(parent); +no_open: + return nfs_lookup_revalidate(dentry, nd); +} +#endif /* CONFIG_NFSV4 */ + +static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) +{ + struct dentry *parent = desc->file->f_path.dentry; + struct inode *dir = parent->d_inode; + struct nfs_entry *entry = desc->entry; + struct dentry *dentry, *alias; + struct qstr name = { + .name = entry->name, + .len = entry->len, + }; + struct inode *inode; + unsigned long verf = nfs_save_change_attribute(dir); + + switch (name.len) { + case 2: + if (name.name[0] == '.' && name.name[1] == '.') + return dget_parent(parent); + break; + case 1: + if (name.name[0] == '.') + return dget(parent); + } + + spin_lock(&dir->i_lock); + if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) { + spin_unlock(&dir->i_lock); + return NULL; + } + spin_unlock(&dir->i_lock); + + name.hash = full_name_hash(name.name, name.len); + dentry = d_lookup(parent, &name); + if (dentry != NULL) { + /* Is this a positive dentry that matches the readdir info? */ + if (dentry->d_inode != NULL && + (NFS_FILEID(dentry->d_inode) == entry->ino || + d_mountpoint(dentry))) { + if (!desc->plus || entry->fh->size == 0) + return dentry; + if (nfs_compare_fh(NFS_FH(dentry->d_inode), + entry->fh) == 0) + goto out_renew; + } + /* No, so d_drop to allow one to be created */ + d_drop(dentry); + dput(dentry); + } + if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR)) + return NULL; + if (name.len > NFS_SERVER(dir)->namelen) + return NULL; + /* Note: caller is already holding the dir->i_mutex! */ + dentry = d_alloc(parent, &name); + if (dentry == NULL) + return NULL; + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); + if (IS_ERR(inode)) { + dput(dentry); + return NULL; + } + + alias = d_materialise_unique(dentry, inode); + if (alias != NULL) { + dput(dentry); + if (IS_ERR(alias)) + return NULL; + dentry = alias; + } + +out_renew: + nfs_set_verifier(dentry, verf); + return dentry; +} + +/* + * Code common to create, mkdir, and mknod. + */ +int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct dentry *parent = dget_parent(dentry); + struct inode *dir = parent->d_inode; + struct inode *inode; + int error = -EACCES; + + d_drop(dentry); + + /* We may have been initialized further down */ + if (dentry->d_inode) + goto out; + if (fhandle->size == 0) { + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); + if (error) + goto out_error; + } + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + if (!(fattr->valid & NFS_ATTR_FATTR)) { + struct nfs_server *server = NFS_SB(dentry->d_sb); + error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr); + if (error < 0) + goto out_error; + } + inode = nfs_fhget(dentry->d_sb, fhandle, fattr); + error = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_error; + d_add(dentry, inode); +out: + dput(parent); + return 0; +out_error: + nfs_mark_for_revalidate(dir); + dput(parent); + return error; +} + +/* + * Following a failed create operation, we drop the dentry rather + * than retain a negative dentry. This avoids a problem in the event + * that the operation succeeded on the server, but an error in the + * reply path made it appear to have failed. + */ +static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + struct iattr attr; + int error; + int open_flags = 0; + + dfprintk(VFS, "NFS: create(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + + attr.ia_mode = mode; + attr.ia_valid = ATTR_MODE; + + if ((nd->flags & LOOKUP_CREATE) != 0) + open_flags = nd->intent.open.flags; + + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); + if (error != 0) + goto out_err; + return 0; +out_err: + d_drop(dentry); + return error; +} + +/* + * See comments for nfs_proc_create regarding failed operations. + */ +static int +nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) +{ + struct iattr attr; + int status; + + dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + + if (!new_valid_dev(rdev)) + return -EINVAL; + + attr.ia_mode = mode; + attr.ia_valid = ATTR_MODE; + + status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev); + if (status != 0) + goto out_err; + return 0; +out_err: + d_drop(dentry); + return status; +} + +/* + * See comments for nfs_proc_create regarding failed operations. + */ +static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct iattr attr; + int error; + + dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + + attr.ia_valid = ATTR_MODE; + attr.ia_mode = mode | S_IFDIR; + + error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); + if (error != 0) + goto out_err; + return 0; +out_err: + d_drop(dentry); + return error; +} + +static void nfs_dentry_handle_enoent(struct dentry *dentry) +{ + if (dentry->d_inode != NULL && !d_unhashed(dentry)) + d_delete(dentry); +} + +static int nfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + int error; + + dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + + error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); + /* Ensure the VFS deletes this inode */ + if (error == 0 && dentry->d_inode != NULL) + clear_nlink(dentry->d_inode); + else if (error == -ENOENT) + nfs_dentry_handle_enoent(dentry); + + return error; +} + +static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) +{ + static unsigned int sillycounter; + const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2; + const int countersize = sizeof(sillycounter)*2; + const int slen = sizeof(".nfs")+fileidsize+countersize-1; + char silly[slen+1]; + struct qstr qsilly; + struct dentry *sdentry; + int error = -EIO; + + dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + atomic_read(&dentry->d_count)); + nfs_inc_stats(dir, NFSIOS_SILLYRENAME); + + /* + * We don't allow a dentry to be silly-renamed twice. + */ + error = -EBUSY; + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + goto out; + + sprintf(silly, ".nfs%*.*Lx", + fileidsize, fileidsize, + (unsigned long long)NFS_FILEID(dentry->d_inode)); + + /* Return delegation in anticipation of the rename */ + nfs_inode_return_delegation(dentry->d_inode); + + sdentry = NULL; + do { + char *suffix = silly + slen - countersize; + + dput(sdentry); + sillycounter++; + sprintf(suffix, "%*.*x", countersize, countersize, sillycounter); + + dfprintk(VFS, "NFS: trying to rename %s to %s\n", + dentry->d_name.name, silly); + + sdentry = lookup_one_len(silly, dentry->d_parent, slen); + /* + * N.B. Better to return EBUSY here ... it could be + * dangerous to delete the file while it's in use. + */ + if (IS_ERR(sdentry)) + goto out; + } while(sdentry->d_inode != NULL); /* need negative lookup */ + + qsilly.name = silly; + qsilly.len = strlen(silly); + if (dentry->d_inode) { + error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, + dir, &qsilly); + nfs_mark_for_revalidate(dentry->d_inode); + } else + error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, + dir, &qsilly); + if (!error) { + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + d_move(dentry, sdentry); + error = nfs_async_unlink(dir, dentry); + /* If we return 0 we don't unlink */ + } + dput(sdentry); +out: + return error; +} + +/* + * Remove a file after making sure there are no pending writes, + * and after checking that the file has only one user. + * + * We invalidate the attribute cache and free the inode prior to the operation + * to avoid possible races if the server reuses the inode. + */ +static int nfs_safe_remove(struct dentry *dentry) +{ + struct inode *dir = dentry->d_parent->d_inode; + struct inode *inode = dentry->d_inode; + int error = -EBUSY; + + dfprintk(VFS, "NFS: safe_remove(%s/%s)\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + + /* If the dentry was sillyrenamed, we simply call d_delete() */ + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { + error = 0; + goto out; + } + + if (inode != NULL) { + nfs_inode_return_delegation(inode); + error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + /* The VFS may want to delete this inode */ + if (error == 0) + nfs_drop_nlink(inode); + nfs_mark_for_revalidate(inode); + } else + error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + if (error == -ENOENT) + nfs_dentry_handle_enoent(dentry); +out: + return error; +} + +/* We do silly rename. In case sillyrename() returns -EBUSY, the inode + * belongs to an active ".nfs..." file and we return -EBUSY. + * + * If sillyrename() returns 0, we do nothing, otherwise we unlink. + */ +static int nfs_unlink(struct inode *dir, struct dentry *dentry) +{ + int error; + int need_rehash = 0; + + dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, + dir->i_ino, dentry->d_name.name); + + spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); + if (atomic_read(&dentry->d_count) > 1) { + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); + /* Start asynchronous writeout of the inode */ + write_inode_now(dentry->d_inode, 0); + error = nfs_sillyrename(dir, dentry); + return error; + } + if (!d_unhashed(dentry)) { + __d_drop(dentry); + need_rehash = 1; + } + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); + error = nfs_safe_remove(dentry); + if (!error || error == -ENOENT) { + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + } else if (need_rehash) + d_rehash(dentry); + return error; +} + +/* + * To create a symbolic link, most file systems instantiate a new inode, + * add a page to it containing the path, then write it out to the disk + * using prepare_write/commit_write. + * + * Unfortunately the NFS client can't create the in-core inode first + * because it needs a file handle to create an in-core inode (see + * fs/nfs/inode.c:nfs_fhget). We only have a file handle *after* the + * symlink request has completed on the server. + * + * So instead we allocate a raw page, copy the symname into it, then do + * the SYMLINK request with the page as the buffer. If it succeeds, we + * now have a new file handle and can instantiate an in-core NFS inode + * and move the raw page into its mapping. + */ +static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +{ + struct pagevec lru_pvec; + struct page *page; + char *kaddr; + struct iattr attr; + unsigned int pathlen = strlen(symname); + int error; + + dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id, + dir->i_ino, dentry->d_name.name, symname); + + if (pathlen > PAGE_SIZE) + return -ENAMETOOLONG; + + attr.ia_mode = S_IFLNK | S_IRWXUGO; + attr.ia_valid = ATTR_MODE; + + page = alloc_page(GFP_HIGHUSER); + if (!page) + return -ENOMEM; + + kaddr = kmap_atomic(page, KM_USER0); + memcpy(kaddr, symname, pathlen); + if (pathlen < PAGE_SIZE) + memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); + kunmap_atomic(kaddr, KM_USER0); + + error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); + if (error != 0) { + dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", + dir->i_sb->s_id, dir->i_ino, + dentry->d_name.name, symname, error); + d_drop(dentry); + __free_page(page); + return error; + } + + /* + * No big deal if we can't add this page to the page cache here. + * READLINK will get the missing page from the server if needed. + */ + pagevec_init(&lru_pvec, 0); + if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, + GFP_KERNEL)) { + pagevec_add(&lru_pvec, page); + pagevec_lru_add_file(&lru_pvec); + SetPageUptodate(page); + unlock_page(page); + } else + __free_page(page); + + return 0; +} + +static int +nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = old_dentry->d_inode; + int error; + + dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n", + old_dentry->d_parent->d_name.name, old_dentry->d_name.name, + dentry->d_parent->d_name.name, dentry->d_name.name); + + nfs_inode_return_delegation(inode); + + d_drop(dentry); + error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); + if (error == 0) { + atomic_inc(&inode->i_count); + d_add(dentry, inode); + } + return error; +} + +/* + * RENAME + * FIXME: Some nfsds, like the Linux user space nfsd, may generate a + * different file handle for the same inode after a rename (e.g. when + * moving to a different directory). A fail-safe method to do so would + * be to look up old_dir/old_name, create a link to new_dir/new_name and + * rename the old file using the sillyrename stuff. This way, the original + * file in old_dir will go away when the last process iput()s the inode. + * + * FIXED. + * + * It actually works quite well. One needs to have the possibility for + * at least one ".nfs..." file in each directory the file ever gets + * moved or linked to which happens automagically with the new + * implementation that only depends on the dcache stuff instead of + * using the inode layer + * + * Unfortunately, things are a little more complicated than indicated + * above. For a cross-directory move, we want to make sure we can get + * rid of the old inode after the operation. This means there must be + * no pending writes (if it's a file), and the use count must be 1. + * If these conditions are met, we can drop the dentries before doing + * the rename. + */ +static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct inode *old_inode = old_dentry->d_inode; + struct inode *new_inode = new_dentry->d_inode; + struct dentry *dentry = NULL, *rehash = NULL; + int error = -EBUSY; + + dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", + old_dentry->d_parent->d_name.name, old_dentry->d_name.name, + new_dentry->d_parent->d_name.name, new_dentry->d_name.name, + atomic_read(&new_dentry->d_count)); + + /* + * For non-directories, check whether the target is busy and if so, + * make a copy of the dentry and then do a silly-rename. If the + * silly-rename succeeds, the copied dentry is hashed and becomes + * the new target. + */ + if (new_inode && !S_ISDIR(new_inode->i_mode)) { + /* + * To prevent any new references to the target during the + * rename, we unhash the dentry in advance. + */ + if (!d_unhashed(new_dentry)) { + d_drop(new_dentry); + rehash = new_dentry; + } + + if (atomic_read(&new_dentry->d_count) > 2) { + int err; + + /* copy the target dentry's name */ + dentry = d_alloc(new_dentry->d_parent, + &new_dentry->d_name); + if (!dentry) + goto out; + + /* silly-rename the existing target ... */ + err = nfs_sillyrename(new_dir, new_dentry); + if (err) + goto out; + + new_dentry = dentry; + rehash = NULL; + new_inode = NULL; + } + } + + /* + * ... prune child dentries and writebacks if needed. + */ + if (atomic_read(&old_dentry->d_count) > 1) { + if (S_ISREG(old_inode->i_mode)) + nfs_wb_all(old_inode); + shrink_dcache_parent(old_dentry); + } + nfs_inode_return_delegation(old_inode); + + if (new_inode != NULL) + nfs_inode_return_delegation(new_inode); + + error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, + new_dir, &new_dentry->d_name); + nfs_mark_for_revalidate(old_inode); +out: + if (rehash) + d_rehash(rehash); + if (!error) { + if (new_inode != NULL) + nfs_drop_nlink(new_inode); + d_move(old_dentry, new_dentry); + nfs_set_verifier(new_dentry, + nfs_save_change_attribute(new_dir)); + } else if (error == -ENOENT) + nfs_dentry_handle_enoent(old_dentry); + + /* new dentry created? */ + if (dentry) + dput(dentry); + return error; +} + +static DEFINE_SPINLOCK(nfs_access_lru_lock); +static LIST_HEAD(nfs_access_lru_list); +static atomic_long_t nfs_access_nr_entries; + +static void nfs_access_free_entry(struct nfs_access_entry *entry) +{ + put_rpccred(entry->cred); + kfree(entry); + smp_mb__before_atomic_dec(); + atomic_long_dec(&nfs_access_nr_entries); + smp_mb__after_atomic_dec(); +} + +int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) +{ + LIST_HEAD(head); + struct nfs_inode *nfsi; + struct nfs_access_entry *cache; + +restart: + spin_lock(&nfs_access_lru_lock); + list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { + struct rw_semaphore *s_umount; + struct inode *inode; + + if (nr_to_scan-- == 0) + break; + s_umount = &nfsi->vfs_inode.i_sb->s_umount; + if (!down_read_trylock(s_umount)) + continue; + inode = igrab(&nfsi->vfs_inode); + if (inode == NULL) { + up_read(s_umount); + continue; + } + spin_lock(&inode->i_lock); + if (list_empty(&nfsi->access_cache_entry_lru)) + goto remove_lru_entry; + cache = list_entry(nfsi->access_cache_entry_lru.next, + struct nfs_access_entry, lru); + list_move(&cache->lru, &head); + rb_erase(&cache->rb_node, &nfsi->access_cache); + if (!list_empty(&nfsi->access_cache_entry_lru)) + list_move_tail(&nfsi->access_cache_inode_lru, + &nfs_access_lru_list); + else { +remove_lru_entry: + list_del_init(&nfsi->access_cache_inode_lru); + clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); + } + spin_unlock(&inode->i_lock); + spin_unlock(&nfs_access_lru_lock); + iput(inode); + up_read(s_umount); + goto restart; + } + spin_unlock(&nfs_access_lru_lock); + while (!list_empty(&head)) { + cache = list_entry(head.next, struct nfs_access_entry, lru); + list_del(&cache->lru); + nfs_access_free_entry(cache); + } + return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure; +} + +static void __nfs_access_zap_cache(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct rb_root *root_node = &nfsi->access_cache; + struct rb_node *n, *dispose = NULL; + struct nfs_access_entry *entry; + + /* Unhook entries from the cache */ + while ((n = rb_first(root_node)) != NULL) { + entry = rb_entry(n, struct nfs_access_entry, rb_node); + rb_erase(n, root_node); + list_del(&entry->lru); + n->rb_left = dispose; + dispose = n; + } + nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; + spin_unlock(&inode->i_lock); + + /* Now kill them all! */ + while (dispose != NULL) { + n = dispose; + dispose = n->rb_left; + nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node)); + } +} + +void nfs_access_zap_cache(struct inode *inode) +{ + /* Remove from global LRU init */ + if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { + spin_lock(&nfs_access_lru_lock); + list_del_init(&NFS_I(inode)->access_cache_inode_lru); + spin_unlock(&nfs_access_lru_lock); + } + + spin_lock(&inode->i_lock); + /* This will release the spinlock */ + __nfs_access_zap_cache(inode); +} + +static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred) +{ + struct rb_node *n = NFS_I(inode)->access_cache.rb_node; + struct nfs_access_entry *entry; + + while (n != NULL) { + entry = rb_entry(n, struct nfs_access_entry, rb_node); + + if (cred < entry->cred) + n = n->rb_left; + else if (cred > entry->cred) + n = n->rb_right; + else + return entry; + } + return NULL; +} + +static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_access_entry *cache; + int err = -ENOENT; + + spin_lock(&inode->i_lock); + if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS) + goto out_zap; + cache = nfs_access_search_rbtree(inode, cred); + if (cache == NULL) + goto out; + if (!nfs_have_delegated_attributes(inode) && + !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) + goto out_stale; + res->jiffies = cache->jiffies; + res->cred = cache->cred; + res->mask = cache->mask; + list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru); + err = 0; +out: + spin_unlock(&inode->i_lock); + return err; +out_stale: + rb_erase(&cache->rb_node, &nfsi->access_cache); + list_del(&cache->lru); + spin_unlock(&inode->i_lock); + nfs_access_free_entry(cache); + return -ENOENT; +out_zap: + /* This will release the spinlock */ + __nfs_access_zap_cache(inode); + return -ENOENT; +} + +static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct rb_root *root_node = &nfsi->access_cache; + struct rb_node **p = &root_node->rb_node; + struct rb_node *parent = NULL; + struct nfs_access_entry *entry; + + spin_lock(&inode->i_lock); + while (*p != NULL) { + parent = *p; + entry = rb_entry(parent, struct nfs_access_entry, rb_node); + + if (set->cred < entry->cred) + p = &parent->rb_left; + else if (set->cred > entry->cred) + p = &parent->rb_right; + else + goto found; + } + rb_link_node(&set->rb_node, parent, p); + rb_insert_color(&set->rb_node, root_node); + list_add_tail(&set->lru, &nfsi->access_cache_entry_lru); + spin_unlock(&inode->i_lock); + return; +found: + rb_replace_node(parent, &set->rb_node, root_node); + list_add_tail(&set->lru, &nfsi->access_cache_entry_lru); + list_del(&entry->lru); + spin_unlock(&inode->i_lock); + nfs_access_free_entry(entry); +} + +static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +{ + struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); + if (cache == NULL) + return; + RB_CLEAR_NODE(&cache->rb_node); + cache->jiffies = set->jiffies; + cache->cred = get_rpccred(set->cred); + cache->mask = set->mask; + + nfs_access_add_rbtree(inode, cache); + + /* Update accounting */ + smp_mb__before_atomic_inc(); + atomic_long_inc(&nfs_access_nr_entries); + smp_mb__after_atomic_inc(); + + /* Add inode to global LRU list */ + if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { + spin_lock(&nfs_access_lru_lock); + list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list); + spin_unlock(&nfs_access_lru_lock); + } +} + +static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) +{ + struct nfs_access_entry cache; + int status; + + status = nfs_access_get_cached(inode, cred, &cache); + if (status == 0) + goto out; + + /* Be clever: ask server to check for all possible rights */ + cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; + cache.cred = cred; + cache.jiffies = jiffies; + status = NFS_PROTO(inode)->access(inode, &cache); + if (status != 0) { + if (status == -ESTALE) { + nfs_zap_caches(inode); + if (!S_ISDIR(inode->i_mode)) + set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); + } + return status; + } + nfs_access_add_cache(inode, &cache); +out: + if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) + return 0; + return -EACCES; +} + +static int nfs_open_permission_mask(int openflags) +{ + int mask = 0; + + if (openflags & FMODE_READ) + mask |= MAY_READ; + if (openflags & FMODE_WRITE) + mask |= MAY_WRITE; + if (openflags & FMODE_EXEC) + mask |= MAY_EXEC; + return mask; +} + +int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) +{ + return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); +} + +int nfs_permission(struct inode *inode, int mask) +{ + struct rpc_cred *cred; + int res = 0; + + nfs_inc_stats(inode, NFSIOS_VFSACCESS); + + if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) + goto out; + /* Is this sys_access() ? */ + if (mask & MAY_ACCESS) + goto force_lookup; + + switch (inode->i_mode & S_IFMT) { + case S_IFLNK: + goto out; + case S_IFREG: + /* NFSv4 has atomic_open... */ + if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN) + && (mask & MAY_OPEN) + && !(mask & MAY_EXEC)) + goto out; + break; + case S_IFDIR: + /* + * Optimize away all write operations, since the server + * will check permissions when we perform the op. + */ + if ((mask & MAY_WRITE) && !(mask & MAY_READ)) + goto out; + } + +force_lookup: + if (!NFS_PROTO(inode)->access) + goto out_notsup; + + cred = rpc_lookup_cred(); + if (!IS_ERR(cred)) { + res = nfs_do_access(inode, cred, mask); + put_rpccred(cred); + } else + res = PTR_ERR(cred); +out: + if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) + res = -EACCES; + + dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n", + inode->i_sb->s_id, inode->i_ino, mask, res); + return res; +out_notsup: + res = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (res == 0) + res = generic_permission(inode, mask, NULL); + goto out; +} + +/* + * Local variables: + * version-control: t + * kept-new-versions: 5 + * End: + */ diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c new file mode 100644 index 00000000000..0d289823e85 --- /dev/null +++ b/fs/nfs/direct.c @@ -0,0 +1,1013 @@ +/* + * linux/fs/nfs/direct.c + * + * Copyright (C) 2003 by Chuck Lever <cel@netapp.com> + * + * High-performance uncached I/O for the Linux NFS client + * + * There are important applications whose performance or correctness + * depends on uncached access to file data. Database clusters + * (multiple copies of the same instance running on separate hosts) + * implement their own cache coherency protocol that subsumes file + * system cache protocols. Applications that process datasets + * considerably larger than the client's memory do not always benefit + * from a local cache. A streaming video server, for instance, has no + * need to cache the contents of a file. + * + * When an application requests uncached I/O, all read and write requests + * are made directly to the server; data stored or fetched via these + * requests is not cached in the Linux page cache. The client does not + * correct unaligned requests from applications. All requested bytes are + * held on permanent storage before a direct write system call returns to + * an application. + * + * Solaris implements an uncached I/O facility called directio() that + * is used for backups and sequential I/O to very large files. Solaris + * also supports uncaching whole NFS partitions with "-o forcedirectio," + * an undocumented mount option. + * + * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with + * help from Andrew Morton. + * + * 18 Dec 2001 Initial implementation for 2.4 --cel + * 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy + * 08 Jun 2003 Port to 2.5 APIs --cel + * 31 Mar 2004 Handle direct I/O without VFS support --cel + * 15 Sep 2004 Parallel async reads --cel + * 04 May 2005 support O_DIRECT with aio --cel + * + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/file.h> +#include <linux/pagemap.h> +#include <linux/kref.h> + +#include <linux/nfs_fs.h> +#include <linux/nfs_page.h> +#include <linux/sunrpc/clnt.h> + +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/atomic.h> + +#include "internal.h" +#include "iostat.h" + +#define NFSDBG_FACILITY NFSDBG_VFS + +static struct kmem_cache *nfs_direct_cachep; + +/* + * This represents a set of asynchronous requests that we're waiting on + */ +struct nfs_direct_req { + struct kref kref; /* release manager */ + + /* I/O parameters */ + struct nfs_open_context *ctx; /* file open context info */ + struct kiocb * iocb; /* controlling i/o request */ + struct inode * inode; /* target file of i/o */ + + /* completion state */ + atomic_t io_count; /* i/os we're waiting for */ + spinlock_t lock; /* protect completion state */ + ssize_t count, /* bytes actually processed */ + error; /* any reported error */ + struct completion completion; /* wait for i/o completion */ + + /* commit state */ + struct list_head rewrite_list; /* saved nfs_write_data structs */ + struct nfs_write_data * commit_data; /* special write_data for commits */ + int flags; +#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ +#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ + struct nfs_writeverf verf; /* unstable write verifier */ +}; + +static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); +static const struct rpc_call_ops nfs_write_direct_ops; + +static inline void get_dreq(struct nfs_direct_req *dreq) +{ + atomic_inc(&dreq->io_count); +} + +static inline int put_dreq(struct nfs_direct_req *dreq) +{ + return atomic_dec_and_test(&dreq->io_count); +} + +/** + * nfs_direct_IO - NFS address space operation for direct I/O + * @rw: direction (read or write) + * @iocb: target I/O control block + * @iov: array of vectors that define I/O buffer + * @pos: offset in file to begin the operation + * @nr_segs: size of iovec array + * + * The presence of this routine in the address space ops vector means + * the NFS client supports direct I/O. However, we shunt off direct + * read and write requests before the VFS gets them, so this method + * should never be called. + */ +ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) +{ + dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", + iocb->ki_filp->f_path.dentry->d_name.name, + (long long) pos, nr_segs); + + return -EINVAL; +} + +static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count) +{ + unsigned int npages; + unsigned int i; + + if (count == 0) + return; + pages += (pgbase >> PAGE_SHIFT); + npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + struct page *page = pages[i]; + if (!PageCompound(page)) + set_page_dirty(page); + } +} + +static void nfs_direct_release_pages(struct page **pages, unsigned int npages) +{ + unsigned int i; + for (i = 0; i < npages; i++) + page_cache_release(pages[i]); +} + +static inline struct nfs_direct_req *nfs_direct_req_alloc(void) +{ + struct nfs_direct_req *dreq; + + dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL); + if (!dreq) + return NULL; + + kref_init(&dreq->kref); + kref_get(&dreq->kref); + init_completion(&dreq->completion); + INIT_LIST_HEAD(&dreq->rewrite_list); + dreq->iocb = NULL; + dreq->ctx = NULL; + spin_lock_init(&dreq->lock); + atomic_set(&dreq->io_count, 0); + dreq->count = 0; + dreq->error = 0; + dreq->flags = 0; + + return dreq; +} + +static void nfs_direct_req_free(struct kref *kref) +{ + struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); + + if (dreq->ctx != NULL) + put_nfs_open_context(dreq->ctx); + kmem_cache_free(nfs_direct_cachep, dreq); +} + +static void nfs_direct_req_release(struct nfs_direct_req *dreq) +{ + kref_put(&dreq->kref, nfs_direct_req_free); +} + +/* + * Collects and returns the final error value/byte-count. + */ +static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) +{ + ssize_t result = -EIOCBQUEUED; + + /* Async requests don't wait here */ + if (dreq->iocb) + goto out; + + result = wait_for_completion_killable(&dreq->completion); + + if (!result) + result = dreq->error; + if (!result) + result = dreq->count; + +out: + return (ssize_t) result; +} + +/* + * Synchronous I/O uses a stack-allocated iocb. Thus we can't trust + * the iocb is still valid here if this is a synchronous request. + */ +static void nfs_direct_complete(struct nfs_direct_req *dreq) +{ + if (dreq->iocb) { + long res = (long) dreq->error; + if (!res) + res = (long) dreq->count; + aio_complete(dreq->iocb, res, 0); + } + complete_all(&dreq->completion); + + nfs_direct_req_release(dreq); +} + +/* + * We must hold a reference to all the pages in this direct read request + * until the RPCs complete. This could be long *after* we are woken up in + * nfs_direct_wait (for instance, if someone hits ^C on a slow server). + */ +static void nfs_direct_read_result(struct rpc_task *task, void *calldata) +{ + struct nfs_read_data *data = calldata; + + nfs_readpage_result(task, data); +} + +static void nfs_direct_read_release(void *calldata) +{ + + struct nfs_read_data *data = calldata; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + int status = data->task.tk_status; + + spin_lock(&dreq->lock); + if (unlikely(status < 0)) { + dreq->error = status; + spin_unlock(&dreq->lock); + } else { + dreq->count += data->res.count; + spin_unlock(&dreq->lock); + nfs_direct_dirty_pages(data->pagevec, + data->args.pgbase, + data->res.count); + } + nfs_direct_release_pages(data->pagevec, data->npages); + + if (put_dreq(dreq)) + nfs_direct_complete(dreq); + nfs_readdata_free(data); +} + +static const struct rpc_call_ops nfs_read_direct_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_read_prepare, +#endif /* CONFIG_NFS_V4_1 */ + .rpc_call_done = nfs_direct_read_result, + .rpc_release = nfs_direct_read_release, +}; + +/* + * For each rsize'd chunk of the user's buffer, dispatch an NFS READ + * operation. If nfs_readdata_alloc() or get_user_pages() fails, + * bail and stop sending more reads. Read length accounting is + * handled automatically by nfs_direct_read_result(). Otherwise, if + * no requests have been sent, just return an error. + */ +static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, + const struct iovec *iov, + loff_t pos) +{ + struct nfs_open_context *ctx = dreq->ctx; + struct inode *inode = ctx->path.dentry->d_inode; + unsigned long user_addr = (unsigned long)iov->iov_base; + size_t count = iov->iov_len; + size_t rsize = NFS_SERVER(inode)->rsize; + struct rpc_task *task; + struct rpc_message msg = { + .rpc_cred = ctx->cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = NFS_CLIENT(inode), + .rpc_message = &msg, + .callback_ops = &nfs_read_direct_ops, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC, + }; + unsigned int pgbase; + int result; + ssize_t started = 0; + + do { + struct nfs_read_data *data; + size_t bytes; + + pgbase = user_addr & ~PAGE_MASK; + bytes = min(rsize,count); + + result = -ENOMEM; + data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes)); + if (unlikely(!data)) + break; + + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, + data->npages, 1, 0, data->pagevec, NULL); + up_read(¤t->mm->mmap_sem); + if (result < 0) { + nfs_readdata_free(data); + break; + } + if ((unsigned)result < data->npages) { + bytes = result * PAGE_SIZE; + if (bytes <= pgbase) { + nfs_direct_release_pages(data->pagevec, result); + nfs_readdata_free(data); + break; + } + bytes -= pgbase; + data->npages = result; + } + + get_dreq(dreq); + + data->req = (struct nfs_page *) dreq; + data->inode = inode; + data->cred = msg.rpc_cred; + data->args.fh = NFS_FH(inode); + data->args.context = ctx; + data->args.offset = pos; + data->args.pgbase = pgbase; + data->args.pages = data->pagevec; + data->args.count = bytes; + data->res.fattr = &data->fattr; + data->res.eof = 0; + data->res.count = bytes; + nfs_fattr_init(&data->fattr); + msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + + task_setup_data.task = &data->task; + task_setup_data.callback_data = data; + NFS_PROTO(inode)->read_setup(data, &msg); + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + break; + rpc_put_task(task); + + dprintk("NFS: %5u initiated direct read call " + "(req %s/%Ld, %zu bytes @ offset %Lu)\n", + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + bytes, + (unsigned long long)data->args.offset); + + started += bytes; + user_addr += bytes; + pos += bytes; + /* FIXME: Remove this unnecessary math from final patch */ + pgbase += bytes; + pgbase &= ~PAGE_MASK; + BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); + + count -= bytes; + } while (count != 0); + + if (started) + return started; + return result < 0 ? (ssize_t) result : -EFAULT; +} + +static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, + const struct iovec *iov, + unsigned long nr_segs, + loff_t pos) +{ + ssize_t result = -EINVAL; + size_t requested_bytes = 0; + unsigned long seg; + + get_dreq(dreq); + + for (seg = 0; seg < nr_segs; seg++) { + const struct iovec *vec = &iov[seg]; + result = nfs_direct_read_schedule_segment(dreq, vec, pos); + if (result < 0) + break; + requested_bytes += result; + if ((size_t)result < vec->iov_len) + break; + pos += vec->iov_len; + } + + if (put_dreq(dreq)) + nfs_direct_complete(dreq); + + if (requested_bytes != 0) + return 0; + + if (result < 0) + return result; + return -EIO; +} + +static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + ssize_t result = 0; + struct inode *inode = iocb->ki_filp->f_mapping->host; + struct nfs_direct_req *dreq; + + dreq = nfs_direct_req_alloc(); + if (!dreq) + return -ENOMEM; + + dreq->inode = inode; + dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); + if (!is_sync_kiocb(iocb)) + dreq->iocb = iocb; + + result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); + if (!result) + result = nfs_direct_wait(dreq); + nfs_direct_req_release(dreq); + + return result; +} + +static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) +{ + while (!list_empty(&dreq->rewrite_list)) { + struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); + list_del(&data->pages); + nfs_direct_release_pages(data->pagevec, data->npages); + nfs_writedata_free(data); + } +} + +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) +{ + struct inode *inode = dreq->inode; + struct list_head *p; + struct nfs_write_data *data; + struct rpc_task *task; + struct rpc_message msg = { + .rpc_cred = dreq->ctx->cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = NFS_CLIENT(inode), + .rpc_message = &msg, + .callback_ops = &nfs_write_direct_ops, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC, + }; + + dreq->count = 0; + get_dreq(dreq); + + list_for_each(p, &dreq->rewrite_list) { + data = list_entry(p, struct nfs_write_data, pages); + + get_dreq(dreq); + + /* Use stable writes */ + data->args.stable = NFS_FILE_SYNC; + + /* + * Reset data->res. + */ + nfs_fattr_init(&data->fattr); + data->res.count = data->args.count; + memset(&data->verf, 0, sizeof(data->verf)); + + /* + * Reuse data->task; data->args should not have changed + * since the original request was sent. + */ + task_setup_data.task = &data->task; + task_setup_data.callback_data = data; + msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + NFS_PROTO(inode)->write_setup(data, &msg); + + /* + * We're called via an RPC callback, so BKL is already held. + */ + task = rpc_run_task(&task_setup_data); + if (!IS_ERR(task)) + rpc_put_task(task); + + dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + data->args.count, + (unsigned long long)data->args.offset); + } + + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, inode); +} + +static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) +{ + struct nfs_write_data *data = calldata; + + /* Call the NFS version-specific code */ + NFS_PROTO(data->inode)->commit_done(task, data); +} + +static void nfs_direct_commit_release(void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + int status = data->task.tk_status; + + if (status < 0) { + dprintk("NFS: %5u commit failed with error %d.\n", + data->task.tk_pid, status); + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { + dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + } + + dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); + nfs_direct_write_complete(dreq, data->inode); + nfs_commit_free(data); +} + +static const struct rpc_call_ops nfs_commit_direct_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ + .rpc_call_done = nfs_direct_commit_result, + .rpc_release = nfs_direct_commit_release, +}; + +static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) +{ + struct nfs_write_data *data = dreq->commit_data; + struct rpc_task *task; + struct rpc_message msg = { + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = dreq->ctx->cred, + }; + struct rpc_task_setup task_setup_data = { + .task = &data->task, + .rpc_client = NFS_CLIENT(dreq->inode), + .rpc_message = &msg, + .callback_ops = &nfs_commit_direct_ops, + .callback_data = data, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC, + }; + + data->inode = dreq->inode; + data->cred = msg.rpc_cred; + + data->args.fh = NFS_FH(data->inode); + data->args.offset = 0; + data->args.count = 0; + data->args.context = dreq->ctx; + data->res.count = 0; + data->res.fattr = &data->fattr; + data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); + + NFS_PROTO(data->inode)->commit_setup(data, &msg); + + /* Note: task.tk_ops->rpc_release will free dreq->commit_data */ + dreq->commit_data = NULL; + + dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); + + task = rpc_run_task(&task_setup_data); + if (!IS_ERR(task)) + rpc_put_task(task); +} + +static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) +{ + int flags = dreq->flags; + + dreq->flags = 0; + switch (flags) { + case NFS_ODIRECT_DO_COMMIT: + nfs_direct_commit_schedule(dreq); + break; + case NFS_ODIRECT_RESCHED_WRITES: + nfs_direct_write_reschedule(dreq); + break; + default: + if (dreq->commit_data != NULL) + nfs_commit_free(dreq->commit_data); + nfs_direct_free_writedata(dreq); + nfs_zap_mapping(inode, inode->i_mapping); + nfs_direct_complete(dreq); + } +} + +static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) +{ + dreq->commit_data = nfs_commitdata_alloc(); + if (dreq->commit_data != NULL) + dreq->commit_data->req = (struct nfs_page *) dreq; +} +#else +static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) +{ + dreq->commit_data = NULL; +} + +static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) +{ + nfs_direct_free_writedata(dreq); + nfs_zap_mapping(inode, inode->i_mapping); + nfs_direct_complete(dreq); +} +#endif + +static void nfs_direct_write_result(struct rpc_task *task, void *calldata) +{ + struct nfs_write_data *data = calldata; + + if (nfs_writeback_done(task, data) != 0) + return; +} + +/* + * NB: Return the value of the first error return code. Subsequent + * errors after the first one are ignored. + */ +static void nfs_direct_write_release(void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + int status = data->task.tk_status; + + spin_lock(&dreq->lock); + + if (unlikely(status < 0)) { + /* An error has occurred, so we should not commit */ + dreq->flags = 0; + dreq->error = status; + } + if (unlikely(dreq->error != 0)) + goto out_unlock; + + dreq->count += data->res.count; + + if (data->res.verf->committed != NFS_FILE_SYNC) { + switch (dreq->flags) { + case 0: + memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf)); + dreq->flags = NFS_ODIRECT_DO_COMMIT; + break; + case NFS_ODIRECT_DO_COMMIT: + if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) { + dprintk("NFS: %5u write verify failed\n", data->task.tk_pid); + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + } + } + } +out_unlock: + spin_unlock(&dreq->lock); + + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, data->inode); +} + +static const struct rpc_call_ops nfs_write_direct_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ + .rpc_call_done = nfs_direct_write_result, + .rpc_release = nfs_direct_write_release, +}; + +/* + * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE + * operation. If nfs_writedata_alloc() or get_user_pages() fails, + * bail and stop sending more writes. Write length accounting is + * handled automatically by nfs_direct_write_result(). Otherwise, if + * no requests have been sent, just return an error. + */ +static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, + const struct iovec *iov, + loff_t pos, int sync) +{ + struct nfs_open_context *ctx = dreq->ctx; + struct inode *inode = ctx->path.dentry->d_inode; + unsigned long user_addr = (unsigned long)iov->iov_base; + size_t count = iov->iov_len; + struct rpc_task *task; + struct rpc_message msg = { + .rpc_cred = ctx->cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = NFS_CLIENT(inode), + .rpc_message = &msg, + .callback_ops = &nfs_write_direct_ops, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC, + }; + size_t wsize = NFS_SERVER(inode)->wsize; + unsigned int pgbase; + int result; + ssize_t started = 0; + + do { + struct nfs_write_data *data; + size_t bytes; + + pgbase = user_addr & ~PAGE_MASK; + bytes = min(wsize,count); + + result = -ENOMEM; + data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes)); + if (unlikely(!data)) + break; + + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, + data->npages, 0, 0, data->pagevec, NULL); + up_read(¤t->mm->mmap_sem); + if (result < 0) { + nfs_writedata_free(data); + break; + } + if ((unsigned)result < data->npages) { + bytes = result * PAGE_SIZE; + if (bytes <= pgbase) { + nfs_direct_release_pages(data->pagevec, result); + nfs_writedata_free(data); + break; + } + bytes -= pgbase; + data->npages = result; + } + + get_dreq(dreq); + + list_move_tail(&data->pages, &dreq->rewrite_list); + + data->req = (struct nfs_page *) dreq; + data->inode = inode; + data->cred = msg.rpc_cred; + data->args.fh = NFS_FH(inode); + data->args.context = ctx; + data->args.offset = pos; + data->args.pgbase = pgbase; + data->args.pages = data->pagevec; + data->args.count = bytes; + data->args.stable = sync; + data->res.fattr = &data->fattr; + data->res.count = bytes; + data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); + + task_setup_data.task = &data->task; + task_setup_data.callback_data = data; + msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + NFS_PROTO(inode)->write_setup(data, &msg); + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + break; + rpc_put_task(task); + + dprintk("NFS: %5u initiated direct write call " + "(req %s/%Ld, %zu bytes @ offset %Lu)\n", + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + bytes, + (unsigned long long)data->args.offset); + + started += bytes; + user_addr += bytes; + pos += bytes; + + /* FIXME: Remove this useless math from the final patch */ + pgbase += bytes; + pgbase &= ~PAGE_MASK; + BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); + + count -= bytes; + } while (count != 0); + + if (started) + return started; + return result < 0 ? (ssize_t) result : -EFAULT; +} + +static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, + const struct iovec *iov, + unsigned long nr_segs, + loff_t pos, int sync) +{ + ssize_t result = 0; + size_t requested_bytes = 0; + unsigned long seg; + + get_dreq(dreq); + + for (seg = 0; seg < nr_segs; seg++) { + const struct iovec *vec = &iov[seg]; + result = nfs_direct_write_schedule_segment(dreq, vec, + pos, sync); + if (result < 0) + break; + requested_bytes += result; + if ((size_t)result < vec->iov_len) + break; + pos += vec->iov_len; + } + + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, dreq->inode); + + if (requested_bytes != 0) + return 0; + + if (result < 0) + return result; + return -EIO; +} + +static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos, + size_t count) +{ + ssize_t result = 0; + struct inode *inode = iocb->ki_filp->f_mapping->host; + struct nfs_direct_req *dreq; + size_t wsize = NFS_SERVER(inode)->wsize; + int sync = NFS_UNSTABLE; + + dreq = nfs_direct_req_alloc(); + if (!dreq) + return -ENOMEM; + nfs_alloc_commit_data(dreq); + + if (dreq->commit_data == NULL || count < wsize) + sync = NFS_FILE_SYNC; + + dreq->inode = inode; + dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); + if (!is_sync_kiocb(iocb)) + dreq->iocb = iocb; + + result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync); + if (!result) + result = nfs_direct_wait(dreq); + nfs_direct_req_release(dreq); + + return result; +} + +/** + * nfs_file_direct_read - file direct read operation for NFS files + * @iocb: target I/O control block + * @iov: vector of user buffers into which to read data + * @nr_segs: size of iov vector + * @pos: byte offset in file where reading starts + * + * We use this function for direct reads instead of calling + * generic_file_aio_read() in order to avoid gfar's check to see if + * the request starts before the end of the file. For that check + * to work, we must generate a GETATTR before each direct read, and + * even then there is a window between the GETATTR and the subsequent + * READ where the file size could change. Our preference is simply + * to do all reads the application wants, and the server will take + * care of managing the end of file boundary. + * + * This function also eliminates unnecessarily updating the file's + * atime locally, as the NFS server sets the file's atime, and this + * client must read the updated atime from the server back into its + * cache. + */ +ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + ssize_t retval = -EINVAL; + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + size_t count; + + count = iov_length(iov, nr_segs); + nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); + + dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, + count, (long long) pos); + + retval = 0; + if (!count) + goto out; + + retval = nfs_sync_mapping(mapping); + if (retval) + goto out; + + retval = nfs_direct_read(iocb, iov, nr_segs, pos); + if (retval > 0) + iocb->ki_pos = pos + retval; + +out: + return retval; +} + +/** + * nfs_file_direct_write - file direct write operation for NFS files + * @iocb: target I/O control block + * @iov: vector of user buffers from which to write data + * @nr_segs: size of iov vector + * @pos: byte offset in file where writing starts + * + * We use this function for direct writes instead of calling + * generic_file_aio_write() in order to avoid taking the inode + * semaphore and updating the i_size. The NFS server will set + * the new i_size and this client must read the updated size + * back into its cache. We let the server do generic write + * parameter checking and report problems. + * + * We eliminate local atime updates, see direct read above. + * + * We avoid unnecessary page cache invalidations for normal cached + * readers of this file. + * + * Note that O_APPEND is not supported for NFS direct writes, as there + * is no atomic O_APPEND write facility in the NFS protocol. + */ +ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + ssize_t retval = -EINVAL; + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + size_t count; + + count = iov_length(iov, nr_segs); + nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); + + dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, + count, (long long) pos); + + retval = generic_write_checks(file, &pos, &count, 0); + if (retval) + goto out; + + retval = -EINVAL; + if ((ssize_t) count < 0) + goto out; + retval = 0; + if (!count) + goto out; + + retval = nfs_sync_mapping(mapping); + if (retval) + goto out; + + retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); + + if (retval > 0) + iocb->ki_pos = pos + retval; + +out: + return retval; +} + +/** + * nfs_init_directcache - create a slab cache for nfs_direct_req structures + * + */ +int __init nfs_init_directcache(void) +{ + nfs_direct_cachep = kmem_cache_create("nfs_direct_cache", + sizeof(struct nfs_direct_req), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL); + if (nfs_direct_cachep == NULL) + return -ENOMEM; + + return 0; +} + +/** + * nfs_destroy_directcache - destroy the slab cache for nfs_direct_req structures + * + */ +void nfs_destroy_directcache(void) +{ + kmem_cache_destroy(nfs_direct_cachep); +} diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c new file mode 100644 index 00000000000..3f0cd4dfdda --- /dev/null +++ b/fs/nfs/dns_resolve.c @@ -0,0 +1,347 @@ +/* + * linux/fs/nfs/dns_resolve.c + * + * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com> + * + * Resolves DNS hostnames into valid ip addresses + */ + +#include <linux/hash.h> +#include <linux/string.h> +#include <linux/kmod.h> +#include <linux/module.h> +#include <linux/socket.h> +#include <linux/seq_file.h> +#include <linux/inet.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/cache.h> +#include <linux/sunrpc/svcauth.h> + +#include "dns_resolve.h" +#include "cache_lib.h" + +#define NFS_DNS_HASHBITS 4 +#define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS) + +static struct cache_head *nfs_dns_table[NFS_DNS_HASHTBL_SIZE]; + +struct nfs_dns_ent { + struct cache_head h; + + char *hostname; + size_t namelen; + + struct sockaddr_storage addr; + size_t addrlen; +}; + + +static void nfs_dns_ent_update(struct cache_head *cnew, + struct cache_head *ckey) +{ + struct nfs_dns_ent *new; + struct nfs_dns_ent *key; + + new = container_of(cnew, struct nfs_dns_ent, h); + key = container_of(ckey, struct nfs_dns_ent, h); + + memcpy(&new->addr, &key->addr, key->addrlen); + new->addrlen = key->addrlen; +} + +static void nfs_dns_ent_init(struct cache_head *cnew, + struct cache_head *ckey) +{ + struct nfs_dns_ent *new; + struct nfs_dns_ent *key; + + new = container_of(cnew, struct nfs_dns_ent, h); + key = container_of(ckey, struct nfs_dns_ent, h); + + kfree(new->hostname); + new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL); + if (new->hostname) { + new->namelen = key->namelen; + nfs_dns_ent_update(cnew, ckey); + } else { + new->namelen = 0; + new->addrlen = 0; + } +} + +static void nfs_dns_ent_put(struct kref *ref) +{ + struct nfs_dns_ent *item; + + item = container_of(ref, struct nfs_dns_ent, h.ref); + kfree(item->hostname); + kfree(item); +} + +static struct cache_head *nfs_dns_ent_alloc(void) +{ + struct nfs_dns_ent *item = kmalloc(sizeof(*item), GFP_KERNEL); + + if (item != NULL) { + item->hostname = NULL; + item->namelen = 0; + item->addrlen = 0; + return &item->h; + } + return NULL; +}; + +static unsigned int nfs_dns_hash(const struct nfs_dns_ent *key) +{ + return hash_str(key->hostname, NFS_DNS_HASHBITS); +} + +static void nfs_dns_request(struct cache_detail *cd, + struct cache_head *ch, + char **bpp, int *blen) +{ + struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h); + + qword_add(bpp, blen, key->hostname); + (*bpp)[-1] = '\n'; +} + +static int nfs_dns_upcall(struct cache_detail *cd, + struct cache_head *ch) +{ + struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h); + int ret; + + ret = nfs_cache_upcall(cd, key->hostname); + if (ret) + ret = sunrpc_cache_pipe_upcall(cd, ch, nfs_dns_request); + return ret; +} + +static int nfs_dns_match(struct cache_head *ca, + struct cache_head *cb) +{ + struct nfs_dns_ent *a; + struct nfs_dns_ent *b; + + a = container_of(ca, struct nfs_dns_ent, h); + b = container_of(cb, struct nfs_dns_ent, h); + + if (a->namelen == 0 || a->namelen != b->namelen) + return 0; + return memcmp(a->hostname, b->hostname, a->namelen) == 0; +} + +static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd, + struct cache_head *h) +{ + struct nfs_dns_ent *item; + long ttl; + + if (h == NULL) { + seq_puts(m, "# ip address hostname ttl\n"); + return 0; + } + item = container_of(h, struct nfs_dns_ent, h); + ttl = (long)item->h.expiry_time - (long)get_seconds(); + if (ttl < 0) + ttl = 0; + + if (!test_bit(CACHE_NEGATIVE, &h->flags)) { + char buf[INET6_ADDRSTRLEN+IPV6_SCOPE_ID_LEN+1]; + + rpc_ntop((struct sockaddr *)&item->addr, buf, sizeof(buf)); + seq_printf(m, "%15s ", buf); + } else + seq_puts(m, "<none> "); + seq_printf(m, "%15s %ld\n", item->hostname, ttl); + return 0; +} + +static struct nfs_dns_ent *nfs_dns_lookup(struct cache_detail *cd, + struct nfs_dns_ent *key) +{ + struct cache_head *ch; + + ch = sunrpc_cache_lookup(cd, + &key->h, + nfs_dns_hash(key)); + if (!ch) + return NULL; + return container_of(ch, struct nfs_dns_ent, h); +} + +static struct nfs_dns_ent *nfs_dns_update(struct cache_detail *cd, + struct nfs_dns_ent *new, + struct nfs_dns_ent *key) +{ + struct cache_head *ch; + + ch = sunrpc_cache_update(cd, + &new->h, &key->h, + nfs_dns_hash(key)); + if (!ch) + return NULL; + return container_of(ch, struct nfs_dns_ent, h); +} + +static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) +{ + char buf1[NFS_DNS_HOSTNAME_MAXLEN+1]; + struct nfs_dns_ent key, *item; + unsigned long ttl; + ssize_t len; + int ret = -EINVAL; + + if (buf[buflen-1] != '\n') + goto out; + buf[buflen-1] = '\0'; + + len = qword_get(&buf, buf1, sizeof(buf1)); + if (len <= 0) + goto out; + key.addrlen = rpc_pton(buf1, len, + (struct sockaddr *)&key.addr, + sizeof(key.addr)); + + len = qword_get(&buf, buf1, sizeof(buf1)); + if (len <= 0) + goto out; + + key.hostname = buf1; + key.namelen = len; + memset(&key.h, 0, sizeof(key.h)); + + ttl = get_expiry(&buf); + if (ttl == 0) + goto out; + key.h.expiry_time = ttl + get_seconds(); + + ret = -ENOMEM; + item = nfs_dns_lookup(cd, &key); + if (item == NULL) + goto out; + + if (key.addrlen == 0) + set_bit(CACHE_NEGATIVE, &key.h.flags); + + item = nfs_dns_update(cd, &key, item); + if (item == NULL) + goto out; + + ret = 0; + cache_put(&item->h, cd); +out: + return ret; +} + +static struct cache_detail nfs_dns_resolve = { + .owner = THIS_MODULE, + .hash_size = NFS_DNS_HASHTBL_SIZE, + .hash_table = nfs_dns_table, + .name = "dns_resolve", + .cache_put = nfs_dns_ent_put, + .cache_upcall = nfs_dns_upcall, + .cache_parse = nfs_dns_parse, + .cache_show = nfs_dns_show, + .match = nfs_dns_match, + .init = nfs_dns_ent_init, + .update = nfs_dns_ent_update, + .alloc = nfs_dns_ent_alloc, +}; + +static int do_cache_lookup(struct cache_detail *cd, + struct nfs_dns_ent *key, + struct nfs_dns_ent **item, + struct nfs_cache_defer_req *dreq) +{ + int ret = -ENOMEM; + + *item = nfs_dns_lookup(cd, key); + if (*item) { + ret = cache_check(cd, &(*item)->h, &dreq->req); + if (ret) + *item = NULL; + } + return ret; +} + +static int do_cache_lookup_nowait(struct cache_detail *cd, + struct nfs_dns_ent *key, + struct nfs_dns_ent **item) +{ + int ret = -ENOMEM; + + *item = nfs_dns_lookup(cd, key); + if (!*item) + goto out_err; + ret = -ETIMEDOUT; + if (!test_bit(CACHE_VALID, &(*item)->h.flags) + || (*item)->h.expiry_time < get_seconds() + || cd->flush_time > (*item)->h.last_refresh) + goto out_put; + ret = -ENOENT; + if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags)) + goto out_put; + return 0; +out_put: + cache_put(&(*item)->h, cd); +out_err: + *item = NULL; + return ret; +} + +static int do_cache_lookup_wait(struct cache_detail *cd, + struct nfs_dns_ent *key, + struct nfs_dns_ent **item) +{ + struct nfs_cache_defer_req *dreq; + int ret = -ENOMEM; + + dreq = nfs_cache_defer_req_alloc(); + if (!dreq) + goto out; + ret = do_cache_lookup(cd, key, item, dreq); + if (ret == -EAGAIN) { + ret = nfs_cache_wait_for_upcall(dreq); + if (!ret) + ret = do_cache_lookup_nowait(cd, key, item); + } + nfs_cache_defer_req_put(dreq); +out: + return ret; +} + +ssize_t nfs_dns_resolve_name(char *name, size_t namelen, + struct sockaddr *sa, size_t salen) +{ + struct nfs_dns_ent key = { + .hostname = name, + .namelen = namelen, + }; + struct nfs_dns_ent *item = NULL; + ssize_t ret; + + ret = do_cache_lookup_wait(&nfs_dns_resolve, &key, &item); + if (ret == 0) { + if (salen >= item->addrlen) { + memcpy(sa, &item->addr, item->addrlen); + ret = item->addrlen; + } else + ret = -EOVERFLOW; + cache_put(&item->h, &nfs_dns_resolve); + } else if (ret == -ENOENT) + ret = -ESRCH; + return ret; +} + +int nfs_dns_resolver_init(void) +{ + return nfs_cache_register(&nfs_dns_resolve); +} + +void nfs_dns_resolver_destroy(void) +{ + nfs_cache_unregister(&nfs_dns_resolve); +} + diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h new file mode 100644 index 00000000000..a3f0938babf --- /dev/null +++ b/fs/nfs/dns_resolve.h @@ -0,0 +1,14 @@ +/* + * Resolve DNS hostnames into valid ip addresses + */ +#ifndef __LINUX_FS_NFS_DNS_RESOLVE_H +#define __LINUX_FS_NFS_DNS_RESOLVE_H + +#define NFS_DNS_HOSTNAME_MAXLEN (128) + +extern int nfs_dns_resolver_init(void); +extern void nfs_dns_resolver_destroy(void); +extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen, + struct sockaddr *sa, size_t salen); + +#endif diff --git a/fs/nfs/file.c b/fs/nfs/file.c new file mode 100644 index 00000000000..bdd2142c1b3 --- /dev/null +++ b/fs/nfs/file.c @@ -0,0 +1,839 @@ +/* + * linux/fs/nfs/file.c + * + * Copyright (C) 1992 Rick Sladkey + * + * Changes Copyright (C) 1994 by Florian La Roche + * - Do not copy data too often around in the kernel. + * - In nfs_file_read the return value of kmalloc wasn't checked. + * - Put in a better version of read look-ahead buffering. Original idea + * and implementation by Wai S Kok elekokws@ee.nus.sg. + * + * Expire cache on write to a file by Wai S Kok (Oct 1994). + * + * Total rewrite of read side for new NFS buffer cache.. Linus. + * + * nfs regular file handling functions + */ + +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <linux/aio.h> + +#include <asm/uaccess.h> +#include <asm/system.h> + +#include "delegation.h" +#include "internal.h" +#include "iostat.h" +#include "fscache.h" + +#define NFSDBG_FACILITY NFSDBG_FILE + +static int nfs_file_open(struct inode *, struct file *); +static int nfs_file_release(struct inode *, struct file *); +static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin); +static int nfs_file_mmap(struct file *, struct vm_area_struct *); +static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t count, unsigned int flags); +static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); +static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, + struct file *filp, loff_t *ppos, + size_t count, unsigned int flags); +static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); +static int nfs_file_flush(struct file *, fl_owner_t id); +static int nfs_file_fsync(struct file *, struct dentry *dentry, int datasync); +static int nfs_check_flags(int flags); +static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); +static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); +static int nfs_setlease(struct file *file, long arg, struct file_lock **fl); + +static const struct vm_operations_struct nfs_file_vm_ops; + +const struct file_operations nfs_file_operations = { + .llseek = nfs_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = nfs_file_read, + .aio_write = nfs_file_write, + .mmap = nfs_file_mmap, + .open = nfs_file_open, + .flush = nfs_file_flush, + .release = nfs_file_release, + .fsync = nfs_file_fsync, + .lock = nfs_lock, + .flock = nfs_flock, + .splice_read = nfs_file_splice_read, + .splice_write = nfs_file_splice_write, + .check_flags = nfs_check_flags, + .setlease = nfs_setlease, +}; + +const struct inode_operations nfs_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; + +#ifdef CONFIG_NFS_V3 +const struct inode_operations nfs3_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; +#endif /* CONFIG_NFS_v3 */ + +/* Hack for future NFS swap support */ +#ifndef IS_SWAPFILE +# define IS_SWAPFILE(inode) (0) +#endif + +static int nfs_check_flags(int flags) +{ + if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT)) + return -EINVAL; + + return 0; +} + +/* + * Open file + */ +static int +nfs_file_open(struct inode *inode, struct file *filp) +{ + int res; + + dprintk("NFS: open file(%s/%s)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name); + + res = nfs_check_flags(filp->f_flags); + if (res) + return res; + + nfs_inc_stats(inode, NFSIOS_VFSOPEN); + res = nfs_open(inode, filp); + return res; +} + +static int +nfs_file_release(struct inode *inode, struct file *filp) +{ + struct dentry *dentry = filp->f_path.dentry; + + dprintk("NFS: release(%s/%s)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name); + + nfs_inc_stats(inode, NFSIOS_VFSRELEASE); + return nfs_release(inode, filp); +} + +/** + * nfs_revalidate_size - Revalidate the file size + * @inode - pointer to inode struct + * @file - pointer to struct file + * + * Revalidates the file length. This is basically a wrapper around + * nfs_revalidate_inode() that takes into account the fact that we may + * have cached writes (in which case we don't care about the server's + * idea of what the file length is), or O_DIRECT (in which case we + * shouldn't trust the cache). + */ +static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_inode *nfsi = NFS_I(inode); + + if (server->flags & NFS_MOUNT_NOAC) + goto force_reval; + if (filp->f_flags & O_DIRECT) + goto force_reval; + if (nfsi->npages != 0) + return 0; + if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) + return 0; +force_reval: + return __nfs_revalidate_inode(server, inode); +} + +static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) +{ + loff_t loff; + + dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name, + offset, origin); + + /* origin == SEEK_END => we must revalidate the cached file length */ + if (origin == SEEK_END) { + struct inode *inode = filp->f_mapping->host; + + int retval = nfs_revalidate_file_size(inode, filp); + if (retval < 0) + return (loff_t)retval; + + spin_lock(&inode->i_lock); + loff = generic_file_llseek_unlocked(filp, offset, origin); + spin_unlock(&inode->i_lock); + } else + loff = generic_file_llseek_unlocked(filp, offset, origin); + return loff; +} + +/* + * Helper for nfs_file_flush() and nfs_file_fsync() + * + * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to + * disk, but it retrieves and clears ctx->error after synching, despite + * the two being set at the same time in nfs_context_set_write_error(). + * This is because the former is used to notify the _next_ call to + * nfs_file_write() that a write error occured, and hence cause it to + * fall back to doing a synchronous write. + */ +static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode) +{ + int have_error, status; + int ret = 0; + + have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); + status = nfs_wb_all(inode); + have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); + if (have_error) + ret = xchg(&ctx->error, 0); + if (!ret) + ret = status; + return ret; +} + +/* + * Flush all dirty pages, and check for write errors. + */ +static int +nfs_file_flush(struct file *file, fl_owner_t id) +{ + struct nfs_open_context *ctx = nfs_file_open_context(file); + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = dentry->d_inode; + + dprintk("NFS: flush(%s/%s)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name); + + if ((file->f_mode & FMODE_WRITE) == 0) + return 0; + nfs_inc_stats(inode, NFSIOS_VFSFLUSH); + + /* Flush writes to the server and return any errors */ + return nfs_do_fsync(ctx, inode); +} + +static ssize_t +nfs_file_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct dentry * dentry = iocb->ki_filp->f_path.dentry; + struct inode * inode = dentry->d_inode; + ssize_t result; + size_t count = iov_length(iov, nr_segs); + + if (iocb->ki_filp->f_flags & O_DIRECT) + return nfs_file_direct_read(iocb, iov, nr_segs, pos); + + dprintk("NFS: read(%s/%s, %lu@%lu)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (unsigned long) count, (unsigned long) pos); + + result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); + nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count); + if (!result) + result = generic_file_aio_read(iocb, iov, nr_segs, pos); + return result; +} + +static ssize_t +nfs_file_splice_read(struct file *filp, loff_t *ppos, + struct pipe_inode_info *pipe, size_t count, + unsigned int flags) +{ + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; + ssize_t res; + + dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (unsigned long) count, (unsigned long long) *ppos); + + res = nfs_revalidate_mapping(inode, filp->f_mapping); + if (!res) + res = generic_file_splice_read(filp, ppos, pipe, count, flags); + return res; +} + +static int +nfs_file_mmap(struct file * file, struct vm_area_struct * vma) +{ + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = dentry->d_inode; + int status; + + dprintk("NFS: mmap(%s/%s)\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + + /* Note: generic_file_mmap() returns ENOSYS on nommu systems + * so we call that before revalidating the mapping + */ + status = generic_file_mmap(file, vma); + if (!status) { + vma->vm_ops = &nfs_file_vm_ops; + status = nfs_revalidate_mapping(inode, file->f_mapping); + } + return status; +} + +/* + * Flush any dirty pages for this process, and check for write errors. + * The return status from this call provides a reliable indication of + * whether any write errors occurred for this process. + */ +static int +nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + struct nfs_open_context *ctx = nfs_file_open_context(file); + struct inode *inode = dentry->d_inode; + + dprintk("NFS: fsync file(%s/%s) datasync %d\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + datasync); + + nfs_inc_stats(inode, NFSIOS_VFSFSYNC); + return nfs_do_fsync(ctx, inode); +} + +/* + * Decide whether a read/modify/write cycle may be more efficient + * then a modify/write/read cycle when writing to a page in the + * page cache. + * + * The modify/write/read cycle may occur if a page is read before + * being completely filled by the writer. In this situation, the + * page must be completely written to stable storage on the server + * before it can be refilled by reading in the page from the server. + * This can lead to expensive, small, FILE_SYNC mode writes being + * done. + * + * It may be more efficient to read the page first if the file is + * open for reading in addition to writing, the page is not marked + * as Uptodate, it is not dirty or waiting to be committed, + * indicating that it was previously allocated and then modified, + * that there were valid bytes of data in that range of the file, + * and that the new data won't completely replace the old data in + * that range of the file. + */ +static int nfs_want_read_modify_write(struct file *file, struct page *page, + loff_t pos, unsigned len) +{ + unsigned int pglen = nfs_page_length(page); + unsigned int offset = pos & (PAGE_CACHE_SIZE - 1); + unsigned int end = offset + len; + + if ((file->f_mode & FMODE_READ) && /* open for read? */ + !PageUptodate(page) && /* Uptodate? */ + !PagePrivate(page) && /* i/o request already? */ + pglen && /* valid bytes of file? */ + (end < pglen || offset)) /* replace all valid bytes? */ + return 1; + return 0; +} + +/* + * This does the "real" work of the write. We must allocate and lock the + * page to be sent back to the generic routine, which then copies the + * data from user space. + * + * If the writer ends up delaying the write, the writer needs to + * increment the page use counts until he is done with the page. + */ +static int nfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + int ret; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + struct page *page; + int once_thru = 0; + + dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, + mapping->host->i_ino, len, (long long) pos); + +start: + /* + * Prevent starvation issues if someone is doing a consistency + * sync-to-disk + */ + ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING, + nfs_wait_bit_killable, TASK_KILLABLE); + if (ret) + return ret; + + page = grab_cache_page_write_begin(mapping, index, flags); + if (!page) + return -ENOMEM; + *pagep = page; + + ret = nfs_flush_incompatible(file, page); + if (ret) { + unlock_page(page); + page_cache_release(page); + } else if (!once_thru && + nfs_want_read_modify_write(file, page, pos, len)) { + once_thru = 1; + ret = nfs_readpage(file, page); + page_cache_release(page); + if (!ret) + goto start; + } + return ret; +} + +static int nfs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + unsigned offset = pos & (PAGE_CACHE_SIZE - 1); + int status; + + dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, + mapping->host->i_ino, len, (long long) pos); + + /* + * Zero any uninitialised parts of the page, and then mark the page + * as up to date if it turns out that we're extending the file. + */ + if (!PageUptodate(page)) { + unsigned pglen = nfs_page_length(page); + unsigned end = offset + len; + + if (pglen == 0) { + zero_user_segments(page, 0, offset, + end, PAGE_CACHE_SIZE); + SetPageUptodate(page); + } else if (end >= pglen) { + zero_user_segment(page, end, PAGE_CACHE_SIZE); + if (offset == 0) + SetPageUptodate(page); + } else + zero_user_segment(page, pglen, PAGE_CACHE_SIZE); + } + + status = nfs_updatepage(file, page, offset, copied); + + unlock_page(page); + page_cache_release(page); + + if (status < 0) + return status; + return copied; +} + +/* + * Partially or wholly invalidate a page + * - Release the private state associated with a page if undergoing complete + * page invalidation + * - Called if either PG_private or PG_fscache is set on the page + * - Caller holds page lock + */ +static void nfs_invalidate_page(struct page *page, unsigned long offset) +{ + dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset); + + if (offset != 0) + return; + /* Cancel any unstarted writes on this page */ + nfs_wb_page_cancel(page->mapping->host, page); + + nfs_fscache_invalidate_page(page, page->mapping->host); +} + +/* + * Attempt to release the private state associated with a page + * - Called if either PG_private or PG_fscache is set on the page + * - Caller holds page lock + * - Return true (may release page) or false (may not) + */ +static int nfs_release_page(struct page *page, gfp_t gfp) +{ + dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); + + /* Only do I/O if gfp is a superset of GFP_KERNEL */ + if ((gfp & GFP_KERNEL) == GFP_KERNEL) + nfs_wb_page(page->mapping->host, page); + /* If PagePrivate() is set, then the page is not freeable */ + if (PagePrivate(page)) + return 0; + return nfs_fscache_release_page(page, gfp); +} + +/* + * Attempt to clear the private state associated with a page when an error + * occurs that requires the cached contents of an inode to be written back or + * destroyed + * - Called if either PG_private or fscache is set on the page + * - Caller holds page lock + * - Return 0 if successful, -error otherwise + */ +static int nfs_launder_page(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct nfs_inode *nfsi = NFS_I(inode); + + dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", + inode->i_ino, (long long)page_offset(page)); + + nfs_fscache_wait_on_page_write(nfsi, page); + return nfs_wb_page(inode, page); +} + +const struct address_space_operations nfs_file_aops = { + .readpage = nfs_readpage, + .readpages = nfs_readpages, + .set_page_dirty = __set_page_dirty_nobuffers, + .writepage = nfs_writepage, + .writepages = nfs_writepages, + .write_begin = nfs_write_begin, + .write_end = nfs_write_end, + .invalidatepage = nfs_invalidate_page, + .releasepage = nfs_release_page, + .direct_IO = nfs_direct_IO, + .migratepage = nfs_migrate_page, + .launder_page = nfs_launder_page, + .error_remove_page = generic_error_remove_page, +}; + +/* + * Notification that a PTE pointing to an NFS page is about to be made + * writable, implying that someone is about to modify the page through a + * shared-writable mapping + */ +static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page *page = vmf->page; + struct file *filp = vma->vm_file; + struct dentry *dentry = filp->f_path.dentry; + unsigned pagelen; + int ret = -EINVAL; + struct address_space *mapping; + + dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + filp->f_mapping->host->i_ino, + (long long)page_offset(page)); + + /* make sure the cache has finished storing the page */ + nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page); + + lock_page(page); + mapping = page->mapping; + if (mapping != dentry->d_inode->i_mapping) + goto out_unlock; + + ret = 0; + pagelen = nfs_page_length(page); + if (pagelen == 0) + goto out_unlock; + + ret = nfs_flush_incompatible(filp, page); + if (ret != 0) + goto out_unlock; + + ret = nfs_updatepage(filp, page, 0, pagelen); +out_unlock: + if (!ret) + return VM_FAULT_LOCKED; + unlock_page(page); + return VM_FAULT_SIGBUS; +} + +static const struct vm_operations_struct nfs_file_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = nfs_vm_page_mkwrite, +}; + +static int nfs_need_sync_write(struct file *filp, struct inode *inode) +{ + struct nfs_open_context *ctx; + + if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC)) + return 1; + ctx = nfs_file_open_context(filp); + if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) + return 1; + return 0; +} + +static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct dentry * dentry = iocb->ki_filp->f_path.dentry; + struct inode * inode = dentry->d_inode; + ssize_t result; + size_t count = iov_length(iov, nr_segs); + + if (iocb->ki_filp->f_flags & O_DIRECT) + return nfs_file_direct_write(iocb, iov, nr_segs, pos); + + dprintk("NFS: write(%s/%s, %lu@%Ld)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (unsigned long) count, (long long) pos); + + result = -EBUSY; + if (IS_SWAPFILE(inode)) + goto out_swapfile; + /* + * O_APPEND implies that we must revalidate the file length. + */ + if (iocb->ki_filp->f_flags & O_APPEND) { + result = nfs_revalidate_file_size(inode, iocb->ki_filp); + if (result) + goto out; + } + + result = count; + if (!count) + goto out; + + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); + result = generic_file_aio_write(iocb, iov, nr_segs, pos); + /* Return error values for O_DSYNC and IS_SYNC() */ + if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { + int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); + if (err < 0) + result = err; + } +out: + return result; + +out_swapfile: + printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); + goto out; +} + +static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, + struct file *filp, loff_t *ppos, + size_t count, unsigned int flags) +{ + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; + ssize_t ret; + + dprintk("NFS splice_write(%s/%s, %lu@%llu)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (unsigned long) count, (unsigned long long) *ppos); + + /* + * The combination of splice and an O_APPEND destination is disallowed. + */ + + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); + + ret = generic_file_splice_write(pipe, filp, ppos, count, flags); + if (ret >= 0 && nfs_need_sync_write(filp, inode)) { + int err = nfs_do_fsync(nfs_file_open_context(filp), inode); + if (err < 0) + ret = err; + } + return ret; +} + +static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_mapping->host; + int status = 0; + + /* Try local locking first */ + posix_test_lock(filp, fl); + if (fl->fl_type != F_UNLCK) { + /* found a conflict */ + goto out; + } + + if (nfs_have_delegation(inode, FMODE_READ)) + goto out_noconflict; + + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) + goto out_noconflict; + + status = NFS_PROTO(inode)->lock(filp, cmd, fl); +out: + return status; +out_noconflict: + fl->fl_type = F_UNLCK; + goto out; +} + +static int do_vfs_lock(struct file *file, struct file_lock *fl) +{ + int res = 0; + switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { + case FL_POSIX: + res = posix_lock_file_wait(file, fl); + break; + case FL_FLOCK: + res = flock_lock_file_wait(file, fl); + break; + default: + BUG(); + } + if (res < 0) + dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager" + " - error %d!\n", + __func__, res); + return res; +} + +static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_mapping->host; + int status; + + /* + * Flush all pending writes before doing anything + * with locks.. + */ + nfs_sync_mapping(filp->f_mapping); + + /* NOTE: special case + * If we're signalled while cleaning up locks on process exit, we + * still need to complete the unlock. + */ + /* Use local locking if mounted with "-onolock" */ + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + else + status = do_vfs_lock(filp, fl); + return status; +} + +static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_mapping->host; + int status; + + /* + * Flush all pending writes before doing anything + * with locks.. + */ + status = nfs_sync_mapping(filp->f_mapping); + if (status != 0) + goto out; + + /* Use local locking if mounted with "-onolock" */ + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + else + status = do_vfs_lock(filp, fl); + if (status < 0) + goto out; + /* + * Make sure we clear the cache whenever we try to get the lock. + * This makes locking act as a cache coherency point. + */ + nfs_sync_mapping(filp->f_mapping); + if (!nfs_have_delegation(inode, FMODE_READ)) + nfs_zap_caches(inode); +out: + return status; +} + +/* + * Lock a (portion of) a file + */ +static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_mapping->host; + int ret = -ENOLCK; + + dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name, + fl->fl_type, fl->fl_flags, + (long long)fl->fl_start, (long long)fl->fl_end); + + nfs_inc_stats(inode, NFSIOS_VFSLOCK); + + /* No mandatory locks over NFS */ + if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) + goto out_err; + + if (NFS_PROTO(inode)->lock_check_bounds != NULL) { + ret = NFS_PROTO(inode)->lock_check_bounds(fl); + if (ret < 0) + goto out_err; + } + + if (IS_GETLK(cmd)) + ret = do_getlk(filp, cmd, fl); + else if (fl->fl_type == F_UNLCK) + ret = do_unlk(filp, cmd, fl); + else + ret = do_setlk(filp, cmd, fl); +out_err: + return ret; +} + +/* + * Lock a (portion of) a file + */ +static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) +{ + dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name, + fl->fl_type, fl->fl_flags); + + if (!(fl->fl_flags & FL_FLOCK)) + return -ENOLCK; + + /* We're simulating flock() locks using posix locks on the server */ + fl->fl_owner = (fl_owner_t)filp; + fl->fl_start = 0; + fl->fl_end = OFFSET_MAX; + + if (fl->fl_type == F_UNLCK) + return do_unlk(filp, cmd, fl); + return do_setlk(filp, cmd, fl); +} + +/* + * There is no protocol support for leases, so we have no way to implement + * them correctly in the face of opens by other clients. + */ +static int nfs_setlease(struct file *file, long arg, struct file_lock **fl) +{ + dprintk("NFS: setlease(%s/%s, arg=%ld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, arg); + + return -EINVAL; +} diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c new file mode 100644 index 00000000000..5b1006480bc --- /dev/null +++ b/fs/nfs/fscache-index.c @@ -0,0 +1,337 @@ +/* NFS FS-Cache index structure definition + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_fs_sb.h> +#include <linux/in6.h> + +#include "internal.h" +#include "fscache.h" + +#define NFSDBG_FACILITY NFSDBG_FSCACHE + +/* + * Define the NFS filesystem for FS-Cache. Upon registration FS-Cache sticks + * the cookie for the top-level index object for NFS into here. The top-level + * index can than have other cache objects inserted into it. + */ +struct fscache_netfs nfs_fscache_netfs = { + .name = "nfs", + .version = 0, +}; + +/* + * Register NFS for caching + */ +int nfs_fscache_register(void) +{ + return fscache_register_netfs(&nfs_fscache_netfs); +} + +/* + * Unregister NFS for caching + */ +void nfs_fscache_unregister(void) +{ + fscache_unregister_netfs(&nfs_fscache_netfs); +} + +/* + * Layout of the key for an NFS server cache object. + */ +struct nfs_server_key { + uint16_t nfsversion; /* NFS protocol version */ + uint16_t family; /* address family */ + uint16_t port; /* IP port */ + union { + struct in_addr ipv4_addr; /* IPv4 address */ + struct in6_addr ipv6_addr; /* IPv6 address */ + } addr[0]; +}; + +/* + * Generate a key to describe a server in the main NFS index + * - We return the length of the key, or 0 if we can't generate one + */ +static uint16_t nfs_server_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct nfs_client *clp = cookie_netfs_data; + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) &clp->cl_addr; + const struct sockaddr_in *sin = (struct sockaddr_in *) &clp->cl_addr; + struct nfs_server_key *key = buffer; + uint16_t len = sizeof(struct nfs_server_key); + + key->nfsversion = clp->rpc_ops->version; + key->family = clp->cl_addr.ss_family; + + memset(key, 0, len); + + switch (clp->cl_addr.ss_family) { + case AF_INET: + key->port = sin->sin_port; + key->addr[0].ipv4_addr = sin->sin_addr; + len += sizeof(key->addr[0].ipv4_addr); + break; + + case AF_INET6: + key->port = sin6->sin6_port; + key->addr[0].ipv6_addr = sin6->sin6_addr; + len += sizeof(key->addr[0].ipv6_addr); + break; + + default: + printk(KERN_WARNING "NFS: Unknown network family '%d'\n", + clp->cl_addr.ss_family); + len = 0; + break; + } + + return len; +} + +/* + * Define the server object for FS-Cache. This is used to describe a server + * object to fscache_acquire_cookie(). It is keyed by the NFS protocol and + * server address parameters. + */ +const struct fscache_cookie_def nfs_fscache_server_index_def = { + .name = "NFS.server", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = nfs_server_get_key, +}; + +/* + * Generate a key to describe a superblock key in the main NFS index + */ +static uint16_t nfs_super_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct nfs_fscache_key *key; + const struct nfs_server *nfss = cookie_netfs_data; + uint16_t len; + + key = nfss->fscache_key; + len = sizeof(key->key) + key->key.uniq_len; + if (len > bufmax) { + len = 0; + } else { + memcpy(buffer, &key->key, sizeof(key->key)); + memcpy(buffer + sizeof(key->key), + key->key.uniquifier, key->key.uniq_len); + } + + return len; +} + +/* + * Define the superblock object for FS-Cache. This is used to describe a + * superblock object to fscache_acquire_cookie(). It is keyed by all the NFS + * parameters that might cause a separate superblock. + */ +const struct fscache_cookie_def nfs_fscache_super_index_def = { + .name = "NFS.super", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = nfs_super_get_key, +}; + +/* + * Definition of the auxiliary data attached to NFS inode storage objects + * within the cache. + * + * The contents of this struct are recorded in the on-disk local cache in the + * auxiliary data attached to the data storage object backing an inode. This + * permits coherency to be managed when a new inode binds to an already extant + * cache object. + */ +struct nfs_fscache_inode_auxdata { + struct timespec mtime; + struct timespec ctime; + loff_t size; + u64 change_attr; +}; + +/* + * Generate a key to describe an NFS inode in an NFS server's index + */ +static uint16_t nfs_fscache_inode_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct nfs_inode *nfsi = cookie_netfs_data; + uint16_t nsize; + + /* use the inode's NFS filehandle as the key */ + nsize = nfsi->fh.size; + memcpy(buffer, nfsi->fh.data, nsize); + return nsize; +} + +/* + * Get certain file attributes from the netfs data + * - This function can be absent for an index + * - Not permitted to return an error + * - The netfs data from the cookie being used as the source is presented + */ +static void nfs_fscache_inode_get_attr(const void *cookie_netfs_data, + uint64_t *size) +{ + const struct nfs_inode *nfsi = cookie_netfs_data; + + *size = nfsi->vfs_inode.i_size; +} + +/* + * Get the auxiliary data from netfs data + * - This function can be absent if the index carries no state data + * - Should store the auxiliary data in the buffer + * - Should return the amount of amount stored + * - Not permitted to return an error + * - The netfs data from the cookie being used as the source is presented + */ +static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + struct nfs_fscache_inode_auxdata auxdata; + const struct nfs_inode *nfsi = cookie_netfs_data; + + memset(&auxdata, 0, sizeof(auxdata)); + auxdata.size = nfsi->vfs_inode.i_size; + auxdata.mtime = nfsi->vfs_inode.i_mtime; + auxdata.ctime = nfsi->vfs_inode.i_ctime; + + if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) + auxdata.change_attr = nfsi->change_attr; + + if (bufmax > sizeof(auxdata)) + bufmax = sizeof(auxdata); + + memcpy(buffer, &auxdata, bufmax); + return bufmax; +} + +/* + * Consult the netfs about the state of an object + * - This function can be absent if the index carries no state data + * - The netfs data from the cookie being used as the target is + * presented, as is the auxiliary data + */ +static +enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data, + const void *data, + uint16_t datalen) +{ + struct nfs_fscache_inode_auxdata auxdata; + struct nfs_inode *nfsi = cookie_netfs_data; + + if (datalen != sizeof(auxdata)) + return FSCACHE_CHECKAUX_OBSOLETE; + + memset(&auxdata, 0, sizeof(auxdata)); + auxdata.size = nfsi->vfs_inode.i_size; + auxdata.mtime = nfsi->vfs_inode.i_mtime; + auxdata.ctime = nfsi->vfs_inode.i_ctime; + + if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) + auxdata.change_attr = nfsi->change_attr; + + if (memcmp(data, &auxdata, datalen) != 0) + return FSCACHE_CHECKAUX_OBSOLETE; + + return FSCACHE_CHECKAUX_OKAY; +} + +/* + * Indication from FS-Cache that the cookie is no longer cached + * - This function is called when the backing store currently caching a cookie + * is removed + * - The netfs should use this to clean up any markers indicating cached pages + * - This is mandatory for any object that may have data + */ +static void nfs_fscache_inode_now_uncached(void *cookie_netfs_data) +{ + struct nfs_inode *nfsi = cookie_netfs_data; + struct pagevec pvec; + pgoff_t first; + int loop, nr_pages; + + pagevec_init(&pvec, 0); + first = 0; + + dprintk("NFS: nfs_inode_now_uncached: nfs_inode 0x%p\n", nfsi); + + for (;;) { + /* grab a bunch of pages to unmark */ + nr_pages = pagevec_lookup(&pvec, + nfsi->vfs_inode.i_mapping, + first, + PAGEVEC_SIZE - pagevec_count(&pvec)); + if (!nr_pages) + break; + + for (loop = 0; loop < nr_pages; loop++) + ClearPageFsCache(pvec.pages[loop]); + + first = pvec.pages[nr_pages - 1]->index + 1; + + pvec.nr = nr_pages; + pagevec_release(&pvec); + cond_resched(); + } +} + +/* + * Get an extra reference on a read context. + * - This function can be absent if the completion function doesn't require a + * context. + * - The read context is passed back to NFS in the event that a data read on the + * cache fails with EIO - in which case the server must be contacted to + * retrieve the data, which requires the read context for security. + */ +static void nfs_fh_get_context(void *cookie_netfs_data, void *context) +{ + get_nfs_open_context(context); +} + +/* + * Release an extra reference on a read context. + * - This function can be absent if the completion function doesn't require a + * context. + */ +static void nfs_fh_put_context(void *cookie_netfs_data, void *context) +{ + if (context) + put_nfs_open_context(context); +} + +/* + * Define the inode object for FS-Cache. This is used to describe an inode + * object to fscache_acquire_cookie(). It is keyed by the NFS file handle for + * an inode. + * + * Coherency is managed by comparing the copies of i_size, i_mtime and i_ctime + * held in the cache auxiliary data for the data storage object with those in + * the inode struct in memory. + */ +const struct fscache_cookie_def nfs_fscache_inode_object_def = { + .name = "NFS.fh", + .type = FSCACHE_COOKIE_TYPE_DATAFILE, + .get_key = nfs_fscache_inode_get_key, + .get_attr = nfs_fscache_inode_get_attr, + .get_aux = nfs_fscache_inode_get_aux, + .check_aux = nfs_fscache_inode_check_aux, + .now_uncached = nfs_fscache_inode_now_uncached, + .get_context = nfs_fh_get_context, + .put_context = nfs_fh_put_context, +}; diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c new file mode 100644 index 00000000000..237874f1af2 --- /dev/null +++ b/fs/nfs/fscache.c @@ -0,0 +1,535 @@ +/* NFS filesystem cache interface + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_fs_sb.h> +#include <linux/in6.h> +#include <linux/seq_file.h> + +#include "internal.h" +#include "iostat.h" +#include "fscache.h" + +#define NFSDBG_FACILITY NFSDBG_FSCACHE + +static struct rb_root nfs_fscache_keys = RB_ROOT; +static DEFINE_SPINLOCK(nfs_fscache_keys_lock); + +/* + * Get the per-client index cookie for an NFS client if the appropriate mount + * flag was set + * - We always try and get an index cookie for the client, but get filehandle + * cookies on a per-superblock basis, depending on the mount flags + */ +void nfs_fscache_get_client_cookie(struct nfs_client *clp) +{ + /* create a cache index for looking up filehandles */ + clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index, + &nfs_fscache_server_index_def, + clp); + dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n", + clp, clp->fscache); +} + +/* + * Dispose of a per-client cookie + */ +void nfs_fscache_release_client_cookie(struct nfs_client *clp) +{ + dfprintk(FSCACHE, "NFS: releasing client cookie (0x%p/0x%p)\n", + clp, clp->fscache); + + fscache_relinquish_cookie(clp->fscache, 0); + clp->fscache = NULL; +} + +/* + * Get the cache cookie for an NFS superblock. We have to handle + * uniquification here because the cache doesn't do it for us. + * + * The default uniquifier is just an empty string, but it may be overridden + * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent + * superblock across an automount point of some nature. + */ +void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, + struct nfs_clone_mount *mntdata) +{ + struct nfs_fscache_key *key, *xkey; + struct nfs_server *nfss = NFS_SB(sb); + struct rb_node **p, *parent; + int diff, ulen; + + if (uniq) { + ulen = strlen(uniq); + } else if (mntdata) { + struct nfs_server *mnt_s = NFS_SB(mntdata->sb); + if (mnt_s->fscache_key) { + uniq = mnt_s->fscache_key->key.uniquifier; + ulen = mnt_s->fscache_key->key.uniq_len; + } + } + + if (!uniq) { + uniq = ""; + ulen = 1; + } + + key = kzalloc(sizeof(*key) + ulen, GFP_KERNEL); + if (!key) + return; + + key->nfs_client = nfss->nfs_client; + key->key.super.s_flags = sb->s_flags & NFS_MS_MASK; + key->key.nfs_server.flags = nfss->flags; + key->key.nfs_server.rsize = nfss->rsize; + key->key.nfs_server.wsize = nfss->wsize; + key->key.nfs_server.acregmin = nfss->acregmin; + key->key.nfs_server.acregmax = nfss->acregmax; + key->key.nfs_server.acdirmin = nfss->acdirmin; + key->key.nfs_server.acdirmax = nfss->acdirmax; + key->key.nfs_server.fsid = nfss->fsid; + key->key.rpc_auth.au_flavor = nfss->client->cl_auth->au_flavor; + + key->key.uniq_len = ulen; + memcpy(key->key.uniquifier, uniq, ulen); + + spin_lock(&nfs_fscache_keys_lock); + p = &nfs_fscache_keys.rb_node; + parent = NULL; + while (*p) { + parent = *p; + xkey = rb_entry(parent, struct nfs_fscache_key, node); + + if (key->nfs_client < xkey->nfs_client) + goto go_left; + if (key->nfs_client > xkey->nfs_client) + goto go_right; + + diff = memcmp(&key->key, &xkey->key, sizeof(key->key)); + if (diff < 0) + goto go_left; + if (diff > 0) + goto go_right; + + if (key->key.uniq_len == 0) + goto non_unique; + diff = memcmp(key->key.uniquifier, + xkey->key.uniquifier, + key->key.uniq_len); + if (diff < 0) + goto go_left; + if (diff > 0) + goto go_right; + goto non_unique; + + go_left: + p = &(*p)->rb_left; + continue; + go_right: + p = &(*p)->rb_right; + } + + rb_link_node(&key->node, parent, p); + rb_insert_color(&key->node, &nfs_fscache_keys); + spin_unlock(&nfs_fscache_keys_lock); + nfss->fscache_key = key; + + /* create a cache index for looking up filehandles */ + nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache, + &nfs_fscache_super_index_def, + nfss); + dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n", + nfss, nfss->fscache); + return; + +non_unique: + spin_unlock(&nfs_fscache_keys_lock); + kfree(key); + nfss->fscache_key = NULL; + nfss->fscache = NULL; + printk(KERN_WARNING "NFS:" + " Cache request denied due to non-unique superblock keys\n"); +} + +/* + * release a per-superblock cookie + */ +void nfs_fscache_release_super_cookie(struct super_block *sb) +{ + struct nfs_server *nfss = NFS_SB(sb); + + dfprintk(FSCACHE, "NFS: releasing superblock cookie (0x%p/0x%p)\n", + nfss, nfss->fscache); + + fscache_relinquish_cookie(nfss->fscache, 0); + nfss->fscache = NULL; + + if (nfss->fscache_key) { + spin_lock(&nfs_fscache_keys_lock); + rb_erase(&nfss->fscache_key->node, &nfs_fscache_keys); + spin_unlock(&nfs_fscache_keys_lock); + kfree(nfss->fscache_key); + nfss->fscache_key = NULL; + } +} + +/* + * Initialise the per-inode cache cookie pointer for an NFS inode. + */ +void nfs_fscache_init_inode_cookie(struct inode *inode) +{ + NFS_I(inode)->fscache = NULL; + if (S_ISREG(inode->i_mode)) + set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); +} + +/* + * Get the per-inode cache cookie for an NFS inode. + */ +static void nfs_fscache_enable_inode_cookie(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + struct nfs_inode *nfsi = NFS_I(inode); + + if (nfsi->fscache || !NFS_FSCACHE(inode)) + return; + + if ((NFS_SB(sb)->options & NFS_OPTION_FSCACHE)) { + nfsi->fscache = fscache_acquire_cookie( + NFS_SB(sb)->fscache, + &nfs_fscache_inode_object_def, + nfsi); + + dfprintk(FSCACHE, "NFS: get FH cookie (0x%p/0x%p/0x%p)\n", + sb, nfsi, nfsi->fscache); + } +} + +/* + * Release a per-inode cookie. + */ +void nfs_fscache_release_inode_cookie(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", + nfsi, nfsi->fscache); + + fscache_relinquish_cookie(nfsi->fscache, 0); + nfsi->fscache = NULL; +} + +/* + * Retire a per-inode cookie, destroying the data attached to it. + */ +void nfs_fscache_zap_inode_cookie(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + dfprintk(FSCACHE, "NFS: zapping cookie (0x%p/0x%p)\n", + nfsi, nfsi->fscache); + + fscache_relinquish_cookie(nfsi->fscache, 1); + nfsi->fscache = NULL; +} + +/* + * Turn off the cache with regard to a per-inode cookie if opened for writing, + * invalidating all the pages in the page cache relating to the associated + * inode to clear the per-page caching. + */ +static void nfs_fscache_disable_inode_cookie(struct inode *inode) +{ + clear_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); + + if (NFS_I(inode)->fscache) { + dfprintk(FSCACHE, + "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode)); + + /* Need to invalidate any mapped pages that were read in before + * turning off the cache. + */ + if (inode->i_mapping && inode->i_mapping->nrpages) + invalidate_inode_pages2(inode->i_mapping); + + nfs_fscache_zap_inode_cookie(inode); + } +} + +/* + * wait_on_bit() sleep function for uninterruptible waiting + */ +static int nfs_fscache_wait_bit(void *flags) +{ + schedule(); + return 0; +} + +/* + * Lock against someone else trying to also acquire or relinquish a cookie + */ +static inline void nfs_fscache_inode_lock(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + while (test_and_set_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags)) + wait_on_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK, + nfs_fscache_wait_bit, TASK_UNINTERRUPTIBLE); +} + +/* + * Unlock cookie management lock + */ +static inline void nfs_fscache_inode_unlock(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + smp_mb__before_clear_bit(); + clear_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK); +} + +/* + * Decide if we should enable or disable local caching for this inode. + * - For now, with NFS, only regular files that are open read-only will be able + * to use the cache. + * - May be invoked multiple times in parallel by parallel nfs_open() functions. + */ +void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp) +{ + if (NFS_FSCACHE(inode)) { + nfs_fscache_inode_lock(inode); + if ((filp->f_flags & O_ACCMODE) != O_RDONLY) + nfs_fscache_disable_inode_cookie(inode); + else + nfs_fscache_enable_inode_cookie(inode); + nfs_fscache_inode_unlock(inode); + } +} + +/* + * Replace a per-inode cookie due to revalidation detecting a file having + * changed on the server. + */ +void nfs_fscache_reset_inode_cookie(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_server *nfss = NFS_SERVER(inode); + struct fscache_cookie *old = nfsi->fscache; + + nfs_fscache_inode_lock(inode); + if (nfsi->fscache) { + /* retire the current fscache cache and get a new one */ + fscache_relinquish_cookie(nfsi->fscache, 1); + + nfsi->fscache = fscache_acquire_cookie( + nfss->nfs_client->fscache, + &nfs_fscache_inode_object_def, + nfsi); + + dfprintk(FSCACHE, + "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n", + nfss, nfsi, old, nfsi->fscache); + } + nfs_fscache_inode_unlock(inode); +} + +/* + * Release the caching state associated with a page, if the page isn't busy + * interacting with the cache. + * - Returns true (can release page) or false (page busy). + */ +int nfs_fscache_release_page(struct page *page, gfp_t gfp) +{ + if (PageFsCache(page)) { + struct nfs_inode *nfsi = NFS_I(page->mapping->host); + struct fscache_cookie *cookie = nfsi->fscache; + + BUG_ON(!cookie); + dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", + cookie, page, nfsi); + + if (!fscache_maybe_release_page(cookie, page, gfp)) + return 0; + + nfs_add_fscache_stats(page->mapping->host, + NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + } + + return 1; +} + +/* + * Release the caching state associated with a page if undergoing complete page + * invalidation. + */ +void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct fscache_cookie *cookie = nfsi->fscache; + + BUG_ON(!cookie); + + dfprintk(FSCACHE, "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n", + cookie, page, nfsi); + + fscache_wait_on_page_write(cookie, page); + + BUG_ON(!PageLocked(page)); + fscache_uncache_page(cookie, page); + nfs_add_fscache_stats(page->mapping->host, + NFSIOS_FSCACHE_PAGES_UNCACHED, 1); +} + +/* + * Handle completion of a page being read from the cache. + * - Called in process (keventd) context. + */ +static void nfs_readpage_from_fscache_complete(struct page *page, + void *context, + int error) +{ + dfprintk(FSCACHE, + "NFS: readpage_from_fscache_complete (0x%p/0x%p/%d)\n", + page, context, error); + + /* if the read completes with an error, we just unlock the page and let + * the VM reissue the readpage */ + if (!error) { + SetPageUptodate(page); + unlock_page(page); + } else { + error = nfs_readpage_async(context, page->mapping->host, page); + if (error) + unlock_page(page); + } +} + +/* + * Retrieve a page from fscache + */ +int __nfs_readpage_from_fscache(struct nfs_open_context *ctx, + struct inode *inode, struct page *page) +{ + int ret; + + dfprintk(FSCACHE, + "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n", + NFS_I(inode)->fscache, page, page->index, page->flags, inode); + + ret = fscache_read_or_alloc_page(NFS_I(inode)->fscache, + page, + nfs_readpage_from_fscache_complete, + ctx, + GFP_KERNEL); + + switch (ret) { + case 0: /* read BIO submitted (page in fscache) */ + dfprintk(FSCACHE, + "NFS: readpage_from_fscache: BIO submitted\n"); + nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, 1); + return ret; + + case -ENOBUFS: /* inode not in cache */ + case -ENODATA: /* page not in cache */ + nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); + dfprintk(FSCACHE, + "NFS: readpage_from_fscache %d\n", ret); + return 1; + + default: + dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); + nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); + } + return ret; +} + +/* + * Retrieve a set of pages from fscache + */ +int __nfs_readpages_from_fscache(struct nfs_open_context *ctx, + struct inode *inode, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages) +{ + int ret, npages = *nr_pages; + + dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n", + NFS_I(inode)->fscache, npages, inode); + + ret = fscache_read_or_alloc_pages(NFS_I(inode)->fscache, + mapping, pages, nr_pages, + nfs_readpage_from_fscache_complete, + ctx, + mapping_gfp_mask(mapping)); + if (*nr_pages < npages) + nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, + npages); + if (*nr_pages > 0) + nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, + *nr_pages); + + switch (ret) { + case 0: /* read submitted to the cache for all pages */ + BUG_ON(!list_empty(pages)); + BUG_ON(*nr_pages != 0); + dfprintk(FSCACHE, + "NFS: nfs_getpages_from_fscache: submitted\n"); + + return ret; + + case -ENOBUFS: /* some pages aren't cached and can't be */ + case -ENODATA: /* some pages aren't cached */ + dfprintk(FSCACHE, + "NFS: nfs_getpages_from_fscache: no page: %d\n", ret); + return 1; + + default: + dfprintk(FSCACHE, + "NFS: nfs_getpages_from_fscache: ret %d\n", ret); + } + + return ret; +} + +/* + * Store a newly fetched page in fscache + * - PG_fscache must be set on the page + */ +void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) +{ + int ret; + + dfprintk(FSCACHE, + "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n", + NFS_I(inode)->fscache, page, page->index, page->flags, sync); + + ret = fscache_write_page(NFS_I(inode)->fscache, page, GFP_KERNEL); + dfprintk(FSCACHE, + "NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n", + page, page->index, page->flags, ret); + + if (ret != 0) { + fscache_uncache_page(NFS_I(inode)->fscache, page); + nfs_add_fscache_stats(inode, + NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1); + nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + } else { + nfs_add_fscache_stats(inode, + NFSIOS_FSCACHE_PAGES_WRITTEN_OK, 1); + } +} diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h new file mode 100644 index 00000000000..b9c572d0679 --- /dev/null +++ b/fs/nfs/fscache.h @@ -0,0 +1,222 @@ +/* NFS filesystem cache interface definitions + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _NFS_FSCACHE_H +#define _NFS_FSCACHE_H + +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/nfs4_mount.h> +#include <linux/fscache.h> + +#ifdef CONFIG_NFS_FSCACHE + +/* + * set of NFS FS-Cache objects that form a superblock key + */ +struct nfs_fscache_key { + struct rb_node node; + struct nfs_client *nfs_client; /* the server */ + + /* the elements of the unique key - as used by nfs_compare_super() and + * nfs_compare_mount_options() to distinguish superblocks */ + struct { + struct { + unsigned long s_flags; /* various flags + * (& NFS_MS_MASK) */ + } super; + + struct { + struct nfs_fsid fsid; + int flags; + unsigned int rsize; /* read size */ + unsigned int wsize; /* write size */ + unsigned int acregmin; /* attr cache timeouts */ + unsigned int acregmax; + unsigned int acdirmin; + unsigned int acdirmax; + } nfs_server; + + struct { + rpc_authflavor_t au_flavor; + } rpc_auth; + + /* uniquifier - can be used if nfs_server.flags includes + * NFS_MOUNT_UNSHARED */ + u8 uniq_len; + char uniquifier[0]; + } key; +}; + +/* + * fscache-index.c + */ +extern struct fscache_netfs nfs_fscache_netfs; +extern const struct fscache_cookie_def nfs_fscache_server_index_def; +extern const struct fscache_cookie_def nfs_fscache_super_index_def; +extern const struct fscache_cookie_def nfs_fscache_inode_object_def; + +extern int nfs_fscache_register(void); +extern void nfs_fscache_unregister(void); + +/* + * fscache.c + */ +extern void nfs_fscache_get_client_cookie(struct nfs_client *); +extern void nfs_fscache_release_client_cookie(struct nfs_client *); + +extern void nfs_fscache_get_super_cookie(struct super_block *, + const char *, + struct nfs_clone_mount *); +extern void nfs_fscache_release_super_cookie(struct super_block *); + +extern void nfs_fscache_init_inode_cookie(struct inode *); +extern void nfs_fscache_release_inode_cookie(struct inode *); +extern void nfs_fscache_zap_inode_cookie(struct inode *); +extern void nfs_fscache_set_inode_cookie(struct inode *, struct file *); +extern void nfs_fscache_reset_inode_cookie(struct inode *); + +extern void __nfs_fscache_invalidate_page(struct page *, struct inode *); +extern int nfs_fscache_release_page(struct page *, gfp_t); + +extern int __nfs_readpage_from_fscache(struct nfs_open_context *, + struct inode *, struct page *); +extern int __nfs_readpages_from_fscache(struct nfs_open_context *, + struct inode *, struct address_space *, + struct list_head *, unsigned *); +extern void __nfs_readpage_to_fscache(struct inode *, struct page *, int); + +/* + * wait for a page to complete writing to the cache + */ +static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi, + struct page *page) +{ + if (PageFsCache(page)) + fscache_wait_on_page_write(nfsi->fscache, page); +} + +/* + * release the caching state associated with a page if undergoing complete page + * invalidation + */ +static inline void nfs_fscache_invalidate_page(struct page *page, + struct inode *inode) +{ + if (PageFsCache(page)) + __nfs_fscache_invalidate_page(page, inode); +} + +/* + * Retrieve a page from an inode data storage object. + */ +static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx, + struct inode *inode, + struct page *page) +{ + if (NFS_I(inode)->fscache) + return __nfs_readpage_from_fscache(ctx, inode, page); + return -ENOBUFS; +} + +/* + * Retrieve a set of pages from an inode data storage object. + */ +static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, + struct inode *inode, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages) +{ + if (NFS_I(inode)->fscache) + return __nfs_readpages_from_fscache(ctx, inode, mapping, pages, + nr_pages); + return -ENOBUFS; +} + +/* + * Store a page newly fetched from the server in an inode data storage object + * in the cache. + */ +static inline void nfs_readpage_to_fscache(struct inode *inode, + struct page *page, + int sync) +{ + if (PageFsCache(page)) + __nfs_readpage_to_fscache(inode, page, sync); +} + +/* + * indicate the client caching state as readable text + */ +static inline const char *nfs_server_fscache_state(struct nfs_server *server) +{ + if (server->fscache && (server->options & NFS_OPTION_FSCACHE)) + return "yes"; + return "no "; +} + + +#else /* CONFIG_NFS_FSCACHE */ +static inline int nfs_fscache_register(void) { return 0; } +static inline void nfs_fscache_unregister(void) {} + +static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {} +static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} + +static inline void nfs_fscache_get_super_cookie( + struct super_block *sb, + const char *uniq, + struct nfs_clone_mount *mntdata) +{ +} +static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {} + +static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {} +static inline void nfs_fscache_release_inode_cookie(struct inode *inode) {} +static inline void nfs_fscache_zap_inode_cookie(struct inode *inode) {} +static inline void nfs_fscache_set_inode_cookie(struct inode *inode, + struct file *filp) {} +static inline void nfs_fscache_reset_inode_cookie(struct inode *inode) {} + +static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp) +{ + return 1; /* True: may release page */ +} +static inline void nfs_fscache_invalidate_page(struct page *page, + struct inode *inode) {} +static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi, + struct page *page) {} + +static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx, + struct inode *inode, + struct page *page) +{ + return -ENOBUFS; +} +static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, + struct inode *inode, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages) +{ + return -ENOBUFS; +} +static inline void nfs_readpage_to_fscache(struct inode *inode, + struct page *page, int sync) {} + +static inline const char *nfs_server_fscache_state(struct nfs_server *server) +{ + return "no "; +} + +#endif /* CONFIG_NFS_FSCACHE */ +#endif /* _NFS_FSCACHE_H */ diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c new file mode 100644 index 00000000000..b35d2a61606 --- /dev/null +++ b/fs/nfs/getroot.c @@ -0,0 +1,293 @@ +/* getroot.c: get the root dentry for an NFS mount + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/init.h> + +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/unistd.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/stats.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/nfs4_mount.h> +#include <linux/lockd/bind.h> +#include <linux/seq_file.h> +#include <linux/mount.h> +#include <linux/nfs_idmap.h> +#include <linux/vfs.h> +#include <linux/namei.h> +#include <linux/security.h> + +#include <asm/system.h> +#include <asm/uaccess.h> + +#include "nfs4_fs.h" +#include "delegation.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_CLIENT + +/* + * Set the superblock root dentry. + * Note that this function frees the inode in case of error. + */ +static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *inode) +{ + /* The mntroot acts as the dummy root dentry for this superblock */ + if (sb->s_root == NULL) { + sb->s_root = d_alloc_root(inode); + if (sb->s_root == NULL) { + iput(inode); + return -ENOMEM; + } + /* Circumvent igrab(): we know the inode is not being freed */ + atomic_inc(&inode->i_count); + /* + * Ensure that this dentry is invisible to d_find_alias(). + * Otherwise, it may be spliced into the tree by + * d_materialise_unique if a parent directory from the same + * filesystem gets mounted at a later time. + * This again causes shrink_dcache_for_umount_subtree() to + * Oops, since the test for IS_ROOT() will fail. + */ + spin_lock(&dcache_lock); + list_del_init(&sb->s_root->d_alias); + spin_unlock(&dcache_lock); + } + return 0; +} + +/* + * get an NFS2/NFS3 root dentry from the root filehandle + */ +struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) +{ + struct nfs_server *server = NFS_SB(sb); + struct nfs_fsinfo fsinfo; + struct nfs_fattr fattr; + struct dentry *mntroot; + struct inode *inode; + int error; + + /* get the actual root for this mount */ + fsinfo.fattr = &fattr; + + error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); + if (error < 0) { + dprintk("nfs_get_root: getattr error = %d\n", -error); + return ERR_PTR(error); + } + + inode = nfs_fhget(sb, mntfh, fsinfo.fattr); + if (IS_ERR(inode)) { + dprintk("nfs_get_root: get root inode failed\n"); + return ERR_CAST(inode); + } + + error = nfs_superblock_set_dummy_root(sb, inode); + if (error != 0) + return ERR_PTR(error); + + /* root dentries normally start off anonymous and get spliced in later + * if the dentry tree reaches them; however if the dentry already + * exists, we'll pick it up at this point and use it as the root + */ + mntroot = d_obtain_alias(inode); + if (IS_ERR(mntroot)) { + dprintk("nfs_get_root: get root dentry failed\n"); + return mntroot; + } + + security_d_instantiate(mntroot, inode); + + if (!mntroot->d_op) + mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops; + + return mntroot; +} + +#ifdef CONFIG_NFS_V4 + +/* + * Do a simple pathwalk from the root FH of the server to the nominated target + * of the mountpoint + * - give error on symlinks + * - give error on ".." occurring in the path + * - follow traversals + */ +int nfs4_path_walk(struct nfs_server *server, + struct nfs_fh *mntfh, + const char *path) +{ + struct nfs_fsinfo fsinfo; + struct nfs_fattr fattr; + struct nfs_fh lastfh; + struct qstr name; + int ret; + + dprintk("--> nfs4_path_walk(,,%s)\n", path); + + fsinfo.fattr = &fattr; + nfs_fattr_init(&fattr); + + /* Eat leading slashes */ + while (*path == '/') + path++; + + /* Start by getting the root filehandle from the server */ + ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); + if (ret < 0) { + dprintk("nfs4_get_root: getroot error = %d\n", -ret); + return ret; + } + + if (!S_ISDIR(fattr.mode)) { + printk(KERN_ERR "nfs4_get_root:" + " getroot encountered non-directory\n"); + return -ENOTDIR; + } + + /* FIXME: It is quite valid for the server to return a referral here */ + if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) { + printk(KERN_ERR "nfs4_get_root:" + " getroot obtained referral\n"); + return -EREMOTE; + } + +next_component: + dprintk("Next: %s\n", path); + + /* extract the next bit of the path */ + if (!*path) + goto path_walk_complete; + + name.name = path; + while (*path && *path != '/') + path++; + name.len = path - (const char *) name.name; + + if (name.len > NFS4_MAXNAMLEN) + return -ENAMETOOLONG; + +eat_dot_dir: + while (*path == '/') + path++; + + if (path[0] == '.' && (path[1] == '/' || !path[1])) { + path += 2; + goto eat_dot_dir; + } + + /* FIXME: Why shouldn't the user be able to use ".." in the path? */ + if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2]) + ) { + printk(KERN_ERR "nfs4_get_root:" + " Mount path contains reference to \"..\"\n"); + return -EINVAL; + } + + /* lookup the next FH in the sequence */ + memcpy(&lastfh, mntfh, sizeof(lastfh)); + + dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path); + + ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name, + mntfh, &fattr); + if (ret < 0) { + dprintk("nfs4_get_root: getroot error = %d\n", -ret); + return ret; + } + + if (!S_ISDIR(fattr.mode)) { + printk(KERN_ERR "nfs4_get_root:" + " lookupfh encountered non-directory\n"); + return -ENOTDIR; + } + + /* FIXME: Referrals are quite valid here too */ + if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) { + printk(KERN_ERR "nfs4_get_root:" + " lookupfh obtained referral\n"); + return -EREMOTE; + } + + goto next_component; + +path_walk_complete: + memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid)); + dprintk("<-- nfs4_path_walk() = 0\n"); + return 0; +} + +/* + * get an NFS4 root dentry from the root filehandle + */ +struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) +{ + struct nfs_server *server = NFS_SB(sb); + struct nfs_fattr fattr; + struct dentry *mntroot; + struct inode *inode; + int error; + + dprintk("--> nfs4_get_root()\n"); + + /* get the info about the server and filesystem */ + error = nfs4_server_capabilities(server, mntfh); + if (error < 0) { + dprintk("nfs_get_root: getcaps error = %d\n", + -error); + return ERR_PTR(error); + } + + /* get the actual root for this mount */ + error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr); + if (error < 0) { + dprintk("nfs_get_root: getattr error = %d\n", -error); + return ERR_PTR(error); + } + + inode = nfs_fhget(sb, mntfh, &fattr); + if (IS_ERR(inode)) { + dprintk("nfs_get_root: get root inode failed\n"); + return ERR_CAST(inode); + } + + error = nfs_superblock_set_dummy_root(sb, inode); + if (error != 0) + return ERR_PTR(error); + + /* root dentries normally start off anonymous and get spliced in later + * if the dentry tree reaches them; however if the dentry already + * exists, we'll pick it up at this point and use it as the root + */ + mntroot = d_obtain_alias(inode); + if (IS_ERR(mntroot)) { + dprintk("nfs_get_root: get root dentry failed\n"); + return mntroot; + } + + security_d_instantiate(mntroot, inode); + + if (!mntroot->d_op) + mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops; + + dprintk("<-- nfs4_get_root()\n"); + return mntroot; +} + +#endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c new file mode 100644 index 00000000000..21a84d45916 --- /dev/null +++ b/fs/nfs/idmap.c @@ -0,0 +1,518 @@ +/* + * fs/nfs/idmap.c + * + * UID and GID to name mapping for clients. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Marius Aamodt Eriksen <marius@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/sched.h> + +#include <linux/sunrpc/clnt.h> +#include <linux/workqueue.h> +#include <linux/sunrpc/rpc_pipe_fs.h> + +#include <linux/nfs_fs.h> + +#include <linux/nfs_idmap.h> +#include "nfs4_fs.h" + +#define IDMAP_HASH_SZ 128 + +/* Default cache timeout is 10 minutes */ +unsigned int nfs_idmap_cache_timeout = 600 * HZ; + +static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) +{ + char *endp; + int num = simple_strtol(val, &endp, 0); + int jif = num * HZ; + if (endp == val || *endp || num < 0 || jif < num) + return -EINVAL; + *((int *)kp->arg) = jif; + return 0; +} + +module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, + &nfs_idmap_cache_timeout, 0644); + +struct idmap_hashent { + unsigned long ih_expires; + __u32 ih_id; + size_t ih_namelen; + char ih_name[IDMAP_NAMESZ]; +}; + +struct idmap_hashtable { + __u8 h_type; + struct idmap_hashent h_entries[IDMAP_HASH_SZ]; +}; + +struct idmap { + struct dentry *idmap_dentry; + wait_queue_head_t idmap_wq; + struct idmap_msg idmap_im; + struct mutex idmap_lock; /* Serializes upcalls */ + struct mutex idmap_im_lock; /* Protects the hashtable */ + struct idmap_hashtable idmap_user_hash; + struct idmap_hashtable idmap_group_hash; +}; + +static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *, + char __user *, size_t); +static ssize_t idmap_pipe_downcall(struct file *, const char __user *, + size_t); +static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); + +static unsigned int fnvhash32(const void *, size_t); + +static const struct rpc_pipe_ops idmap_upcall_ops = { + .upcall = idmap_pipe_upcall, + .downcall = idmap_pipe_downcall, + .destroy_msg = idmap_pipe_destroy_msg, +}; + +int +nfs_idmap_new(struct nfs_client *clp) +{ + struct idmap *idmap; + int error; + + BUG_ON(clp->cl_idmap != NULL); + + idmap = kzalloc(sizeof(*idmap), GFP_KERNEL); + if (idmap == NULL) + return -ENOMEM; + + idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry, + "idmap", idmap, &idmap_upcall_ops, 0); + if (IS_ERR(idmap->idmap_dentry)) { + error = PTR_ERR(idmap->idmap_dentry); + kfree(idmap); + return error; + } + + mutex_init(&idmap->idmap_lock); + mutex_init(&idmap->idmap_im_lock); + init_waitqueue_head(&idmap->idmap_wq); + idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; + idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; + + clp->cl_idmap = idmap; + return 0; +} + +void +nfs_idmap_delete(struct nfs_client *clp) +{ + struct idmap *idmap = clp->cl_idmap; + + if (!idmap) + return; + rpc_unlink(idmap->idmap_dentry); + clp->cl_idmap = NULL; + kfree(idmap); +} + +/* + * Helper routines for manipulating the hashtable + */ +static inline struct idmap_hashent * +idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len) +{ + return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ]; +} + +static struct idmap_hashent * +idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len) +{ + struct idmap_hashent *he = idmap_name_hash(h, name, len); + + if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0) + return NULL; + if (time_after(jiffies, he->ih_expires)) + return NULL; + return he; +} + +static inline struct idmap_hashent * +idmap_id_hash(struct idmap_hashtable* h, __u32 id) +{ + return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ]; +} + +static struct idmap_hashent * +idmap_lookup_id(struct idmap_hashtable *h, __u32 id) +{ + struct idmap_hashent *he = idmap_id_hash(h, id); + if (he->ih_id != id || he->ih_namelen == 0) + return NULL; + if (time_after(jiffies, he->ih_expires)) + return NULL; + return he; +} + +/* + * Routines for allocating new entries in the hashtable. + * For now, we just have 1 entry per bucket, so it's all + * pretty trivial. + */ +static inline struct idmap_hashent * +idmap_alloc_name(struct idmap_hashtable *h, char *name, size_t len) +{ + return idmap_name_hash(h, name, len); +} + +static inline struct idmap_hashent * +idmap_alloc_id(struct idmap_hashtable *h, __u32 id) +{ + return idmap_id_hash(h, id); +} + +static void +idmap_update_entry(struct idmap_hashent *he, const char *name, + size_t namelen, __u32 id) +{ + he->ih_id = id; + memcpy(he->ih_name, name, namelen); + he->ih_name[namelen] = '\0'; + he->ih_namelen = namelen; + he->ih_expires = jiffies + nfs_idmap_cache_timeout; +} + +/* + * Name -> ID + */ +static int +nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, + const char *name, size_t namelen, __u32 *id) +{ + struct rpc_pipe_msg msg; + struct idmap_msg *im; + struct idmap_hashent *he; + DECLARE_WAITQUEUE(wq, current); + int ret = -EIO; + + im = &idmap->idmap_im; + + /* + * String sanity checks + * Note that the userland daemon expects NUL terminated strings + */ + for (;;) { + if (namelen == 0) + return -EINVAL; + if (name[namelen-1] != '\0') + break; + namelen--; + } + if (namelen >= IDMAP_NAMESZ) + return -EINVAL; + + mutex_lock(&idmap->idmap_lock); + mutex_lock(&idmap->idmap_im_lock); + + he = idmap_lookup_name(h, name, namelen); + if (he != NULL) { + *id = he->ih_id; + ret = 0; + goto out; + } + + memset(im, 0, sizeof(*im)); + memcpy(im->im_name, name, namelen); + + im->im_type = h->h_type; + im->im_conv = IDMAP_CONV_NAMETOID; + + memset(&msg, 0, sizeof(msg)); + msg.data = im; + msg.len = sizeof(*im); + + add_wait_queue(&idmap->idmap_wq, &wq); + if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) { + remove_wait_queue(&idmap->idmap_wq, &wq); + goto out; + } + + set_current_state(TASK_UNINTERRUPTIBLE); + mutex_unlock(&idmap->idmap_im_lock); + schedule(); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&idmap->idmap_wq, &wq); + mutex_lock(&idmap->idmap_im_lock); + + if (im->im_status & IDMAP_STATUS_SUCCESS) { + *id = im->im_id; + ret = 0; + } + + out: + memset(im, 0, sizeof(*im)); + mutex_unlock(&idmap->idmap_im_lock); + mutex_unlock(&idmap->idmap_lock); + return ret; +} + +/* + * ID -> Name + */ +static int +nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, + __u32 id, char *name) +{ + struct rpc_pipe_msg msg; + struct idmap_msg *im; + struct idmap_hashent *he; + DECLARE_WAITQUEUE(wq, current); + int ret = -EIO; + unsigned int len; + + im = &idmap->idmap_im; + + mutex_lock(&idmap->idmap_lock); + mutex_lock(&idmap->idmap_im_lock); + + he = idmap_lookup_id(h, id); + if (he) { + memcpy(name, he->ih_name, he->ih_namelen); + ret = he->ih_namelen; + goto out; + } + + memset(im, 0, sizeof(*im)); + im->im_type = h->h_type; + im->im_conv = IDMAP_CONV_IDTONAME; + im->im_id = id; + + memset(&msg, 0, sizeof(msg)); + msg.data = im; + msg.len = sizeof(*im); + + add_wait_queue(&idmap->idmap_wq, &wq); + + if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) { + remove_wait_queue(&idmap->idmap_wq, &wq); + goto out; + } + + set_current_state(TASK_UNINTERRUPTIBLE); + mutex_unlock(&idmap->idmap_im_lock); + schedule(); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&idmap->idmap_wq, &wq); + mutex_lock(&idmap->idmap_im_lock); + + if (im->im_status & IDMAP_STATUS_SUCCESS) { + if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0) + goto out; + memcpy(name, im->im_name, len); + ret = len; + } + + out: + memset(im, 0, sizeof(*im)); + mutex_unlock(&idmap->idmap_im_lock); + mutex_unlock(&idmap->idmap_lock); + return ret; +} + +/* RPC pipefs upcall/downcall routines */ +static ssize_t +idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, + char __user *dst, size_t buflen) +{ + char *data = (char *)msg->data + msg->copied; + size_t mlen = min(msg->len, buflen); + unsigned long left; + + left = copy_to_user(dst, data, mlen); + if (left == mlen) { + msg->errno = -EFAULT; + return -EFAULT; + } + + mlen -= left; + msg->copied += mlen; + msg->errno = 0; + return mlen; +} + +static ssize_t +idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) +{ + struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); + struct idmap *idmap = (struct idmap *)rpci->private; + struct idmap_msg im_in, *im = &idmap->idmap_im; + struct idmap_hashtable *h; + struct idmap_hashent *he = NULL; + size_t namelen_in; + int ret; + + if (mlen != sizeof(im_in)) + return -ENOSPC; + + if (copy_from_user(&im_in, src, mlen) != 0) + return -EFAULT; + + mutex_lock(&idmap->idmap_im_lock); + + ret = mlen; + im->im_status = im_in.im_status; + /* If we got an error, terminate now, and wake up pending upcalls */ + if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) { + wake_up(&idmap->idmap_wq); + goto out; + } + + /* Sanity checking of strings */ + ret = -EINVAL; + namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ); + if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) + goto out; + + switch (im_in.im_type) { + case IDMAP_TYPE_USER: + h = &idmap->idmap_user_hash; + break; + case IDMAP_TYPE_GROUP: + h = &idmap->idmap_group_hash; + break; + default: + goto out; + } + + switch (im_in.im_conv) { + case IDMAP_CONV_IDTONAME: + /* Did we match the current upcall? */ + if (im->im_conv == IDMAP_CONV_IDTONAME + && im->im_type == im_in.im_type + && im->im_id == im_in.im_id) { + /* Yes: copy string, including the terminating '\0' */ + memcpy(im->im_name, im_in.im_name, namelen_in); + im->im_name[namelen_in] = '\0'; + wake_up(&idmap->idmap_wq); + } + he = idmap_alloc_id(h, im_in.im_id); + break; + case IDMAP_CONV_NAMETOID: + /* Did we match the current upcall? */ + if (im->im_conv == IDMAP_CONV_NAMETOID + && im->im_type == im_in.im_type + && strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in + && memcmp(im->im_name, im_in.im_name, namelen_in) == 0) { + im->im_id = im_in.im_id; + wake_up(&idmap->idmap_wq); + } + he = idmap_alloc_name(h, im_in.im_name, namelen_in); + break; + default: + goto out; + } + + /* If the entry is valid, also copy it to the cache */ + if (he != NULL) + idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id); + ret = mlen; +out: + mutex_unlock(&idmap->idmap_im_lock); + return ret; +} + +static void +idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) +{ + struct idmap_msg *im = msg->data; + struct idmap *idmap = container_of(im, struct idmap, idmap_im); + + if (msg->errno >= 0) + return; + mutex_lock(&idmap->idmap_im_lock); + im->im_status = IDMAP_STATUS_LOOKUPFAIL; + wake_up(&idmap->idmap_wq); + mutex_unlock(&idmap->idmap_im_lock); +} + +/* + * Fowler/Noll/Vo hash + * http://www.isthe.com/chongo/tech/comp/fnv/ + */ + +#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */ +#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */ + +static unsigned int fnvhash32(const void *buf, size_t buflen) +{ + const unsigned char *p, *end = (const unsigned char *)buf + buflen; + unsigned int hash = FNV_1_32; + + for (p = buf; p < end; p++) { + hash *= FNV_P_32; + hash ^= (unsigned int)*p; + } + + return hash; +} + +int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) +{ + struct idmap *idmap = clp->cl_idmap; + + return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); +} + +int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) +{ + struct idmap *idmap = clp->cl_idmap; + + return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); +} + +int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf) +{ + struct idmap *idmap = clp->cl_idmap; + + return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); +} +int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf) +{ + struct idmap *idmap = clp->cl_idmap; + + return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); +} + diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c new file mode 100644 index 00000000000..5f59a2df21a --- /dev/null +++ b/fs/nfs/inode.c @@ -0,0 +1,1574 @@ +/* + * linux/fs/nfs/inode.c + * + * Copyright (C) 1992 Rick Sladkey + * + * nfs inode and superblock handling functions + * + * Modularised by Alan Cox <alan@lxorguk.ukuu.org.uk>, while hacking some + * experimental NFS changes. Modularisation taken straight from SYS5 fs. + * + * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts. + * J.S.Peatfield@damtp.cam.ac.uk + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/unistd.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/stats.h> +#include <linux/sunrpc/metrics.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/nfs4_mount.h> +#include <linux/lockd/bind.h> +#include <linux/seq_file.h> +#include <linux/mount.h> +#include <linux/nfs_idmap.h> +#include <linux/vfs.h> +#include <linux/inet.h> +#include <linux/nfs_xdr.h> + +#include <asm/system.h> +#include <asm/uaccess.h> + +#include "nfs4_fs.h" +#include "callback.h" +#include "delegation.h" +#include "iostat.h" +#include "internal.h" +#include "fscache.h" +#include "dns_resolve.h" + +#define NFSDBG_FACILITY NFSDBG_VFS + +#define NFS_64_BIT_INODE_NUMBERS_ENABLED 1 + +/* Default is to see 64-bit inode numbers */ +static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED; + +static void nfs_invalidate_inode(struct inode *); +static int nfs_update_inode(struct inode *, struct nfs_fattr *); + +static struct kmem_cache * nfs_inode_cachep; + +static inline unsigned long +nfs_fattr_to_ino_t(struct nfs_fattr *fattr) +{ + return nfs_fileid_to_ino_t(fattr->fileid); +} + +/** + * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks + * @word: long word containing the bit lock + */ +int nfs_wait_bit_killable(void *word) +{ + if (fatal_signal_pending(current)) + return -ERESTARTSYS; + schedule(); + return 0; +} + +/** + * nfs_compat_user_ino64 - returns the user-visible inode number + * @fileid: 64-bit fileid + * + * This function returns a 32-bit inode number if the boot parameter + * nfs.enable_ino64 is zero. + */ +u64 nfs_compat_user_ino64(u64 fileid) +{ + int ino; + + if (enable_ino64) + return fileid; + ino = fileid; + if (sizeof(ino) < sizeof(fileid)) + ino ^= fileid >> (sizeof(fileid)-sizeof(ino)) * 8; + return ino; +} + +int nfs_write_inode(struct inode *inode, int sync) +{ + int ret; + + if (sync) { + ret = filemap_fdatawait(inode->i_mapping); + if (ret == 0) + ret = nfs_commit_inode(inode, FLUSH_SYNC); + } else + ret = nfs_commit_inode(inode, 0); + if (ret >= 0) + return 0; + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + return ret; +} + +void nfs_clear_inode(struct inode *inode) +{ + /* + * The following should never happen... + */ + BUG_ON(nfs_have_writebacks(inode)); + BUG_ON(!list_empty(&NFS_I(inode)->open_files)); + nfs_zap_acl_cache(inode); + nfs_access_zap_cache(inode); + nfs_fscache_release_inode_cookie(inode); +} + +/** + * nfs_sync_mapping - helper to flush all mmapped dirty data to disk + */ +int nfs_sync_mapping(struct address_space *mapping) +{ + int ret; + + if (mapping->nrpages == 0) + return 0; + unmap_mapping_range(mapping, 0, 0, 0); + ret = filemap_write_and_wait(mapping); + if (ret != 0) + goto out; + ret = nfs_wb_all(mapping->host); +out: + return ret; +} + +/* + * Invalidate the local caches + */ +static void nfs_zap_caches_locked(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int mode = inode->i_mode; + + nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); + + nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); + nfsi->attrtimeo_timestamp = jiffies; + + memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; + else + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; +} + +void nfs_zap_caches(struct inode *inode) +{ + spin_lock(&inode->i_lock); + nfs_zap_caches_locked(inode); + spin_unlock(&inode->i_lock); +} + +void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) +{ + if (mapping->nrpages != 0) { + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; + spin_unlock(&inode->i_lock); + } +} + +void nfs_zap_acl_cache(struct inode *inode) +{ + void (*clear_acl_cache)(struct inode *); + + clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; + if (clear_acl_cache != NULL) + clear_acl_cache(inode); + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL; + spin_unlock(&inode->i_lock); +} + +void nfs_invalidate_atime(struct inode *inode) +{ + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&inode->i_lock); +} + +/* + * Invalidate, but do not unhash, the inode. + * NB: must be called with inode->i_lock held! + */ +static void nfs_invalidate_inode(struct inode *inode) +{ + set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); + nfs_zap_caches_locked(inode); +} + +struct nfs_find_desc { + struct nfs_fh *fh; + struct nfs_fattr *fattr; +}; + +/* + * In NFSv3 we can have 64bit inode numbers. In order to support + * this, and re-exported directories (also seen in NFSv2) + * we are forced to allow 2 different inodes to have the same + * i_ino. + */ +static int +nfs_find_actor(struct inode *inode, void *opaque) +{ + struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; + struct nfs_fh *fh = desc->fh; + struct nfs_fattr *fattr = desc->fattr; + + if (NFS_FILEID(inode) != fattr->fileid) + return 0; + if (nfs_compare_fh(NFS_FH(inode), fh)) + return 0; + if (is_bad_inode(inode) || NFS_STALE(inode)) + return 0; + return 1; +} + +static int +nfs_init_locked(struct inode *inode, void *opaque) +{ + struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; + struct nfs_fattr *fattr = desc->fattr; + + set_nfs_fileid(inode, fattr->fileid); + nfs_copy_fh(NFS_FH(inode), desc->fh); + return 0; +} + +/* Don't use READDIRPLUS on directories that we believe are too large */ +#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE) + +/* + * This is our front-end to iget that looks up inodes by file handle + * instead of inode number. + */ +struct inode * +nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + struct nfs_find_desc desc = { + .fh = fh, + .fattr = fattr + }; + struct inode *inode = ERR_PTR(-ENOENT); + unsigned long hash; + + if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) + goto out_no_inode; + if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) + goto out_no_inode; + + hash = nfs_fattr_to_ino_t(fattr); + + inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc); + if (inode == NULL) { + inode = ERR_PTR(-ENOMEM); + goto out_no_inode; + } + + if (inode->i_state & I_NEW) { + struct nfs_inode *nfsi = NFS_I(inode); + unsigned long now = jiffies; + + /* We set i_ino for the few things that still rely on it, + * such as stat(2) */ + inode->i_ino = hash; + + /* We can't support update_atime(), since the server will reset it */ + inode->i_flags |= S_NOATIME|S_NOCMTIME; + inode->i_mode = fattr->mode; + if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 + && nfs_server_capable(inode, NFS_CAP_MODE)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL; + /* Why so? Because we want revalidate for devices/FIFOs, and + * that's precisely what we have in nfs_file_inode_operations. + */ + inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops; + if (S_ISREG(inode->i_mode)) { + inode->i_fop = &nfs_file_operations; + inode->i_data.a_ops = &nfs_file_aops; + inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; + inode->i_fop = &nfs_dir_operations; + if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) + && fattr->size <= NFS_LIMIT_READDIRPLUS) + set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); + /* Deal with crossing mountpoints */ + if ((fattr->valid & NFS_ATTR_FATTR_FSID) + && !nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { + if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) + inode->i_op = &nfs_referral_inode_operations; + else + inode->i_op = &nfs_mountpoint_inode_operations; + inode->i_fop = NULL; + set_bit(NFS_INO_MOUNTPOINT, &nfsi->flags); + } + } else if (S_ISLNK(inode->i_mode)) + inode->i_op = &nfs_symlink_inode_operations; + else + init_special_inode(inode, inode->i_mode, fattr->rdev); + + memset(&inode->i_atime, 0, sizeof(inode->i_atime)); + memset(&inode->i_mtime, 0, sizeof(inode->i_mtime)); + memset(&inode->i_ctime, 0, sizeof(inode->i_ctime)); + nfsi->change_attr = 0; + inode->i_size = 0; + inode->i_nlink = 0; + inode->i_uid = -2; + inode->i_gid = -2; + inode->i_blocks = 0; + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + + nfsi->read_cache_jiffies = fattr->time_start; + nfsi->attr_gencount = fattr->gencount; + if (fattr->valid & NFS_ATTR_FATTR_ATIME) + inode->i_atime = fattr->atime; + else if (nfs_server_capable(inode, NFS_CAP_ATIME)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; + if (fattr->valid & NFS_ATTR_FATTR_MTIME) + inode->i_mtime = fattr->mtime; + else if (nfs_server_capable(inode, NFS_CAP_MTIME)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA; + if (fattr->valid & NFS_ATTR_FATTR_CTIME) + inode->i_ctime = fattr->ctime; + else if (nfs_server_capable(inode, NFS_CAP_CTIME)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL; + if (fattr->valid & NFS_ATTR_FATTR_CHANGE) + nfsi->change_attr = fattr->change_attr; + else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA; + if (fattr->valid & NFS_ATTR_FATTR_SIZE) + inode->i_size = nfs_size_to_loff_t(fattr->size); + else + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA + | NFS_INO_REVAL_PAGECACHE; + if (fattr->valid & NFS_ATTR_FATTR_NLINK) + inode->i_nlink = fattr->nlink; + else if (nfs_server_capable(inode, NFS_CAP_NLINK)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; + if (fattr->valid & NFS_ATTR_FATTR_OWNER) + inode->i_uid = fattr->uid; + else if (nfs_server_capable(inode, NFS_CAP_OWNER)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL; + if (fattr->valid & NFS_ATTR_FATTR_GROUP) + inode->i_gid = fattr->gid; + else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL; + if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) + inode->i_blocks = fattr->du.nfs2.blocks; + if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { + /* + * report the blocks in 512byte units + */ + inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); + } + nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); + nfsi->attrtimeo_timestamp = now; + nfsi->access_cache = RB_ROOT; + + nfs_fscache_init_inode_cookie(inode); + + unlock_new_inode(inode); + } else + nfs_refresh_inode(inode, fattr); + dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + atomic_read(&inode->i_count)); + +out: + return inode; + +out_no_inode: + dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode)); + goto out; +} + +#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE) + +int +nfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + struct nfs_fattr fattr; + int error; + + nfs_inc_stats(inode, NFSIOS_VFSSETATTR); + + /* skip mode change if it's just for clearing setuid/setgid */ + if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) + attr->ia_valid &= ~ATTR_MODE; + + if (attr->ia_valid & ATTR_SIZE) { + if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) + attr->ia_valid &= ~ATTR_SIZE; + } + + /* Optimization: if the end result is no change, don't RPC */ + attr->ia_valid &= NFS_VALID_ATTRS; + if ((attr->ia_valid & ~ATTR_FILE) == 0) + return 0; + + /* Write all dirty data */ + if (S_ISREG(inode->i_mode)) { + filemap_write_and_wait(inode->i_mapping); + nfs_wb_all(inode); + } + /* + * Return any delegations if we're going to change ACLs + */ + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) + nfs_inode_return_delegation(inode); + error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); + if (error == 0) + nfs_refresh_inode(inode, &fattr); + return error; +} + +/** + * nfs_vmtruncate - unmap mappings "freed" by truncate() syscall + * @inode: inode of the file used + * @offset: file offset to start truncating + * + * This is a copy of the common vmtruncate, but with the locking + * corrected to take into account the fact that NFS requires + * inode->i_size to be updated under the inode->i_lock. + */ +static int nfs_vmtruncate(struct inode * inode, loff_t offset) +{ + loff_t oldsize; + int err; + + err = inode_newsize_ok(inode, offset); + if (err) + goto out; + + spin_lock(&inode->i_lock); + oldsize = inode->i_size; + i_size_write(inode, offset); + spin_unlock(&inode->i_lock); + + truncate_pagecache(inode, oldsize, offset); +out: + return err; +} + +/** + * nfs_setattr_update_inode - Update inode metadata after a setattr call. + * @inode: pointer to struct inode + * @attr: pointer to struct iattr + * + * Note: we do this in the *proc.c in order to ensure that + * it works for things like exclusive creates too. + */ +void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) +{ + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { + spin_lock(&inode->i_lock); + if ((attr->ia_valid & ATTR_MODE) != 0) { + int mode = attr->ia_mode & S_IALLUGO; + mode |= inode->i_mode & ~S_IALLUGO; + inode->i_mode = mode; + } + if ((attr->ia_valid & ATTR_UID) != 0) + inode->i_uid = attr->ia_uid; + if ((attr->ia_valid & ATTR_GID) != 0) + inode->i_gid = attr->ia_gid; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + spin_unlock(&inode->i_lock); + } + if ((attr->ia_valid & ATTR_SIZE) != 0) { + nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); + nfs_vmtruncate(inode, attr->ia_size); + } +} + +int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; + int err; + + /* + * Flush out writes to the server in order to update c/mtime. + * + * Hold the i_mutex to suspend application writes temporarily; + * this prevents long-running writing applications from blocking + * nfs_wb_nocommit. + */ + if (S_ISREG(inode->i_mode)) { + mutex_lock(&inode->i_mutex); + nfs_wb_nocommit(inode); + mutex_unlock(&inode->i_mutex); + } + + /* + * We may force a getattr if the user cares about atime. + * + * Note that we only have to check the vfsmount flags here: + * - NFS always sets S_NOATIME by so checking it would give a + * bogus result + * - NFS never sets MS_NOATIME or MS_NODIRATIME so there is + * no point in checking those. + */ + if ((mnt->mnt_flags & MNT_NOATIME) || + ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) + need_atime = 0; + + if (need_atime) + err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + else + err = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (!err) { + generic_fillattr(inode, stat); + stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); + } + return err; +} + +/** + * nfs_close_context - Common close_context() routine NFSv2/v3 + * @ctx: pointer to context + * @is_sync: is this a synchronous close + * + * always ensure that the attributes are up to date if we're mounted + * with close-to-open semantics + */ +void nfs_close_context(struct nfs_open_context *ctx, int is_sync) +{ + struct inode *inode; + struct nfs_server *server; + + if (!(ctx->mode & FMODE_WRITE)) + return; + if (!is_sync) + return; + inode = ctx->path.dentry->d_inode; + if (!list_empty(&NFS_I(inode)->open_files)) + return; + server = NFS_SERVER(inode); + if (server->flags & NFS_MOUNT_NOCTO) + return; + nfs_revalidate_inode(server, inode); +} + +static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred) +{ + struct nfs_open_context *ctx; + + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (ctx != NULL) { + ctx->path.dentry = dget(dentry); + ctx->path.mnt = mntget(mnt); + ctx->cred = get_rpccred(cred); + ctx->state = NULL; + ctx->lockowner = current->files; + ctx->flags = 0; + ctx->error = 0; + ctx->dir_cookie = 0; + atomic_set(&ctx->count, 1); + } + return ctx; +} + +struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) +{ + if (ctx != NULL) + atomic_inc(&ctx->count); + return ctx; +} + +static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) +{ + struct inode *inode = ctx->path.dentry->d_inode; + + if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock)) + return; + list_del(&ctx->list); + spin_unlock(&inode->i_lock); + NFS_PROTO(inode)->close_context(ctx, is_sync); + if (ctx->cred != NULL) + put_rpccred(ctx->cred); + path_put(&ctx->path); + kfree(ctx); +} + +void put_nfs_open_context(struct nfs_open_context *ctx) +{ + __put_nfs_open_context(ctx, 0); +} + +static void put_nfs_open_context_sync(struct nfs_open_context *ctx) +{ + __put_nfs_open_context(ctx, 1); +} + +/* + * Ensure that mmap has a recent RPC credential for use when writing out + * shared pages + */ +static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); + + filp->private_data = get_nfs_open_context(ctx); + spin_lock(&inode->i_lock); + list_add(&ctx->list, &nfsi->open_files); + spin_unlock(&inode->i_lock); +} + +/* + * Given an inode, search for an open context with the desired characteristics + */ +struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *pos, *ctx = NULL; + + spin_lock(&inode->i_lock); + list_for_each_entry(pos, &nfsi->open_files, list) { + if (cred != NULL && pos->cred != cred) + continue; + if ((pos->mode & mode) == mode) { + ctx = get_nfs_open_context(pos); + break; + } + } + spin_unlock(&inode->i_lock); + return ctx; +} + +static void nfs_file_clear_open_context(struct file *filp) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct nfs_open_context *ctx = nfs_file_open_context(filp); + + if (ctx) { + filp->private_data = NULL; + spin_lock(&inode->i_lock); + list_move_tail(&ctx->list, &NFS_I(inode)->open_files); + spin_unlock(&inode->i_lock); + put_nfs_open_context_sync(ctx); + } +} + +/* + * These allocate and release file read/write context information. + */ +int nfs_open(struct inode *inode, struct file *filp) +{ + struct nfs_open_context *ctx; + struct rpc_cred *cred; + + cred = rpc_lookup_cred(); + if (IS_ERR(cred)) + return PTR_ERR(cred); + ctx = alloc_nfs_open_context(filp->f_path.mnt, filp->f_path.dentry, cred); + put_rpccred(cred); + if (ctx == NULL) + return -ENOMEM; + ctx->mode = filp->f_mode; + nfs_file_set_open_context(filp, ctx); + put_nfs_open_context(ctx); + nfs_fscache_set_inode_cookie(inode, filp); + return 0; +} + +int nfs_release(struct inode *inode, struct file *filp) +{ + nfs_file_clear_open_context(filp); + return 0; +} + +/* + * This function is called whenever some part of NFS notices that + * the cached attributes have to be refreshed. + */ +int +__nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) +{ + int status = -ESTALE; + struct nfs_fattr fattr; + struct nfs_inode *nfsi = NFS_I(inode); + + dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", + inode->i_sb->s_id, (long long)NFS_FILEID(inode)); + + if (is_bad_inode(inode)) + goto out; + if (NFS_STALE(inode)) + goto out; + + nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); + status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); + if (status != 0) { + dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), status); + if (status == -ESTALE) { + nfs_zap_caches(inode); + if (!S_ISDIR(inode->i_mode)) + set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); + } + goto out; + } + + status = nfs_refresh_inode(inode, &fattr); + if (status) { + dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), status); + goto out; + } + + if (nfsi->cache_validity & NFS_INO_INVALID_ACL) + nfs_zap_acl_cache(inode); + + dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode)); + + out: + return status; +} + +int nfs_attribute_timeout(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (nfs_have_delegated_attributes(inode)) + return 0; + return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); +} + +/** + * nfs_revalidate_inode - Revalidate the inode attributes + * @server - pointer to nfs_server struct + * @inode - pointer to inode struct + * + * Updates inode attribute information by retrieving the data from the server. + */ +int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) +{ + if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR) + && !nfs_attribute_timeout(inode)) + return NFS_STALE(inode) ? -ESTALE : 0; + return __nfs_revalidate_inode(server, inode); +} + +static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (mapping->nrpages != 0) { + int ret = invalidate_inode_pages2(mapping); + if (ret < 0) + return ret; + } + spin_lock(&inode->i_lock); + nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; + if (S_ISDIR(inode->i_mode)) + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + spin_unlock(&inode->i_lock); + nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); + nfs_fscache_reset_inode_cookie(inode); + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", + inode->i_sb->s_id, (long long)NFS_FILEID(inode)); + return 0; +} + +static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) +{ + int ret = 0; + + mutex_lock(&inode->i_mutex); + if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) { + ret = nfs_sync_mapping(mapping); + if (ret == 0) + ret = nfs_invalidate_mapping_nolock(inode, mapping); + } + mutex_unlock(&inode->i_mutex); + return ret; +} + +/** + * nfs_revalidate_mapping_nolock - Revalidate the pagecache + * @inode - pointer to host inode + * @mapping - pointer to mapping + */ +int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int ret = 0; + + if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) + || nfs_attribute_timeout(inode) || NFS_STALE(inode)) { + ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (ret < 0) + goto out; + } + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + ret = nfs_invalidate_mapping_nolock(inode, mapping); +out: + return ret; +} + +/** + * nfs_revalidate_mapping - Revalidate the pagecache + * @inode - pointer to host inode + * @mapping - pointer to mapping + * + * This version of the function will take the inode->i_mutex and attempt to + * flush out all dirty data if it needs to invalidate the page cache. + */ +int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int ret = 0; + + if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) + || nfs_attribute_timeout(inode) || NFS_STALE(inode)) { + ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (ret < 0) + goto out; + } + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + ret = nfs_invalidate_mapping(inode, mapping); +out: + return ret; +} + +static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) + && (fattr->valid & NFS_ATTR_FATTR_CHANGE) + && nfsi->change_attr == fattr->pre_change_attr) { + nfsi->change_attr = fattr->change_attr; + if (S_ISDIR(inode->i_mode)) + nfsi->cache_validity |= NFS_INO_INVALID_DATA; + } + /* If we have atomic WCC data, we may update some attributes */ + if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) + && (fattr->valid & NFS_ATTR_FATTR_CTIME) + && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + + if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) + && (fattr->valid & NFS_ATTR_FATTR_MTIME) + && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + if (S_ISDIR(inode->i_mode)) + nfsi->cache_validity |= NFS_INO_INVALID_DATA; + } + if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) + && (fattr->valid & NFS_ATTR_FATTR_SIZE) + && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) + && nfsi->npages == 0) + i_size_write(inode, nfs_size_to_loff_t(fattr->size)); +} + +/** + * nfs_check_inode_attributes - verify consistency of the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes + * + * Verifies the attribute cache. If we have just changed the attributes, + * so that fattr carries weak cache consistency data, then it may + * also update the ctime/mtime/change_attribute. + */ +static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fattr) +{ + struct nfs_inode *nfsi = NFS_I(inode); + loff_t cur_size, new_isize; + unsigned long invalid = 0; + + + /* Has the inode gone and changed behind our back? */ + if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) + return -EIO; + if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + return -EIO; + + if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && + nfsi->change_attr != fattr->change_attr) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + + /* Verify a few of the more important attributes */ + if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + + if (fattr->valid & NFS_ATTR_FATTR_SIZE) { + cur_size = i_size_read(inode); + new_isize = nfs_size_to_loff_t(fattr->size); + if (cur_size != new_isize && nfsi->npages == 0) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + } + + /* Have any file permissions changed? */ + if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) + invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid) + invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid) + invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + + /* Has the link count changed? */ + if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink) + invalid |= NFS_INO_INVALID_ATTR; + + if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&inode->i_atime, &fattr->atime)) + invalid |= NFS_INO_INVALID_ATIME; + + if (invalid != 0) + nfsi->cache_validity |= invalid; + + nfsi->read_cache_jiffies = fattr->time_start; + return 0; +} + +static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr) +{ + if (!(fattr->valid & NFS_ATTR_FATTR_CTIME)) + return 0; + return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0; +} + +static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr) +{ + if (!(fattr->valid & NFS_ATTR_FATTR_SIZE)) + return 0; + return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); +} + +static atomic_long_t nfs_attr_generation_counter; + +static unsigned long nfs_read_attr_generation_counter(void) +{ + return atomic_long_read(&nfs_attr_generation_counter); +} + +unsigned long nfs_inc_attr_generation_counter(void) +{ + return atomic_long_inc_return(&nfs_attr_generation_counter); +} + +void nfs_fattr_init(struct nfs_fattr *fattr) +{ + fattr->valid = 0; + fattr->time_start = jiffies; + fattr->gencount = nfs_inc_attr_generation_counter(); +} + +/** + * nfs_inode_attrs_need_update - check if the inode attributes need updating + * @inode - pointer to inode + * @fattr - attributes + * + * Attempt to divine whether or not an RPC call reply carrying stale + * attributes got scheduled after another call carrying updated ones. + * + * To do so, the function first assumes that a more recent ctime means + * that the attributes in fattr are newer, however it also attempt to + * catch the case where ctime either didn't change, or went backwards + * (if someone reset the clock on the server) by looking at whether + * or not this RPC call was started after the inode was last updated. + * Note also the check for wraparound of 'attr_gencount' + * + * The function returns 'true' if it thinks the attributes in 'fattr' are + * more recent than the ones cached in the inode. + * + */ +static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr) +{ + const struct nfs_inode *nfsi = NFS_I(inode); + + return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || + nfs_ctime_need_update(inode, fattr) || + nfs_size_need_update(inode, fattr) || + ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); +} + +static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) +{ + if (nfs_inode_attrs_need_update(inode, fattr)) + return nfs_update_inode(inode, fattr); + return nfs_check_inode_attributes(inode, fattr); +} + +/** + * nfs_refresh_inode - try to update the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes + * + * Check that an RPC call that returned attributes has not overlapped with + * other recent updates of the inode metadata, then decide whether it is + * safe to do a full update of the inode attributes, or whether just to + * call nfs_check_inode_attributes. + */ +int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +{ + int status; + + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + spin_lock(&inode->i_lock); + status = nfs_refresh_inode_locked(inode, fattr); + spin_unlock(&inode->i_lock); + + return status; +} + +static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + if (S_ISDIR(inode->i_mode)) + nfsi->cache_validity |= NFS_INO_INVALID_DATA; + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + return nfs_refresh_inode_locked(inode, fattr); +} + +/** + * nfs_post_op_update_inode - try to update the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes + * + * After an operation that has changed the inode metadata, mark the + * attribute cache as being invalid, then try to update it. + * + * NB: if the server didn't return any post op attributes, this + * function will force the retrieval of attributes before the next + * NFS request. Thus it should be used only for operations that + * are expected to change one or more attributes, to avoid + * unnecessary NFS requests and trips through nfs_update_inode(). + */ +int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) +{ + int status; + + spin_lock(&inode->i_lock); + status = nfs_post_op_update_inode_locked(inode, fattr); + spin_unlock(&inode->i_lock); + return status; +} + +/** + * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes + * + * After an operation that has changed the inode metadata, mark the + * attribute cache as being invalid, then try to update it. Fake up + * weak cache consistency data, if none exist. + * + * This function is mainly designed to be used by the ->write_done() functions. + */ +int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) +{ + int status; + + spin_lock(&inode->i_lock); + /* Don't do a WCC update if these attributes are already stale */ + if ((fattr->valid & NFS_ATTR_FATTR) == 0 || + !nfs_inode_attrs_need_update(inode, fattr)) { + fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE + | NFS_ATTR_FATTR_PRESIZE + | NFS_ATTR_FATTR_PREMTIME + | NFS_ATTR_FATTR_PRECTIME); + goto out_noforce; + } + if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && + (fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) { + fattr->pre_change_attr = NFS_I(inode)->change_attr; + fattr->valid |= NFS_ATTR_FATTR_PRECHANGE; + } + if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && + (fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) { + memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime)); + fattr->valid |= NFS_ATTR_FATTR_PRECTIME; + } + if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 && + (fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) { + memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime)); + fattr->valid |= NFS_ATTR_FATTR_PREMTIME; + } + if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 && + (fattr->valid & NFS_ATTR_FATTR_PRESIZE) == 0) { + fattr->pre_size = i_size_read(inode); + fattr->valid |= NFS_ATTR_FATTR_PRESIZE; + } +out_noforce: + status = nfs_post_op_update_inode_locked(inode, fattr); + spin_unlock(&inode->i_lock); + return status; +} + +/* + * Many nfs protocol calls return the new file attributes after + * an operation. Here we update the inode to reflect the state + * of the server's inode. + * + * This is a bit tricky because we have to make sure all dirty pages + * have been sent off to the server before calling invalidate_inode_pages. + * To make sure no other process adds more write requests while we try + * our best to flush them, we make them sleep during the attribute refresh. + * + * A very similar scenario holds for the dir cache. + */ +static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) +{ + struct nfs_server *server; + struct nfs_inode *nfsi = NFS_I(inode); + loff_t cur_isize, new_isize; + unsigned long invalid = 0; + unsigned long now = jiffies; + unsigned long save_cache_validity; + + dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", + __func__, inode->i_sb->s_id, inode->i_ino, + atomic_read(&inode->i_count), fattr->valid); + + if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) + goto out_fileid; + + /* + * Make sure the inode's type hasn't changed. + */ + if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + goto out_changed; + + server = NFS_SERVER(inode); + /* Update the fsid? */ + if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) && + !nfs_fsid_equal(&server->fsid, &fattr->fsid) && + !test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags)) + server->fsid = fattr->fsid; + + /* + * Update the read time so we don't revalidate too often. + */ + nfsi->read_cache_jiffies = fattr->time_start; + + save_cache_validity = nfsi->cache_validity; + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ATIME + | NFS_INO_REVAL_FORCED + | NFS_INO_REVAL_PAGECACHE); + + /* Do atomic weak cache consistency updates */ + nfs_wcc_update_inode(inode, fattr); + + /* More cache consistency checks */ + if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { + if (nfsi->change_attr != fattr->change_attr) { + dprintk("NFS: change_attr change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + if (S_ISDIR(inode->i_mode)) + nfs_force_lookup_revalidate(inode); + nfsi->change_attr = fattr->change_attr; + } + } else if (server->caps & NFS_CAP_CHANGE_ATTR) + invalid |= save_cache_validity; + + if (fattr->valid & NFS_ATTR_FATTR_MTIME) { + /* NFSv2/v3: Check if the mtime agrees */ + if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { + dprintk("NFS: mtime change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + if (S_ISDIR(inode->i_mode)) + nfs_force_lookup_revalidate(inode); + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + } + } else if (server->caps & NFS_CAP_MTIME) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA + | NFS_INO_REVAL_PAGECACHE + | NFS_INO_REVAL_FORCED); + + if (fattr->valid & NFS_ATTR_FATTR_CTIME) { + /* If ctime has changed we should definitely clear access+acl caches */ + if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + /* and probably clear data for a directory too as utimes can cause + * havoc with our cache. + */ + if (S_ISDIR(inode->i_mode)) { + invalid |= NFS_INO_INVALID_DATA; + nfs_force_lookup_revalidate(inode); + } + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + } + } else if (server->caps & NFS_CAP_CTIME) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_FORCED); + + /* Check if our cached file size is stale */ + if (fattr->valid & NFS_ATTR_FATTR_SIZE) { + new_isize = nfs_size_to_loff_t(fattr->size); + cur_isize = i_size_read(inode); + if (new_isize != cur_isize) { + /* Do we perhaps have any outstanding writes, or has + * the file grown beyond our last write? */ + if (nfsi->npages == 0 || new_isize > cur_isize) { + i_size_write(inode, new_isize); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + } + dprintk("NFS: isize change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + } + } else + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_REVAL_PAGECACHE + | NFS_INO_REVAL_FORCED); + + + if (fattr->valid & NFS_ATTR_FATTR_ATIME) + memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); + else if (server->caps & NFS_CAP_ATIME) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME + | NFS_INO_REVAL_FORCED); + + if (fattr->valid & NFS_ATTR_FATTR_MODE) { + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { + umode_t newmode = inode->i_mode & S_IFMT; + newmode |= fattr->mode & S_IALLUGO; + inode->i_mode = newmode; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + } + } else if (server->caps & NFS_CAP_MODE) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_FORCED); + + if (fattr->valid & NFS_ATTR_FATTR_OWNER) { + if (inode->i_uid != fattr->uid) { + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + inode->i_uid = fattr->uid; + } + } else if (server->caps & NFS_CAP_OWNER) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_FORCED); + + if (fattr->valid & NFS_ATTR_FATTR_GROUP) { + if (inode->i_gid != fattr->gid) { + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + inode->i_gid = fattr->gid; + } + } else if (server->caps & NFS_CAP_OWNER_GROUP) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_FORCED); + + if (fattr->valid & NFS_ATTR_FATTR_NLINK) { + if (inode->i_nlink != fattr->nlink) { + invalid |= NFS_INO_INVALID_ATTR; + if (S_ISDIR(inode->i_mode)) + invalid |= NFS_INO_INVALID_DATA; + inode->i_nlink = fattr->nlink; + } + } else if (server->caps & NFS_CAP_NLINK) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_REVAL_FORCED); + + if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { + /* + * report the blocks in 512byte units + */ + inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); + } + if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) + inode->i_blocks = fattr->du.nfs2.blocks; + + /* Update attrtimeo value if we're out of the unstable period */ + if (invalid & NFS_INO_INVALID_ATTR) { + nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); + nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); + nfsi->attrtimeo_timestamp = now; + nfsi->attr_gencount = nfs_inc_attr_generation_counter(); + } else { + if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { + if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) + nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); + nfsi->attrtimeo_timestamp = now; + } + } + invalid &= ~NFS_INO_INVALID_ATTR; + /* Don't invalidate the data if we were to blame */ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) + || S_ISLNK(inode->i_mode))) + invalid &= ~NFS_INO_INVALID_DATA; + if (!nfs_have_delegation(inode, FMODE_READ) || + (save_cache_validity & NFS_INO_REVAL_FORCED)) + nfsi->cache_validity |= invalid; + + return 0; + out_changed: + /* + * Big trouble! The inode has become a different object. + */ + printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", + __func__, inode->i_ino, inode->i_mode, fattr->mode); + out_err: + /* + * No need to worry about unhashing the dentry, as the + * lookup validation will know that the inode is bad. + * (But we fall through to invalidate the caches.) + */ + nfs_invalidate_inode(inode); + return -ESTALE; + + out_fileid: + printk(KERN_ERR "NFS: server %s error: fileid changed\n" + "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", + NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id, + (long long)nfsi->fileid, (long long)fattr->fileid); + goto out_err; +} + + +#ifdef CONFIG_NFS_V4 + +/* + * Clean out any remaining NFSv4 state that might be left over due + * to open() calls that passed nfs_atomic_lookup, but failed to call + * nfs_open(). + */ +void nfs4_clear_inode(struct inode *inode) +{ + /* If we are holding a delegation, return it! */ + nfs_inode_return_delegation_noreclaim(inode); + /* First call standard NFS clear_inode() code */ + nfs_clear_inode(inode); +} +#endif + +struct inode *nfs_alloc_inode(struct super_block *sb) +{ + struct nfs_inode *nfsi; + nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL); + if (!nfsi) + return NULL; + nfsi->flags = 0UL; + nfsi->cache_validity = 0UL; +#ifdef CONFIG_NFS_V3_ACL + nfsi->acl_access = ERR_PTR(-EAGAIN); + nfsi->acl_default = ERR_PTR(-EAGAIN); +#endif +#ifdef CONFIG_NFS_V4 + nfsi->nfs4_acl = NULL; +#endif /* CONFIG_NFS_V4 */ + return &nfsi->vfs_inode; +} + +void nfs_destroy_inode(struct inode *inode) +{ + kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); +} + +static inline void nfs4_init_once(struct nfs_inode *nfsi) +{ +#ifdef CONFIG_NFS_V4 + INIT_LIST_HEAD(&nfsi->open_states); + nfsi->delegation = NULL; + nfsi->delegation_state = 0; + init_rwsem(&nfsi->rwsem); +#endif +} + +static void init_once(void *foo) +{ + struct nfs_inode *nfsi = (struct nfs_inode *) foo; + + inode_init_once(&nfsi->vfs_inode); + INIT_LIST_HEAD(&nfsi->open_files); + INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); + INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); + INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); + nfsi->npages = 0; + atomic_set(&nfsi->silly_count, 1); + INIT_HLIST_HEAD(&nfsi->silly_list); + init_waitqueue_head(&nfsi->waitqueue); + nfs4_init_once(nfsi); +} + +static int __init nfs_init_inodecache(void) +{ + nfs_inode_cachep = kmem_cache_create("nfs_inode_cache", + sizeof(struct nfs_inode), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + init_once); + if (nfs_inode_cachep == NULL) + return -ENOMEM; + + return 0; +} + +static void nfs_destroy_inodecache(void) +{ + kmem_cache_destroy(nfs_inode_cachep); +} + +struct workqueue_struct *nfsiod_workqueue; + +/* + * start up the nfsiod workqueue + */ +static int nfsiod_start(void) +{ + struct workqueue_struct *wq; + dprintk("RPC: creating workqueue nfsiod\n"); + wq = create_singlethread_workqueue("nfsiod"); + if (wq == NULL) + return -ENOMEM; + nfsiod_workqueue = wq; + return 0; +} + +/* + * Destroy the nfsiod workqueue + */ +static void nfsiod_stop(void) +{ + struct workqueue_struct *wq; + + wq = nfsiod_workqueue; + if (wq == NULL) + return; + nfsiod_workqueue = NULL; + destroy_workqueue(wq); +} + +/* + * Initialize NFS + */ +static int __init init_nfs_fs(void) +{ + int err; + + err = nfs_dns_resolver_init(); + if (err < 0) + goto out8; + + err = nfs_fscache_register(); + if (err < 0) + goto out7; + + err = nfsiod_start(); + if (err) + goto out6; + + err = nfs_fs_proc_init(); + if (err) + goto out5; + + err = nfs_init_nfspagecache(); + if (err) + goto out4; + + err = nfs_init_inodecache(); + if (err) + goto out3; + + err = nfs_init_readpagecache(); + if (err) + goto out2; + + err = nfs_init_writepagecache(); + if (err) + goto out1; + + err = nfs_init_directcache(); + if (err) + goto out0; + +#ifdef CONFIG_PROC_FS + rpc_proc_register(&nfs_rpcstat); +#endif + if ((err = register_nfs_fs()) != 0) + goto out; + return 0; +out: +#ifdef CONFIG_PROC_FS + rpc_proc_unregister("nfs"); +#endif + nfs_destroy_directcache(); +out0: + nfs_destroy_writepagecache(); +out1: + nfs_destroy_readpagecache(); +out2: + nfs_destroy_inodecache(); +out3: + nfs_destroy_nfspagecache(); +out4: + nfs_fs_proc_exit(); +out5: + nfsiod_stop(); +out6: + nfs_fscache_unregister(); +out7: + nfs_dns_resolver_destroy(); +out8: + return err; +} + +static void __exit exit_nfs_fs(void) +{ + nfs_destroy_directcache(); + nfs_destroy_writepagecache(); + nfs_destroy_readpagecache(); + nfs_destroy_inodecache(); + nfs_destroy_nfspagecache(); + nfs_fscache_unregister(); + nfs_dns_resolver_destroy(); +#ifdef CONFIG_PROC_FS + rpc_proc_unregister("nfs"); +#endif + unregister_nfs_fs(); + nfs_fs_proc_exit(); + nfsiod_stop(); +} + +/* Not quite true; I just maintain it */ +MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); +MODULE_LICENSE("GPL"); +module_param(enable_ino64, bool, 0644); + +module_init(init_nfs_fs) +module_exit(exit_nfs_fs) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h new file mode 100644 index 00000000000..29e464d23b3 --- /dev/null +++ b/fs/nfs/internal.h @@ -0,0 +1,380 @@ +/* + * NFS internal definitions + */ + +#include "nfs4_fs.h" +#include <linux/mount.h> +#include <linux/security.h> + +#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) + +struct nfs_string; + +/* Maximum number of readahead requests + * FIXME: this should really be a sysctl so that users may tune it to suit + * their needs. People that do NFS over a slow network, might for + * instance want to reduce it to something closer to 1 for improved + * interactive response. + */ +#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) + +/* + * Determine if sessions are in use. + */ +static inline int nfs4_has_session(const struct nfs_client *clp) +{ +#ifdef CONFIG_NFS_V4_1 + if (clp->cl_session) + return 1; +#endif /* CONFIG_NFS_V4_1 */ + return 0; +} + +static inline int nfs4_has_persistent_session(const struct nfs_client *clp) +{ +#ifdef CONFIG_NFS_V4_1 + if (nfs4_has_session(clp)) + return (clp->cl_session->flags & SESSION4_PERSIST); +#endif /* CONFIG_NFS_V4_1 */ + return 0; +} + +struct nfs_clone_mount { + const struct super_block *sb; + const struct dentry *dentry; + struct nfs_fh *fh; + struct nfs_fattr *fattr; + char *hostname; + char *mnt_path; + struct sockaddr *addr; + size_t addrlen; + rpc_authflavor_t authflavor; +}; + +/* + * Note: RFC 1813 doesn't limit the number of auth flavors that + * a server can return, so make something up. + */ +#define NFS_MAX_SECFLAVORS (12) + +/* + * Value used if the user did not specify a port value. + */ +#define NFS_UNSPEC_PORT (-1) + +/* + * In-kernel mount arguments + */ +struct nfs_parsed_mount_data { + int flags; + int rsize, wsize; + int timeo, retrans; + int acregmin, acregmax, + acdirmin, acdirmax; + int namlen; + unsigned int options; + unsigned int bsize; + unsigned int auth_flavor_len; + rpc_authflavor_t auth_flavors[1]; + char *client_address; + unsigned int version; + unsigned int minorversion; + char *fscache_uniq; + + struct { + struct sockaddr_storage address; + size_t addrlen; + char *hostname; + u32 version; + int port; + unsigned short protocol; + } mount_server; + + struct { + struct sockaddr_storage address; + size_t addrlen; + char *hostname; + char *export_path; + int port; + unsigned short protocol; + } nfs_server; + + struct security_mnt_opts lsm_opts; +}; + +/* mount_clnt.c */ +struct nfs_mount_request { + struct sockaddr *sap; + size_t salen; + char *hostname; + char *dirpath; + u32 version; + unsigned short protocol; + struct nfs_fh *fh; + int noresvport; + unsigned int *auth_flav_len; + rpc_authflavor_t *auth_flavs; +}; + +extern int nfs_mount(struct nfs_mount_request *info); +extern void nfs_umount(const struct nfs_mount_request *info); + +/* client.c */ +extern struct rpc_program nfs_program; + +extern void nfs_put_client(struct nfs_client *); +extern struct nfs_client *nfs_find_client(const struct sockaddr *, u32); +extern struct nfs_client *nfs_find_client_next(struct nfs_client *); +extern struct nfs_server *nfs_create_server( + const struct nfs_parsed_mount_data *, + struct nfs_fh *); +extern struct nfs_server *nfs4_create_server( + const struct nfs_parsed_mount_data *, + struct nfs_fh *); +extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, + struct nfs_fh *); +extern void nfs_free_server(struct nfs_server *server); +extern struct nfs_server *nfs_clone_server(struct nfs_server *, + struct nfs_fh *, + struct nfs_fattr *); +extern void nfs_mark_client_ready(struct nfs_client *clp, int state); +extern int nfs4_check_client_ready(struct nfs_client *clp); +#ifdef CONFIG_PROC_FS +extern int __init nfs_fs_proc_init(void); +extern void nfs_fs_proc_exit(void); +#else +static inline int nfs_fs_proc_init(void) +{ + return 0; +} +static inline void nfs_fs_proc_exit(void) +{ +} +#endif + +/* nfs4namespace.c */ +#ifdef CONFIG_NFS_V4 +extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry); +#else +static inline +struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) +{ + return ERR_PTR(-ENOENT); +} +#endif + +/* callback_xdr.c */ +extern struct svc_version nfs4_callback_version1; +extern struct svc_version nfs4_callback_version4; + +/* pagelist.c */ +extern int __init nfs_init_nfspagecache(void); +extern void nfs_destroy_nfspagecache(void); +extern int __init nfs_init_readpagecache(void); +extern void nfs_destroy_readpagecache(void); +extern int __init nfs_init_writepagecache(void); +extern void nfs_destroy_writepagecache(void); + +extern int __init nfs_init_directcache(void); +extern void nfs_destroy_directcache(void); + +/* nfs2xdr.c */ +extern int nfs_stat_to_errno(int); +extern struct rpc_procinfo nfs_procedures[]; +extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int); + +/* nfs3xdr.c */ +extern struct rpc_procinfo nfs3_procedures[]; +extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int); + +/* nfs4xdr.c */ +#ifdef CONFIG_NFS_V4 +extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); +#endif +#ifdef CONFIG_NFS_V4_1 +extern const u32 nfs41_maxread_overhead; +extern const u32 nfs41_maxwrite_overhead; +#endif + +/* nfs4proc.c */ +#ifdef CONFIG_NFS_V4 +extern struct rpc_procinfo nfs4_procedures[]; +#endif + +/* proc.c */ +void nfs_close_context(struct nfs_open_context *ctx, int is_sync); + +/* dir.c */ +extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); + +/* inode.c */ +extern struct workqueue_struct *nfsiod_workqueue; +extern struct inode *nfs_alloc_inode(struct super_block *sb); +extern void nfs_destroy_inode(struct inode *); +extern int nfs_write_inode(struct inode *,int); +extern void nfs_clear_inode(struct inode *); +#ifdef CONFIG_NFS_V4 +extern void nfs4_clear_inode(struct inode *); +#endif +void nfs_zap_acl_cache(struct inode *inode); +extern int nfs_wait_bit_killable(void *word); + +/* super.c */ +extern struct file_system_type nfs_xdev_fs_type; +#ifdef CONFIG_NFS_V4 +extern struct file_system_type nfs4_xdev_fs_type; +extern struct file_system_type nfs4_referral_fs_type; +#endif + +extern struct rpc_stat nfs_rpcstat; + +extern int __init register_nfs_fs(void); +extern void __exit unregister_nfs_fs(void); +extern void nfs_sb_active(struct super_block *sb); +extern void nfs_sb_deactive(struct super_block *sb); + +/* namespace.c */ +extern char *nfs_path(const char *base, + const struct dentry *droot, + const struct dentry *dentry, + char *buffer, ssize_t buflen); + +/* getroot.c */ +extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *); +#ifdef CONFIG_NFS_V4 +extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *); + +extern int nfs4_path_walk(struct nfs_server *server, + struct nfs_fh *mntfh, + const char *path); +#endif + +/* read.c */ +extern void nfs_read_prepare(struct rpc_task *task, void *calldata); + +/* write.c */ +extern void nfs_write_prepare(struct rpc_task *task, void *calldata); +#ifdef CONFIG_MIGRATION +extern int nfs_migrate_page(struct address_space *, + struct page *, struct page *); +#else +#define nfs_migrate_page NULL +#endif + +/* nfs4proc.c */ +extern int _nfs4_call_sync(struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply); +extern int _nfs4_call_sync_session(struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply); + +/* + * Determine the device name as a string + */ +static inline char *nfs_devname(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root, + dentry, buffer, buflen); +} + +/* + * Determine the actual block size (and log2 thereof) + */ +static inline +unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) +{ + /* make sure blocksize is a power of two */ + if ((bsize & (bsize - 1)) || nrbitsp) { + unsigned char nrbits; + + for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--) + ; + bsize = 1 << nrbits; + if (nrbitsp) + *nrbitsp = nrbits; + } + + return bsize; +} + +/* + * Calculate the number of 512byte blocks used. + */ +static inline blkcnt_t nfs_calc_block_size(u64 tsize) +{ + blkcnt_t used = (tsize + 511) >> 9; + return (used > ULONG_MAX) ? ULONG_MAX : used; +} + +/* + * Compute and set NFS server blocksize + */ +static inline +unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) +{ + if (bsize < NFS_MIN_FILE_IO_SIZE) + bsize = NFS_DEF_FILE_IO_SIZE; + else if (bsize >= NFS_MAX_FILE_IO_SIZE) + bsize = NFS_MAX_FILE_IO_SIZE; + + return nfs_block_bits(bsize, nrbitsp); +} + +/* + * Determine the maximum file size for a superblock + */ +static inline +void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) +{ + sb->s_maxbytes = (loff_t)maxfilesize; + if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) + sb->s_maxbytes = MAX_LFS_FILESIZE; +} + +/* + * Determine the number of bytes of data the page contains + */ +static inline +unsigned int nfs_page_length(struct page *page) +{ + loff_t i_size = i_size_read(page->mapping->host); + + if (i_size > 0) { + pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; + if (page->index < end_index) + return PAGE_CACHE_SIZE; + if (page->index == end_index) + return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1; + } + return 0; +} + +/* + * Determine the number of pages in an array of length 'len' and + * with a base offset of 'base' + */ +static inline +unsigned int nfs_page_array_len(unsigned int base, size_t len) +{ + return ((unsigned long)len + (unsigned long)base + + PAGE_SIZE - 1) >> PAGE_SHIFT; +} + +/* + * Helper for restarting RPC calls in the possible presence of NFSv4.1 + * sessions. + */ +static inline void nfs_restart_rpc(struct rpc_task *task, const struct nfs_client *clp) +{ + if (nfs4_has_session(clp)) + rpc_restart_call_prepare(task); + else + rpc_restart_call(task); +} diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h new file mode 100644 index 00000000000..46d779abafd --- /dev/null +++ b/fs/nfs/iostat.h @@ -0,0 +1,71 @@ +/* + * linux/fs/nfs/iostat.h + * + * Declarations for NFS client per-mount statistics + * + * Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com> + * + */ + +#ifndef _NFS_IOSTAT +#define _NFS_IOSTAT + +#include <linux/percpu.h> +#include <linux/cache.h> +#include <linux/nfs_iostat.h> + +struct nfs_iostats { + unsigned long long bytes[__NFSIOS_BYTESMAX]; +#ifdef CONFIG_NFS_FSCACHE + unsigned long long fscache[__NFSIOS_FSCACHEMAX]; +#endif + unsigned long events[__NFSIOS_COUNTSMAX]; +} ____cacheline_aligned; + +static inline void nfs_inc_server_stats(const struct nfs_server *server, + enum nfs_stat_eventcounters stat) +{ + this_cpu_inc(server->io_stats->events[stat]); +} + +static inline void nfs_inc_stats(const struct inode *inode, + enum nfs_stat_eventcounters stat) +{ + nfs_inc_server_stats(NFS_SERVER(inode), stat); +} + +static inline void nfs_add_server_stats(const struct nfs_server *server, + enum nfs_stat_bytecounters stat, + unsigned long addend) +{ + this_cpu_add(server->io_stats->bytes[stat], addend); +} + +static inline void nfs_add_stats(const struct inode *inode, + enum nfs_stat_bytecounters stat, + unsigned long addend) +{ + nfs_add_server_stats(NFS_SERVER(inode), stat, addend); +} + +#ifdef CONFIG_NFS_FSCACHE +static inline void nfs_add_fscache_stats(struct inode *inode, + enum nfs_stat_fscachecounters stat, + unsigned long addend) +{ + this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend); +} +#endif + +static inline struct nfs_iostats *nfs_alloc_iostats(void) +{ + return alloc_percpu(struct nfs_iostats); +} + +static inline void nfs_free_iostats(struct nfs_iostats *stats) +{ + if (stats != NULL) + free_percpu(stats); +} + +#endif /* _NFS_IOSTAT */ diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c new file mode 100644 index 00000000000..59047f8d7d7 --- /dev/null +++ b/fs/nfs/mount_clnt.c @@ -0,0 +1,531 @@ +/* + * In-kernel MOUNT protocol client + * + * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/uio.h> +#include <linux/net.h> +#include <linux/in.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/sched.h> +#include <linux/nfs_fs.h> +#include "internal.h" + +#ifdef RPC_DEBUG +# define NFSDBG_FACILITY NFSDBG_MOUNT +#endif + +/* + * Defined by RFC 1094, section A.3; and RFC 1813, section 5.1.4 + */ +#define MNTPATHLEN (1024) + +/* + * XDR data type sizes + */ +#define encode_dirpath_sz (1 + XDR_QUADLEN(MNTPATHLEN)) +#define MNT_status_sz (1) +#define MNT_fhs_status_sz (1) +#define MNT_fhandle_sz XDR_QUADLEN(NFS2_FHSIZE) +#define MNT_fhandle3_sz (1 + XDR_QUADLEN(NFS3_FHSIZE)) +#define MNT_authflav3_sz (1 + NFS_MAX_SECFLAVORS) + +/* + * XDR argument and result sizes + */ +#define MNT_enc_dirpath_sz encode_dirpath_sz +#define MNT_dec_mountres_sz (MNT_status_sz + MNT_fhandle_sz) +#define MNT_dec_mountres3_sz (MNT_status_sz + MNT_fhandle_sz + \ + MNT_authflav3_sz) + +/* + * Defined by RFC 1094, section A.5 + */ +enum { + MOUNTPROC_NULL = 0, + MOUNTPROC_MNT = 1, + MOUNTPROC_DUMP = 2, + MOUNTPROC_UMNT = 3, + MOUNTPROC_UMNTALL = 4, + MOUNTPROC_EXPORT = 5, +}; + +/* + * Defined by RFC 1813, section 5.2 + */ +enum { + MOUNTPROC3_NULL = 0, + MOUNTPROC3_MNT = 1, + MOUNTPROC3_DUMP = 2, + MOUNTPROC3_UMNT = 3, + MOUNTPROC3_UMNTALL = 4, + MOUNTPROC3_EXPORT = 5, +}; + +static struct rpc_program mnt_program; + +/* + * Defined by OpenGroup XNFS Version 3W, chapter 8 + */ +enum mountstat { + MNT_OK = 0, + MNT_EPERM = 1, + MNT_ENOENT = 2, + MNT_EACCES = 13, + MNT_EINVAL = 22, +}; + +static struct { + u32 status; + int errno; +} mnt_errtbl[] = { + { .status = MNT_OK, .errno = 0, }, + { .status = MNT_EPERM, .errno = -EPERM, }, + { .status = MNT_ENOENT, .errno = -ENOENT, }, + { .status = MNT_EACCES, .errno = -EACCES, }, + { .status = MNT_EINVAL, .errno = -EINVAL, }, +}; + +/* + * Defined by RFC 1813, section 5.1.5 + */ +enum mountstat3 { + MNT3_OK = 0, /* no error */ + MNT3ERR_PERM = 1, /* Not owner */ + MNT3ERR_NOENT = 2, /* No such file or directory */ + MNT3ERR_IO = 5, /* I/O error */ + MNT3ERR_ACCES = 13, /* Permission denied */ + MNT3ERR_NOTDIR = 20, /* Not a directory */ + MNT3ERR_INVAL = 22, /* Invalid argument */ + MNT3ERR_NAMETOOLONG = 63, /* Filename too long */ + MNT3ERR_NOTSUPP = 10004, /* Operation not supported */ + MNT3ERR_SERVERFAULT = 10006, /* A failure on the server */ +}; + +static struct { + u32 status; + int errno; +} mnt3_errtbl[] = { + { .status = MNT3_OK, .errno = 0, }, + { .status = MNT3ERR_PERM, .errno = -EPERM, }, + { .status = MNT3ERR_NOENT, .errno = -ENOENT, }, + { .status = MNT3ERR_IO, .errno = -EIO, }, + { .status = MNT3ERR_ACCES, .errno = -EACCES, }, + { .status = MNT3ERR_NOTDIR, .errno = -ENOTDIR, }, + { .status = MNT3ERR_INVAL, .errno = -EINVAL, }, + { .status = MNT3ERR_NAMETOOLONG, .errno = -ENAMETOOLONG, }, + { .status = MNT3ERR_NOTSUPP, .errno = -ENOTSUPP, }, + { .status = MNT3ERR_SERVERFAULT, .errno = -EREMOTEIO, }, +}; + +struct mountres { + int errno; + struct nfs_fh *fh; + unsigned int *auth_count; + rpc_authflavor_t *auth_flavors; +}; + +struct mnt_fhstatus { + u32 status; + struct nfs_fh *fh; +}; + +/** + * nfs_mount - Obtain an NFS file handle for the given host and path + * @info: pointer to mount request arguments + * + * Uses default timeout parameters specified by underlying transport. + */ +int nfs_mount(struct nfs_mount_request *info) +{ + struct mountres result = { + .fh = info->fh, + .auth_count = info->auth_flav_len, + .auth_flavors = info->auth_flavs, + }; + struct rpc_message msg = { + .rpc_argp = info->dirpath, + .rpc_resp = &result, + }; + struct rpc_create_args args = { + .protocol = info->protocol, + .address = info->sap, + .addrsize = info->salen, + .servername = info->hostname, + .program = &mnt_program, + .version = info->version, + .authflavor = RPC_AUTH_UNIX, + }; + struct rpc_clnt *mnt_clnt; + int status; + + dprintk("NFS: sending MNT request for %s:%s\n", + (info->hostname ? info->hostname : "server"), + info->dirpath); + + if (info->noresvport) + args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; + + mnt_clnt = rpc_create(&args); + if (IS_ERR(mnt_clnt)) + goto out_clnt_err; + + if (info->version == NFS_MNT3_VERSION) + msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT]; + else + msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT]; + + status = rpc_call_sync(mnt_clnt, &msg, 0); + rpc_shutdown_client(mnt_clnt); + + if (status < 0) + goto out_call_err; + if (result.errno != 0) + goto out_mnt_err; + + dprintk("NFS: MNT request succeeded\n"); + status = 0; + +out: + return status; + +out_clnt_err: + status = PTR_ERR(mnt_clnt); + dprintk("NFS: failed to create MNT RPC client, status=%d\n", status); + goto out; + +out_call_err: + dprintk("NFS: MNT request failed, status=%d\n", status); + goto out; + +out_mnt_err: + dprintk("NFS: MNT server returned result %d\n", result.errno); + status = result.errno; + goto out; +} + +/** + * nfs_umount - Notify a server that we have unmounted this export + * @info: pointer to umount request arguments + * + * MOUNTPROC_UMNT is advisory, so we set a short timeout, and always + * use UDP. + */ +void nfs_umount(const struct nfs_mount_request *info) +{ + static const struct rpc_timeout nfs_umnt_timeout = { + .to_initval = 1 * HZ, + .to_maxval = 3 * HZ, + .to_retries = 2, + }; + struct rpc_create_args args = { + .protocol = IPPROTO_UDP, + .address = info->sap, + .addrsize = info->salen, + .timeout = &nfs_umnt_timeout, + .servername = info->hostname, + .program = &mnt_program, + .version = info->version, + .authflavor = RPC_AUTH_UNIX, + .flags = RPC_CLNT_CREATE_NOPING, + }; + struct mountres result; + struct rpc_message msg = { + .rpc_argp = info->dirpath, + .rpc_resp = &result, + }; + struct rpc_clnt *clnt; + int status; + + if (info->noresvport) + args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; + + clnt = rpc_create(&args); + if (unlikely(IS_ERR(clnt))) + goto out_clnt_err; + + dprintk("NFS: sending UMNT request for %s:%s\n", + (info->hostname ? info->hostname : "server"), info->dirpath); + + if (info->version == NFS_MNT3_VERSION) + msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC3_UMNT]; + else + msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC_UMNT]; + + status = rpc_call_sync(clnt, &msg, 0); + rpc_shutdown_client(clnt); + + if (unlikely(status < 0)) + goto out_call_err; + + return; + +out_clnt_err: + dprintk("NFS: failed to create UMNT RPC client, status=%ld\n", + PTR_ERR(clnt)); + return; + +out_call_err: + dprintk("NFS: UMNT request failed, status=%d\n", status); +} + +/* + * XDR encode/decode functions for MOUNT + */ + +static int encode_mntdirpath(struct xdr_stream *xdr, const char *pathname) +{ + const u32 pathname_len = strlen(pathname); + __be32 *p; + + if (unlikely(pathname_len > MNTPATHLEN)) + return -EIO; + + p = xdr_reserve_space(xdr, sizeof(u32) + pathname_len); + if (unlikely(p == NULL)) + return -EIO; + xdr_encode_opaque(p, pathname, pathname_len); + + return 0; +} + +static int mnt_enc_dirpath(struct rpc_rqst *req, __be32 *p, + const char *dirpath) +{ + struct xdr_stream xdr; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + return encode_mntdirpath(&xdr, dirpath); +} + +/* + * RFC 1094: "A non-zero status indicates some sort of error. In this + * case, the status is a UNIX error number." This can be problematic + * if the server and client use different errno values for the same + * error. + * + * However, the OpenGroup XNFS spec provides a simple mapping that is + * independent of local errno values on the server and the client. + */ +static int decode_status(struct xdr_stream *xdr, struct mountres *res) +{ + unsigned int i; + u32 status; + __be32 *p; + + p = xdr_inline_decode(xdr, sizeof(status)); + if (unlikely(p == NULL)) + return -EIO; + status = ntohl(*p); + + for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) { + if (mnt_errtbl[i].status == status) { + res->errno = mnt_errtbl[i].errno; + return 0; + } + } + + dprintk("NFS: unrecognized MNT status code: %u\n", status); + res->errno = -EACCES; + return 0; +} + +static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res) +{ + struct nfs_fh *fh = res->fh; + __be32 *p; + + p = xdr_inline_decode(xdr, NFS2_FHSIZE); + if (unlikely(p == NULL)) + return -EIO; + + fh->size = NFS2_FHSIZE; + memcpy(fh->data, p, NFS2_FHSIZE); + return 0; +} + +static int mnt_dec_mountres(struct rpc_rqst *req, __be32 *p, + struct mountres *res) +{ + struct xdr_stream xdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + + status = decode_status(&xdr, res); + if (unlikely(status != 0 || res->errno != 0)) + return status; + return decode_fhandle(&xdr, res); +} + +static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res) +{ + unsigned int i; + u32 status; + __be32 *p; + + p = xdr_inline_decode(xdr, sizeof(status)); + if (unlikely(p == NULL)) + return -EIO; + status = ntohl(*p); + + for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) { + if (mnt3_errtbl[i].status == status) { + res->errno = mnt3_errtbl[i].errno; + return 0; + } + } + + dprintk("NFS: unrecognized MNT3 status code: %u\n", status); + res->errno = -EACCES; + return 0; +} + +static int decode_fhandle3(struct xdr_stream *xdr, struct mountres *res) +{ + struct nfs_fh *fh = res->fh; + u32 size; + __be32 *p; + + p = xdr_inline_decode(xdr, sizeof(size)); + if (unlikely(p == NULL)) + return -EIO; + + size = ntohl(*p++); + if (size > NFS3_FHSIZE || size == 0) + return -EIO; + + p = xdr_inline_decode(xdr, size); + if (unlikely(p == NULL)) + return -EIO; + + fh->size = size; + memcpy(fh->data, p, size); + return 0; +} + +static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res) +{ + rpc_authflavor_t *flavors = res->auth_flavors; + unsigned int *count = res->auth_count; + u32 entries, i; + __be32 *p; + + if (*count == 0) + return 0; + + p = xdr_inline_decode(xdr, sizeof(entries)); + if (unlikely(p == NULL)) + return -EIO; + entries = ntohl(*p); + dprintk("NFS: received %u auth flavors\n", entries); + if (entries > NFS_MAX_SECFLAVORS) + entries = NFS_MAX_SECFLAVORS; + + p = xdr_inline_decode(xdr, sizeof(u32) * entries); + if (unlikely(p == NULL)) + return -EIO; + + if (entries > *count) + entries = *count; + + for (i = 0; i < entries; i++) { + flavors[i] = ntohl(*p++); + dprintk("NFS:\tflavor %u: %d\n", i, flavors[i]); + } + *count = i; + + return 0; +} + +static int mnt_dec_mountres3(struct rpc_rqst *req, __be32 *p, + struct mountres *res) +{ + struct xdr_stream xdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + + status = decode_fhs_status(&xdr, res); + if (unlikely(status != 0 || res->errno != 0)) + return status; + status = decode_fhandle3(&xdr, res); + if (unlikely(status != 0)) { + res->errno = -EBADHANDLE; + return 0; + } + return decode_auth_flavors(&xdr, res); +} + +static struct rpc_procinfo mnt_procedures[] = { + [MOUNTPROC_MNT] = { + .p_proc = MOUNTPROC_MNT, + .p_encode = (kxdrproc_t)mnt_enc_dirpath, + .p_decode = (kxdrproc_t)mnt_dec_mountres, + .p_arglen = MNT_enc_dirpath_sz, + .p_replen = MNT_dec_mountres_sz, + .p_statidx = MOUNTPROC_MNT, + .p_name = "MOUNT", + }, + [MOUNTPROC_UMNT] = { + .p_proc = MOUNTPROC_UMNT, + .p_encode = (kxdrproc_t)mnt_enc_dirpath, + .p_arglen = MNT_enc_dirpath_sz, + .p_statidx = MOUNTPROC_UMNT, + .p_name = "UMOUNT", + }, +}; + +static struct rpc_procinfo mnt3_procedures[] = { + [MOUNTPROC3_MNT] = { + .p_proc = MOUNTPROC3_MNT, + .p_encode = (kxdrproc_t)mnt_enc_dirpath, + .p_decode = (kxdrproc_t)mnt_dec_mountres3, + .p_arglen = MNT_enc_dirpath_sz, + .p_replen = MNT_dec_mountres3_sz, + .p_statidx = MOUNTPROC3_MNT, + .p_name = "MOUNT", + }, + [MOUNTPROC3_UMNT] = { + .p_proc = MOUNTPROC3_UMNT, + .p_encode = (kxdrproc_t)mnt_enc_dirpath, + .p_arglen = MNT_enc_dirpath_sz, + .p_statidx = MOUNTPROC3_UMNT, + .p_name = "UMOUNT", + }, +}; + + +static struct rpc_version mnt_version1 = { + .number = 1, + .nrprocs = 2, + .procs = mnt_procedures, +}; + +static struct rpc_version mnt_version3 = { + .number = 3, + .nrprocs = 2, + .procs = mnt3_procedures, +}; + +static struct rpc_version *mnt_version[] = { + NULL, + &mnt_version1, + NULL, + &mnt_version3, +}; + +static struct rpc_stat mnt_stats; + +static struct rpc_program mnt_program = { + .name = "mount", + .number = NFS_MNT_PROGRAM, + .nrvers = ARRAY_SIZE(mnt_version), + .version = mnt_version, + .stats = &mnt_stats, +}; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c new file mode 100644 index 00000000000..40c76678289 --- /dev/null +++ b/fs/nfs/namespace.c @@ -0,0 +1,257 @@ +/* + * linux/fs/nfs/namespace.c + * + * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com> + * - Modified by David Howells <dhowells@redhat.com> + * + * NFS namespace + */ + +#include <linux/dcache.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/nfs_fs.h> +#include <linux/string.h> +#include <linux/sunrpc/clnt.h> +#include <linux/vfs.h> +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_VFS + +static void nfs_expire_automounts(struct work_struct *work); + +static LIST_HEAD(nfs_automount_list); +static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts); +int nfs_mountpoint_expiry_timeout = 500 * HZ; + +static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + struct nfs_fh *fh, + struct nfs_fattr *fattr); + +/* + * nfs_path - reconstruct the path given an arbitrary dentry + * @base - arbitrary string to prepend to the path + * @droot - pointer to root dentry for mountpoint + * @dentry - pointer to dentry + * @buffer - result buffer + * @buflen - length of buffer + * + * Helper function for constructing the path from the + * root dentry to an arbitrary hashed dentry. + * + * This is mainly for use in figuring out the path on the + * server side when automounting on top of an existing partition. + */ +char *nfs_path(const char *base, + const struct dentry *droot, + const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + char *end = buffer+buflen; + int namelen; + + *--end = '\0'; + buflen--; + spin_lock(&dcache_lock); + while (!IS_ROOT(dentry) && dentry != droot) { + namelen = dentry->d_name.len; + buflen -= namelen + 1; + if (buflen < 0) + goto Elong_unlock; + end -= namelen; + memcpy(end, dentry->d_name.name, namelen); + *--end = '/'; + dentry = dentry->d_parent; + } + spin_unlock(&dcache_lock); + if (*end != '/') { + if (--buflen < 0) + goto Elong; + *--end = '/'; + } + namelen = strlen(base); + /* Strip off excess slashes in base string */ + while (namelen > 0 && base[namelen - 1] == '/') + namelen--; + buflen -= namelen; + if (buflen < 0) + goto Elong; + end -= namelen; + memcpy(end, base, namelen); + return end; +Elong_unlock: + spin_unlock(&dcache_lock); +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + +/* + * nfs_follow_mountpoint - handle crossing a mountpoint on the server + * @dentry - dentry of mountpoint + * @nd - nameidata info + * + * When we encounter a mountpoint on the server, we want to set up + * a mountpoint on the client too, to prevent inode numbers from + * colliding, and to allow "df" to work properly. + * On NFSv4, we also want to allow for the fact that different + * filesystems may be migrated to different servers in a failover + * situation, and that different filesystems may want to use + * different security flavours. + */ +static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) +{ + struct vfsmount *mnt; + struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct dentry *parent; + struct nfs_fh fh; + struct nfs_fattr fattr; + int err; + + dprintk("--> nfs_follow_mountpoint()\n"); + + err = -ESTALE; + if (IS_ROOT(dentry)) + goto out_err; + + dprintk("%s: enter\n", __func__); + dput(nd->path.dentry); + nd->path.dentry = dget(dentry); + + /* Look it up again */ + parent = dget_parent(nd->path.dentry); + err = server->nfs_client->rpc_ops->lookup(parent->d_inode, + &nd->path.dentry->d_name, + &fh, &fattr); + dput(parent); + if (err != 0) + goto out_err; + + if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) + mnt = nfs_do_refmount(nd->path.mnt, nd->path.dentry); + else + mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, &fh, + &fattr); + err = PTR_ERR(mnt); + if (IS_ERR(mnt)) + goto out_err; + + mntget(mnt); + err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE, + &nfs_automount_list); + if (err < 0) { + mntput(mnt); + if (err == -EBUSY) + goto out_follow; + goto out_err; + } + path_put(&nd->path); + nd->path.mnt = mnt; + nd->path.dentry = dget(mnt->mnt_root); + schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); +out: + dprintk("%s: done, returned %d\n", __func__, err); + + dprintk("<-- nfs_follow_mountpoint() = %d\n", err); + return ERR_PTR(err); +out_err: + path_put(&nd->path); + goto out; +out_follow: + while (d_mountpoint(nd->path.dentry) && + follow_down(&nd->path)) + ; + err = 0; + goto out; +} + +const struct inode_operations nfs_mountpoint_inode_operations = { + .follow_link = nfs_follow_mountpoint, + .getattr = nfs_getattr, +}; + +const struct inode_operations nfs_referral_inode_operations = { + .follow_link = nfs_follow_mountpoint, +}; + +static void nfs_expire_automounts(struct work_struct *work) +{ + struct list_head *list = &nfs_automount_list; + + mark_mounts_for_expiry(list); + if (!list_empty(list)) + schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); +} + +void nfs_release_automount_timer(void) +{ + if (list_empty(&nfs_automount_list)) + cancel_delayed_work(&nfs_automount_task); +} + +/* + * Clone a mountpoint of the appropriate type + */ +static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, + const char *devname, + struct nfs_clone_mount *mountdata) +{ +#ifdef CONFIG_NFS_V4 + struct vfsmount *mnt = ERR_PTR(-EINVAL); + switch (server->nfs_client->rpc_ops->version) { + case 2: + case 3: + mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); + break; + case 4: + mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata); + } + return mnt; +#else + return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); +#endif +} + +/** + * nfs_do_submount - set up mountpoint when crossing a filesystem boundary + * @mnt_parent - mountpoint of parent directory + * @dentry - parent directory + * @fh - filehandle for new root dentry + * @fattr - attributes for new root inode + * + */ +static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + struct nfs_fh *fh, + struct nfs_fattr *fattr) +{ + struct nfs_clone_mount mountdata = { + .sb = mnt_parent->mnt_sb, + .dentry = dentry, + .fh = fh, + .fattr = fattr, + }; + struct vfsmount *mnt = ERR_PTR(-ENOMEM); + char *page = (char *) __get_free_page(GFP_USER); + char *devname; + + dprintk("--> nfs_do_submount()\n"); + + dprintk("%s: submounting on %s/%s\n", __func__, + dentry->d_parent->d_name.name, + dentry->d_name.name); + if (page == NULL) + goto out; + devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); + mnt = (struct vfsmount *)devname; + if (IS_ERR(devname)) + goto free_page; + mnt = nfs_do_clone_mount(NFS_SB(mnt_parent->mnt_sb), devname, &mountdata); +free_page: + free_page((unsigned long)page); +out: + dprintk("%s: done\n", __func__); + + dprintk("<-- nfs_do_submount() = %p\n", mnt); + return mnt; +} diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c new file mode 100644 index 00000000000..7bc2da8efd4 --- /dev/null +++ b/fs/nfs/nfs2xdr.c @@ -0,0 +1,758 @@ +/* + * linux/fs/nfs/nfs2xdr.c + * + * XDR functions to encode/decode NFS RPC arguments and results. + * + * Copyright (C) 1992, 1993, 1994 Rick Sladkey + * Copyright (C) 1996 Olaf Kirch + * 04 Aug 1998 Ion Badulescu <ionut@cs.columbia.edu> + * FIFO's need special handling in NFSv2 + */ + +#include <linux/param.h> +#include <linux/time.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/in.h> +#include <linux/pagemap.h> +#include <linux/proc_fs.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs2.h> +#include <linux/nfs_fs.h> +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_XDR + +/* Mapping from NFS error code to "errno" error code. */ +#define errno_NFSERR_IO EIO + +/* + * Declare the space requirements for NFS arguments and replies as + * number of 32bit-words + */ +#define NFS_fhandle_sz (8) +#define NFS_sattr_sz (8) +#define NFS_filename_sz (1+(NFS2_MAXNAMLEN>>2)) +#define NFS_path_sz (1+(NFS2_MAXPATHLEN>>2)) +#define NFS_fattr_sz (17) +#define NFS_info_sz (5) +#define NFS_entry_sz (NFS_filename_sz+3) + +#define NFS_diropargs_sz (NFS_fhandle_sz+NFS_filename_sz) +#define NFS_removeargs_sz (NFS_fhandle_sz+NFS_filename_sz) +#define NFS_sattrargs_sz (NFS_fhandle_sz+NFS_sattr_sz) +#define NFS_readlinkargs_sz (NFS_fhandle_sz) +#define NFS_readargs_sz (NFS_fhandle_sz+3) +#define NFS_writeargs_sz (NFS_fhandle_sz+4) +#define NFS_createargs_sz (NFS_diropargs_sz+NFS_sattr_sz) +#define NFS_renameargs_sz (NFS_diropargs_sz+NFS_diropargs_sz) +#define NFS_linkargs_sz (NFS_fhandle_sz+NFS_diropargs_sz) +#define NFS_symlinkargs_sz (NFS_diropargs_sz+1+NFS_sattr_sz) +#define NFS_readdirargs_sz (NFS_fhandle_sz+2) + +#define NFS_attrstat_sz (1+NFS_fattr_sz) +#define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz) +#define NFS_readlinkres_sz (2) +#define NFS_readres_sz (1+NFS_fattr_sz+1) +#define NFS_writeres_sz (NFS_attrstat_sz) +#define NFS_stat_sz (1) +#define NFS_readdirres_sz (1) +#define NFS_statfsres_sz (1+NFS_info_sz) + +/* + * Common NFS XDR functions as inlines + */ +static inline __be32 * +xdr_encode_fhandle(__be32 *p, const struct nfs_fh *fhandle) +{ + memcpy(p, fhandle->data, NFS2_FHSIZE); + return p + XDR_QUADLEN(NFS2_FHSIZE); +} + +static inline __be32 * +xdr_decode_fhandle(__be32 *p, struct nfs_fh *fhandle) +{ + /* NFSv2 handles have a fixed length */ + fhandle->size = NFS2_FHSIZE; + memcpy(fhandle->data, p, NFS2_FHSIZE); + return p + XDR_QUADLEN(NFS2_FHSIZE); +} + +static inline __be32* +xdr_encode_time(__be32 *p, struct timespec *timep) +{ + *p++ = htonl(timep->tv_sec); + /* Convert nanoseconds into microseconds */ + *p++ = htonl(timep->tv_nsec ? timep->tv_nsec / 1000 : 0); + return p; +} + +static inline __be32* +xdr_encode_current_server_time(__be32 *p, struct timespec *timep) +{ + /* + * Passing the invalid value useconds=1000000 is a + * Sun convention for "set to current server time". + * It's needed to make permissions checks for the + * "touch" program across v2 mounts to Solaris and + * Irix boxes work correctly. See description of + * sattr in section 6.1 of "NFS Illustrated" by + * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5 + */ + *p++ = htonl(timep->tv_sec); + *p++ = htonl(1000000); + return p; +} + +static inline __be32* +xdr_decode_time(__be32 *p, struct timespec *timep) +{ + timep->tv_sec = ntohl(*p++); + /* Convert microseconds into nanoseconds */ + timep->tv_nsec = ntohl(*p++) * 1000; + return p; +} + +static __be32 * +xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr) +{ + u32 rdev, type; + type = ntohl(*p++); + fattr->mode = ntohl(*p++); + fattr->nlink = ntohl(*p++); + fattr->uid = ntohl(*p++); + fattr->gid = ntohl(*p++); + fattr->size = ntohl(*p++); + fattr->du.nfs2.blocksize = ntohl(*p++); + rdev = ntohl(*p++); + fattr->du.nfs2.blocks = ntohl(*p++); + fattr->fsid.major = ntohl(*p++); + fattr->fsid.minor = 0; + fattr->fileid = ntohl(*p++); + p = xdr_decode_time(p, &fattr->atime); + p = xdr_decode_time(p, &fattr->mtime); + p = xdr_decode_time(p, &fattr->ctime); + fattr->valid |= NFS_ATTR_FATTR_V2; + fattr->rdev = new_decode_dev(rdev); + if (type == NFCHR && rdev == NFS2_FIFO_DEV) { + fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO; + fattr->rdev = 0; + } + return p; +} + +static inline __be32 * +xdr_encode_sattr(__be32 *p, struct iattr *attr) +{ + const __be32 not_set = __constant_htonl(0xFFFFFFFF); + + *p++ = (attr->ia_valid & ATTR_MODE) ? htonl(attr->ia_mode) : not_set; + *p++ = (attr->ia_valid & ATTR_UID) ? htonl(attr->ia_uid) : not_set; + *p++ = (attr->ia_valid & ATTR_GID) ? htonl(attr->ia_gid) : not_set; + *p++ = (attr->ia_valid & ATTR_SIZE) ? htonl(attr->ia_size) : not_set; + + if (attr->ia_valid & ATTR_ATIME_SET) { + p = xdr_encode_time(p, &attr->ia_atime); + } else if (attr->ia_valid & ATTR_ATIME) { + p = xdr_encode_current_server_time(p, &attr->ia_atime); + } else { + *p++ = not_set; + *p++ = not_set; + } + + if (attr->ia_valid & ATTR_MTIME_SET) { + p = xdr_encode_time(p, &attr->ia_mtime); + } else if (attr->ia_valid & ATTR_MTIME) { + p = xdr_encode_current_server_time(p, &attr->ia_mtime); + } else { + *p++ = not_set; + *p++ = not_set; + } + return p; +} + +/* + * NFS encode functions + */ +/* + * Encode file handle argument + * GETATTR, READLINK, STATFS + */ +static int +nfs_xdr_fhandle(struct rpc_rqst *req, __be32 *p, struct nfs_fh *fh) +{ + p = xdr_encode_fhandle(p, fh); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode SETATTR arguments + */ +static int +nfs_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs_sattrargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_sattr(p, args->sattr); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode directory ops argument + * LOOKUP, RMDIR + */ +static int +nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_array(p, args->name, args->len); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode REMOVE argument + */ +static int +nfs_xdr_removeargs(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_array(p, args->name.name, args->name.len); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Arguments to a READ call. Since we read data directly into the page + * cache, we also set up the reply iovec here so that iov[1] points + * exactly to the page we want to fetch. + */ +static int +nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; + unsigned int replen; + u32 offset = (u32)args->offset; + u32 count = args->count; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(offset); + *p++ = htonl(count); + *p++ = htonl(count); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, + args->pages, args->pgbase, count); + req->rq_rcv_buf.flags |= XDRBUF_READ; + return 0; +} + +/* + * Decode READ reply + */ +static int +nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) +{ + struct kvec *iov = req->rq_rcv_buf.head; + size_t hdrlen; + u32 count, recvd; + int status; + + if ((status = ntohl(*p++))) + return nfs_stat_to_errno(status); + p = xdr_decode_fattr(p, res->fattr); + + count = ntohl(*p++); + res->eof = 0; + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + if (iov->iov_len < hdrlen) { + dprintk("NFS: READ reply header overflowed:" + "length %Zu > %Zu\n", hdrlen, iov->iov_len); + return -errno_NFSERR_IO; + } else if (iov->iov_len != hdrlen) { + dprintk("NFS: READ header is short. iovec will be shifted.\n"); + xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen); + } + + recvd = req->rq_rcv_buf.len - hdrlen; + if (count > recvd) { + dprintk("NFS: server cheating in read reply: " + "count %u > recvd %u\n", count, recvd); + count = recvd; + } + + dprintk("RPC: readres OK count %u\n", count); + if (count < res->count) + res->count = count; + + return count; +} + + +/* + * Write arguments. Splice the buffer to be written into the iovec. + */ +static int +nfs_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) +{ + struct xdr_buf *sndbuf = &req->rq_snd_buf; + u32 offset = (u32)args->offset; + u32 count = args->count; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(offset); + *p++ = htonl(offset); + *p++ = htonl(count); + *p++ = htonl(count); + sndbuf->len = xdr_adjust_iovec(sndbuf->head, p); + + /* Copy the page array */ + xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); + sndbuf->flags |= XDRBUF_WRITE; + return 0; +} + +/* + * Encode create arguments + * CREATE, MKDIR + */ +static int +nfs_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs_createargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_array(p, args->name, args->len); + p = xdr_encode_sattr(p, args->sattr); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode RENAME arguments + */ +static int +nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_array(p, args->fromname, args->fromlen); + p = xdr_encode_fhandle(p, args->tofh); + p = xdr_encode_array(p, args->toname, args->tolen); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode LINK arguments + */ +static int +nfs_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs_linkargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_fhandle(p, args->tofh); + p = xdr_encode_array(p, args->toname, args->tolen); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode SYMLINK arguments + */ +static int +nfs_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_symlinkargs *args) +{ + struct xdr_buf *sndbuf = &req->rq_snd_buf; + size_t pad; + + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_array(p, args->fromname, args->fromlen); + *p++ = htonl(args->pathlen); + sndbuf->len = xdr_adjust_iovec(sndbuf->head, p); + + xdr_encode_pages(sndbuf, args->pages, 0, args->pathlen); + + /* + * xdr_encode_pages may have added a few bytes to ensure the + * pathname ends on a 4-byte boundary. Start encoding the + * attributes after the pad bytes. + */ + pad = sndbuf->tail->iov_len; + if (pad > 0) + p++; + p = xdr_encode_sattr(p, args->sattr); + sndbuf->len += xdr_adjust_iovec(sndbuf->tail, p) - pad; + return 0; +} + +/* + * Encode arguments to readdir call + */ +static int +nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args) +{ + struct rpc_task *task = req->rq_task; + struct rpc_auth *auth = task->tk_msg.rpc_cred->cr_auth; + unsigned int replen; + u32 count = args->count; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->cookie); + *p++ = htonl(count); /* see above */ + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count); + return 0; +} + +/* + * Decode the result of a readdir call. + * We're not really decoding anymore, we just leave the buffer untouched + * and only check that it is syntactically correct. + * The real decoding happens in nfs_decode_entry below, called directly + * from nfs_readdir for each entry. + */ +static int +nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) +{ + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct kvec *iov = rcvbuf->head; + struct page **page; + size_t hdrlen; + unsigned int pglen, recvd; + u32 len; + int status, nr = 0; + __be32 *end, *entry, *kaddr; + + if ((status = ntohl(*p++))) + return nfs_stat_to_errno(status); + + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + if (iov->iov_len < hdrlen) { + dprintk("NFS: READDIR reply header overflowed:" + "length %Zu > %Zu\n", hdrlen, iov->iov_len); + return -errno_NFSERR_IO; + } else if (iov->iov_len != hdrlen) { + dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); + xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); + } + + pglen = rcvbuf->page_len; + recvd = rcvbuf->len - hdrlen; + if (pglen > recvd) + pglen = recvd; + page = rcvbuf->pages; + kaddr = p = kmap_atomic(*page, KM_USER0); + end = (__be32 *)((char *)p + pglen); + entry = p; + + /* Make sure the packet actually has a value_follows and EOF entry */ + if ((entry + 1) > end) + goto short_pkt; + + for (; *p++; nr++) { + if (p + 2 > end) + goto short_pkt; + p++; /* fileid */ + len = ntohl(*p++); + p += XDR_QUADLEN(len) + 1; /* name plus cookie */ + if (len > NFS2_MAXNAMLEN) { + dprintk("NFS: giant filename in readdir (len 0x%x)!\n", + len); + goto err_unmap; + } + if (p + 2 > end) + goto short_pkt; + entry = p; + } + + /* + * Apparently some server sends responses that are a valid size, but + * contain no entries, and have value_follows==0 and EOF==0. For + * those, just set the EOF marker. + */ + if (!nr && entry[1] == 0) { + dprintk("NFS: readdir reply truncated!\n"); + entry[1] = 1; + } + out: + kunmap_atomic(kaddr, KM_USER0); + return nr; + short_pkt: + /* + * When we get a short packet there are 2 possibilities. We can + * return an error, or fix up the response to look like a valid + * response and return what we have so far. If there are no + * entries and the packet was short, then return -EIO. If there + * are valid entries in the response, return them and pretend that + * the call was successful, but incomplete. The caller can retry the + * readdir starting at the last cookie. + */ + entry[0] = entry[1] = 0; + if (!nr) + nr = -errno_NFSERR_IO; + goto out; +err_unmap: + nr = -errno_NFSERR_IO; + goto out; +} + +__be32 * +nfs_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) +{ + if (!*p++) { + if (!*p) + return ERR_PTR(-EAGAIN); + entry->eof = 1; + return ERR_PTR(-EBADCOOKIE); + } + + entry->ino = ntohl(*p++); + entry->len = ntohl(*p++); + entry->name = (const char *) p; + p += XDR_QUADLEN(entry->len); + entry->prev_cookie = entry->cookie; + entry->cookie = ntohl(*p++); + entry->eof = !p[0] && p[1]; + + return p; +} + +/* + * NFS XDR decode functions + */ +/* + * Decode simple status reply + */ +static int +nfs_xdr_stat(struct rpc_rqst *req, __be32 *p, void *dummy) +{ + int status; + + if ((status = ntohl(*p++)) != 0) + status = nfs_stat_to_errno(status); + return status; +} + +/* + * Decode attrstat reply + * GETATTR, SETATTR, WRITE + */ +static int +nfs_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) +{ + int status; + + if ((status = ntohl(*p++))) + return nfs_stat_to_errno(status); + xdr_decode_fattr(p, fattr); + return 0; +} + +/* + * Decode diropres reply + * LOOKUP, CREATE, MKDIR + */ +static int +nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res) +{ + int status; + + if ((status = ntohl(*p++))) + return nfs_stat_to_errno(status); + p = xdr_decode_fhandle(p, res->fh); + xdr_decode_fattr(p, res->fattr); + return 0; +} + +/* + * Encode READLINK args + */ +static int +nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; + unsigned int replen; + + p = xdr_encode_fhandle(p, args->fh); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen); + return 0; +} + +/* + * Decode READLINK reply + */ +static int +nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) +{ + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct kvec *iov = rcvbuf->head; + size_t hdrlen; + u32 len, recvd; + char *kaddr; + int status; + + if ((status = ntohl(*p++))) + return nfs_stat_to_errno(status); + /* Convert length of symlink */ + len = ntohl(*p++); + if (len >= rcvbuf->page_len) { + dprintk("nfs: server returned giant symlink!\n"); + return -ENAMETOOLONG; + } + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + if (iov->iov_len < hdrlen) { + dprintk("NFS: READLINK reply header overflowed:" + "length %Zu > %Zu\n", hdrlen, iov->iov_len); + return -errno_NFSERR_IO; + } else if (iov->iov_len != hdrlen) { + dprintk("NFS: READLINK header is short. iovec will be shifted.\n"); + xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); + } + recvd = req->rq_rcv_buf.len - hdrlen; + if (recvd < len) { + dprintk("NFS: server cheating in readlink reply: " + "count %u > recvd %u\n", len, recvd); + return -EIO; + } + + /* NULL terminate the string we got */ + kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0); + kaddr[len+rcvbuf->page_base] = '\0'; + kunmap_atomic(kaddr, KM_USER0); + return 0; +} + +/* + * Decode WRITE reply + */ +static int +nfs_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res) +{ + res->verf->committed = NFS_FILE_SYNC; + return nfs_xdr_attrstat(req, p, res->fattr); +} + +/* + * Decode STATFS reply + */ +static int +nfs_xdr_statfsres(struct rpc_rqst *req, __be32 *p, struct nfs2_fsstat *res) +{ + int status; + + if ((status = ntohl(*p++))) + return nfs_stat_to_errno(status); + + res->tsize = ntohl(*p++); + res->bsize = ntohl(*p++); + res->blocks = ntohl(*p++); + res->bfree = ntohl(*p++); + res->bavail = ntohl(*p++); + return 0; +} + +/* + * We need to translate between nfs status return values and + * the local errno values which may not be the same. + */ +static struct { + int stat; + int errno; +} nfs_errtbl[] = { + { NFS_OK, 0 }, + { NFSERR_PERM, -EPERM }, + { NFSERR_NOENT, -ENOENT }, + { NFSERR_IO, -errno_NFSERR_IO}, + { NFSERR_NXIO, -ENXIO }, +/* { NFSERR_EAGAIN, -EAGAIN }, */ + { NFSERR_ACCES, -EACCES }, + { NFSERR_EXIST, -EEXIST }, + { NFSERR_XDEV, -EXDEV }, + { NFSERR_NODEV, -ENODEV }, + { NFSERR_NOTDIR, -ENOTDIR }, + { NFSERR_ISDIR, -EISDIR }, + { NFSERR_INVAL, -EINVAL }, + { NFSERR_FBIG, -EFBIG }, + { NFSERR_NOSPC, -ENOSPC }, + { NFSERR_ROFS, -EROFS }, + { NFSERR_MLINK, -EMLINK }, + { NFSERR_NAMETOOLONG, -ENAMETOOLONG }, + { NFSERR_NOTEMPTY, -ENOTEMPTY }, + { NFSERR_DQUOT, -EDQUOT }, + { NFSERR_STALE, -ESTALE }, + { NFSERR_REMOTE, -EREMOTE }, +#ifdef EWFLUSH + { NFSERR_WFLUSH, -EWFLUSH }, +#endif + { NFSERR_BADHANDLE, -EBADHANDLE }, + { NFSERR_NOT_SYNC, -ENOTSYNC }, + { NFSERR_BAD_COOKIE, -EBADCOOKIE }, + { NFSERR_NOTSUPP, -ENOTSUPP }, + { NFSERR_TOOSMALL, -ETOOSMALL }, + { NFSERR_SERVERFAULT, -EREMOTEIO }, + { NFSERR_BADTYPE, -EBADTYPE }, + { NFSERR_JUKEBOX, -EJUKEBOX }, + { -1, -EIO } +}; + +/* + * Convert an NFS error code to a local one. + * This one is used jointly by NFSv2 and NFSv3. + */ +int +nfs_stat_to_errno(int stat) +{ + int i; + + for (i = 0; nfs_errtbl[i].stat != -1; i++) { + if (nfs_errtbl[i].stat == stat) + return nfs_errtbl[i].errno; + } + dprintk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat); + return nfs_errtbl[i].errno; +} + +#define PROC(proc, argtype, restype, timer) \ +[NFSPROC_##proc] = { \ + .p_proc = NFSPROC_##proc, \ + .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \ + .p_decode = (kxdrproc_t) nfs_xdr_##restype, \ + .p_arglen = NFS_##argtype##_sz, \ + .p_replen = NFS_##restype##_sz, \ + .p_timer = timer, \ + .p_statidx = NFSPROC_##proc, \ + .p_name = #proc, \ + } +struct rpc_procinfo nfs_procedures[] = { + PROC(GETATTR, fhandle, attrstat, 1), + PROC(SETATTR, sattrargs, attrstat, 0), + PROC(LOOKUP, diropargs, diropres, 2), + PROC(READLINK, readlinkargs, readlinkres, 3), + PROC(READ, readargs, readres, 3), + PROC(WRITE, writeargs, writeres, 4), + PROC(CREATE, createargs, diropres, 0), + PROC(REMOVE, removeargs, stat, 0), + PROC(RENAME, renameargs, stat, 0), + PROC(LINK, linkargs, stat, 0), + PROC(SYMLINK, symlinkargs, stat, 0), + PROC(MKDIR, createargs, diropres, 0), + PROC(RMDIR, diropargs, stat, 0), + PROC(READDIR, readdirargs, readdirres, 3), + PROC(STATFS, fhandle, statfsres, 0), +}; + +struct rpc_version nfs_version2 = { + .number = 2, + .nrprocs = ARRAY_SIZE(nfs_procedures), + .procs = nfs_procedures +}; diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c new file mode 100644 index 00000000000..bac60515a4b --- /dev/null +++ b/fs/nfs/nfs3acl.c @@ -0,0 +1,434 @@ +#include <linux/fs.h> +#include <linux/nfs.h> +#include <linux/nfs3.h> +#include <linux/nfs_fs.h> +#include <linux/posix_acl_xattr.h> +#include <linux/nfsacl.h> + +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_PROC + +ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int pos=0, len=0; + +# define output(s) do { \ + if (pos + sizeof(s) <= size) { \ + memcpy(buffer + pos, s, sizeof(s)); \ + pos += sizeof(s); \ + } \ + len += sizeof(s); \ + } while(0) + + acl = nfs3_proc_getacl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + output("system.posix_acl_access"); + posix_acl_release(acl); + } + + if (S_ISDIR(inode->i_mode)) { + acl = nfs3_proc_getacl(inode, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + output("system.posix_acl_default"); + posix_acl_release(acl); + } + } + +# undef output + + if (!buffer || len <= size) + return len; + return -ERANGE; +} + +ssize_t nfs3_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int type, error = 0; + + if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + acl = nfs3_proc_getacl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + else if (acl) { + if (type == ACL_TYPE_ACCESS && acl->a_count == 0) + error = -ENODATA; + else + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + } else + error = -ENODATA; + + return error; +} + +int nfs3_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int type, error; + + if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + error = nfs3_proc_setacl(inode, type, acl); + posix_acl_release(acl); + + return error; +} + +int nfs3_removexattr(struct dentry *dentry, const char *name) +{ + struct inode *inode = dentry->d_inode; + int type; + + if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + return nfs3_proc_setacl(inode, type, NULL); +} + +static void __nfs3_forget_cached_acls(struct nfs_inode *nfsi) +{ + if (!IS_ERR(nfsi->acl_access)) { + posix_acl_release(nfsi->acl_access); + nfsi->acl_access = ERR_PTR(-EAGAIN); + } + if (!IS_ERR(nfsi->acl_default)) { + posix_acl_release(nfsi->acl_default); + nfsi->acl_default = ERR_PTR(-EAGAIN); + } +} + +void nfs3_forget_cached_acls(struct inode *inode) +{ + dprintk("NFS: nfs3_forget_cached_acls(%s/%ld)\n", inode->i_sb->s_id, + inode->i_ino); + spin_lock(&inode->i_lock); + __nfs3_forget_cached_acls(NFS_I(inode)); + spin_unlock(&inode->i_lock); +} + +static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct posix_acl *acl = ERR_PTR(-EINVAL); + + spin_lock(&inode->i_lock); + switch(type) { + case ACL_TYPE_ACCESS: + acl = nfsi->acl_access; + break; + + case ACL_TYPE_DEFAULT: + acl = nfsi->acl_default; + break; + + default: + goto out; + } + if (IS_ERR(acl)) + acl = ERR_PTR(-EAGAIN); + else + acl = posix_acl_dup(acl); +out: + spin_unlock(&inode->i_lock); + dprintk("NFS: nfs3_get_cached_acl(%s/%ld, %d) = %p\n", inode->i_sb->s_id, + inode->i_ino, type, acl); + return acl; +} + +static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + dprintk("nfs3_cache_acls(%s/%ld, %p, %p)\n", inode->i_sb->s_id, + inode->i_ino, acl, dfacl); + spin_lock(&inode->i_lock); + __nfs3_forget_cached_acls(NFS_I(inode)); + if (!IS_ERR(acl)) + nfsi->acl_access = posix_acl_dup(acl); + if (!IS_ERR(dfacl)) + nfsi->acl_default = posix_acl_dup(dfacl); + spin_unlock(&inode->i_lock); +} + +struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; + struct page *pages[NFSACL_MAXPAGES] = { }; + struct nfs3_getaclargs args = { + .fh = NFS_FH(inode), + /* The xdr layer may allocate pages here. */ + .pages = pages, + }; + struct nfs3_getaclres res = { + .fattr = &fattr, + }; + struct rpc_message msg = { + .rpc_argp = &args, + .rpc_resp = &res, + }; + struct posix_acl *acl; + int status, count; + + if (!nfs_server_capable(inode, NFS_CAP_ACLS)) + return ERR_PTR(-EOPNOTSUPP); + + status = nfs_revalidate_inode(server, inode); + if (status < 0) + return ERR_PTR(status); + acl = nfs3_get_cached_acl(inode, type); + if (acl != ERR_PTR(-EAGAIN)) + return acl; + acl = NULL; + + /* + * Only get the access acl when explicitly requested: We don't + * need it for access decisions, and only some applications use + * it. Applications which request the access acl first are not + * penalized from this optimization. + */ + if (type == ACL_TYPE_ACCESS) + args.mask |= NFS_ACLCNT|NFS_ACL; + if (S_ISDIR(inode->i_mode)) + args.mask |= NFS_DFACLCNT|NFS_DFACL; + if (args.mask == 0) + return NULL; + + dprintk("NFS call getacl\n"); + msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; + nfs_fattr_init(&fattr); + status = rpc_call_sync(server->client_acl, &msg, 0); + dprintk("NFS reply getacl: %d\n", status); + + /* pages may have been allocated at the xdr layer. */ + for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++) + __free_page(args.pages[count]); + + switch (status) { + case 0: + status = nfs_refresh_inode(inode, &fattr); + break; + case -EPFNOSUPPORT: + case -EPROTONOSUPPORT: + dprintk("NFS_V3_ACL extension not supported; disabling\n"); + server->caps &= ~NFS_CAP_ACLS; + case -ENOTSUPP: + status = -EOPNOTSUPP; + default: + goto getout; + } + if ((args.mask & res.mask) != args.mask) { + status = -EIO; + goto getout; + } + + if (res.acl_access != NULL) { + if (posix_acl_equiv_mode(res.acl_access, NULL) == 0) { + posix_acl_release(res.acl_access); + res.acl_access = NULL; + } + } + nfs3_cache_acls(inode, + (res.mask & NFS_ACL) ? res.acl_access : ERR_PTR(-EINVAL), + (res.mask & NFS_DFACL) ? res.acl_default : ERR_PTR(-EINVAL)); + + switch(type) { + case ACL_TYPE_ACCESS: + acl = res.acl_access; + res.acl_access = NULL; + break; + + case ACL_TYPE_DEFAULT: + acl = res.acl_default; + res.acl_default = NULL; + } + +getout: + posix_acl_release(res.acl_access); + posix_acl_release(res.acl_default); + + if (status != 0) { + posix_acl_release(acl); + acl = ERR_PTR(status); + } + return acl; +} + +static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; + struct page *pages[NFSACL_MAXPAGES]; + struct nfs3_setaclargs args = { + .inode = inode, + .mask = NFS_ACL, + .acl_access = acl, + .pages = pages, + }; + struct rpc_message msg = { + .rpc_argp = &args, + .rpc_resp = &fattr, + }; + int status; + + status = -EOPNOTSUPP; + if (!nfs_server_capable(inode, NFS_CAP_ACLS)) + goto out; + + /* We are doing this here, because XDR marshalling can only + return -ENOMEM. */ + status = -ENOSPC; + if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES) + goto out; + if (dfacl != NULL && dfacl->a_count > NFS_ACL_MAX_ENTRIES) + goto out; + if (S_ISDIR(inode->i_mode)) { + args.mask |= NFS_DFACL; + args.acl_default = dfacl; + args.len = nfsacl_size(acl, dfacl); + } else + args.len = nfsacl_size(acl, NULL); + + if (args.len > NFS_ACL_INLINE_BUFSIZE) { + unsigned int npages = 1 + ((args.len - 1) >> PAGE_SHIFT); + + status = -ENOMEM; + do { + args.pages[args.npages] = alloc_page(GFP_KERNEL); + if (args.pages[args.npages] == NULL) + goto out_freepages; + args.npages++; + } while (args.npages < npages); + } + + dprintk("NFS call setacl\n"); + msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; + nfs_fattr_init(&fattr); + status = rpc_call_sync(server->client_acl, &msg, 0); + nfs_access_zap_cache(inode); + nfs_zap_acl_cache(inode); + dprintk("NFS reply setacl: %d\n", status); + + switch (status) { + case 0: + status = nfs_refresh_inode(inode, &fattr); + nfs3_cache_acls(inode, acl, dfacl); + break; + case -EPFNOSUPPORT: + case -EPROTONOSUPPORT: + dprintk("NFS_V3_ACL SETACL RPC not supported" + "(will not retry)\n"); + server->caps &= ~NFS_CAP_ACLS; + case -ENOTSUPP: + status = -EOPNOTSUPP; + } +out_freepages: + while (args.npages != 0) { + args.npages--; + __free_page(args.pages[args.npages]); + } +out: + return status; +} + +int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct posix_acl *alloc = NULL, *dfacl = NULL; + int status; + + if (S_ISDIR(inode->i_mode)) { + switch(type) { + case ACL_TYPE_ACCESS: + alloc = dfacl = nfs3_proc_getacl(inode, + ACL_TYPE_DEFAULT); + if (IS_ERR(alloc)) + goto fail; + break; + + case ACL_TYPE_DEFAULT: + dfacl = acl; + alloc = acl = nfs3_proc_getacl(inode, + ACL_TYPE_ACCESS); + if (IS_ERR(alloc)) + goto fail; + break; + + default: + return -EINVAL; + } + } else if (type != ACL_TYPE_ACCESS) + return -EINVAL; + + if (acl == NULL) { + alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + if (IS_ERR(alloc)) + goto fail; + } + status = nfs3_proc_setacls(inode, acl, dfacl); + posix_acl_release(alloc); + return status; + +fail: + return PTR_ERR(alloc); +} + +int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, + mode_t mode) +{ + struct posix_acl *dfacl, *acl; + int error = 0; + + dfacl = nfs3_proc_getacl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(dfacl)) { + error = PTR_ERR(dfacl); + return (error == -EOPNOTSUPP) ? 0 : error; + } + if (!dfacl) + return 0; + acl = posix_acl_clone(dfacl, GFP_KERNEL); + error = -ENOMEM; + if (!acl) + goto out_release_dfacl; + error = posix_acl_create_masq(acl, &mode); + if (error < 0) + goto out_release_acl; + error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ? + dfacl : NULL); +out_release_acl: + posix_acl_release(acl); +out_release_dfacl: + posix_acl_release(dfacl); + return error; +} diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c new file mode 100644 index 00000000000..3f8881d1a05 --- /dev/null +++ b/fs/nfs/nfs3proc.c @@ -0,0 +1,836 @@ +/* + * linux/fs/nfs/nfs3proc.c + * + * Client-side NFSv3 procedures stubs. + * + * Copyright (C) 1997, Olaf Kirch + */ + +#include <linux/mm.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs3.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_page.h> +#include <linux/lockd/bind.h> +#include <linux/nfs_mount.h> + +#include "iostat.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_PROC + +/* A wrapper to handle the EJUKEBOX error message */ +static int +nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) +{ + int res; + do { + res = rpc_call_sync(clnt, msg, flags); + if (res != -EJUKEBOX) + break; + schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); + res = -ERESTARTSYS; + } while (!fatal_signal_pending(current)); + return res; +} + +#define rpc_call_sync(clnt, msg, flags) nfs3_rpc_wrapper(clnt, msg, flags) + +static int +nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode) +{ + if (task->tk_status != -EJUKEBOX) + return 0; + nfs_inc_stats(inode, NFSIOS_DELAY); + task->tk_status = 0; + rpc_restart_call(task); + rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); + return 1; +} + +static int +do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO], + .rpc_argp = fhandle, + .rpc_resp = info, + }; + int status; + + dprintk("%s: call fsinfo\n", __func__); + nfs_fattr_init(info->fattr); + status = rpc_call_sync(client, &msg, 0); + dprintk("%s: reply fsinfo: %d\n", __func__, status); + if (!(info->fattr->valid & NFS_ATTR_FATTR)) { + msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; + msg.rpc_resp = info->fattr; + status = rpc_call_sync(client, &msg, 0); + dprintk("%s: reply getattr: %d\n", __func__, status); + } + return status; +} + +/* + * Bare-bones access to getattr: this is for nfs_get_root/nfs_get_sb + */ +static int +nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int status; + + status = do_proc_get_root(server->client, fhandle, info); + if (status && server->nfs_client->cl_rpcclient != server->client) + status = do_proc_get_root(server->nfs_client->cl_rpcclient, fhandle, info); + return status; +} + +/* + * One function for each procedure in the NFS protocol. + */ +static int +nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR], + .rpc_argp = fhandle, + .rpc_resp = fattr, + }; + int status; + + dprintk("NFS call getattr\n"); + nfs_fattr_init(fattr); + status = rpc_call_sync(server->client, &msg, 0); + dprintk("NFS reply getattr: %d\n", status); + return status; +} + +static int +nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, + struct iattr *sattr) +{ + struct inode *inode = dentry->d_inode; + struct nfs3_sattrargs arg = { + .fh = NFS_FH(inode), + .sattr = sattr, + }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_SETATTR], + .rpc_argp = &arg, + .rpc_resp = fattr, + }; + int status; + + dprintk("NFS call setattr\n"); + if (sattr->ia_valid & ATTR_FILE) + msg.rpc_cred = nfs_file_cred(sattr->ia_file); + nfs_fattr_init(fattr); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + if (status == 0) + nfs_setattr_update_inode(inode, sattr); + dprintk("NFS reply setattr: %d\n", status); + return status; +} + +static int +nfs3_proc_lookup(struct inode *dir, struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct nfs_fattr dir_attr; + struct nfs3_diropargs arg = { + .fh = NFS_FH(dir), + .name = name->name, + .len = name->len + }; + struct nfs3_diropres res = { + .dir_attr = &dir_attr, + .fh = fhandle, + .fattr = fattr + }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_LOOKUP], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int status; + + dprintk("NFS call lookup %s\n", name->name); + nfs_fattr_init(&dir_attr); + nfs_fattr_init(fattr); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_refresh_inode(dir, &dir_attr); + if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) { + msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; + msg.rpc_argp = fhandle; + msg.rpc_resp = fattr; + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + } + dprintk("NFS reply lookup: %d\n", status); + return status; +} + +static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) +{ + struct nfs_fattr fattr; + struct nfs3_accessargs arg = { + .fh = NFS_FH(inode), + }; + struct nfs3_accessres res = { + .fattr = &fattr, + }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = entry->cred, + }; + int mode = entry->mask; + int status; + + dprintk("NFS call access\n"); + + if (mode & MAY_READ) + arg.access |= NFS3_ACCESS_READ; + if (S_ISDIR(inode->i_mode)) { + if (mode & MAY_WRITE) + arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE; + if (mode & MAY_EXEC) + arg.access |= NFS3_ACCESS_LOOKUP; + } else { + if (mode & MAY_WRITE) + arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND; + if (mode & MAY_EXEC) + arg.access |= NFS3_ACCESS_EXECUTE; + } + nfs_fattr_init(&fattr); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + nfs_refresh_inode(inode, &fattr); + if (status == 0) { + entry->mask = 0; + if (res.access & NFS3_ACCESS_READ) + entry->mask |= MAY_READ; + if (res.access & (NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE)) + entry->mask |= MAY_WRITE; + if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE)) + entry->mask |= MAY_EXEC; + } + dprintk("NFS reply access: %d\n", status); + return status; +} + +static int nfs3_proc_readlink(struct inode *inode, struct page *page, + unsigned int pgbase, unsigned int pglen) +{ + struct nfs_fattr fattr; + struct nfs3_readlinkargs args = { + .fh = NFS_FH(inode), + .pgbase = pgbase, + .pglen = pglen, + .pages = &page + }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_READLINK], + .rpc_argp = &args, + .rpc_resp = &fattr, + }; + int status; + + dprintk("NFS call readlink\n"); + nfs_fattr_init(&fattr); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + nfs_refresh_inode(inode, &fattr); + dprintk("NFS reply readlink: %d\n", status); + return status; +} + +struct nfs3_createdata { + struct rpc_message msg; + union { + struct nfs3_createargs create; + struct nfs3_mkdirargs mkdir; + struct nfs3_symlinkargs symlink; + struct nfs3_mknodargs mknod; + } arg; + struct nfs3_diropres res; + struct nfs_fh fh; + struct nfs_fattr fattr; + struct nfs_fattr dir_attr; +}; + +static struct nfs3_createdata *nfs3_alloc_createdata(void) +{ + struct nfs3_createdata *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data != NULL) { + data->msg.rpc_argp = &data->arg; + data->msg.rpc_resp = &data->res; + data->res.fh = &data->fh; + data->res.fattr = &data->fattr; + data->res.dir_attr = &data->dir_attr; + nfs_fattr_init(data->res.fattr); + nfs_fattr_init(data->res.dir_attr); + } + return data; +} + +static int nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_createdata *data) +{ + int status; + + status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); + nfs_post_op_update_inode(dir, data->res.dir_attr); + if (status == 0) + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); + return status; +} + +static void nfs3_free_createdata(struct nfs3_createdata *data) +{ + kfree(data); +} + +/* + * Create a regular file. + */ +static int +nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, + int flags, struct nameidata *nd) +{ + struct nfs3_createdata *data; + mode_t mode = sattr->ia_mode; + int status = -ENOMEM; + + dprintk("NFS call create %s\n", dentry->d_name.name); + + data = nfs3_alloc_createdata(); + if (data == NULL) + goto out; + + data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_CREATE]; + data->arg.create.fh = NFS_FH(dir); + data->arg.create.name = dentry->d_name.name; + data->arg.create.len = dentry->d_name.len; + data->arg.create.sattr = sattr; + + data->arg.create.createmode = NFS3_CREATE_UNCHECKED; + if (flags & O_EXCL) { + data->arg.create.createmode = NFS3_CREATE_EXCLUSIVE; + data->arg.create.verifier[0] = jiffies; + data->arg.create.verifier[1] = current->pid; + } + + sattr->ia_mode &= ~current_umask(); + + for (;;) { + status = nfs3_do_create(dir, dentry, data); + + if (status != -ENOTSUPP) + break; + /* If the server doesn't support the exclusive creation + * semantics, try again with simple 'guarded' mode. */ + switch (data->arg.create.createmode) { + case NFS3_CREATE_EXCLUSIVE: + data->arg.create.createmode = NFS3_CREATE_GUARDED; + break; + + case NFS3_CREATE_GUARDED: + data->arg.create.createmode = NFS3_CREATE_UNCHECKED; + break; + + case NFS3_CREATE_UNCHECKED: + goto out; + } + nfs_fattr_init(data->res.dir_attr); + nfs_fattr_init(data->res.fattr); + } + + if (status != 0) + goto out; + + /* When we created the file with exclusive semantics, make + * sure we set the attributes afterwards. */ + if (data->arg.create.createmode == NFS3_CREATE_EXCLUSIVE) { + dprintk("NFS call setattr (post-create)\n"); + + if (!(sattr->ia_valid & ATTR_ATIME_SET)) + sattr->ia_valid |= ATTR_ATIME; + if (!(sattr->ia_valid & ATTR_MTIME_SET)) + sattr->ia_valid |= ATTR_MTIME; + + /* Note: we could use a guarded setattr here, but I'm + * not sure this buys us anything (and I'd have + * to revamp the NFSv3 XDR code) */ + status = nfs3_proc_setattr(dentry, data->res.fattr, sattr); + nfs_post_op_update_inode(dentry->d_inode, data->res.fattr); + dprintk("NFS reply setattr (post-create): %d\n", status); + if (status != 0) + goto out; + } + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); +out: + nfs3_free_createdata(data); + dprintk("NFS reply create: %d\n", status); + return status; +} + +static int +nfs3_proc_remove(struct inode *dir, struct qstr *name) +{ + struct nfs_removeargs arg = { + .fh = NFS_FH(dir), + .name.len = name->len, + .name.name = name->name, + }; + struct nfs_removeres res; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int status; + + dprintk("NFS call remove %s\n", name->name); + nfs_fattr_init(&res.dir_attr); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_post_op_update_inode(dir, &res.dir_attr); + dprintk("NFS reply remove: %d\n", status); + return status; +} + +static void +nfs3_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) +{ + msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; +} + +static int +nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir) +{ + struct nfs_removeres *res; + if (nfs3_async_handle_jukebox(task, dir)) + return 0; + res = task->tk_msg.rpc_resp; + nfs_post_op_update_inode(dir, &res->dir_attr); + return 1; +} + +static int +nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, + struct inode *new_dir, struct qstr *new_name) +{ + struct nfs_fattr old_dir_attr, new_dir_attr; + struct nfs3_renameargs arg = { + .fromfh = NFS_FH(old_dir), + .fromname = old_name->name, + .fromlen = old_name->len, + .tofh = NFS_FH(new_dir), + .toname = new_name->name, + .tolen = new_name->len + }; + struct nfs3_renameres res = { + .fromattr = &old_dir_attr, + .toattr = &new_dir_attr + }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int status; + + dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); + nfs_fattr_init(&old_dir_attr); + nfs_fattr_init(&new_dir_attr); + status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); + nfs_post_op_update_inode(old_dir, &old_dir_attr); + nfs_post_op_update_inode(new_dir, &new_dir_attr); + dprintk("NFS reply rename: %d\n", status); + return status; +} + +static int +nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) +{ + struct nfs_fattr dir_attr, fattr; + struct nfs3_linkargs arg = { + .fromfh = NFS_FH(inode), + .tofh = NFS_FH(dir), + .toname = name->name, + .tolen = name->len + }; + struct nfs3_linkres res = { + .dir_attr = &dir_attr, + .fattr = &fattr + }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_LINK], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int status; + + dprintk("NFS call link %s\n", name->name); + nfs_fattr_init(&dir_attr); + nfs_fattr_init(&fattr); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + nfs_post_op_update_inode(dir, &dir_attr); + nfs_post_op_update_inode(inode, &fattr); + dprintk("NFS reply link: %d\n", status); + return status; +} + +static int +nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, + unsigned int len, struct iattr *sattr) +{ + struct nfs3_createdata *data; + int status = -ENOMEM; + + if (len > NFS3_MAXPATHLEN) + return -ENAMETOOLONG; + + dprintk("NFS call symlink %s\n", dentry->d_name.name); + + data = nfs3_alloc_createdata(); + if (data == NULL) + goto out; + data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK]; + data->arg.symlink.fromfh = NFS_FH(dir); + data->arg.symlink.fromname = dentry->d_name.name; + data->arg.symlink.fromlen = dentry->d_name.len; + data->arg.symlink.pages = &page; + data->arg.symlink.pathlen = len; + data->arg.symlink.sattr = sattr; + + status = nfs3_do_create(dir, dentry, data); + + nfs3_free_createdata(data); +out: + dprintk("NFS reply symlink: %d\n", status); + return status; +} + +static int +nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) +{ + struct nfs3_createdata *data; + int mode = sattr->ia_mode; + int status = -ENOMEM; + + dprintk("NFS call mkdir %s\n", dentry->d_name.name); + + sattr->ia_mode &= ~current_umask(); + + data = nfs3_alloc_createdata(); + if (data == NULL) + goto out; + + data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR]; + data->arg.mkdir.fh = NFS_FH(dir); + data->arg.mkdir.name = dentry->d_name.name; + data->arg.mkdir.len = dentry->d_name.len; + data->arg.mkdir.sattr = sattr; + + status = nfs3_do_create(dir, dentry, data); + if (status != 0) + goto out; + + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); +out: + nfs3_free_createdata(data); + dprintk("NFS reply mkdir: %d\n", status); + return status; +} + +static int +nfs3_proc_rmdir(struct inode *dir, struct qstr *name) +{ + struct nfs_fattr dir_attr; + struct nfs3_diropargs arg = { + .fh = NFS_FH(dir), + .name = name->name, + .len = name->len + }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_RMDIR], + .rpc_argp = &arg, + .rpc_resp = &dir_attr, + }; + int status; + + dprintk("NFS call rmdir %s\n", name->name); + nfs_fattr_init(&dir_attr); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_post_op_update_inode(dir, &dir_attr); + dprintk("NFS reply rmdir: %d\n", status); + return status; +} + +/* + * The READDIR implementation is somewhat hackish - we pass the user buffer + * to the encode function, which installs it in the receive iovec. + * The decode function itself doesn't perform any decoding, it just makes + * sure the reply is syntactically correct. + * + * Also note that this implementation handles both plain readdir and + * readdirplus. + */ +static int +nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, + u64 cookie, struct page *page, unsigned int count, int plus) +{ + struct inode *dir = dentry->d_inode; + struct nfs_fattr dir_attr; + __be32 *verf = NFS_COOKIEVERF(dir); + struct nfs3_readdirargs arg = { + .fh = NFS_FH(dir), + .cookie = cookie, + .verf = {verf[0], verf[1]}, + .plus = plus, + .count = count, + .pages = &page + }; + struct nfs3_readdirres res = { + .dir_attr = &dir_attr, + .verf = verf, + .plus = plus + }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_READDIR], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = cred + }; + int status; + + if (plus) + msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS]; + + dprintk("NFS call readdir%s %d\n", + plus? "plus" : "", (unsigned int) cookie); + + nfs_fattr_init(&dir_attr); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + + nfs_invalidate_atime(dir); + + nfs_refresh_inode(dir, &dir_attr); + dprintk("NFS reply readdir: %d\n", status); + return status; +} + +static int +nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, + dev_t rdev) +{ + struct nfs3_createdata *data; + mode_t mode = sattr->ia_mode; + int status = -ENOMEM; + + dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, + MAJOR(rdev), MINOR(rdev)); + + sattr->ia_mode &= ~current_umask(); + + data = nfs3_alloc_createdata(); + if (data == NULL) + goto out; + + data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD]; + data->arg.mknod.fh = NFS_FH(dir); + data->arg.mknod.name = dentry->d_name.name; + data->arg.mknod.len = dentry->d_name.len; + data->arg.mknod.sattr = sattr; + data->arg.mknod.rdev = rdev; + + switch (sattr->ia_mode & S_IFMT) { + case S_IFBLK: + data->arg.mknod.type = NF3BLK; + break; + case S_IFCHR: + data->arg.mknod.type = NF3CHR; + break; + case S_IFIFO: + data->arg.mknod.type = NF3FIFO; + break; + case S_IFSOCK: + data->arg.mknod.type = NF3SOCK; + break; + default: + status = -EINVAL; + goto out; + } + + status = nfs3_do_create(dir, dentry, data); + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); +out: + nfs3_free_createdata(data); + dprintk("NFS reply mknod: %d\n", status); + return status; +} + +static int +nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsstat *stat) +{ + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_FSSTAT], + .rpc_argp = fhandle, + .rpc_resp = stat, + }; + int status; + + dprintk("NFS call fsstat\n"); + nfs_fattr_init(stat->fattr); + status = rpc_call_sync(server->client, &msg, 0); + dprintk("NFS reply statfs: %d\n", status); + return status; +} + +static int +do_proc_fsinfo(struct rpc_clnt *client, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO], + .rpc_argp = fhandle, + .rpc_resp = info, + }; + int status; + + dprintk("NFS call fsinfo\n"); + nfs_fattr_init(info->fattr); + status = rpc_call_sync(client, &msg, 0); + dprintk("NFS reply fsinfo: %d\n", status); + return status; +} + +/* + * Bare-bones access to fsinfo: this is for nfs_get_root/nfs_get_sb via + * nfs_create_server + */ +static int +nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int status; + + status = do_proc_fsinfo(server->client, fhandle, info); + if (status && server->nfs_client->cl_rpcclient != server->client) + status = do_proc_fsinfo(server->nfs_client->cl_rpcclient, fhandle, info); + return status; +} + +static int +nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *info) +{ + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_PATHCONF], + .rpc_argp = fhandle, + .rpc_resp = info, + }; + int status; + + dprintk("NFS call pathconf\n"); + nfs_fattr_init(info->fattr); + status = rpc_call_sync(server->client, &msg, 0); + dprintk("NFS reply pathconf: %d\n", status); + return status; +} + +static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) +{ + if (nfs3_async_handle_jukebox(task, data->inode)) + return -EAGAIN; + + nfs_invalidate_atime(data->inode); + nfs_refresh_inode(data->inode, &data->fattr); + return 0; +} + +static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) +{ + msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; +} + +static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) +{ + if (nfs3_async_handle_jukebox(task, data->inode)) + return -EAGAIN; + if (task->tk_status >= 0) + nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); + return 0; +} + +static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) +{ + msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; +} + +static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) +{ + if (nfs3_async_handle_jukebox(task, data->inode)) + return -EAGAIN; + nfs_refresh_inode(data->inode, data->res.fattr); + return 0; +} + +static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) +{ + msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT]; +} + +static int +nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + + return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); +} + +const struct nfs_rpc_ops nfs_v3_clientops = { + .version = 3, /* protocol version */ + .dentry_ops = &nfs_dentry_operations, + .dir_inode_ops = &nfs3_dir_inode_operations, + .file_inode_ops = &nfs3_file_inode_operations, + .getroot = nfs3_proc_get_root, + .getattr = nfs3_proc_getattr, + .setattr = nfs3_proc_setattr, + .lookup = nfs3_proc_lookup, + .access = nfs3_proc_access, + .readlink = nfs3_proc_readlink, + .create = nfs3_proc_create, + .remove = nfs3_proc_remove, + .unlink_setup = nfs3_proc_unlink_setup, + .unlink_done = nfs3_proc_unlink_done, + .rename = nfs3_proc_rename, + .link = nfs3_proc_link, + .symlink = nfs3_proc_symlink, + .mkdir = nfs3_proc_mkdir, + .rmdir = nfs3_proc_rmdir, + .readdir = nfs3_proc_readdir, + .mknod = nfs3_proc_mknod, + .statfs = nfs3_proc_statfs, + .fsinfo = nfs3_proc_fsinfo, + .pathconf = nfs3_proc_pathconf, + .decode_dirent = nfs3_decode_dirent, + .read_setup = nfs3_proc_read_setup, + .read_done = nfs3_read_done, + .write_setup = nfs3_proc_write_setup, + .write_done = nfs3_write_done, + .commit_setup = nfs3_proc_commit_setup, + .commit_done = nfs3_commit_done, + .lock = nfs3_proc_lock, + .clear_acl_cache = nfs3_forget_cached_acls, + .close_context = nfs_close_context, +}; diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c new file mode 100644 index 00000000000..5fe5492fbd2 --- /dev/null +++ b/fs/nfs/nfs3xdr.c @@ -0,0 +1,1209 @@ +/* + * linux/fs/nfs/nfs3xdr.c + * + * XDR functions to encode/decode NFSv3 RPC arguments and results. + * + * Copyright (C) 1996, 1997 Olaf Kirch + */ + +#include <linux/param.h> +#include <linux/time.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/in.h> +#include <linux/pagemap.h> +#include <linux/proc_fs.h> +#include <linux/kdev_t.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs3.h> +#include <linux/nfs_fs.h> +#include <linux/nfsacl.h> +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_XDR + +/* Mapping from NFS error code to "errno" error code. */ +#define errno_NFSERR_IO EIO + +/* + * Declare the space requirements for NFS arguments and replies as + * number of 32bit-words + */ +#define NFS3_fhandle_sz (1+16) +#define NFS3_fh_sz (NFS3_fhandle_sz) /* shorthand */ +#define NFS3_sattr_sz (15) +#define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2)) +#define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2)) +#define NFS3_fattr_sz (21) +#define NFS3_wcc_attr_sz (6) +#define NFS3_pre_op_attr_sz (1+NFS3_wcc_attr_sz) +#define NFS3_post_op_attr_sz (1+NFS3_fattr_sz) +#define NFS3_wcc_data_sz (NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz) +#define NFS3_fsstat_sz +#define NFS3_fsinfo_sz +#define NFS3_pathconf_sz +#define NFS3_entry_sz (NFS3_filename_sz+3) + +#define NFS3_sattrargs_sz (NFS3_fh_sz+NFS3_sattr_sz+3) +#define NFS3_diropargs_sz (NFS3_fh_sz+NFS3_filename_sz) +#define NFS3_removeargs_sz (NFS3_fh_sz+NFS3_filename_sz) +#define NFS3_accessargs_sz (NFS3_fh_sz+1) +#define NFS3_readlinkargs_sz (NFS3_fh_sz) +#define NFS3_readargs_sz (NFS3_fh_sz+3) +#define NFS3_writeargs_sz (NFS3_fh_sz+5) +#define NFS3_createargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) +#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) +#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+1+NFS3_sattr_sz) +#define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz) +#define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz) +#define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz) +#define NFS3_readdirargs_sz (NFS3_fh_sz+2) +#define NFS3_commitargs_sz (NFS3_fh_sz+3) + +#define NFS3_attrstat_sz (1+NFS3_fattr_sz) +#define NFS3_wccstat_sz (1+NFS3_wcc_data_sz) +#define NFS3_removeres_sz (NFS3_wccstat_sz) +#define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz)) +#define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1) +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1) +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3) +#define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4) +#define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) +#define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz)) +#define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2) +#define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13) +#define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12) +#define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6) +#define NFS3_commitres_sz (1+NFS3_wcc_data_sz+2) + +#define ACL3_getaclargs_sz (NFS3_fh_sz+1) +#define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \ + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)) +#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \ + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)) +#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) + +/* + * Map file type to S_IFMT bits + */ +static const umode_t nfs_type2fmt[] = { + [NF3BAD] = 0, + [NF3REG] = S_IFREG, + [NF3DIR] = S_IFDIR, + [NF3BLK] = S_IFBLK, + [NF3CHR] = S_IFCHR, + [NF3LNK] = S_IFLNK, + [NF3SOCK] = S_IFSOCK, + [NF3FIFO] = S_IFIFO, +}; + +/* + * Common NFS XDR functions as inlines + */ +static inline __be32 * +xdr_encode_fhandle(__be32 *p, const struct nfs_fh *fh) +{ + return xdr_encode_array(p, fh->data, fh->size); +} + +static inline __be32 * +xdr_decode_fhandle(__be32 *p, struct nfs_fh *fh) +{ + if ((fh->size = ntohl(*p++)) <= NFS3_FHSIZE) { + memcpy(fh->data, p, fh->size); + return p + XDR_QUADLEN(fh->size); + } + return NULL; +} + +/* + * Encode/decode time. + */ +static inline __be32 * +xdr_encode_time3(__be32 *p, struct timespec *timep) +{ + *p++ = htonl(timep->tv_sec); + *p++ = htonl(timep->tv_nsec); + return p; +} + +static inline __be32 * +xdr_decode_time3(__be32 *p, struct timespec *timep) +{ + timep->tv_sec = ntohl(*p++); + timep->tv_nsec = ntohl(*p++); + return p; +} + +static __be32 * +xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr) +{ + unsigned int type, major, minor; + umode_t fmode; + + type = ntohl(*p++); + if (type > NF3FIFO) + type = NF3NON; + fmode = nfs_type2fmt[type]; + fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode; + fattr->nlink = ntohl(*p++); + fattr->uid = ntohl(*p++); + fattr->gid = ntohl(*p++); + p = xdr_decode_hyper(p, &fattr->size); + p = xdr_decode_hyper(p, &fattr->du.nfs3.used); + + /* Turn remote device info into Linux-specific dev_t */ + major = ntohl(*p++); + minor = ntohl(*p++); + fattr->rdev = MKDEV(major, minor); + if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor) + fattr->rdev = 0; + + p = xdr_decode_hyper(p, &fattr->fsid.major); + fattr->fsid.minor = 0; + p = xdr_decode_hyper(p, &fattr->fileid); + p = xdr_decode_time3(p, &fattr->atime); + p = xdr_decode_time3(p, &fattr->mtime); + p = xdr_decode_time3(p, &fattr->ctime); + + /* Update the mode bits */ + fattr->valid |= NFS_ATTR_FATTR_V3; + return p; +} + +static inline __be32 * +xdr_encode_sattr(__be32 *p, struct iattr *attr) +{ + if (attr->ia_valid & ATTR_MODE) { + *p++ = xdr_one; + *p++ = htonl(attr->ia_mode & S_IALLUGO); + } else { + *p++ = xdr_zero; + } + if (attr->ia_valid & ATTR_UID) { + *p++ = xdr_one; + *p++ = htonl(attr->ia_uid); + } else { + *p++ = xdr_zero; + } + if (attr->ia_valid & ATTR_GID) { + *p++ = xdr_one; + *p++ = htonl(attr->ia_gid); + } else { + *p++ = xdr_zero; + } + if (attr->ia_valid & ATTR_SIZE) { + *p++ = xdr_one; + p = xdr_encode_hyper(p, (__u64) attr->ia_size); + } else { + *p++ = xdr_zero; + } + if (attr->ia_valid & ATTR_ATIME_SET) { + *p++ = xdr_two; + p = xdr_encode_time3(p, &attr->ia_atime); + } else if (attr->ia_valid & ATTR_ATIME) { + *p++ = xdr_one; + } else { + *p++ = xdr_zero; + } + if (attr->ia_valid & ATTR_MTIME_SET) { + *p++ = xdr_two; + p = xdr_encode_time3(p, &attr->ia_mtime); + } else if (attr->ia_valid & ATTR_MTIME) { + *p++ = xdr_one; + } else { + *p++ = xdr_zero; + } + return p; +} + +static inline __be32 * +xdr_decode_wcc_attr(__be32 *p, struct nfs_fattr *fattr) +{ + p = xdr_decode_hyper(p, &fattr->pre_size); + p = xdr_decode_time3(p, &fattr->pre_mtime); + p = xdr_decode_time3(p, &fattr->pre_ctime); + fattr->valid |= NFS_ATTR_FATTR_PRESIZE + | NFS_ATTR_FATTR_PREMTIME + | NFS_ATTR_FATTR_PRECTIME; + return p; +} + +static inline __be32 * +xdr_decode_post_op_attr(__be32 *p, struct nfs_fattr *fattr) +{ + if (*p++) + p = xdr_decode_fattr(p, fattr); + return p; +} + +static inline __be32 * +xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr) +{ + if (*p++) + return xdr_decode_wcc_attr(p, fattr); + return p; +} + + +static inline __be32 * +xdr_decode_wcc_data(__be32 *p, struct nfs_fattr *fattr) +{ + p = xdr_decode_pre_op_attr(p, fattr); + return xdr_decode_post_op_attr(p, fattr); +} + +/* + * NFS encode functions + */ + +/* + * Encode file handle argument + */ +static int +nfs3_xdr_fhandle(struct rpc_rqst *req, __be32 *p, struct nfs_fh *fh) +{ + p = xdr_encode_fhandle(p, fh); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode SETATTR arguments + */ +static int +nfs3_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs3_sattrargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_sattr(p, args->sattr); + *p++ = htonl(args->guard); + if (args->guard) + p = xdr_encode_time3(p, &args->guardtime); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode directory ops argument + */ +static int +nfs3_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs3_diropargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_array(p, args->name, args->len); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode REMOVE argument + */ +static int +nfs3_xdr_removeargs(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_array(p, args->name.name, args->name.len); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode access() argument + */ +static int +nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->access); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Arguments to a READ call. Since we read data directly into the page + * cache, we also set up the reply iovec here so that iov[1] points + * exactly to the page we want to fetch. + */ +static int +nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; + unsigned int replen; + u32 count = args->count; + + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_hyper(p, args->offset); + *p++ = htonl(count); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, + args->pages, args->pgbase, count); + req->rq_rcv_buf.flags |= XDRBUF_READ; + return 0; +} + +/* + * Write arguments. Splice the buffer to be written into the iovec. + */ +static int +nfs3_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) +{ + struct xdr_buf *sndbuf = &req->rq_snd_buf; + u32 count = args->count; + + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_hyper(p, args->offset); + *p++ = htonl(count); + *p++ = htonl(args->stable); + *p++ = htonl(count); + sndbuf->len = xdr_adjust_iovec(sndbuf->head, p); + + /* Copy the page array */ + xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); + sndbuf->flags |= XDRBUF_WRITE; + return 0; +} + +/* + * Encode CREATE arguments + */ +static int +nfs3_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs3_createargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_array(p, args->name, args->len); + + *p++ = htonl(args->createmode); + if (args->createmode == NFS3_CREATE_EXCLUSIVE) { + *p++ = args->verifier[0]; + *p++ = args->verifier[1]; + } else + p = xdr_encode_sattr(p, args->sattr); + + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode MKDIR arguments + */ +static int +nfs3_xdr_mkdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mkdirargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_array(p, args->name, args->len); + p = xdr_encode_sattr(p, args->sattr); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode SYMLINK arguments + */ +static int +nfs3_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_symlinkargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_array(p, args->fromname, args->fromlen); + p = xdr_encode_sattr(p, args->sattr); + *p++ = htonl(args->pathlen); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Copy the page */ + xdr_encode_pages(&req->rq_snd_buf, args->pages, 0, args->pathlen); + return 0; +} + +/* + * Encode MKNOD arguments + */ +static int +nfs3_xdr_mknodargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mknodargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_array(p, args->name, args->len); + *p++ = htonl(args->type); + p = xdr_encode_sattr(p, args->sattr); + if (args->type == NF3CHR || args->type == NF3BLK) { + *p++ = htonl(MAJOR(args->rdev)); + *p++ = htonl(MINOR(args->rdev)); + } + + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode RENAME arguments + */ +static int +nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs3_renameargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_array(p, args->fromname, args->fromlen); + p = xdr_encode_fhandle(p, args->tofh); + p = xdr_encode_array(p, args->toname, args->tolen); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode LINK arguments + */ +static int +nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args) +{ + p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_fhandle(p, args->tofh); + p = xdr_encode_array(p, args->toname, args->tolen); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode arguments to readdir call + */ +static int +nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; + unsigned int replen; + u32 count = args->count; + + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_hyper(p, args->cookie); + *p++ = args->verf[0]; + *p++ = args->verf[1]; + if (args->plus) { + /* readdirplus: need dircount + buffer size. + * We just make sure we make dircount big enough */ + *p++ = htonl(count >> 3); + } + *p++ = htonl(count); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readdirres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count); + return 0; +} + +/* + * Decode the result of a readdir call. + * We just check for syntactical correctness. + */ +static int +nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res) +{ + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct kvec *iov = rcvbuf->head; + struct page **page; + size_t hdrlen; + u32 len, recvd, pglen; + int status, nr = 0; + __be32 *entry, *end, *kaddr; + + status = ntohl(*p++); + /* Decode post_op_attrs */ + p = xdr_decode_post_op_attr(p, res->dir_attr); + if (status) + return nfs_stat_to_errno(status); + /* Decode verifier cookie */ + if (res->verf) { + res->verf[0] = *p++; + res->verf[1] = *p++; + } else { + p += 2; + } + + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + if (iov->iov_len < hdrlen) { + dprintk("NFS: READDIR reply header overflowed:" + "length %Zu > %Zu\n", hdrlen, iov->iov_len); + return -errno_NFSERR_IO; + } else if (iov->iov_len != hdrlen) { + dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); + xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); + } + + pglen = rcvbuf->page_len; + recvd = rcvbuf->len - hdrlen; + if (pglen > recvd) + pglen = recvd; + page = rcvbuf->pages; + kaddr = p = kmap_atomic(*page, KM_USER0); + end = (__be32 *)((char *)p + pglen); + entry = p; + + /* Make sure the packet actually has a value_follows and EOF entry */ + if ((entry + 1) > end) + goto short_pkt; + + for (; *p++; nr++) { + if (p + 3 > end) + goto short_pkt; + p += 2; /* inode # */ + len = ntohl(*p++); /* string length */ + p += XDR_QUADLEN(len) + 2; /* name + cookie */ + if (len > NFS3_MAXNAMLEN) { + dprintk("NFS: giant filename in readdir (len 0x%x)!\n", + len); + goto err_unmap; + } + + if (res->plus) { + /* post_op_attr */ + if (p + 2 > end) + goto short_pkt; + if (*p++) { + p += 21; + if (p + 1 > end) + goto short_pkt; + } + /* post_op_fh3 */ + if (*p++) { + if (p + 1 > end) + goto short_pkt; + len = ntohl(*p++); + if (len > NFS3_FHSIZE) { + dprintk("NFS: giant filehandle in " + "readdir (len 0x%x)!\n", len); + goto err_unmap; + } + p += XDR_QUADLEN(len); + } + } + + if (p + 2 > end) + goto short_pkt; + entry = p; + } + + /* + * Apparently some server sends responses that are a valid size, but + * contain no entries, and have value_follows==0 and EOF==0. For + * those, just set the EOF marker. + */ + if (!nr && entry[1] == 0) { + dprintk("NFS: readdir reply truncated!\n"); + entry[1] = 1; + } + out: + kunmap_atomic(kaddr, KM_USER0); + return nr; + short_pkt: + /* + * When we get a short packet there are 2 possibilities. We can + * return an error, or fix up the response to look like a valid + * response and return what we have so far. If there are no + * entries and the packet was short, then return -EIO. If there + * are valid entries in the response, return them and pretend that + * the call was successful, but incomplete. The caller can retry the + * readdir starting at the last cookie. + */ + entry[0] = entry[1] = 0; + if (!nr) + nr = -errno_NFSERR_IO; + goto out; +err_unmap: + nr = -errno_NFSERR_IO; + goto out; +} + +__be32 * +nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) +{ + struct nfs_entry old = *entry; + + if (!*p++) { + if (!*p) + return ERR_PTR(-EAGAIN); + entry->eof = 1; + return ERR_PTR(-EBADCOOKIE); + } + + p = xdr_decode_hyper(p, &entry->ino); + entry->len = ntohl(*p++); + entry->name = (const char *) p; + p += XDR_QUADLEN(entry->len); + entry->prev_cookie = entry->cookie; + p = xdr_decode_hyper(p, &entry->cookie); + + if (plus) { + entry->fattr->valid = 0; + p = xdr_decode_post_op_attr(p, entry->fattr); + /* In fact, a post_op_fh3: */ + if (*p++) { + p = xdr_decode_fhandle(p, entry->fh); + /* Ugh -- server reply was truncated */ + if (p == NULL) { + dprintk("NFS: FH truncated\n"); + *entry = old; + return ERR_PTR(-EAGAIN); + } + } else + memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); + } + + entry->eof = !p[0] && p[1]; + return p; +} + +/* + * Encode COMMIT arguments + */ +static int +nfs3_xdr_commitargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_hyper(p, args->offset); + *p++ = htonl(args->count); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +#ifdef CONFIG_NFS_V3_ACL +/* + * Encode GETACL arguments + */ +static int +nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p, + struct nfs3_getaclargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; + unsigned int replen; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->mask); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + if (args->mask & (NFS_ACL | NFS_DFACL)) { + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + + ACL3_getaclres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, + NFSACL_MAXPAGES << PAGE_SHIFT); + } + return 0; +} + +/* + * Encode SETACL arguments + */ +static int +nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p, + struct nfs3_setaclargs *args) +{ + struct xdr_buf *buf = &req->rq_snd_buf; + unsigned int base; + int err; + + p = xdr_encode_fhandle(p, NFS_FH(args->inode)); + *p++ = htonl(args->mask); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + base = req->rq_slen; + + if (args->npages != 0) + xdr_encode_pages(buf, args->pages, 0, args->len); + else + req->rq_slen = xdr_adjust_iovec(req->rq_svec, + p + XDR_QUADLEN(args->len)); + + err = nfsacl_encode(buf, base, args->inode, + (args->mask & NFS_ACL) ? + args->acl_access : NULL, 1, 0); + if (err > 0) + err = nfsacl_encode(buf, base + err, args->inode, + (args->mask & NFS_DFACL) ? + args->acl_default : NULL, 1, + NFS_ACL_DEFAULT); + return (err > 0) ? 0 : err; +} +#endif /* CONFIG_NFS_V3_ACL */ + +/* + * NFS XDR decode functions + */ + +/* + * Decode attrstat reply. + */ +static int +nfs3_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) +{ + int status; + + if ((status = ntohl(*p++))) + return nfs_stat_to_errno(status); + xdr_decode_fattr(p, fattr); + return 0; +} + +/* + * Decode status+wcc_data reply + * SATTR, REMOVE, RMDIR + */ +static int +nfs3_xdr_wccstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) +{ + int status; + + if ((status = ntohl(*p++))) + status = nfs_stat_to_errno(status); + xdr_decode_wcc_data(p, fattr); + return status; +} + +static int +nfs3_xdr_removeres(struct rpc_rqst *req, __be32 *p, struct nfs_removeres *res) +{ + return nfs3_xdr_wccstat(req, p, &res->dir_attr); +} + +/* + * Decode LOOKUP reply + */ +static int +nfs3_xdr_lookupres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res) +{ + int status; + + if ((status = ntohl(*p++))) { + status = nfs_stat_to_errno(status); + } else { + if (!(p = xdr_decode_fhandle(p, res->fh))) + return -errno_NFSERR_IO; + p = xdr_decode_post_op_attr(p, res->fattr); + } + xdr_decode_post_op_attr(p, res->dir_attr); + return status; +} + +/* + * Decode ACCESS reply + */ +static int +nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res) +{ + int status = ntohl(*p++); + + p = xdr_decode_post_op_attr(p, res->fattr); + if (status) + return nfs_stat_to_errno(status); + res->access = ntohl(*p++); + return 0; +} + +static int +nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; + unsigned int replen; + + p = xdr_encode_fhandle(p, args->fh); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readlinkres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen); + return 0; +} + +/* + * Decode READLINK reply + */ +static int +nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) +{ + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct kvec *iov = rcvbuf->head; + size_t hdrlen; + u32 len, recvd; + char *kaddr; + int status; + + status = ntohl(*p++); + p = xdr_decode_post_op_attr(p, fattr); + + if (status != 0) + return nfs_stat_to_errno(status); + + /* Convert length of symlink */ + len = ntohl(*p++); + if (len >= rcvbuf->page_len) { + dprintk("nfs: server returned giant symlink!\n"); + return -ENAMETOOLONG; + } + + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + if (iov->iov_len < hdrlen) { + dprintk("NFS: READLINK reply header overflowed:" + "length %Zu > %Zu\n", hdrlen, iov->iov_len); + return -errno_NFSERR_IO; + } else if (iov->iov_len != hdrlen) { + dprintk("NFS: READLINK header is short. " + "iovec will be shifted.\n"); + xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); + } + recvd = req->rq_rcv_buf.len - hdrlen; + if (recvd < len) { + dprintk("NFS: server cheating in readlink reply: " + "count %u > recvd %u\n", len, recvd); + return -EIO; + } + + /* NULL terminate the string we got */ + kaddr = (char*)kmap_atomic(rcvbuf->pages[0], KM_USER0); + kaddr[len+rcvbuf->page_base] = '\0'; + kunmap_atomic(kaddr, KM_USER0); + return 0; +} + +/* + * Decode READ reply + */ +static int +nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) +{ + struct kvec *iov = req->rq_rcv_buf.head; + size_t hdrlen; + u32 count, ocount, recvd; + int status; + + status = ntohl(*p++); + p = xdr_decode_post_op_attr(p, res->fattr); + + if (status != 0) + return nfs_stat_to_errno(status); + + /* Decode reply count and EOF flag. NFSv3 is somewhat redundant + * in that it puts the count both in the res struct and in the + * opaque data count. */ + count = ntohl(*p++); + res->eof = ntohl(*p++); + ocount = ntohl(*p++); + + if (ocount != count) { + dprintk("NFS: READ count doesn't match RPC opaque count.\n"); + return -errno_NFSERR_IO; + } + + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + if (iov->iov_len < hdrlen) { + dprintk("NFS: READ reply header overflowed:" + "length %Zu > %Zu\n", hdrlen, iov->iov_len); + return -errno_NFSERR_IO; + } else if (iov->iov_len != hdrlen) { + dprintk("NFS: READ header is short. iovec will be shifted.\n"); + xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen); + } + + recvd = req->rq_rcv_buf.len - hdrlen; + if (count > recvd) { + dprintk("NFS: server cheating in read reply: " + "count %u > recvd %u\n", count, recvd); + count = recvd; + res->eof = 0; + } + + if (count < res->count) + res->count = count; + + return count; +} + +/* + * Decode WRITE response + */ +static int +nfs3_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res) +{ + int status; + + status = ntohl(*p++); + p = xdr_decode_wcc_data(p, res->fattr); + + if (status != 0) + return nfs_stat_to_errno(status); + + res->count = ntohl(*p++); + res->verf->committed = (enum nfs3_stable_how)ntohl(*p++); + res->verf->verifier[0] = *p++; + res->verf->verifier[1] = *p++; + + return res->count; +} + +/* + * Decode a CREATE response + */ +static int +nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res) +{ + int status; + + status = ntohl(*p++); + if (status == 0) { + if (*p++) { + if (!(p = xdr_decode_fhandle(p, res->fh))) + return -errno_NFSERR_IO; + p = xdr_decode_post_op_attr(p, res->fattr); + } else { + memset(res->fh, 0, sizeof(*res->fh)); + /* Do decode post_op_attr but set it to NULL */ + p = xdr_decode_post_op_attr(p, res->fattr); + res->fattr->valid = 0; + } + } else { + status = nfs_stat_to_errno(status); + } + p = xdr_decode_wcc_data(p, res->dir_attr); + return status; +} + +/* + * Decode RENAME reply + */ +static int +nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs3_renameres *res) +{ + int status; + + if ((status = ntohl(*p++)) != 0) + status = nfs_stat_to_errno(status); + p = xdr_decode_wcc_data(p, res->fromattr); + p = xdr_decode_wcc_data(p, res->toattr); + return status; +} + +/* + * Decode LINK reply + */ +static int +nfs3_xdr_linkres(struct rpc_rqst *req, __be32 *p, struct nfs3_linkres *res) +{ + int status; + + if ((status = ntohl(*p++)) != 0) + status = nfs_stat_to_errno(status); + p = xdr_decode_post_op_attr(p, res->fattr); + p = xdr_decode_wcc_data(p, res->dir_attr); + return status; +} + +/* + * Decode FSSTAT reply + */ +static int +nfs3_xdr_fsstatres(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *res) +{ + int status; + + status = ntohl(*p++); + + p = xdr_decode_post_op_attr(p, res->fattr); + if (status != 0) + return nfs_stat_to_errno(status); + + p = xdr_decode_hyper(p, &res->tbytes); + p = xdr_decode_hyper(p, &res->fbytes); + p = xdr_decode_hyper(p, &res->abytes); + p = xdr_decode_hyper(p, &res->tfiles); + p = xdr_decode_hyper(p, &res->ffiles); + p = xdr_decode_hyper(p, &res->afiles); + + /* ignore invarsec */ + return 0; +} + +/* + * Decode FSINFO reply + */ +static int +nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res) +{ + int status; + + status = ntohl(*p++); + + p = xdr_decode_post_op_attr(p, res->fattr); + if (status != 0) + return nfs_stat_to_errno(status); + + res->rtmax = ntohl(*p++); + res->rtpref = ntohl(*p++); + res->rtmult = ntohl(*p++); + res->wtmax = ntohl(*p++); + res->wtpref = ntohl(*p++); + res->wtmult = ntohl(*p++); + res->dtpref = ntohl(*p++); + p = xdr_decode_hyper(p, &res->maxfilesize); + + /* ignore time_delta and properties */ + res->lease_time = 0; + return 0; +} + +/* + * Decode PATHCONF reply + */ +static int +nfs3_xdr_pathconfres(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *res) +{ + int status; + + status = ntohl(*p++); + + p = xdr_decode_post_op_attr(p, res->fattr); + if (status != 0) + return nfs_stat_to_errno(status); + res->max_link = ntohl(*p++); + res->max_namelen = ntohl(*p++); + + /* ignore remaining fields */ + return 0; +} + +/* + * Decode COMMIT reply + */ +static int +nfs3_xdr_commitres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res) +{ + int status; + + status = ntohl(*p++); + p = xdr_decode_wcc_data(p, res->fattr); + if (status != 0) + return nfs_stat_to_errno(status); + + res->verf->verifier[0] = *p++; + res->verf->verifier[1] = *p++; + return 0; +} + +#ifdef CONFIG_NFS_V3_ACL +/* + * Decode GETACL reply + */ +static int +nfs3_xdr_getaclres(struct rpc_rqst *req, __be32 *p, + struct nfs3_getaclres *res) +{ + struct xdr_buf *buf = &req->rq_rcv_buf; + int status = ntohl(*p++); + struct posix_acl **acl; + unsigned int *aclcnt; + int err, base; + + if (status != 0) + return nfs_stat_to_errno(status); + p = xdr_decode_post_op_attr(p, res->fattr); + res->mask = ntohl(*p++); + if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + return -EINVAL; + base = (char *)p - (char *)req->rq_rcv_buf.head->iov_base; + + acl = (res->mask & NFS_ACL) ? &res->acl_access : NULL; + aclcnt = (res->mask & NFS_ACLCNT) ? &res->acl_access_count : NULL; + err = nfsacl_decode(buf, base, aclcnt, acl); + + acl = (res->mask & NFS_DFACL) ? &res->acl_default : NULL; + aclcnt = (res->mask & NFS_DFACLCNT) ? &res->acl_default_count : NULL; + if (err > 0) + err = nfsacl_decode(buf, base + err, aclcnt, acl); + return (err > 0) ? 0 : err; +} + +/* + * Decode setacl reply. + */ +static int +nfs3_xdr_setaclres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) +{ + int status = ntohl(*p++); + + if (status) + return nfs_stat_to_errno(status); + xdr_decode_post_op_attr(p, fattr); + return 0; +} +#endif /* CONFIG_NFS_V3_ACL */ + +#define PROC(proc, argtype, restype, timer) \ +[NFS3PROC_##proc] = { \ + .p_proc = NFS3PROC_##proc, \ + .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \ + .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \ + .p_arglen = NFS3_##argtype##_sz, \ + .p_replen = NFS3_##restype##_sz, \ + .p_timer = timer, \ + .p_statidx = NFS3PROC_##proc, \ + .p_name = #proc, \ + } + +struct rpc_procinfo nfs3_procedures[] = { + PROC(GETATTR, fhandle, attrstat, 1), + PROC(SETATTR, sattrargs, wccstat, 0), + PROC(LOOKUP, diropargs, lookupres, 2), + PROC(ACCESS, accessargs, accessres, 1), + PROC(READLINK, readlinkargs, readlinkres, 3), + PROC(READ, readargs, readres, 3), + PROC(WRITE, writeargs, writeres, 4), + PROC(CREATE, createargs, createres, 0), + PROC(MKDIR, mkdirargs, createres, 0), + PROC(SYMLINK, symlinkargs, createres, 0), + PROC(MKNOD, mknodargs, createres, 0), + PROC(REMOVE, removeargs, removeres, 0), + PROC(RMDIR, diropargs, wccstat, 0), + PROC(RENAME, renameargs, renameres, 0), + PROC(LINK, linkargs, linkres, 0), + PROC(READDIR, readdirargs, readdirres, 3), + PROC(READDIRPLUS, readdirargs, readdirres, 3), + PROC(FSSTAT, fhandle, fsstatres, 0), + PROC(FSINFO, fhandle, fsinfores, 0), + PROC(PATHCONF, fhandle, pathconfres, 0), + PROC(COMMIT, commitargs, commitres, 5), +}; + +struct rpc_version nfs_version3 = { + .number = 3, + .nrprocs = ARRAY_SIZE(nfs3_procedures), + .procs = nfs3_procedures +}; + +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_procinfo nfs3_acl_procedures[] = { + [ACLPROC3_GETACL] = { + .p_proc = ACLPROC3_GETACL, + .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs, + .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, + .p_arglen = ACL3_getaclargs_sz, + .p_replen = ACL3_getaclres_sz, + .p_timer = 1, + .p_name = "GETACL", + }, + [ACLPROC3_SETACL] = { + .p_proc = ACLPROC3_SETACL, + .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs, + .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, + .p_arglen = ACL3_setaclargs_sz, + .p_replen = ACL3_setaclres_sz, + .p_timer = 0, + .p_name = "SETACL", + }, +}; + +struct rpc_version nfsacl_version3 = { + .number = 3, + .nrprocs = sizeof(nfs3_acl_procedures)/ + sizeof(nfs3_acl_procedures[0]), + .procs = nfs3_acl_procedures, +}; +#endif /* CONFIG_NFS_V3_ACL */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h new file mode 100644 index 00000000000..0c6fda33d66 --- /dev/null +++ b/fs/nfs/nfs4_fs.h @@ -0,0 +1,312 @@ +/* + * linux/fs/nfs/nfs4_fs.h + * + * Copyright (C) 2005 Trond Myklebust + * + * NFSv4-specific filesystem definitions and declarations + */ + +#ifndef __LINUX_FS_NFS_NFS4_FS_H +#define __LINUX_FS_NFS_NFS4_FS_H + +#ifdef CONFIG_NFS_V4 + +struct idmap; + +/* + * In a seqid-mutating op, this macro controls which error return + * values trigger incrementation of the seqid. + * + * from rfc 3010: + * The client MUST monotonically increment the sequence number for the + * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE + * operations. This is true even in the event that the previous + * operation that used the sequence number received an error. The only + * exception to this rule is if the previous operation received one of + * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID, + * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR, + * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE. + * + */ +#define seqid_mutating_err(err) \ +(((err) != NFSERR_STALE_CLIENTID) && \ + ((err) != NFSERR_STALE_STATEID) && \ + ((err) != NFSERR_BAD_STATEID) && \ + ((err) != NFSERR_BAD_SEQID) && \ + ((err) != NFSERR_BAD_XDR) && \ + ((err) != NFSERR_RESOURCE) && \ + ((err) != NFSERR_NOFILEHANDLE)) + +enum nfs4_client_state { + NFS4CLNT_MANAGER_RUNNING = 0, + NFS4CLNT_CHECK_LEASE, + NFS4CLNT_LEASE_EXPIRED, + NFS4CLNT_RECLAIM_REBOOT, + NFS4CLNT_RECLAIM_NOGRACE, + NFS4CLNT_DELEGRETURN, + NFS4CLNT_SESSION_RESET, + NFS4CLNT_SESSION_DRAINING, +}; + +/* + * struct rpc_sequence ensures that RPC calls are sent in the exact + * order that they appear on the list. + */ +struct rpc_sequence { + struct rpc_wait_queue wait; /* RPC call delay queue */ + spinlock_t lock; /* Protects the list */ + struct list_head list; /* Defines sequence of RPC calls */ +}; + +#define NFS_SEQID_CONFIRMED 1 +struct nfs_seqid_counter { + struct rpc_sequence *sequence; + int flags; + u32 counter; +}; + +struct nfs_seqid { + struct nfs_seqid_counter *sequence; + struct list_head list; +}; + +static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status) +{ + if (seqid_mutating_err(-status)) + seqid->flags |= NFS_SEQID_CONFIRMED; +} + +struct nfs_unique_id { + struct rb_node rb_node; + __u64 id; +}; + +/* + * NFS4 state_owners and lock_owners are simply labels for ordered + * sequences of RPC calls. Their sole purpose is to provide once-only + * semantics by allowing the server to identify replayed requests. + */ +struct nfs4_state_owner { + struct nfs_unique_id so_owner_id; + struct nfs_client *so_client; + struct nfs_server *so_server; + struct rb_node so_client_node; + + struct rpc_cred *so_cred; /* Associated cred */ + + spinlock_t so_lock; + atomic_t so_count; + unsigned long so_flags; + struct list_head so_states; + struct list_head so_delegations; + struct nfs_seqid_counter so_seqid; + struct rpc_sequence so_sequence; +}; + +enum { + NFS_OWNER_RECLAIM_REBOOT, + NFS_OWNER_RECLAIM_NOGRACE +}; + +#define NFS_LOCK_NEW 0 +#define NFS_LOCK_RECLAIM 1 +#define NFS_LOCK_EXPIRED 2 + +/* + * struct nfs4_state maintains the client-side state for a given + * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). + * + * OPEN: + * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server, + * we need to know how many files are open for reading or writing on a + * given inode. This information too is stored here. + * + * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) + */ + +struct nfs4_lock_state { + struct list_head ls_locks; /* Other lock stateids */ + struct nfs4_state * ls_state; /* Pointer to open state */ + fl_owner_t ls_owner; /* POSIX lock owner */ +#define NFS_LOCK_INITIALIZED 1 + int ls_flags; + struct nfs_seqid_counter ls_seqid; + struct rpc_sequence ls_sequence; + struct nfs_unique_id ls_id; + nfs4_stateid ls_stateid; + atomic_t ls_count; +}; + +/* bits for nfs4_state->flags */ +enum { + LK_STATE_IN_USE, + NFS_DELEGATED_STATE, /* Current stateid is delegation */ + NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */ + NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */ + NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ + NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */ + NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */ + NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */ +}; + +struct nfs4_state { + struct list_head open_states; /* List of states for the same state_owner */ + struct list_head inode_states; /* List of states for the same inode */ + struct list_head lock_states; /* List of subservient lock stateids */ + + struct nfs4_state_owner *owner; /* Pointer to the open owner */ + struct inode *inode; /* Pointer to the inode */ + + unsigned long flags; /* Do we hold any locks? */ + spinlock_t state_lock; /* Protects the lock_states list */ + + seqlock_t seqlock; /* Protects the stateid/open_stateid */ + nfs4_stateid stateid; /* Current stateid: may be delegation */ + nfs4_stateid open_stateid; /* OPEN stateid */ + + /* The following 3 fields are protected by owner->so_lock */ + unsigned int n_rdonly; /* Number of read-only references */ + unsigned int n_wronly; /* Number of write-only references */ + unsigned int n_rdwr; /* Number of read/write references */ + fmode_t state; /* State on the server (R,W, or RW) */ + atomic_t count; +}; + + +struct nfs4_exception { + long timeout; + int retry; + struct nfs4_state *state; +}; + +struct nfs4_state_recovery_ops { + int owner_flag_bit; + int state_flag_bit; + int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); + int (*recover_lock)(struct nfs4_state *, struct file_lock *); + int (*establish_clid)(struct nfs_client *, struct rpc_cred *); + struct rpc_cred * (*get_clid_cred)(struct nfs_client *); + int (*reclaim_complete)(struct nfs_client *); +}; + +struct nfs4_state_maintenance_ops { + int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *); + struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *); + int (*renew_lease)(struct nfs_client *, struct rpc_cred *); +}; + +extern const struct dentry_operations nfs4_dentry_operations; +extern const struct inode_operations nfs4_dir_inode_operations; + +/* inode.c */ +extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t); +extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int); +extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t); + + +/* nfs4proc.c */ +extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *); +extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); +extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); +extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); +extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); +extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); +extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); +extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait); +extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); +extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); +extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, + struct nfs4_fs_locations *fs_locations, struct page *page); + +extern struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[]; +extern struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[]; +#if defined(CONFIG_NFS_V4_1) +extern int nfs4_setup_sequence(struct nfs_client *clp, + struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, + int cache_reply, struct rpc_task *task); +extern void nfs4_destroy_session(struct nfs4_session *session); +extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); +extern int nfs4_proc_create_session(struct nfs_client *); +extern int nfs4_proc_destroy_session(struct nfs4_session *); +extern int nfs4_init_session(struct nfs_server *server); +extern int nfs4_proc_get_lease_time(struct nfs_client *clp, + struct nfs_fsinfo *fsinfo); +#else /* CONFIG_NFS_v4_1 */ +static inline int nfs4_setup_sequence(struct nfs_client *clp, + struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, + int cache_reply, struct rpc_task *task) +{ + return 0; +} + +static inline int nfs4_init_session(struct nfs_server *server) +{ + return 0; +} +#endif /* CONFIG_NFS_V4_1 */ + +extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[]; + +extern const u32 nfs4_fattr_bitmap[2]; +extern const u32 nfs4_statfs_bitmap[2]; +extern const u32 nfs4_pathconf_bitmap[2]; +extern const u32 nfs4_fsinfo_bitmap[2]; +extern const u32 nfs4_fs_locations_bitmap[2]; + +/* nfs4renewd.c */ +extern void nfs4_schedule_state_renewal(struct nfs_client *); +extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); +extern void nfs4_kill_renewd(struct nfs_client *); +extern void nfs4_renew_state(struct work_struct *); + +/* nfs4state.c */ +struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp); +struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); +#if defined(CONFIG_NFS_V4_1) +struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); +struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); +#endif /* CONFIG_NFS_V4_1 */ + +extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); +extern void nfs4_put_state_owner(struct nfs4_state_owner *); +extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); +extern void nfs4_put_open_state(struct nfs4_state *); +extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t); +extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t); +extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); +extern void nfs4_schedule_state_recovery(struct nfs_client *); +extern void nfs4_schedule_state_manager(struct nfs_client *); +extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); +extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state); +extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); +extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); +extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); +extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); + +extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter); +extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); +extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); +extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); +extern void nfs_release_seqid(struct nfs_seqid *seqid); +extern void nfs_free_seqid(struct nfs_seqid *seqid); + +extern const nfs4_stateid zero_stateid; + +/* nfs4xdr.c */ +extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); +extern struct rpc_procinfo nfs4_procedures[]; + +struct nfs4_mount_data; + +/* callback_xdr.c */ +extern struct svc_version nfs4_callback_version1; +extern struct svc_version nfs4_callback_version4; + +#else + +#define nfs4_close_state(a, b, c) do { } while (0) +#define nfs4_close_sync(a, b, c) do { } while (0) + +#endif /* CONFIG_NFS_V4 */ +#endif /* __LINUX_FS_NFS_NFS4_FS.H */ diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c new file mode 100644 index 00000000000..fa3408f2011 --- /dev/null +++ b/fs/nfs/nfs4namespace.c @@ -0,0 +1,267 @@ +/* + * linux/fs/nfs/nfs4namespace.c + * + * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com> + * - Modified by David Howells <dhowells@redhat.com> + * + * NFSv4 namespace + */ + +#include <linux/dcache.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/nfs_fs.h> +#include <linux/string.h> +#include <linux/sunrpc/clnt.h> +#include <linux/vfs.h> +#include <linux/inet.h> +#include "internal.h" +#include "nfs4_fs.h" +#include "dns_resolve.h" + +#define NFSDBG_FACILITY NFSDBG_VFS + +/* + * Convert the NFSv4 pathname components into a standard posix path. + * + * Note that the resulting string will be placed at the end of the buffer + */ +static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname, + char *buffer, ssize_t buflen) +{ + char *end = buffer + buflen; + int n; + + *--end = '\0'; + buflen--; + + n = pathname->ncomponents; + while (--n >= 0) { + const struct nfs4_string *component = &pathname->components[n]; + buflen -= component->len + 1; + if (buflen < 0) + goto Elong; + end -= component->len; + memcpy(end, component->data, component->len); + *--end = '/'; + } + return end; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + +/* + * Determine the mount path as a string + */ +static char *nfs4_path(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + const char *srvpath; + + srvpath = strchr(mnt_parent->mnt_devname, ':'); + if (srvpath) + srvpath++; + else + srvpath = mnt_parent->mnt_devname; + + return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen); +} + +/* + * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we + * believe to be the server path to this dentry + */ +static int nfs4_validate_fspath(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + const struct nfs4_fs_locations *locations, + char *page, char *page2) +{ + const char *path, *fs_path; + + path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE); + if (IS_ERR(path)) + return PTR_ERR(path); + + fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE); + if (IS_ERR(fs_path)) + return PTR_ERR(fs_path); + + if (strncmp(path, fs_path, strlen(fs_path)) != 0) { + dprintk("%s: path %s does not begin with fsroot %s\n", + __func__, path, fs_path); + return -ENOENT; + } + + return 0; +} + +static size_t nfs_parse_server_name(char *string, size_t len, + struct sockaddr *sa, size_t salen) +{ + ssize_t ret; + + ret = rpc_pton(string, len, sa, salen); + if (ret == 0) { + ret = nfs_dns_resolve_name(string, len, sa, salen); + if (ret < 0) + ret = 0; + } + return ret; +} + +static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, + char *page, char *page2, + const struct nfs4_fs_location *location) +{ + struct vfsmount *mnt = ERR_PTR(-ENOENT); + char *mnt_path; + unsigned int maxbuflen; + unsigned int s; + + mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); + if (IS_ERR(mnt_path)) + return ERR_CAST(mnt_path); + mountdata->mnt_path = mnt_path; + maxbuflen = mnt_path - 1 - page2; + + for (s = 0; s < location->nservers; s++) { + const struct nfs4_string *buf = &location->servers[s]; + struct sockaddr_storage addr; + + if (buf->len <= 0 || buf->len >= maxbuflen) + continue; + + if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) + continue; + + mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, + (struct sockaddr *)&addr, sizeof(addr)); + if (mountdata->addrlen == 0) + continue; + + mountdata->addr = (struct sockaddr *)&addr; + rpc_set_port(mountdata->addr, NFS_PORT); + + memcpy(page2, buf->data, buf->len); + page2[buf->len] = '\0'; + mountdata->hostname = page2; + + snprintf(page, PAGE_SIZE, "%s:%s", + mountdata->hostname, + mountdata->mnt_path); + + mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata); + if (!IS_ERR(mnt)) + break; + } + return mnt; +} + +/** + * nfs_follow_referral - set up mountpoint when hitting a referral on moved error + * @mnt_parent - mountpoint of parent directory + * @dentry - parent directory + * @locations - array of NFSv4 server location information + * + */ +static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + const struct nfs4_fs_locations *locations) +{ + struct vfsmount *mnt = ERR_PTR(-ENOENT); + struct nfs_clone_mount mountdata = { + .sb = mnt_parent->mnt_sb, + .dentry = dentry, + .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, + }; + char *page = NULL, *page2 = NULL; + int loc, error; + + if (locations == NULL || locations->nlocations <= 0) + goto out; + + dprintk("%s: referral at %s/%s\n", __func__, + dentry->d_parent->d_name.name, dentry->d_name.name); + + page = (char *) __get_free_page(GFP_USER); + if (!page) + goto out; + + page2 = (char *) __get_free_page(GFP_USER); + if (!page2) + goto out; + + /* Ensure fs path is a prefix of current dentry path */ + error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2); + if (error < 0) { + mnt = ERR_PTR(error); + goto out; + } + + for (loc = 0; loc < locations->nlocations; loc++) { + const struct nfs4_fs_location *location = &locations->locations[loc]; + + if (location == NULL || location->nservers <= 0 || + location->rootpath.ncomponents == 0) + continue; + + mnt = try_location(&mountdata, page, page2, location); + if (!IS_ERR(mnt)) + break; + } + +out: + free_page((unsigned long) page); + free_page((unsigned long) page2); + dprintk("%s: done\n", __func__); + return mnt; +} + +/* + * nfs_do_refmount - handle crossing a referral on server + * @dentry - dentry of referral + * @nd - nameidata info + * + */ +struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) +{ + struct vfsmount *mnt = ERR_PTR(-ENOMEM); + struct dentry *parent; + struct nfs4_fs_locations *fs_locations = NULL; + struct page *page; + int err; + + /* BUG_ON(IS_ROOT(dentry)); */ + dprintk("%s: enter\n", __func__); + + page = alloc_page(GFP_KERNEL); + if (page == NULL) + goto out; + + fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); + if (fs_locations == NULL) + goto out_free; + + /* Get locations */ + mnt = ERR_PTR(-ENOENT); + + parent = dget_parent(dentry); + dprintk("%s: getting locations for %s/%s\n", + __func__, parent->d_name.name, dentry->d_name.name); + + err = nfs4_proc_fs_locations(parent->d_inode, &dentry->d_name, fs_locations, page); + dput(parent); + if (err != 0 || + fs_locations->nlocations <= 0 || + fs_locations->fs_path.ncomponents <= 0) + goto out_free; + + mnt = nfs_follow_referral(mnt_parent, dentry, fs_locations); +out_free: + __free_page(page); + kfree(fs_locations); +out: + dprintk("%s: done\n", __func__); + return mnt; +} diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c new file mode 100644 index 00000000000..ecf66020475 --- /dev/null +++ b/fs/nfs/nfs4proc.c @@ -0,0 +1,5328 @@ +/* + * fs/nfs/nfs4proc.c + * + * Client-side procedure declarations for NFSv4. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * Andy Adamson <andros@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/mm.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_page.h> +#include <linux/namei.h> +#include <linux/mount.h> +#include <linux/module.h> +#include <linux/sunrpc/bc_xprt.h> + +#include "nfs4_fs.h" +#include "delegation.h" +#include "internal.h" +#include "iostat.h" +#include "callback.h" + +#define NFSDBG_FACILITY NFSDBG_PROC + +#define NFS4_POLL_RETRY_MIN (HZ/10) +#define NFS4_POLL_RETRY_MAX (15*HZ) + +#define NFS4_MAX_LOOP_ON_RECOVER (10) + +struct nfs4_opendata; +static int _nfs4_proc_open(struct nfs4_opendata *data); +static int _nfs4_recover_proc_open(struct nfs4_opendata *data); +static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); +static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); +static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); +static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); + +/* Prevent leaks of NFSv4 errors into userland */ +static int nfs4_map_errors(int err) +{ + if (err >= -1000) + return err; + switch (err) { + case -NFS4ERR_RESOURCE: + return -EREMOTEIO; + default: + dprintk("%s could not handle NFSv4 error %d\n", + __func__, -err); + break; + } + return -EIO; +} + +/* + * This is our standard bitmap for GETATTR requests. + */ +const u32 nfs4_fattr_bitmap[2] = { + FATTR4_WORD0_TYPE + | FATTR4_WORD0_CHANGE + | FATTR4_WORD0_SIZE + | FATTR4_WORD0_FSID + | FATTR4_WORD0_FILEID, + FATTR4_WORD1_MODE + | FATTR4_WORD1_NUMLINKS + | FATTR4_WORD1_OWNER + | FATTR4_WORD1_OWNER_GROUP + | FATTR4_WORD1_RAWDEV + | FATTR4_WORD1_SPACE_USED + | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_METADATA + | FATTR4_WORD1_TIME_MODIFY +}; + +const u32 nfs4_statfs_bitmap[2] = { + FATTR4_WORD0_FILES_AVAIL + | FATTR4_WORD0_FILES_FREE + | FATTR4_WORD0_FILES_TOTAL, + FATTR4_WORD1_SPACE_AVAIL + | FATTR4_WORD1_SPACE_FREE + | FATTR4_WORD1_SPACE_TOTAL +}; + +const u32 nfs4_pathconf_bitmap[2] = { + FATTR4_WORD0_MAXLINK + | FATTR4_WORD0_MAXNAME, + 0 +}; + +const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE + | FATTR4_WORD0_MAXREAD + | FATTR4_WORD0_MAXWRITE + | FATTR4_WORD0_LEASE_TIME, + 0 +}; + +const u32 nfs4_fs_locations_bitmap[2] = { + FATTR4_WORD0_TYPE + | FATTR4_WORD0_CHANGE + | FATTR4_WORD0_SIZE + | FATTR4_WORD0_FSID + | FATTR4_WORD0_FILEID + | FATTR4_WORD0_FS_LOCATIONS, + FATTR4_WORD1_MODE + | FATTR4_WORD1_NUMLINKS + | FATTR4_WORD1_OWNER + | FATTR4_WORD1_OWNER_GROUP + | FATTR4_WORD1_RAWDEV + | FATTR4_WORD1_SPACE_USED + | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_METADATA + | FATTR4_WORD1_TIME_MODIFY + | FATTR4_WORD1_MOUNTED_ON_FILEID +}; + +static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry, + struct nfs4_readdir_arg *readdir) +{ + __be32 *start, *p; + + BUG_ON(readdir->count < 80); + if (cookie > 2) { + readdir->cookie = cookie; + memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier)); + return; + } + + readdir->cookie = 0; + memset(&readdir->verifier, 0, sizeof(readdir->verifier)); + if (cookie == 2) + return; + + /* + * NFSv4 servers do not return entries for '.' and '..' + * Therefore, we fake these entries here. We let '.' + * have cookie 0 and '..' have cookie 1. Note that + * when talking to the server, we always send cookie 0 + * instead of 1 or 2. + */ + start = p = kmap_atomic(*readdir->pages, KM_USER0); + + if (cookie == 0) { + *p++ = xdr_one; /* next */ + *p++ = xdr_zero; /* cookie, first word */ + *p++ = xdr_one; /* cookie, second word */ + *p++ = xdr_one; /* entry len */ + memcpy(p, ".\0\0\0", 4); /* entry */ + p++; + *p++ = xdr_one; /* bitmap length */ + *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ + *p++ = htonl(8); /* attribute buffer length */ + p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_inode)); + } + + *p++ = xdr_one; /* next */ + *p++ = xdr_zero; /* cookie, first word */ + *p++ = xdr_two; /* cookie, second word */ + *p++ = xdr_two; /* entry len */ + memcpy(p, "..\0\0", 4); /* entry */ + p++; + *p++ = xdr_one; /* bitmap length */ + *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ + *p++ = htonl(8); /* attribute buffer length */ + p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode)); + + readdir->pgbase = (char *)p - (char *)start; + readdir->count -= readdir->pgbase; + kunmap_atomic(start, KM_USER0); +} + +static int nfs4_wait_clnt_recover(struct nfs_client *clp) +{ + int res; + + might_sleep(); + + res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, + nfs_wait_bit_killable, TASK_KILLABLE); + return res; +} + +static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) +{ + int res = 0; + + might_sleep(); + + if (*timeout <= 0) + *timeout = NFS4_POLL_RETRY_MIN; + if (*timeout > NFS4_POLL_RETRY_MAX) + *timeout = NFS4_POLL_RETRY_MAX; + schedule_timeout_killable(*timeout); + if (fatal_signal_pending(current)) + res = -ERESTARTSYS; + *timeout <<= 1; + return res; +} + +/* This is the error handling routine for processes that are allowed + * to sleep. + */ +static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +{ + struct nfs_client *clp = server->nfs_client; + struct nfs4_state *state = exception->state; + int ret = errorcode; + + exception->retry = 0; + switch(errorcode) { + case 0: + return 0; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OPENMODE: + if (state == NULL) + break; + nfs4_state_mark_reclaim_nograce(clp, state); + goto do_state_recovery; + case -NFS4ERR_STALE_STATEID: + if (state == NULL) + break; + nfs4_state_mark_reclaim_reboot(clp, state); + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_EXPIRED: + goto do_state_recovery; +#if defined(CONFIG_NFS_V4_1) + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_SEQ_FALSE_RETRY: + case -NFS4ERR_SEQ_MISORDERED: + dprintk("%s ERROR: %d Reset session\n", __func__, + errorcode); + nfs4_schedule_state_recovery(clp); + exception->retry = 1; + break; +#endif /* defined(CONFIG_NFS_V4_1) */ + case -NFS4ERR_FILE_OPEN: + if (exception->timeout > HZ) { + /* We have retried a decent amount, time to + * fail + */ + ret = -EBUSY; + break; + } + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + ret = nfs4_delay(server->client, &exception->timeout); + if (ret != 0) + break; + case -NFS4ERR_OLD_STATEID: + exception->retry = 1; + } + /* We failed to handle the error */ + return nfs4_map_errors(ret); +do_state_recovery: + nfs4_schedule_state_recovery(clp); + ret = nfs4_wait_clnt_recover(clp); + if (ret == 0) + exception->retry = 1; + return ret; +} + + +static void renew_lease(const struct nfs_server *server, unsigned long timestamp) +{ + struct nfs_client *clp = server->nfs_client; + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,timestamp)) + clp->cl_last_renewal = timestamp; + spin_unlock(&clp->cl_lock); +} + +#if defined(CONFIG_NFS_V4_1) + +/* + * nfs4_free_slot - free a slot and efficiently update slot table. + * + * freeing a slot is trivially done by clearing its respective bit + * in the bitmap. + * If the freed slotid equals highest_used_slotid we want to update it + * so that the server would be able to size down the slot table if needed, + * otherwise we know that the highest_used_slotid is still in use. + * When updating highest_used_slotid there may be "holes" in the bitmap + * so we need to scan down from highest_used_slotid to 0 looking for the now + * highest slotid in use. + * If none found, highest_used_slotid is set to -1. + * + * Must be called while holding tbl->slot_tbl_lock + */ +static void +nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) +{ + int slotid = free_slotid; + + /* clear used bit in bitmap */ + __clear_bit(slotid, tbl->used_slots); + + /* update highest_used_slotid when it is freed */ + if (slotid == tbl->highest_used_slotid) { + slotid = find_last_bit(tbl->used_slots, tbl->max_slots); + if (slotid < tbl->max_slots) + tbl->highest_used_slotid = slotid; + else + tbl->highest_used_slotid = -1; + } + dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__, + free_slotid, tbl->highest_used_slotid); +} + +/* + * Signal state manager thread if session is drained + */ +static void nfs41_check_drain_session_complete(struct nfs4_session *ses) +{ + struct rpc_task *task; + + if (!test_bit(NFS4CLNT_SESSION_DRAINING, &ses->clp->cl_state)) { + task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq); + if (task) + rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); + return; + } + + if (ses->fc_slot_table.highest_used_slotid != -1) + return; + + dprintk("%s COMPLETE: Session Drained\n", __func__); + complete(&ses->complete); +} + +static void nfs41_sequence_free_slot(const struct nfs_client *clp, + struct nfs4_sequence_res *res) +{ + struct nfs4_slot_table *tbl; + + tbl = &clp->cl_session->fc_slot_table; + if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) { + /* just wake up the next guy waiting since + * we may have not consumed a slot after all */ + dprintk("%s: No slot\n", __func__); + return; + } + + spin_lock(&tbl->slot_tbl_lock); + nfs4_free_slot(tbl, res->sr_slotid); + nfs41_check_drain_session_complete(clp->cl_session); + spin_unlock(&tbl->slot_tbl_lock); + res->sr_slotid = NFS4_MAX_SLOT_TABLE; +} + +static void nfs41_sequence_done(struct nfs_client *clp, + struct nfs4_sequence_res *res, + int rpc_status) +{ + unsigned long timestamp; + struct nfs4_slot_table *tbl; + struct nfs4_slot *slot; + + /* + * sr_status remains 1 if an RPC level error occurred. The server + * may or may not have processed the sequence operation.. + * Proceed as if the server received and processed the sequence + * operation. + */ + if (res->sr_status == 1) + res->sr_status = NFS_OK; + + /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */ + if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) + goto out; + + /* Check the SEQUENCE operation status */ + if (res->sr_status == 0) { + tbl = &clp->cl_session->fc_slot_table; + slot = tbl->slots + res->sr_slotid; + /* Update the slot's sequence and clientid lease timer */ + ++slot->seq_nr; + timestamp = res->sr_renewal_time; + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal, timestamp)) + clp->cl_last_renewal = timestamp; + spin_unlock(&clp->cl_lock); + /* Check sequence flags */ + nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); + } +out: + /* The session may be reset by one of the error handlers. */ + dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); + nfs41_sequence_free_slot(clp, res); +} + +/* + * nfs4_find_slot - efficiently look for a free slot + * + * nfs4_find_slot looks for an unset bit in the used_slots bitmap. + * If found, we mark the slot as used, update the highest_used_slotid, + * and respectively set up the sequence operation args. + * The slot number is returned if found, or NFS4_MAX_SLOT_TABLE otherwise. + * + * Note: must be called with under the slot_tbl_lock. + */ +static u8 +nfs4_find_slot(struct nfs4_slot_table *tbl) +{ + int slotid; + u8 ret_id = NFS4_MAX_SLOT_TABLE; + BUILD_BUG_ON((u8)NFS4_MAX_SLOT_TABLE != (int)NFS4_MAX_SLOT_TABLE); + + dprintk("--> %s used_slots=%04lx highest_used=%d max_slots=%d\n", + __func__, tbl->used_slots[0], tbl->highest_used_slotid, + tbl->max_slots); + slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots); + if (slotid >= tbl->max_slots) + goto out; + __set_bit(slotid, tbl->used_slots); + if (slotid > tbl->highest_used_slotid) + tbl->highest_used_slotid = slotid; + ret_id = slotid; +out: + dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n", + __func__, tbl->used_slots[0], tbl->highest_used_slotid, ret_id); + return ret_id; +} + +static int nfs41_setup_sequence(struct nfs4_session *session, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply, + struct rpc_task *task) +{ + struct nfs4_slot *slot; + struct nfs4_slot_table *tbl; + u8 slotid; + + dprintk("--> %s\n", __func__); + /* slot already allocated? */ + if (res->sr_slotid != NFS4_MAX_SLOT_TABLE) + return 0; + + memset(res, 0, sizeof(*res)); + res->sr_slotid = NFS4_MAX_SLOT_TABLE; + tbl = &session->fc_slot_table; + + spin_lock(&tbl->slot_tbl_lock); + if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state) && + !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { + /* + * The state manager will wait until the slot table is empty. + * Schedule the reset thread + */ + rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); + spin_unlock(&tbl->slot_tbl_lock); + dprintk("%s Schedule Session Reset\n", __func__); + return -EAGAIN; + } + + if (!rpc_queue_empty(&tbl->slot_tbl_waitq) && + !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { + rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); + spin_unlock(&tbl->slot_tbl_lock); + dprintk("%s enforce FIFO order\n", __func__); + return -EAGAIN; + } + + slotid = nfs4_find_slot(tbl); + if (slotid == NFS4_MAX_SLOT_TABLE) { + rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); + spin_unlock(&tbl->slot_tbl_lock); + dprintk("<-- %s: no free slots\n", __func__); + return -EAGAIN; + } + spin_unlock(&tbl->slot_tbl_lock); + + rpc_task_set_priority(task, RPC_PRIORITY_NORMAL); + slot = tbl->slots + slotid; + args->sa_session = session; + args->sa_slotid = slotid; + args->sa_cache_this = cache_reply; + + dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); + + res->sr_session = session; + res->sr_slotid = slotid; + res->sr_renewal_time = jiffies; + /* + * sr_status is only set in decode_sequence, and so will remain + * set to 1 if an rpc level failure occurs. + */ + res->sr_status = 1; + return 0; +} + +int nfs4_setup_sequence(struct nfs_client *clp, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply, + struct rpc_task *task) +{ + int ret = 0; + + dprintk("--> %s clp %p session %p sr_slotid %d\n", + __func__, clp, clp->cl_session, res->sr_slotid); + + if (!nfs4_has_session(clp)) + goto out; + ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply, + task); + if (ret && ret != -EAGAIN) { + /* terminate rpc task */ + task->tk_status = ret; + task->tk_action = NULL; + } +out: + dprintk("<-- %s status=%d\n", __func__, ret); + return ret; +} + +struct nfs41_call_sync_data { + struct nfs_client *clp; + struct nfs4_sequence_args *seq_args; + struct nfs4_sequence_res *seq_res; + int cache_reply; +}; + +static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs41_call_sync_data *data = calldata; + + dprintk("--> %s data->clp->cl_session %p\n", __func__, + data->clp->cl_session); + if (nfs4_setup_sequence(data->clp, data->seq_args, + data->seq_res, data->cache_reply, task)) + return; + rpc_call_start(task); +} + +static void nfs41_call_priv_sync_prepare(struct rpc_task *task, void *calldata) +{ + rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); + nfs41_call_sync_prepare(task, calldata); +} + +static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) +{ + struct nfs41_call_sync_data *data = calldata; + + nfs41_sequence_done(data->clp, data->seq_res, task->tk_status); +} + +struct rpc_call_ops nfs41_call_sync_ops = { + .rpc_call_prepare = nfs41_call_sync_prepare, + .rpc_call_done = nfs41_call_sync_done, +}; + +struct rpc_call_ops nfs41_call_priv_sync_ops = { + .rpc_call_prepare = nfs41_call_priv_sync_prepare, + .rpc_call_done = nfs41_call_sync_done, +}; + +static int nfs4_call_sync_sequence(struct nfs_client *clp, + struct rpc_clnt *clnt, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply, + int privileged) +{ + int ret; + struct rpc_task *task; + struct nfs41_call_sync_data data = { + .clp = clp, + .seq_args = args, + .seq_res = res, + .cache_reply = cache_reply, + }; + struct rpc_task_setup task_setup = { + .rpc_client = clnt, + .rpc_message = msg, + .callback_ops = &nfs41_call_sync_ops, + .callback_data = &data + }; + + res->sr_slotid = NFS4_MAX_SLOT_TABLE; + if (privileged) + task_setup.callback_ops = &nfs41_call_priv_sync_ops; + task = rpc_run_task(&task_setup); + if (IS_ERR(task)) + ret = PTR_ERR(task); + else { + ret = task->tk_status; + rpc_put_task(task); + } + return ret; +} + +int _nfs4_call_sync_session(struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply) +{ + return nfs4_call_sync_sequence(server->nfs_client, server->client, + msg, args, res, cache_reply, 0); +} + +#endif /* CONFIG_NFS_V4_1 */ + +int _nfs4_call_sync(struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply) +{ + args->sa_session = res->sr_session = NULL; + return rpc_call_sync(server->client, msg, 0); +} + +#define nfs4_call_sync(server, msg, args, res, cache_reply) \ + (server)->nfs_client->cl_call_sync((server), (msg), &(args)->seq_args, \ + &(res)->seq_res, (cache_reply)) + +static void nfs4_sequence_done(const struct nfs_server *server, + struct nfs4_sequence_res *res, int rpc_status) +{ +#ifdef CONFIG_NFS_V4_1 + if (nfs4_has_session(server->nfs_client)) + nfs41_sequence_done(server->nfs_client, res, rpc_status); +#endif /* CONFIG_NFS_V4_1 */ +} + +static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) +{ + struct nfs_inode *nfsi = NFS_I(dir); + + spin_lock(&dir->i_lock); + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; + if (!cinfo->atomic || cinfo->before != nfsi->change_attr) + nfs_force_lookup_revalidate(dir); + nfsi->change_attr = cinfo->after; + spin_unlock(&dir->i_lock); +} + +struct nfs4_opendata { + struct kref kref; + struct nfs_openargs o_arg; + struct nfs_openres o_res; + struct nfs_open_confirmargs c_arg; + struct nfs_open_confirmres c_res; + struct nfs_fattr f_attr; + struct nfs_fattr dir_attr; + struct path path; + struct dentry *dir; + struct nfs4_state_owner *owner; + struct nfs4_state *state; + struct iattr attrs; + unsigned long timestamp; + unsigned int rpc_done : 1; + int rpc_status; + int cancelled; +}; + + +static void nfs4_init_opendata_res(struct nfs4_opendata *p) +{ + p->o_res.f_attr = &p->f_attr; + p->o_res.dir_attr = &p->dir_attr; + p->o_res.seqid = p->o_arg.seqid; + p->c_res.seqid = p->c_arg.seqid; + p->o_res.server = p->o_arg.server; + nfs_fattr_init(&p->f_attr); + nfs_fattr_init(&p->dir_attr); + p->o_res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; +} + +static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, + struct nfs4_state_owner *sp, fmode_t fmode, int flags, + const struct iattr *attrs) +{ + struct dentry *parent = dget_parent(path->dentry); + struct inode *dir = parent->d_inode; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs4_opendata *p; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + goto err; + p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + if (p->o_arg.seqid == NULL) + goto err_free; + p->path.mnt = mntget(path->mnt); + p->path.dentry = dget(path->dentry); + p->dir = parent; + p->owner = sp; + atomic_inc(&sp->so_count); + p->o_arg.fh = NFS_FH(dir); + p->o_arg.open_flags = flags; + p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); + p->o_arg.clientid = server->nfs_client->cl_clientid; + p->o_arg.id = sp->so_owner_id.id; + p->o_arg.name = &p->path.dentry->d_name; + p->o_arg.server = server; + p->o_arg.bitmask = server->attr_bitmask; + p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; + if (flags & O_EXCL) { + if (nfs4_has_persistent_session(server->nfs_client)) { + /* GUARDED */ + p->o_arg.u.attrs = &p->attrs; + memcpy(&p->attrs, attrs, sizeof(p->attrs)); + } else { /* EXCLUSIVE4_1 */ + u32 *s = (u32 *) p->o_arg.u.verifier.data; + s[0] = jiffies; + s[1] = current->pid; + } + } else if (flags & O_CREAT) { + p->o_arg.u.attrs = &p->attrs; + memcpy(&p->attrs, attrs, sizeof(p->attrs)); + } + p->c_arg.fh = &p->o_res.fh; + p->c_arg.stateid = &p->o_res.stateid; + p->c_arg.seqid = p->o_arg.seqid; + nfs4_init_opendata_res(p); + kref_init(&p->kref); + return p; +err_free: + kfree(p); +err: + dput(parent); + return NULL; +} + +static void nfs4_opendata_free(struct kref *kref) +{ + struct nfs4_opendata *p = container_of(kref, + struct nfs4_opendata, kref); + + nfs_free_seqid(p->o_arg.seqid); + if (p->state != NULL) + nfs4_put_open_state(p->state); + nfs4_put_state_owner(p->owner); + dput(p->dir); + path_put(&p->path); + kfree(p); +} + +static void nfs4_opendata_put(struct nfs4_opendata *p) +{ + if (p != NULL) + kref_put(&p->kref, nfs4_opendata_free); +} + +static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) +{ + int ret; + + ret = rpc_wait_for_completion_task(task); + return ret; +} + +static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode) +{ + int ret = 0; + + if (open_mode & O_EXCL) + goto out; + switch (mode & (FMODE_READ|FMODE_WRITE)) { + case FMODE_READ: + ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0 + && state->n_rdonly != 0; + break; + case FMODE_WRITE: + ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0 + && state->n_wronly != 0; + break; + case FMODE_READ|FMODE_WRITE: + ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0 + && state->n_rdwr != 0; + } +out: + return ret; +} + +static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode) +{ + if ((delegation->type & fmode) != fmode) + return 0; + if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) + return 0; + nfs_mark_delegation_referenced(delegation); + return 1; +} + +static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode) +{ + switch (fmode) { + case FMODE_WRITE: + state->n_wronly++; + break; + case FMODE_READ: + state->n_rdonly++; + break; + case FMODE_READ|FMODE_WRITE: + state->n_rdwr++; + } + nfs4_state_set_mode_locked(state, state->state | fmode); +} + +static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) +{ + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) + memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data)); + memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data)); + switch (fmode) { + case FMODE_READ: + set_bit(NFS_O_RDONLY_STATE, &state->flags); + break; + case FMODE_WRITE: + set_bit(NFS_O_WRONLY_STATE, &state->flags); + break; + case FMODE_READ|FMODE_WRITE: + set_bit(NFS_O_RDWR_STATE, &state->flags); + } +} + +static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) +{ + write_seqlock(&state->seqlock); + nfs_set_open_stateid_locked(state, stateid, fmode); + write_sequnlock(&state->seqlock); +} + +static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode) +{ + /* + * Protect the call to nfs4_state_set_mode_locked and + * serialise the stateid update + */ + write_seqlock(&state->seqlock); + if (deleg_stateid != NULL) { + memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data)); + set_bit(NFS_DELEGATED_STATE, &state->flags); + } + if (open_stateid != NULL) + nfs_set_open_stateid_locked(state, open_stateid, fmode); + write_sequnlock(&state->seqlock); + spin_lock(&state->owner->so_lock); + update_open_stateflags(state, fmode); + spin_unlock(&state->owner->so_lock); +} + +static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *delegation, fmode_t fmode) +{ + struct nfs_inode *nfsi = NFS_I(state->inode); + struct nfs_delegation *deleg_cur; + int ret = 0; + + fmode &= (FMODE_READ|FMODE_WRITE); + + rcu_read_lock(); + deleg_cur = rcu_dereference(nfsi->delegation); + if (deleg_cur == NULL) + goto no_delegation; + + spin_lock(&deleg_cur->lock); + if (nfsi->delegation != deleg_cur || + (deleg_cur->type & fmode) != fmode) + goto no_delegation_unlock; + + if (delegation == NULL) + delegation = &deleg_cur->stateid; + else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0) + goto no_delegation_unlock; + + nfs_mark_delegation_referenced(deleg_cur); + __update_open_stateid(state, open_stateid, &deleg_cur->stateid, fmode); + ret = 1; +no_delegation_unlock: + spin_unlock(&deleg_cur->lock); +no_delegation: + rcu_read_unlock(); + + if (!ret && open_stateid != NULL) { + __update_open_stateid(state, open_stateid, NULL, fmode); + ret = 1; + } + + return ret; +} + + +static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmode) +{ + struct nfs_delegation *delegation; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation == NULL || (delegation->type & fmode) == fmode) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + nfs_inode_return_delegation(inode); +} + +static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) +{ + struct nfs4_state *state = opendata->state; + struct nfs_inode *nfsi = NFS_I(state->inode); + struct nfs_delegation *delegation; + int open_mode = opendata->o_arg.open_flags & O_EXCL; + fmode_t fmode = opendata->o_arg.fmode; + nfs4_stateid stateid; + int ret = -EAGAIN; + + for (;;) { + if (can_open_cached(state, fmode, open_mode)) { + spin_lock(&state->owner->so_lock); + if (can_open_cached(state, fmode, open_mode)) { + update_open_stateflags(state, fmode); + spin_unlock(&state->owner->so_lock); + goto out_return_state; + } + spin_unlock(&state->owner->so_lock); + } + rcu_read_lock(); + delegation = rcu_dereference(nfsi->delegation); + if (delegation == NULL || + !can_open_delegated(delegation, fmode)) { + rcu_read_unlock(); + break; + } + /* Save the delegation */ + memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); + rcu_read_unlock(); + ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); + if (ret != 0) + goto out; + ret = -EAGAIN; + + /* Try to update the stateid using the delegation */ + if (update_open_stateid(state, NULL, &stateid, fmode)) + goto out_return_state; + } +out: + return ERR_PTR(ret); +out_return_state: + atomic_inc(&state->count); + return state; +} + +static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) +{ + struct inode *inode; + struct nfs4_state *state = NULL; + struct nfs_delegation *delegation; + int ret; + + if (!data->rpc_done) { + state = nfs4_try_open_cached(data); + goto out; + } + + ret = -EAGAIN; + if (!(data->f_attr.valid & NFS_ATTR_FATTR)) + goto err; + inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr); + ret = PTR_ERR(inode); + if (IS_ERR(inode)) + goto err; + ret = -ENOMEM; + state = nfs4_get_open_state(inode, data->owner); + if (state == NULL) + goto err_put_inode; + if (data->o_res.delegation_type != 0) { + int delegation_flags = 0; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation) + delegation_flags = delegation->flags; + rcu_read_unlock(); + if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) + nfs_inode_set_delegation(state->inode, + data->owner->so_cred, + &data->o_res); + else + nfs_inode_reclaim_delegation(state->inode, + data->owner->so_cred, + &data->o_res); + } + + update_open_stateid(state, &data->o_res.stateid, NULL, + data->o_arg.fmode); + iput(inode); +out: + return state; +err_put_inode: + iput(inode); +err: + return ERR_PTR(ret); +} + +static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) +{ + struct nfs_inode *nfsi = NFS_I(state->inode); + struct nfs_open_context *ctx; + + spin_lock(&state->inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + if (ctx->state != state) + continue; + get_nfs_open_context(ctx); + spin_unlock(&state->inode->i_lock); + return ctx; + } + spin_unlock(&state->inode->i_lock); + return ERR_PTR(-ENOENT); +} + +static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx, struct nfs4_state *state) +{ + struct nfs4_opendata *opendata; + + opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL); + if (opendata == NULL) + return ERR_PTR(-ENOMEM); + opendata->state = state; + atomic_inc(&state->count); + return opendata; +} + +static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmode, struct nfs4_state **res) +{ + struct nfs4_state *newstate; + int ret; + + opendata->o_arg.open_flags = 0; + opendata->o_arg.fmode = fmode; + memset(&opendata->o_res, 0, sizeof(opendata->o_res)); + memset(&opendata->c_res, 0, sizeof(opendata->c_res)); + nfs4_init_opendata_res(opendata); + ret = _nfs4_recover_proc_open(opendata); + if (ret != 0) + return ret; + newstate = nfs4_opendata_to_nfs4_state(opendata); + if (IS_ERR(newstate)) + return PTR_ERR(newstate); + nfs4_close_state(&opendata->path, newstate, fmode); + *res = newstate; + return 0; +} + +static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state) +{ + struct nfs4_state *newstate; + int ret; + + /* memory barrier prior to reading state->n_* */ + clear_bit(NFS_DELEGATED_STATE, &state->flags); + smp_rmb(); + if (state->n_rdwr != 0) { + ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); + if (ret != 0) + return ret; + if (newstate != state) + return -ESTALE; + } + if (state->n_wronly != 0) { + ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); + if (ret != 0) + return ret; + if (newstate != state) + return -ESTALE; + } + if (state->n_rdonly != 0) { + ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); + if (ret != 0) + return ret; + if (newstate != state) + return -ESTALE; + } + /* + * We may have performed cached opens for all three recoveries. + * Check if we need to update the current stateid. + */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 && + memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) { + write_seqlock(&state->seqlock); + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) + memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)); + write_sequnlock(&state->seqlock); + } + return 0; +} + +/* + * OPEN_RECLAIM: + * reclaim state on the server after a reboot. + */ +static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state) +{ + struct nfs_delegation *delegation; + struct nfs4_opendata *opendata; + fmode_t delegation_type = 0; + int status; + + opendata = nfs4_open_recoverdata_alloc(ctx, state); + if (IS_ERR(opendata)) + return PTR_ERR(opendata); + opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS; + opendata->o_arg.fh = NFS_FH(state->inode); + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(state->inode)->delegation); + if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) + delegation_type = delegation->type; + rcu_read_unlock(); + opendata->o_arg.u.delegation_type = delegation_type; + status = nfs4_open_recover(opendata, state); + nfs4_opendata_put(opendata); + return status; +} + +static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs4_do_open_reclaim(ctx, state); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + +static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct nfs_open_context *ctx; + int ret; + + ctx = nfs4_state_find_open_context(state); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + ret = nfs4_do_open_reclaim(ctx, state); + put_nfs_open_context(ctx); + return ret; +} + +static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) +{ + struct nfs4_opendata *opendata; + int ret; + + opendata = nfs4_open_recoverdata_alloc(ctx, state); + if (IS_ERR(opendata)) + return PTR_ERR(opendata); + opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; + memcpy(opendata->o_arg.u.delegation.data, stateid->data, + sizeof(opendata->o_arg.u.delegation.data)); + ret = nfs4_open_recover(opendata, state); + nfs4_opendata_put(opendata); + return ret; +} + +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) +{ + struct nfs4_exception exception = { }; + struct nfs_server *server = NFS_SERVER(state->inode); + int err; + do { + err = _nfs4_open_delegation_recall(ctx, state, stateid); + switch (err) { + case 0: + case -ENOENT: + case -ESTALE: + goto out; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_DEADSESSION: + nfs4_schedule_state_recovery( + server->nfs_client); + goto out; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + /* Don't recall a delegation if it was lost */ + nfs4_schedule_state_recovery(server->nfs_client); + goto out; + case -ERESTARTSYS: + /* + * The show must go on: exit, but mark the + * stateid as needing recovery. + */ + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + nfs4_state_mark_reclaim_nograce(server->nfs_client, state); + case -ENOMEM: + err = 0; + goto out; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); +out: + return err; +} + +static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_opendata *data = calldata; + + data->rpc_status = task->tk_status; + if (RPC_ASSASSINATED(task)) + return; + if (data->rpc_status == 0) { + memcpy(data->o_res.stateid.data, data->c_res.stateid.data, + sizeof(data->o_res.stateid.data)); + nfs_confirm_seqid(&data->owner->so_seqid, 0); + renew_lease(data->o_res.server, data->timestamp); + data->rpc_done = 1; + } +} + +static void nfs4_open_confirm_release(void *calldata) +{ + struct nfs4_opendata *data = calldata; + struct nfs4_state *state = NULL; + + /* If this request hasn't been cancelled, do nothing */ + if (data->cancelled == 0) + goto out_free; + /* In case of error, no cleanup! */ + if (!data->rpc_done) + goto out_free; + state = nfs4_opendata_to_nfs4_state(data); + if (!IS_ERR(state)) + nfs4_close_state(&data->path, state, data->o_arg.fmode); +out_free: + nfs4_opendata_put(data); +} + +static const struct rpc_call_ops nfs4_open_confirm_ops = { + .rpc_call_done = nfs4_open_confirm_done, + .rpc_release = nfs4_open_confirm_release, +}; + +/* + * Note: On error, nfs4_proc_open_confirm will free the struct nfs4_opendata + */ +static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) +{ + struct nfs_server *server = NFS_SERVER(data->dir->d_inode); + struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], + .rpc_argp = &data->c_arg, + .rpc_resp = &data->c_res, + .rpc_cred = data->owner->so_cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs4_open_confirm_ops, + .callback_data = data, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC, + }; + int status; + + kref_get(&data->kref); + data->rpc_done = 0; + data->rpc_status = 0; + data->timestamp = jiffies; + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + status = nfs4_wait_for_completion_rpc_task(task); + if (status != 0) { + data->cancelled = 1; + smp_wmb(); + } else + status = data->rpc_status; + rpc_put_task(task); + return status; +} + +static void nfs4_open_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs4_opendata *data = calldata; + struct nfs4_state_owner *sp = data->owner; + + if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) + return; + /* + * Check if we still need to send an OPEN call, or if we can use + * a delegation instead. + */ + if (data->state != NULL) { + struct nfs_delegation *delegation; + + if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags)) + goto out_no_action; + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); + if (delegation != NULL && + test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) { + rcu_read_unlock(); + goto out_no_action; + } + rcu_read_unlock(); + } + /* Update sequence id. */ + data->o_arg.id = sp->so_owner_id.id; + data->o_arg.clientid = sp->so_client->cl_clientid; + if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { + task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; + nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); + } + data->timestamp = jiffies; + if (nfs4_setup_sequence(data->o_arg.server->nfs_client, + &data->o_arg.seq_args, + &data->o_res.seq_res, 1, task)) + return; + rpc_call_start(task); + return; +out_no_action: + task->tk_action = NULL; + +} + +static void nfs4_recover_open_prepare(struct rpc_task *task, void *calldata) +{ + rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); + nfs4_open_prepare(task, calldata); +} + +static void nfs4_open_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_opendata *data = calldata; + + data->rpc_status = task->tk_status; + + nfs4_sequence_done(data->o_arg.server, &data->o_res.seq_res, + task->tk_status); + + if (RPC_ASSASSINATED(task)) + return; + if (task->tk_status == 0) { + switch (data->o_res.f_attr->mode & S_IFMT) { + case S_IFREG: + break; + case S_IFLNK: + data->rpc_status = -ELOOP; + break; + case S_IFDIR: + data->rpc_status = -EISDIR; + break; + default: + data->rpc_status = -ENOTDIR; + } + renew_lease(data->o_res.server, data->timestamp); + if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)) + nfs_confirm_seqid(&data->owner->so_seqid, 0); + } + data->rpc_done = 1; +} + +static void nfs4_open_release(void *calldata) +{ + struct nfs4_opendata *data = calldata; + struct nfs4_state *state = NULL; + + /* If this request hasn't been cancelled, do nothing */ + if (data->cancelled == 0) + goto out_free; + /* In case of error, no cleanup! */ + if (data->rpc_status != 0 || !data->rpc_done) + goto out_free; + /* In case we need an open_confirm, no cleanup! */ + if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) + goto out_free; + state = nfs4_opendata_to_nfs4_state(data); + if (!IS_ERR(state)) + nfs4_close_state(&data->path, state, data->o_arg.fmode); +out_free: + nfs4_opendata_put(data); +} + +static const struct rpc_call_ops nfs4_open_ops = { + .rpc_call_prepare = nfs4_open_prepare, + .rpc_call_done = nfs4_open_done, + .rpc_release = nfs4_open_release, +}; + +static const struct rpc_call_ops nfs4_recover_open_ops = { + .rpc_call_prepare = nfs4_recover_open_prepare, + .rpc_call_done = nfs4_open_done, + .rpc_release = nfs4_open_release, +}; + +static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) +{ + struct inode *dir = data->dir->d_inode; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs_openargs *o_arg = &data->o_arg; + struct nfs_openres *o_res = &data->o_res; + struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN], + .rpc_argp = o_arg, + .rpc_resp = o_res, + .rpc_cred = data->owner->so_cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs4_open_ops, + .callback_data = data, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC, + }; + int status; + + kref_get(&data->kref); + data->rpc_done = 0; + data->rpc_status = 0; + data->cancelled = 0; + if (isrecover) + task_setup_data.callback_ops = &nfs4_recover_open_ops; + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + status = nfs4_wait_for_completion_rpc_task(task); + if (status != 0) { + data->cancelled = 1; + smp_wmb(); + } else + status = data->rpc_status; + rpc_put_task(task); + + return status; +} + +static int _nfs4_recover_proc_open(struct nfs4_opendata *data) +{ + struct inode *dir = data->dir->d_inode; + struct nfs_openres *o_res = &data->o_res; + int status; + + status = nfs4_run_open_task(data, 1); + if (status != 0 || !data->rpc_done) + return status; + + nfs_refresh_inode(dir, o_res->dir_attr); + + if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { + status = _nfs4_proc_open_confirm(data); + if (status != 0) + return status; + } + + return status; +} + +/* + * Note: On error, nfs4_proc_open will free the struct nfs4_opendata + */ +static int _nfs4_proc_open(struct nfs4_opendata *data) +{ + struct inode *dir = data->dir->d_inode; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs_openargs *o_arg = &data->o_arg; + struct nfs_openres *o_res = &data->o_res; + int status; + + status = nfs4_run_open_task(data, 0); + if (status != 0 || !data->rpc_done) + return status; + + if (o_arg->open_flags & O_CREAT) { + update_changeattr(dir, &o_res->cinfo); + nfs_post_op_update_inode(dir, o_res->dir_attr); + } else + nfs_refresh_inode(dir, o_res->dir_attr); + if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) + server->caps &= ~NFS_CAP_POSIX_LOCK; + if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { + status = _nfs4_proc_open_confirm(data); + if (status != 0) + return status; + } + if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) + _nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr); + return 0; +} + +static int nfs4_recover_expired_lease(struct nfs_server *server) +{ + struct nfs_client *clp = server->nfs_client; + unsigned int loop; + int ret; + + for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) { + ret = nfs4_wait_clnt_recover(clp); + if (ret != 0) + break; + if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && + !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) + break; + nfs4_schedule_state_recovery(clp); + ret = -EIO; + } + return ret; +} + +/* + * OPEN_EXPIRED: + * reclaim state on the server after a network partition. + * Assumes caller holds the appropriate lock + */ +static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) +{ + struct nfs4_opendata *opendata; + int ret; + + opendata = nfs4_open_recoverdata_alloc(ctx, state); + if (IS_ERR(opendata)) + return PTR_ERR(opendata); + ret = nfs4_open_recover(opendata, state); + if (ret == -ESTALE) + d_drop(ctx->path.dentry); + nfs4_opendata_put(opendata); + return ret; +} + +static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs4_open_expired(ctx, state); + switch (err) { + default: + goto out; + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + nfs4_handle_exception(server, err, &exception); + err = 0; + } + } while (exception.retry); +out: + return err; +} + +static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct nfs_open_context *ctx; + int ret; + + ctx = nfs4_state_find_open_context(state); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + ret = nfs4_do_open_expired(ctx, state); + put_nfs_open_context(ctx); + return ret; +} + +/* + * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* + * fields corresponding to attributes that were used to store the verifier. + * Make sure we clobber those fields in the later setattr call + */ +static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr) +{ + if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) && + !(sattr->ia_valid & ATTR_ATIME_SET)) + sattr->ia_valid |= ATTR_ATIME; + + if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) && + !(sattr->ia_valid & ATTR_MTIME_SET)) + sattr->ia_valid |= ATTR_MTIME; +} + +/* + * Returns a referenced nfs4_state + */ +static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) +{ + struct nfs4_state_owner *sp; + struct nfs4_state *state = NULL; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs4_opendata *opendata; + int status; + + /* Protect against reboot recovery conflicts */ + status = -ENOMEM; + if (!(sp = nfs4_get_state_owner(server, cred))) { + dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); + goto out_err; + } + status = nfs4_recover_expired_lease(server); + if (status != 0) + goto err_put_state_owner; + if (path->dentry->d_inode != NULL) + nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode); + status = -ENOMEM; + opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr); + if (opendata == NULL) + goto err_put_state_owner; + + if (path->dentry->d_inode != NULL) + opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp); + + status = _nfs4_proc_open(opendata); + if (status != 0) + goto err_opendata_put; + + if (opendata->o_arg.open_flags & O_EXCL) + nfs4_exclusive_attrset(opendata, sattr); + + state = nfs4_opendata_to_nfs4_state(opendata); + status = PTR_ERR(state); + if (IS_ERR(state)) + goto err_opendata_put; + if (server->caps & NFS_CAP_POSIX_LOCK) + set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); + nfs4_opendata_put(opendata); + nfs4_put_state_owner(sp); + *res = state; + return 0; +err_opendata_put: + nfs4_opendata_put(opendata); +err_put_state_owner: + nfs4_put_state_owner(sp); +out_err: + *res = NULL; + return status; +} + + +static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred) +{ + struct nfs4_exception exception = { }; + struct nfs4_state *res; + int status; + + do { + status = _nfs4_do_open(dir, path, fmode, flags, sattr, cred, &res); + if (status == 0) + break; + /* NOTE: BAD_SEQID means the server and client disagree about the + * book-keeping w.r.t. state-changing operations + * (OPEN/CLOSE/LOCK/LOCKU...) + * It is actually a sign of a bug on the client or on the server. + * + * If we receive a BAD_SEQID error in the particular case of + * doing an OPEN, we assume that nfs_increment_open_seqid() will + * have unhashed the old state_owner for us, and that we can + * therefore safely retry using a new one. We should still warn + * the user though... + */ + if (status == -NFS4ERR_BAD_SEQID) { + printk(KERN_WARNING "NFS: v4 server %s " + " returned a bad sequence-id error!\n", + NFS_SERVER(dir)->nfs_client->cl_hostname); + exception.retry = 1; + continue; + } + /* + * BAD_STATEID on OPEN means that the server cancelled our + * state before it received the OPEN_CONFIRM. + * Recover by retrying the request as per the discussion + * on Page 181 of RFC3530. + */ + if (status == -NFS4ERR_BAD_STATEID) { + exception.retry = 1; + continue; + } + if (status == -EAGAIN) { + /* We must have found a delegation */ + exception.retry = 1; + continue; + } + res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), + status, &exception)); + } while (exception.retry); + return res; +} + +static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, + struct nfs_fattr *fattr, struct iattr *sattr, + struct nfs4_state *state) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_setattrargs arg = { + .fh = NFS_FH(inode), + .iap = sattr, + .server = server, + .bitmask = server->attr_bitmask, + }; + struct nfs_setattrres res = { + .fattr = fattr, + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = cred, + }; + unsigned long timestamp = jiffies; + int status; + + nfs_fattr_init(fattr); + + if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { + /* Use that stateid */ + } else if (state != NULL) { + nfs4_copy_stateid(&arg.stateid, state, current->files); + } else + memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); + + status = nfs4_call_sync(server, &msg, &arg, &res, 1); + if (status == 0 && state != NULL) + renew_lease(server, timestamp); + return status; +} + +static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, + struct nfs_fattr *fattr, struct iattr *sattr, + struct nfs4_state *state) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_do_setattr(inode, cred, fattr, sattr, state), + &exception); + } while (exception.retry); + return err; +} + +struct nfs4_closedata { + struct path path; + struct inode *inode; + struct nfs4_state *state; + struct nfs_closeargs arg; + struct nfs_closeres res; + struct nfs_fattr fattr; + unsigned long timestamp; +}; + +static void nfs4_free_closedata(void *data) +{ + struct nfs4_closedata *calldata = data; + struct nfs4_state_owner *sp = calldata->state->owner; + + nfs4_put_open_state(calldata->state); + nfs_free_seqid(calldata->arg.seqid); + nfs4_put_state_owner(sp); + path_put(&calldata->path); + kfree(calldata); +} + +static void nfs4_close_clear_stateid_flags(struct nfs4_state *state, + fmode_t fmode) +{ + spin_lock(&state->owner->so_lock); + if (!(fmode & FMODE_READ)) + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + if (!(fmode & FMODE_WRITE)) + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_O_RDWR_STATE, &state->flags); + spin_unlock(&state->owner->so_lock); +} + +static void nfs4_close_done(struct rpc_task *task, void *data) +{ + struct nfs4_closedata *calldata = data; + struct nfs4_state *state = calldata->state; + struct nfs_server *server = NFS_SERVER(calldata->inode); + + nfs4_sequence_done(server, &calldata->res.seq_res, task->tk_status); + if (RPC_ASSASSINATED(task)) + return; + /* hmm. we are done with the inode, and in the process of freeing + * the state_owner. we keep this around to process errors + */ + switch (task->tk_status) { + case 0: + nfs_set_open_stateid(state, &calldata->res.stateid, 0); + renew_lease(server, calldata->timestamp); + nfs4_close_clear_stateid_flags(state, + calldata->arg.fmode); + break; + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_EXPIRED: + if (calldata->arg.fmode == 0) + break; + default: + if (nfs4_async_handle_error(task, server, state) == -EAGAIN) + rpc_restart_call_prepare(task); + } + nfs_release_seqid(calldata->arg.seqid); + nfs_refresh_inode(calldata->inode, calldata->res.fattr); +} + +static void nfs4_close_prepare(struct rpc_task *task, void *data) +{ + struct nfs4_closedata *calldata = data; + struct nfs4_state *state = calldata->state; + int call_close = 0; + + if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) + return; + + task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; + calldata->arg.fmode = FMODE_READ|FMODE_WRITE; + spin_lock(&state->owner->so_lock); + /* Calculate the change in open mode */ + if (state->n_rdwr == 0) { + if (state->n_rdonly == 0) { + call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags); + call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); + calldata->arg.fmode &= ~FMODE_READ; + } + if (state->n_wronly == 0) { + call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags); + call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); + calldata->arg.fmode &= ~FMODE_WRITE; + } + } + spin_unlock(&state->owner->so_lock); + + if (!call_close) { + /* Note: exit _without_ calling nfs4_close_done */ + task->tk_action = NULL; + return; + } + + if (calldata->arg.fmode == 0) + task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; + + nfs_fattr_init(calldata->res.fattr); + calldata->timestamp = jiffies; + if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client, + &calldata->arg.seq_args, &calldata->res.seq_res, + 1, task)) + return; + rpc_call_start(task); +} + +static const struct rpc_call_ops nfs4_close_ops = { + .rpc_call_prepare = nfs4_close_prepare, + .rpc_call_done = nfs4_close_done, + .rpc_release = nfs4_free_closedata, +}; + +/* + * It is possible for data to be read/written from a mem-mapped file + * after the sys_close call (which hits the vfs layer as a flush). + * This means that we can't safely call nfsv4 close on a file until + * the inode is cleared. This in turn means that we are not good + * NFSv4 citizens - we do not indicate to the server to update the file's + * share state even when we are done with one of the three share + * stateid's in the inode. + * + * NOTE: Caller must be holding the sp->so_owner semaphore! + */ +int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_closedata *calldata; + struct nfs4_state_owner *sp = state->owner; + struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], + .rpc_cred = state->owner->so_cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs4_close_ops, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC, + }; + int status = -ENOMEM; + + calldata = kzalloc(sizeof(*calldata), GFP_KERNEL); + if (calldata == NULL) + goto out; + calldata->inode = state->inode; + calldata->state = state; + calldata->arg.fh = NFS_FH(state->inode); + calldata->arg.stateid = &state->open_stateid; + /* Serialization for the sequence id */ + calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); + if (calldata->arg.seqid == NULL) + goto out_free_calldata; + calldata->arg.fmode = 0; + calldata->arg.bitmask = server->cache_consistency_bitmask; + calldata->res.fattr = &calldata->fattr; + calldata->res.seqid = calldata->arg.seqid; + calldata->res.server = server; + calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; + calldata->path.mnt = mntget(path->mnt); + calldata->path.dentry = dget(path->dentry); + + msg.rpc_argp = &calldata->arg, + msg.rpc_resp = &calldata->res, + task_setup_data.callback_data = calldata; + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + status = 0; + if (wait) + status = rpc_wait_for_completion_task(task); + rpc_put_task(task); + return status; +out_free_calldata: + kfree(calldata); +out: + nfs4_put_open_state(state); + nfs4_put_state_owner(sp); + return status; +} + +static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state, fmode_t fmode) +{ + struct file *filp; + int ret; + + /* If the open_intent is for execute, we have an extra check to make */ + if (fmode & FMODE_EXEC) { + ret = nfs_may_open(state->inode, + state->owner->so_cred, + nd->intent.open.flags); + if (ret < 0) + goto out_close; + } + filp = lookup_instantiate_filp(nd, path->dentry, NULL); + if (!IS_ERR(filp)) { + struct nfs_open_context *ctx; + ctx = nfs_file_open_context(filp); + ctx->state = state; + return 0; + } + ret = PTR_ERR(filp); +out_close: + nfs4_close_sync(path, state, fmode & (FMODE_READ|FMODE_WRITE)); + return ret; +} + +struct dentry * +nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct path path = { + .mnt = nd->path.mnt, + .dentry = dentry, + }; + struct dentry *parent; + struct iattr attr; + struct rpc_cred *cred; + struct nfs4_state *state; + struct dentry *res; + fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); + + if (nd->flags & LOOKUP_CREATE) { + attr.ia_mode = nd->intent.open.create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current_umask(); + } else { + attr.ia_valid = 0; + BUG_ON(nd->intent.open.flags & O_CREAT); + } + + cred = rpc_lookup_cred(); + if (IS_ERR(cred)) + return (struct dentry *)cred; + parent = dentry->d_parent; + /* Protect against concurrent sillydeletes */ + nfs_block_sillyrename(parent); + state = nfs4_do_open(dir, &path, fmode, nd->intent.open.flags, &attr, cred); + put_rpccred(cred); + if (IS_ERR(state)) { + if (PTR_ERR(state) == -ENOENT) { + d_add(dentry, NULL); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + } + nfs_unblock_sillyrename(parent); + return (struct dentry *)state; + } + res = d_add_unique(dentry, igrab(state->inode)); + if (res != NULL) + path.dentry = res; + nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir)); + nfs_unblock_sillyrename(parent); + nfs4_intent_set_file(nd, &path, state, fmode); + return res; +} + +int +nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) +{ + struct path path = { + .mnt = nd->path.mnt, + .dentry = dentry, + }; + struct rpc_cred *cred; + struct nfs4_state *state; + fmode_t fmode = openflags & (FMODE_READ | FMODE_WRITE); + + cred = rpc_lookup_cred(); + if (IS_ERR(cred)) + return PTR_ERR(cred); + state = nfs4_do_open(dir, &path, fmode, openflags, NULL, cred); + put_rpccred(cred); + if (IS_ERR(state)) { + switch (PTR_ERR(state)) { + case -EPERM: + case -EACCES: + case -EDQUOT: + case -ENOSPC: + case -EROFS: + lookup_instantiate_filp(nd, (struct dentry *)state, NULL); + return 1; + default: + goto out_drop; + } + } + if (state->inode == dentry->d_inode) { + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + nfs4_intent_set_file(nd, &path, state, fmode); + return 1; + } + nfs4_close_sync(&path, state, fmode); +out_drop: + d_drop(dentry); + return 0; +} + +static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) +{ + if (ctx->state == NULL) + return; + if (is_sync) + nfs4_close_sync(&ctx->path, ctx->state, ctx->mode); + else + nfs4_close_state(&ctx->path, ctx->state, ctx->mode); +} + +static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) +{ + struct nfs4_server_caps_arg args = { + .fhandle = fhandle, + }; + struct nfs4_server_caps_res res = {}; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SERVER_CAPS], + .rpc_argp = &args, + .rpc_resp = &res, + }; + int status; + + status = nfs4_call_sync(server, &msg, &args, &res, 0); + if (status == 0) { + memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); + server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| + NFS_CAP_SYMLINKS|NFS_CAP_FILEID| + NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER| + NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME| + NFS_CAP_CTIME|NFS_CAP_MTIME); + if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) + server->caps |= NFS_CAP_ACLS; + if (res.has_links != 0) + server->caps |= NFS_CAP_HARDLINKS; + if (res.has_symlinks != 0) + server->caps |= NFS_CAP_SYMLINKS; + if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID) + server->caps |= NFS_CAP_FILEID; + if (res.attr_bitmask[1] & FATTR4_WORD1_MODE) + server->caps |= NFS_CAP_MODE; + if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS) + server->caps |= NFS_CAP_NLINK; + if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER) + server->caps |= NFS_CAP_OWNER; + if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP) + server->caps |= NFS_CAP_OWNER_GROUP; + if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS) + server->caps |= NFS_CAP_ATIME; + if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA) + server->caps |= NFS_CAP_CTIME; + if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY) + server->caps |= NFS_CAP_MTIME; + + memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); + server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; + server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; + server->acl_bitmask = res.acl_bitmask; + } + + return status; +} + +int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_server_capabilities(server, fhandle), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + struct nfs4_lookup_root_arg args = { + .bitmask = nfs4_fattr_bitmap, + }; + struct nfs4_lookup_res res = { + .server = server, + .fattr = info->fattr, + .fh = fhandle, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP_ROOT], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + nfs_fattr_init(info->fattr); + return nfs4_call_sync(server, &msg, &args, &res, 0); +} + +static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_lookup_root(server, fhandle, info), + &exception); + } while (exception.retry); + return err; +} + +/* + * get the file handle for the "/" directory on the server + */ +static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int status; + + status = nfs4_lookup_root(server, fhandle, info); + if (status == 0) + status = nfs4_server_capabilities(server, fhandle); + if (status == 0) + status = nfs4_do_fsinfo(server, fhandle, info); + return nfs4_map_errors(status); +} + +/* + * Get locations and (maybe) other attributes of a referral. + * Note that we'll actually follow the referral later when + * we detect fsid mismatch in inode revalidation + */ +static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct nfs_fattr *fattr, struct nfs_fh *fhandle) +{ + int status = -ENOMEM; + struct page *page = NULL; + struct nfs4_fs_locations *locations = NULL; + + page = alloc_page(GFP_KERNEL); + if (page == NULL) + goto out; + locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); + if (locations == NULL) + goto out; + + status = nfs4_proc_fs_locations(dir, name, locations, page); + if (status != 0) + goto out; + /* Make sure server returned a different fsid for the referral */ + if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) { + dprintk("%s: server did not return a different fsid for a referral at %s\n", __func__, name->name); + status = -EIO; + goto out; + } + + memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr)); + fattr->valid |= NFS_ATTR_FATTR_V4_REFERRAL; + if (!fattr->mode) + fattr->mode = S_IFDIR; + memset(fhandle, 0, sizeof(struct nfs_fh)); +out: + if (page) + __free_page(page); + if (locations) + kfree(locations); + return status; +} + +static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct nfs4_getattr_arg args = { + .fh = fhandle, + .bitmask = server->attr_bitmask, + }; + struct nfs4_getattr_res res = { + .fattr = fattr, + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + nfs_fattr_init(fattr); + return nfs4_call_sync(server, &msg, &args, &res, 0); +} + +static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_proc_getattr(server, fhandle, fattr), + &exception); + } while (exception.retry); + return err; +} + +/* + * The file is not closed if it is opened due to the a request to change + * the size of the file. The open call will not be needed once the + * VFS layer lookup-intents are implemented. + * + * Close is called when the inode is destroyed. + * If we haven't opened the file for O_WRONLY, we + * need to in the size_change case to obtain a stateid. + * + * Got race? + * Because OPEN is always done by name in nfsv4, it is + * possible that we opened a different file by the same + * name. We can recognize this race condition, but we + * can't do anything about it besides returning an error. + * + * This will be fixed with VFS changes (lookup-intent). + */ +static int +nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, + struct iattr *sattr) +{ + struct inode *inode = dentry->d_inode; + struct rpc_cred *cred = NULL; + struct nfs4_state *state = NULL; + int status; + + nfs_fattr_init(fattr); + + /* Search for an existing open(O_WRITE) file */ + if (sattr->ia_valid & ATTR_FILE) { + struct nfs_open_context *ctx; + + ctx = nfs_file_open_context(sattr->ia_file); + if (ctx) { + cred = ctx->cred; + state = ctx->state; + } + } + + status = nfs4_do_setattr(inode, cred, fattr, sattr, state); + if (status == 0) + nfs_setattr_update_inode(inode, sattr); + return status; +} + +static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *dirfh, + const struct qstr *name, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + int status; + struct nfs4_lookup_arg args = { + .bitmask = server->attr_bitmask, + .dir_fh = dirfh, + .name = name, + }; + struct nfs4_lookup_res res = { + .server = server, + .fattr = fattr, + .fh = fhandle, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + nfs_fattr_init(fattr); + + dprintk("NFS call lookupfh %s\n", name->name); + status = nfs4_call_sync(server, &msg, &args, &res, 0); + dprintk("NFS reply lookupfh: %d\n", status); + return status; +} + +static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, + struct qstr *name, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr); + /* FIXME: !!!! */ + if (err == -NFS4ERR_MOVED) { + err = -EREMOTE; + break; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + int status; + + dprintk("NFS call lookup %s\n", name->name); + status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr); + if (status == -NFS4ERR_MOVED) + status = nfs4_get_referral(dir, name, fattr, fhandle); + dprintk("NFS reply lookup: %d\n", status); + return status; +} + +static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), + _nfs4_proc_lookup(dir, name, fhandle, fattr), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; + struct nfs4_accessargs args = { + .fh = NFS_FH(inode), + .bitmask = server->attr_bitmask, + }; + struct nfs4_accessres res = { + .server = server, + .fattr = &fattr, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = entry->cred, + }; + int mode = entry->mask; + int status; + + /* + * Determine which access bits we want to ask for... + */ + if (mode & MAY_READ) + args.access |= NFS4_ACCESS_READ; + if (S_ISDIR(inode->i_mode)) { + if (mode & MAY_WRITE) + args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE; + if (mode & MAY_EXEC) + args.access |= NFS4_ACCESS_LOOKUP; + } else { + if (mode & MAY_WRITE) + args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND; + if (mode & MAY_EXEC) + args.access |= NFS4_ACCESS_EXECUTE; + } + nfs_fattr_init(&fattr); + status = nfs4_call_sync(server, &msg, &args, &res, 0); + if (!status) { + entry->mask = 0; + if (res.access & NFS4_ACCESS_READ) + entry->mask |= MAY_READ; + if (res.access & (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE)) + entry->mask |= MAY_WRITE; + if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) + entry->mask |= MAY_EXEC; + nfs_refresh_inode(inode, &fattr); + } + return status; +} + +static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), + _nfs4_proc_access(inode, entry), + &exception); + } while (exception.retry); + return err; +} + +/* + * TODO: For the time being, we don't try to get any attributes + * along with any of the zero-copy operations READ, READDIR, + * READLINK, WRITE. + * + * In the case of the first three, we want to put the GETATTR + * after the read-type operation -- this is because it is hard + * to predict the length of a GETATTR response in v4, and thus + * align the READ data correctly. This means that the GETATTR + * may end up partially falling into the page cache, and we should + * shift it into the 'tail' of the xdr_buf before processing. + * To do this efficiently, we need to know the total length + * of data received, which doesn't seem to be available outside + * of the RPC layer. + * + * In the case of WRITE, we also want to put the GETATTR after + * the operation -- in this case because we want to make sure + * we get the post-operation mtime and size. This means that + * we can't use xdr_encode_pages() as written: we need a variant + * of it which would leave room in the 'tail' iovec. + * + * Both of these changes to the XDR layer would in fact be quite + * minor, but I decided to leave them for a subsequent patch. + */ +static int _nfs4_proc_readlink(struct inode *inode, struct page *page, + unsigned int pgbase, unsigned int pglen) +{ + struct nfs4_readlink args = { + .fh = NFS_FH(inode), + .pgbase = pgbase, + .pglen = pglen, + .pages = &page, + }; + struct nfs4_readlink_res res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READLINK], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + return nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); +} + +static int nfs4_proc_readlink(struct inode *inode, struct page *page, + unsigned int pgbase, unsigned int pglen) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), + _nfs4_proc_readlink(inode, page, pgbase, pglen), + &exception); + } while (exception.retry); + return err; +} + +/* + * Got race? + * We will need to arrange for the VFS layer to provide an atomic open. + * Until then, this create/open method is prone to inefficiency and race + * conditions due to the lookup, create, and open VFS calls from sys_open() + * placed on the wire. + * + * Given the above sorry state of affairs, I'm simply sending an OPEN. + * The file will be opened again in the subsequent VFS open call + * (nfs4_proc_file_open). + * + * The open for read will just hang around to be used by any process that + * opens the file O_RDONLY. This will all be resolved with the VFS changes. + */ + +static int +nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, + int flags, struct nameidata *nd) +{ + struct path path = { + .mnt = nd->path.mnt, + .dentry = dentry, + }; + struct nfs4_state *state; + struct rpc_cred *cred; + fmode_t fmode = flags & (FMODE_READ | FMODE_WRITE); + int status = 0; + + cred = rpc_lookup_cred(); + if (IS_ERR(cred)) { + status = PTR_ERR(cred); + goto out; + } + state = nfs4_do_open(dir, &path, fmode, flags, sattr, cred); + d_drop(dentry); + if (IS_ERR(state)) { + status = PTR_ERR(state); + goto out_putcred; + } + d_add(dentry, igrab(state->inode)); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + if (flags & O_EXCL) { + struct nfs_fattr fattr; + status = nfs4_do_setattr(state->inode, cred, &fattr, sattr, state); + if (status == 0) + nfs_setattr_update_inode(state->inode, sattr); + nfs_post_op_update_inode(state->inode, &fattr); + } + if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) + status = nfs4_intent_set_file(nd, &path, state, fmode); + else + nfs4_close_sync(&path, state, fmode); +out_putcred: + put_rpccred(cred); +out: + return status; +} + +static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) +{ + struct nfs_server *server = NFS_SERVER(dir); + struct nfs_removeargs args = { + .fh = NFS_FH(dir), + .name.len = name->len, + .name.name = name->name, + .bitmask = server->attr_bitmask, + }; + struct nfs_removeres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE], + .rpc_argp = &args, + .rpc_resp = &res, + }; + int status; + + nfs_fattr_init(&res.dir_attr); + status = nfs4_call_sync(server, &msg, &args, &res, 1); + if (status == 0) { + update_changeattr(dir, &res.cinfo); + nfs_post_op_update_inode(dir, &res.dir_attr); + } + return status; +} + +static int nfs4_proc_remove(struct inode *dir, struct qstr *name) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), + _nfs4_proc_remove(dir, name), + &exception); + } while (exception.retry); + return err; +} + +static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) +{ + struct nfs_server *server = NFS_SERVER(dir); + struct nfs_removeargs *args = msg->rpc_argp; + struct nfs_removeres *res = msg->rpc_resp; + + args->bitmask = server->cache_consistency_bitmask; + res->server = server; + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; +} + +static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) +{ + struct nfs_removeres *res = task->tk_msg.rpc_resp; + + nfs4_sequence_done(res->server, &res->seq_res, task->tk_status); + if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) + return 0; + update_changeattr(dir, &res->cinfo); + nfs_post_op_update_inode(dir, &res->dir_attr); + return 1; +} + +static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, + struct inode *new_dir, struct qstr *new_name) +{ + struct nfs_server *server = NFS_SERVER(old_dir); + struct nfs4_rename_arg arg = { + .old_dir = NFS_FH(old_dir), + .new_dir = NFS_FH(new_dir), + .old_name = old_name, + .new_name = new_name, + .bitmask = server->attr_bitmask, + }; + struct nfs_fattr old_fattr, new_fattr; + struct nfs4_rename_res res = { + .server = server, + .old_fattr = &old_fattr, + .new_fattr = &new_fattr, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int status; + + nfs_fattr_init(res.old_fattr); + nfs_fattr_init(res.new_fattr); + status = nfs4_call_sync(server, &msg, &arg, &res, 1); + + if (!status) { + update_changeattr(old_dir, &res.old_cinfo); + nfs_post_op_update_inode(old_dir, res.old_fattr); + update_changeattr(new_dir, &res.new_cinfo); + nfs_post_op_update_inode(new_dir, res.new_fattr); + } + return status; +} + +static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, + struct inode *new_dir, struct qstr *new_name) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(old_dir), + _nfs4_proc_rename(old_dir, old_name, + new_dir, new_name), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_link_arg arg = { + .fh = NFS_FH(inode), + .dir_fh = NFS_FH(dir), + .name = name, + .bitmask = server->attr_bitmask, + }; + struct nfs_fattr fattr, dir_attr; + struct nfs4_link_res res = { + .server = server, + .fattr = &fattr, + .dir_attr = &dir_attr, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int status; + + nfs_fattr_init(res.fattr); + nfs_fattr_init(res.dir_attr); + status = nfs4_call_sync(server, &msg, &arg, &res, 1); + if (!status) { + update_changeattr(dir, &res.cinfo); + nfs_post_op_update_inode(dir, res.dir_attr); + nfs_post_op_update_inode(inode, res.fattr); + } + + return status; +} + +static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), + _nfs4_proc_link(inode, dir, name), + &exception); + } while (exception.retry); + return err; +} + +struct nfs4_createdata { + struct rpc_message msg; + struct nfs4_create_arg arg; + struct nfs4_create_res res; + struct nfs_fh fh; + struct nfs_fattr fattr; + struct nfs_fattr dir_fattr; +}; + +static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, + struct qstr *name, struct iattr *sattr, u32 ftype) +{ + struct nfs4_createdata *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data != NULL) { + struct nfs_server *server = NFS_SERVER(dir); + + data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE]; + data->msg.rpc_argp = &data->arg; + data->msg.rpc_resp = &data->res; + data->arg.dir_fh = NFS_FH(dir); + data->arg.server = server; + data->arg.name = name; + data->arg.attrs = sattr; + data->arg.ftype = ftype; + data->arg.bitmask = server->attr_bitmask; + data->res.server = server; + data->res.fh = &data->fh; + data->res.fattr = &data->fattr; + data->res.dir_fattr = &data->dir_fattr; + nfs_fattr_init(data->res.fattr); + nfs_fattr_init(data->res.dir_fattr); + } + return data; +} + +static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) +{ + int status = nfs4_call_sync(NFS_SERVER(dir), &data->msg, + &data->arg, &data->res, 1); + if (status == 0) { + update_changeattr(dir, &data->res.dir_cinfo); + nfs_post_op_update_inode(dir, data->res.dir_fattr); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); + } + return status; +} + +static void nfs4_free_createdata(struct nfs4_createdata *data) +{ + kfree(data); +} + +static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, + struct page *page, unsigned int len, struct iattr *sattr) +{ + struct nfs4_createdata *data; + int status = -ENAMETOOLONG; + + if (len > NFS4_MAXPATHLEN) + goto out; + + status = -ENOMEM; + data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4LNK); + if (data == NULL) + goto out; + + data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK]; + data->arg.u.symlink.pages = &page; + data->arg.u.symlink.len = len; + + status = nfs4_do_create(dir, dentry, data); + + nfs4_free_createdata(data); +out: + return status; +} + +static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, + struct page *page, unsigned int len, struct iattr *sattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), + _nfs4_proc_symlink(dir, dentry, page, + len, sattr), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, + struct iattr *sattr) +{ + struct nfs4_createdata *data; + int status = -ENOMEM; + + data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4DIR); + if (data == NULL) + goto out; + + status = nfs4_do_create(dir, dentry, data); + + nfs4_free_createdata(data); +out: + return status; +} + +static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, + struct iattr *sattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), + _nfs4_proc_mkdir(dir, dentry, sattr), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, + u64 cookie, struct page *page, unsigned int count, int plus) +{ + struct inode *dir = dentry->d_inode; + struct nfs4_readdir_arg args = { + .fh = NFS_FH(dir), + .pages = &page, + .pgbase = 0, + .count = count, + .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, + }; + struct nfs4_readdir_res res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READDIR], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + int status; + + dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __func__, + dentry->d_parent->d_name.name, + dentry->d_name.name, + (unsigned long long)cookie); + nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); + res.pgbase = args.pgbase; + status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0); + if (status == 0) + memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); + + nfs_invalidate_atime(dir); + + dprintk("%s: returns %d\n", __func__, status); + return status; +} + +static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, + u64 cookie, struct page *page, unsigned int count, int plus) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), + _nfs4_proc_readdir(dentry, cred, cookie, + page, count, plus), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, + struct iattr *sattr, dev_t rdev) +{ + struct nfs4_createdata *data; + int mode = sattr->ia_mode; + int status = -ENOMEM; + + BUG_ON(!(sattr->ia_valid & ATTR_MODE)); + BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); + + data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4SOCK); + if (data == NULL) + goto out; + + if (S_ISFIFO(mode)) + data->arg.ftype = NF4FIFO; + else if (S_ISBLK(mode)) { + data->arg.ftype = NF4BLK; + data->arg.u.device.specdata1 = MAJOR(rdev); + data->arg.u.device.specdata2 = MINOR(rdev); + } + else if (S_ISCHR(mode)) { + data->arg.ftype = NF4CHR; + data->arg.u.device.specdata1 = MAJOR(rdev); + data->arg.u.device.specdata2 = MINOR(rdev); + } + + status = nfs4_do_create(dir, dentry, data); + + nfs4_free_createdata(data); +out: + return status; +} + +static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, + struct iattr *sattr, dev_t rdev) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), + _nfs4_proc_mknod(dir, dentry, sattr, rdev), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsstat *fsstat) +{ + struct nfs4_statfs_arg args = { + .fh = fhandle, + .bitmask = server->attr_bitmask, + }; + struct nfs4_statfs_res res = { + .fsstat = fsstat, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_STATFS], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + nfs_fattr_init(fsstat->fattr); + return nfs4_call_sync(server, &msg, &args, &res, 0); +} + +static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_proc_statfs(server, fhandle, fsstat), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *fsinfo) +{ + struct nfs4_fsinfo_arg args = { + .fh = fhandle, + .bitmask = server->attr_bitmask, + }; + struct nfs4_fsinfo_res res = { + .fsinfo = fsinfo, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSINFO], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + return nfs4_call_sync(server, &msg, &args, &res, 0); +} + +static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = nfs4_handle_exception(server, + _nfs4_do_fsinfo(server, fhandle, fsinfo), + &exception); + } while (exception.retry); + return err; +} + +static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) +{ + nfs_fattr_init(fsinfo->fattr); + return nfs4_do_fsinfo(server, fhandle, fsinfo); +} + +static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *pathconf) +{ + struct nfs4_pathconf_arg args = { + .fh = fhandle, + .bitmask = server->attr_bitmask, + }; + struct nfs4_pathconf_res res = { + .pathconf = pathconf, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PATHCONF], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + /* None of the pathconf attributes are mandatory to implement */ + if ((args.bitmask[0] & nfs4_pathconf_bitmap[0]) == 0) { + memset(pathconf, 0, sizeof(*pathconf)); + return 0; + } + + nfs_fattr_init(pathconf->fattr); + return nfs4_call_sync(server, &msg, &args, &res, 0); +} + +static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *pathconf) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = nfs4_handle_exception(server, + _nfs4_proc_pathconf(server, fhandle, pathconf), + &exception); + } while (exception.retry); + return err; +} + +static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) +{ + struct nfs_server *server = NFS_SERVER(data->inode); + + dprintk("--> %s\n", __func__); + + nfs4_sequence_done(server, &data->res.seq_res, task->tk_status); + + if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { + nfs_restart_rpc(task, server->nfs_client); + return -EAGAIN; + } + + nfs_invalidate_atime(data->inode); + if (task->tk_status > 0) + renew_lease(server, data->timestamp); + return 0; +} + +static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) +{ + data->timestamp = jiffies; + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; +} + +static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) +{ + struct inode *inode = data->inode; + + nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, + task->tk_status); + + if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { + nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); + return -EAGAIN; + } + if (task->tk_status >= 0) { + renew_lease(NFS_SERVER(inode), data->timestamp); + nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); + } + return 0; +} + +static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) +{ + struct nfs_server *server = NFS_SERVER(data->inode); + + data->args.bitmask = server->cache_consistency_bitmask; + data->res.server = server; + data->timestamp = jiffies; + + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; +} + +static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) +{ + struct inode *inode = data->inode; + + nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, + task->tk_status); + if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { + nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); + return -EAGAIN; + } + nfs_refresh_inode(inode, data->res.fattr); + return 0; +} + +static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) +{ + struct nfs_server *server = NFS_SERVER(data->inode); + + data->args.bitmask = server->cache_consistency_bitmask; + data->res.server = server; + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; +} + +/* + * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special + * standalone procedure for queueing an asynchronous RENEW. + */ +static void nfs4_renew_done(struct rpc_task *task, void *data) +{ + struct nfs_client *clp = (struct nfs_client *)task->tk_msg.rpc_argp; + unsigned long timestamp = (unsigned long)data; + + if (task->tk_status < 0) { + /* Unless we're shutting down, schedule state recovery! */ + if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) + nfs4_schedule_state_recovery(clp); + return; + } + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,timestamp)) + clp->cl_last_renewal = timestamp; + spin_unlock(&clp->cl_lock); +} + +static const struct rpc_call_ops nfs4_renew_ops = { + .rpc_call_done = nfs4_renew_done, +}; + +int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], + .rpc_argp = clp, + .rpc_cred = cred, + }; + + return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, + &nfs4_renew_ops, (void *)jiffies); +} + +int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], + .rpc_argp = clp, + .rpc_cred = cred, + }; + unsigned long now = jiffies; + int status; + + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + if (status < 0) + return status; + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,now)) + clp->cl_last_renewal = now; + spin_unlock(&clp->cl_lock); + return 0; +} + +static inline int nfs4_server_supports_acls(struct nfs_server *server) +{ + return (server->caps & NFS_CAP_ACLS) + && (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) + && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); +} + +/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that + * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on + * the stack. + */ +#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) + +static void buf_to_pages(const void *buf, size_t buflen, + struct page **pages, unsigned int *pgbase) +{ + const void *p = buf; + + *pgbase = offset_in_page(buf); + p -= *pgbase; + while (p < buf + buflen) { + *(pages++) = virt_to_page(p); + p += PAGE_CACHE_SIZE; + } +} + +struct nfs4_cached_acl { + int cached; + size_t len; + char data[0]; +}; + +static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&inode->i_lock); + kfree(nfsi->nfs4_acl); + nfsi->nfs4_acl = acl; + spin_unlock(&inode->i_lock); +} + +static void nfs4_zap_acl_attr(struct inode *inode) +{ + nfs4_set_cached_acl(inode, NULL); +} + +static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_t buflen) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs4_cached_acl *acl; + int ret = -ENOENT; + + spin_lock(&inode->i_lock); + acl = nfsi->nfs4_acl; + if (acl == NULL) + goto out; + if (buf == NULL) /* user is just asking for length */ + goto out_len; + if (acl->cached == 0) + goto out; + ret = -ERANGE; /* see getxattr(2) man page */ + if (acl->len > buflen) + goto out; + memcpy(buf, acl->data, acl->len); +out_len: + ret = acl->len; +out: + spin_unlock(&inode->i_lock); + return ret; +} + +static void nfs4_write_cached_acl(struct inode *inode, const char *buf, size_t acl_len) +{ + struct nfs4_cached_acl *acl; + + if (buf && acl_len <= PAGE_SIZE) { + acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); + if (acl == NULL) + goto out; + acl->cached = 1; + memcpy(acl->data, buf, acl_len); + } else { + acl = kmalloc(sizeof(*acl), GFP_KERNEL); + if (acl == NULL) + goto out; + acl->cached = 0; + } + acl->len = acl_len; +out: + nfs4_set_cached_acl(inode, acl); +} + +static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +{ + struct page *pages[NFS4ACL_MAXPAGES]; + struct nfs_getaclargs args = { + .fh = NFS_FH(inode), + .acl_pages = pages, + .acl_len = buflen, + }; + struct nfs_getaclres res = { + .acl_len = buflen, + }; + void *resp_buf; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], + .rpc_argp = &args, + .rpc_resp = &res, + }; + struct page *localpage = NULL; + int ret; + + if (buflen < PAGE_SIZE) { + /* As long as we're doing a round trip to the server anyway, + * let's be prepared for a page of acl data. */ + localpage = alloc_page(GFP_KERNEL); + resp_buf = page_address(localpage); + if (localpage == NULL) + return -ENOMEM; + args.acl_pages[0] = localpage; + args.acl_pgbase = 0; + args.acl_len = PAGE_SIZE; + } else { + resp_buf = buf; + buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); + } + ret = nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); + if (ret) + goto out_free; + if (res.acl_len > args.acl_len) + nfs4_write_cached_acl(inode, NULL, res.acl_len); + else + nfs4_write_cached_acl(inode, resp_buf, res.acl_len); + if (buf) { + ret = -ERANGE; + if (res.acl_len > buflen) + goto out_free; + if (localpage) + memcpy(buf, resp_buf, res.acl_len); + } + ret = res.acl_len; +out_free: + if (localpage) + __free_page(localpage); + return ret; +} + +static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +{ + struct nfs4_exception exception = { }; + ssize_t ret; + do { + ret = __nfs4_get_acl_uncached(inode, buf, buflen); + if (ret >= 0) + break; + ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception); + } while (exception.retry); + return ret; +} + +static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + ret = nfs_revalidate_inode(server, inode); + if (ret < 0) + return ret; + ret = nfs4_read_cached_acl(inode, buf, buflen); + if (ret != -ENOENT) + return ret; + return nfs4_get_acl_uncached(inode, buf, buflen); +} + +static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct page *pages[NFS4ACL_MAXPAGES]; + struct nfs_setaclargs arg = { + .fh = NFS_FH(inode), + .acl_pages = pages, + .acl_len = buflen, + }; + struct nfs_setaclres res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + nfs_inode_return_delegation(inode); + buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); + ret = nfs4_call_sync(server, &msg, &arg, &res, 1); + nfs_access_zap_cache(inode); + nfs_zap_acl_cache(inode); + return ret; +} + +static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), + __nfs4_proc_set_acl(inode, buf, buflen), + &exception); + } while (exception.retry); + return err; +} + +static int +_nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs_client *clp, struct nfs4_state *state) +{ + if (!clp || task->tk_status >= 0) + return 0; + switch(task->tk_status) { + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OPENMODE: + if (state == NULL) + break; + nfs4_state_mark_reclaim_nograce(clp, state); + goto do_state_recovery; + case -NFS4ERR_STALE_STATEID: + if (state == NULL) + break; + nfs4_state_mark_reclaim_reboot(clp, state); + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_EXPIRED: + goto do_state_recovery; +#if defined(CONFIG_NFS_V4_1) + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_SEQ_FALSE_RETRY: + case -NFS4ERR_SEQ_MISORDERED: + dprintk("%s ERROR %d, Reset session\n", __func__, + task->tk_status); + nfs4_schedule_state_recovery(clp); + task->tk_status = 0; + return -EAGAIN; +#endif /* CONFIG_NFS_V4_1 */ + case -NFS4ERR_DELAY: + if (server) + nfs_inc_server_stats(server, NFSIOS_DELAY); + case -NFS4ERR_GRACE: + rpc_delay(task, NFS4_POLL_RETRY_MAX); + task->tk_status = 0; + return -EAGAIN; + case -NFS4ERR_OLD_STATEID: + task->tk_status = 0; + return -EAGAIN; + } + task->tk_status = nfs4_map_errors(task->tk_status); + return 0; +do_state_recovery: + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); + nfs4_schedule_state_recovery(clp); + if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) + rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); + task->tk_status = 0; + return -EAGAIN; +} + +static int +nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) +{ + return _nfs4_async_handle_error(task, server, server->nfs_client, state); +} + +int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) +{ + nfs4_verifier sc_verifier; + struct nfs4_setclientid setclientid = { + .sc_verifier = &sc_verifier, + .sc_prog = program, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], + .rpc_argp = &setclientid, + .rpc_resp = clp, + .rpc_cred = cred, + }; + __be32 *p; + int loop = 0; + int status; + + p = (__be32*)sc_verifier.data; + *p++ = htonl((u32)clp->cl_boot_time.tv_sec); + *p = htonl((u32)clp->cl_boot_time.tv_nsec); + + for(;;) { + setclientid.sc_name_len = scnprintf(setclientid.sc_name, + sizeof(setclientid.sc_name), "%s/%s %s %s %u", + clp->cl_ipaddr, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR), + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_PROTO), + clp->cl_rpcclient->cl_auth->au_ops->au_name, + clp->cl_id_uniquifier); + setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, + sizeof(setclientid.sc_netid), + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_NETID)); + setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, + sizeof(setclientid.sc_uaddr), "%s.%u.%u", + clp->cl_ipaddr, port >> 8, port & 255); + + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + if (status != -NFS4ERR_CLID_INUSE) + break; + if (signalled()) + break; + if (loop++ & 1) + ssleep(clp->cl_lease_time + 1); + else + if (++clp->cl_id_uniquifier == 0) + break; + } + return status; +} + +static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) +{ + struct nfs_fsinfo fsinfo; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM], + .rpc_argp = clp, + .rpc_resp = &fsinfo, + .rpc_cred = cred, + }; + unsigned long now; + int status; + + now = jiffies; + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + if (status == 0) { + spin_lock(&clp->cl_lock); + clp->cl_lease_time = fsinfo.lease_time * HZ; + clp->cl_last_renewal = now; + spin_unlock(&clp->cl_lock); + } + return status; +} + +int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) +{ + long timeout = 0; + int err; + do { + err = _nfs4_proc_setclientid_confirm(clp, cred); + switch (err) { + case 0: + return err; + case -NFS4ERR_RESOURCE: + /* The IBM lawyers misread another document! */ + case -NFS4ERR_DELAY: + err = nfs4_delay(clp->cl_rpcclient, &timeout); + } + } while (err == 0); + return err; +} + +struct nfs4_delegreturndata { + struct nfs4_delegreturnargs args; + struct nfs4_delegreturnres res; + struct nfs_fh fh; + nfs4_stateid stateid; + unsigned long timestamp; + struct nfs_fattr fattr; + int rpc_status; +}; + +static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_delegreturndata *data = calldata; + + nfs4_sequence_done(data->res.server, &data->res.seq_res, + task->tk_status); + + switch (task->tk_status) { + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + case 0: + renew_lease(data->res.server, data->timestamp); + break; + default: + if (nfs4_async_handle_error(task, data->res.server, NULL) == + -EAGAIN) { + nfs_restart_rpc(task, data->res.server->nfs_client); + return; + } + } + data->rpc_status = task->tk_status; +} + +static void nfs4_delegreturn_release(void *calldata) +{ + kfree(calldata); +} + +#if defined(CONFIG_NFS_V4_1) +static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) +{ + struct nfs4_delegreturndata *d_data; + + d_data = (struct nfs4_delegreturndata *)data; + + if (nfs4_setup_sequence(d_data->res.server->nfs_client, + &d_data->args.seq_args, + &d_data->res.seq_res, 1, task)) + return; + rpc_call_start(task); +} +#endif /* CONFIG_NFS_V4_1 */ + +static const struct rpc_call_ops nfs4_delegreturn_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs4_delegreturn_prepare, +#endif /* CONFIG_NFS_V4_1 */ + .rpc_call_done = nfs4_delegreturn_done, + .rpc_release = nfs4_delegreturn_release, +}; + +static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync) +{ + struct nfs4_delegreturndata *data; + struct nfs_server *server = NFS_SERVER(inode); + struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN], + .rpc_cred = cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs4_delegreturn_ops, + .flags = RPC_TASK_ASYNC, + }; + int status = 0; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + data->args.fhandle = &data->fh; + data->args.stateid = &data->stateid; + data->args.bitmask = server->attr_bitmask; + nfs_copy_fh(&data->fh, NFS_FH(inode)); + memcpy(&data->stateid, stateid, sizeof(data->stateid)); + data->res.fattr = &data->fattr; + data->res.server = server; + data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; + nfs_fattr_init(data->res.fattr); + data->timestamp = jiffies; + data->rpc_status = 0; + + task_setup_data.callback_data = data; + msg.rpc_argp = &data->args, + msg.rpc_resp = &data->res, + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + if (!issync) + goto out; + status = nfs4_wait_for_completion_rpc_task(task); + if (status != 0) + goto out; + status = data->rpc_status; + if (status != 0) + goto out; + nfs_refresh_inode(inode, &data->fattr); +out: + rpc_put_task(task); + return status; +} + +int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs4_proc_delegreturn(inode, cred, stateid, issync); + switch (err) { + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + case 0: + return 0; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + +#define NFS4_LOCK_MINTIMEOUT (1 * HZ) +#define NFS4_LOCK_MAXTIMEOUT (30 * HZ) + +/* + * sleep, with exponential backoff, and retry the LOCK operation. + */ +static unsigned long +nfs4_set_lock_task_retry(unsigned long timeout) +{ + schedule_timeout_killable(timeout); + timeout <<= 1; + if (timeout > NFS4_LOCK_MAXTIMEOUT) + return NFS4_LOCK_MAXTIMEOUT; + return timeout; +} + +static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_client *clp = server->nfs_client; + struct nfs_lockt_args arg = { + .fh = NFS_FH(inode), + .fl = request, + }; + struct nfs_lockt_res res = { + .denied = request, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKT], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs4_lock_state *lsp; + int status; + + arg.lock_owner.clientid = clp->cl_clientid; + status = nfs4_set_lock_state(state, request); + if (status != 0) + goto out; + lsp = request->fl_u.nfs4_fl.owner; + arg.lock_owner.id = lsp->ls_id.id; + status = nfs4_call_sync(server, &msg, &arg, &res, 1); + switch (status) { + case 0: + request->fl_type = F_UNLCK; + break; + case -NFS4ERR_DENIED: + status = 0; + } + request->fl_ops->fl_release_private(request); +out: + return status; +} + +static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = nfs4_handle_exception(NFS_SERVER(state->inode), + _nfs4_proc_getlk(state, cmd, request), + &exception); + } while (exception.retry); + return err; +} + +static int do_vfs_lock(struct file *file, struct file_lock *fl) +{ + int res = 0; + switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { + case FL_POSIX: + res = posix_lock_file_wait(file, fl); + break; + case FL_FLOCK: + res = flock_lock_file_wait(file, fl); + break; + default: + BUG(); + } + return res; +} + +struct nfs4_unlockdata { + struct nfs_locku_args arg; + struct nfs_locku_res res; + struct nfs4_lock_state *lsp; + struct nfs_open_context *ctx; + struct file_lock fl; + const struct nfs_server *server; + unsigned long timestamp; +}; + +static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, + struct nfs_open_context *ctx, + struct nfs4_lock_state *lsp, + struct nfs_seqid *seqid) +{ + struct nfs4_unlockdata *p; + struct inode *inode = lsp->ls_state->inode; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + return NULL; + p->arg.fh = NFS_FH(inode); + p->arg.fl = &p->fl; + p->arg.seqid = seqid; + p->res.seqid = seqid; + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; + p->arg.stateid = &lsp->ls_stateid; + p->lsp = lsp; + atomic_inc(&lsp->ls_count); + /* Ensure we don't close file until we're done freeing locks! */ + p->ctx = get_nfs_open_context(ctx); + memcpy(&p->fl, fl, sizeof(p->fl)); + p->server = NFS_SERVER(inode); + return p; +} + +static void nfs4_locku_release_calldata(void *data) +{ + struct nfs4_unlockdata *calldata = data; + nfs_free_seqid(calldata->arg.seqid); + nfs4_put_lock_state(calldata->lsp); + put_nfs_open_context(calldata->ctx); + kfree(calldata); +} + +static void nfs4_locku_done(struct rpc_task *task, void *data) +{ + struct nfs4_unlockdata *calldata = data; + + nfs4_sequence_done(calldata->server, &calldata->res.seq_res, + task->tk_status); + if (RPC_ASSASSINATED(task)) + return; + switch (task->tk_status) { + case 0: + memcpy(calldata->lsp->ls_stateid.data, + calldata->res.stateid.data, + sizeof(calldata->lsp->ls_stateid.data)); + renew_lease(calldata->server, calldata->timestamp); + break; + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + break; + default: + if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) + nfs_restart_rpc(task, + calldata->server->nfs_client); + } +} + +static void nfs4_locku_prepare(struct rpc_task *task, void *data) +{ + struct nfs4_unlockdata *calldata = data; + + if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) + return; + if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) { + /* Note: exit _without_ running nfs4_locku_done */ + task->tk_action = NULL; + return; + } + calldata->timestamp = jiffies; + if (nfs4_setup_sequence(calldata->server->nfs_client, + &calldata->arg.seq_args, + &calldata->res.seq_res, 1, task)) + return; + rpc_call_start(task); +} + +static const struct rpc_call_ops nfs4_locku_ops = { + .rpc_call_prepare = nfs4_locku_prepare, + .rpc_call_done = nfs4_locku_done, + .rpc_release = nfs4_locku_release_calldata, +}; + +static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, + struct nfs_open_context *ctx, + struct nfs4_lock_state *lsp, + struct nfs_seqid *seqid) +{ + struct nfs4_unlockdata *data; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], + .rpc_cred = ctx->cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = NFS_CLIENT(lsp->ls_state->inode), + .rpc_message = &msg, + .callback_ops = &nfs4_locku_ops, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC, + }; + + /* Ensure this is an unlock - when canceling a lock, the + * canceled lock is passed in, and it won't be an unlock. + */ + fl->fl_type = F_UNLCK; + + data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid); + if (data == NULL) { + nfs_free_seqid(seqid); + return ERR_PTR(-ENOMEM); + } + + msg.rpc_argp = &data->arg, + msg.rpc_resp = &data->res, + task_setup_data.callback_data = data; + return rpc_run_task(&task_setup_data); +} + +static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs_inode *nfsi = NFS_I(state->inode); + struct nfs_seqid *seqid; + struct nfs4_lock_state *lsp; + struct rpc_task *task; + int status = 0; + unsigned char fl_flags = request->fl_flags; + + status = nfs4_set_lock_state(state, request); + /* Unlock _before_ we do the RPC call */ + request->fl_flags |= FL_EXISTS; + down_read(&nfsi->rwsem); + if (do_vfs_lock(request->fl_file, request) == -ENOENT) { + up_read(&nfsi->rwsem); + goto out; + } + up_read(&nfsi->rwsem); + if (status != 0) + goto out; + /* Is this a delegated lock? */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) + goto out; + lsp = request->fl_u.nfs4_fl.owner; + seqid = nfs_alloc_seqid(&lsp->ls_seqid); + status = -ENOMEM; + if (seqid == NULL) + goto out; + task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid); + status = PTR_ERR(task); + if (IS_ERR(task)) + goto out; + status = nfs4_wait_for_completion_rpc_task(task); + rpc_put_task(task); +out: + request->fl_flags = fl_flags; + return status; +} + +struct nfs4_lockdata { + struct nfs_lock_args arg; + struct nfs_lock_res res; + struct nfs4_lock_state *lsp; + struct nfs_open_context *ctx; + struct file_lock fl; + unsigned long timestamp; + int rpc_status; + int cancelled; + struct nfs_server *server; +}; + +static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, + struct nfs_open_context *ctx, struct nfs4_lock_state *lsp) +{ + struct nfs4_lockdata *p; + struct inode *inode = lsp->ls_state->inode; + struct nfs_server *server = NFS_SERVER(inode); + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + return NULL; + + p->arg.fh = NFS_FH(inode); + p->arg.fl = &p->fl; + p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid); + if (p->arg.open_seqid == NULL) + goto out_free; + p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); + if (p->arg.lock_seqid == NULL) + goto out_free_seqid; + p->arg.lock_stateid = &lsp->ls_stateid; + p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; + p->arg.lock_owner.id = lsp->ls_id.id; + p->res.lock_seqid = p->arg.lock_seqid; + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; + p->lsp = lsp; + p->server = server; + atomic_inc(&lsp->ls_count); + p->ctx = get_nfs_open_context(ctx); + memcpy(&p->fl, fl, sizeof(p->fl)); + return p; +out_free_seqid: + nfs_free_seqid(p->arg.open_seqid); +out_free: + kfree(p); + return NULL; +} + +static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs4_lockdata *data = calldata; + struct nfs4_state *state = data->lsp->ls_state; + + dprintk("%s: begin!\n", __func__); + if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) + return; + /* Do we need to do an open_to_lock_owner? */ + if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { + if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) + return; + data->arg.open_stateid = &state->stateid; + data->arg.new_lock_owner = 1; + data->res.open_seqid = data->arg.open_seqid; + } else + data->arg.new_lock_owner = 0; + data->timestamp = jiffies; + if (nfs4_setup_sequence(data->server->nfs_client, &data->arg.seq_args, + &data->res.seq_res, 1, task)) + return; + rpc_call_start(task); + dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); +} + +static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata) +{ + rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); + nfs4_lock_prepare(task, calldata); +} + +static void nfs4_lock_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_lockdata *data = calldata; + + dprintk("%s: begin!\n", __func__); + + nfs4_sequence_done(data->server, &data->res.seq_res, + task->tk_status); + + data->rpc_status = task->tk_status; + if (RPC_ASSASSINATED(task)) + goto out; + if (data->arg.new_lock_owner != 0) { + if (data->rpc_status == 0) + nfs_confirm_seqid(&data->lsp->ls_seqid, 0); + else + goto out; + } + if (data->rpc_status == 0) { + memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, + sizeof(data->lsp->ls_stateid.data)); + data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; + renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp); + } +out: + dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); +} + +static void nfs4_lock_release(void *calldata) +{ + struct nfs4_lockdata *data = calldata; + + dprintk("%s: begin!\n", __func__); + nfs_free_seqid(data->arg.open_seqid); + if (data->cancelled != 0) { + struct rpc_task *task; + task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, + data->arg.lock_seqid); + if (!IS_ERR(task)) + rpc_put_task(task); + dprintk("%s: cancelling lock!\n", __func__); + } else + nfs_free_seqid(data->arg.lock_seqid); + nfs4_put_lock_state(data->lsp); + put_nfs_open_context(data->ctx); + kfree(data); + dprintk("%s: done!\n", __func__); +} + +static const struct rpc_call_ops nfs4_lock_ops = { + .rpc_call_prepare = nfs4_lock_prepare, + .rpc_call_done = nfs4_lock_done, + .rpc_release = nfs4_lock_release, +}; + +static const struct rpc_call_ops nfs4_recover_lock_ops = { + .rpc_call_prepare = nfs4_recover_lock_prepare, + .rpc_call_done = nfs4_lock_done, + .rpc_release = nfs4_lock_release, +}; + +static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) +{ + struct nfs_client *clp = server->nfs_client; + struct nfs4_state *state = lsp->ls_state; + + switch (error) { + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_EXPIRED: + if (new_lock_owner != 0 || + (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + nfs4_state_mark_reclaim_nograce(clp, state); + lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; + break; + case -NFS4ERR_STALE_STATEID: + if (new_lock_owner != 0 || + (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + nfs4_state_mark_reclaim_reboot(clp, state); + lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; + }; +} + +static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type) +{ + struct nfs4_lockdata *data; + struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK], + .rpc_cred = state->owner->so_cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = NFS_CLIENT(state->inode), + .rpc_message = &msg, + .callback_ops = &nfs4_lock_ops, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC, + }; + int ret; + + dprintk("%s: begin!\n", __func__); + data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), + fl->fl_u.nfs4_fl.owner); + if (data == NULL) + return -ENOMEM; + if (IS_SETLKW(cmd)) + data->arg.block = 1; + if (recovery_type > NFS_LOCK_NEW) { + if (recovery_type == NFS_LOCK_RECLAIM) + data->arg.reclaim = NFS_LOCK_RECLAIM; + task_setup_data.callback_ops = &nfs4_recover_lock_ops; + } + msg.rpc_argp = &data->arg, + msg.rpc_resp = &data->res, + task_setup_data.callback_data = data; + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + ret = nfs4_wait_for_completion_rpc_task(task); + if (ret == 0) { + ret = data->rpc_status; + if (ret) + nfs4_handle_setlk_error(data->server, data->lsp, + data->arg.new_lock_owner, ret); + } else + data->cancelled = 1; + rpc_put_task(task); + dprintk("%s: done, ret = %d!\n", __func__, ret); + return ret; +} + +static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + + do { + /* Cache the lock if possible... */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) + return 0; + err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + +static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + + err = nfs4_set_lock_state(state, request); + if (err != 0) + return err; + do { + if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) + return 0; + err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_EXPIRED); + switch (err) { + default: + goto out; + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + nfs4_handle_exception(server, err, &exception); + err = 0; + } + } while (exception.retry); +out: + return err; +} + +static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs_inode *nfsi = NFS_I(state->inode); + unsigned char fl_flags = request->fl_flags; + int status = -ENOLCK; + + if ((fl_flags & FL_POSIX) && + !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) + goto out; + /* Is this a delegated open? */ + status = nfs4_set_lock_state(state, request); + if (status != 0) + goto out; + request->fl_flags |= FL_ACCESS; + status = do_vfs_lock(request->fl_file, request); + if (status < 0) + goto out; + down_read(&nfsi->rwsem); + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { + /* Yes: cache locks! */ + /* ...but avoid races with delegation recall... */ + request->fl_flags = fl_flags & ~FL_SLEEP; + status = do_vfs_lock(request->fl_file, request); + goto out_unlock; + } + status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW); + if (status != 0) + goto out_unlock; + /* Note: we always want to sleep here! */ + request->fl_flags = fl_flags | FL_SLEEP; + if (do_vfs_lock(request->fl_file, request) < 0) + printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__); +out_unlock: + up_read(&nfsi->rwsem); +out: + request->fl_flags = fl_flags; + return status; +} + +static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs4_proc_setlk(state, cmd, request); + if (err == -NFS4ERR_DENIED) + err = -EAGAIN; + err = nfs4_handle_exception(NFS_SERVER(state->inode), + err, &exception); + } while (exception.retry); + return err; +} + +static int +nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) +{ + struct nfs_open_context *ctx; + struct nfs4_state *state; + unsigned long timeout = NFS4_LOCK_MINTIMEOUT; + int status; + + /* verify open state */ + ctx = nfs_file_open_context(filp); + state = ctx->state; + + if (request->fl_start < 0 || request->fl_end < 0) + return -EINVAL; + + if (IS_GETLK(cmd)) { + if (state != NULL) + return nfs4_proc_getlk(state, F_GETLK, request); + return 0; + } + + if (!(IS_SETLK(cmd) || IS_SETLKW(cmd))) + return -EINVAL; + + if (request->fl_type == F_UNLCK) { + if (state != NULL) + return nfs4_proc_unlck(state, cmd, request); + return 0; + } + + if (state == NULL) + return -ENOLCK; + do { + status = nfs4_proc_setlk(state, cmd, request); + if ((status != -EAGAIN) || IS_SETLK(cmd)) + break; + timeout = nfs4_set_lock_task_retry(timeout); + status = -ERESTARTSYS; + if (signalled()) + break; + } while(status < 0); + return status; +} + +int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + + err = nfs4_set_lock_state(state, fl); + if (err != 0) + goto out; + do { + err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); + switch (err) { + default: + printk(KERN_ERR "%s: unhandled error %d.\n", + __func__, err); + case 0: + case -ESTALE: + goto out; + case -NFS4ERR_EXPIRED: + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_DEADSESSION: + nfs4_schedule_state_recovery(server->nfs_client); + goto out; + case -ERESTARTSYS: + /* + * The show must go on: exit, but mark the + * stateid as needing recovery. + */ + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OPENMODE: + nfs4_state_mark_reclaim_nograce(server->nfs_client, state); + err = 0; + goto out; + case -ENOMEM: + case -NFS4ERR_DENIED: + /* kill_proc(fl->fl_pid, SIGLOST, 1); */ + err = 0; + goto out; + case -NFS4ERR_DELAY: + break; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); +out: + return err; +} + +#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" + +int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, + size_t buflen, int flags) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EOPNOTSUPP; + + return nfs4_proc_set_acl(inode, buf, buflen); +} + +/* The getxattr man page suggests returning -ENODATA for unknown attributes, + * and that's what we'll do for e.g. user attributes that haven't been set. + * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported + * attributes in kernel-managed attribute namespaces. */ +ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf, + size_t buflen) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EOPNOTSUPP; + + return nfs4_proc_get_acl(inode, buf, buflen); +} + +ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) +{ + size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1; + + if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode))) + return 0; + if (buf && buflen < len) + return -ERANGE; + if (buf) + memcpy(buf, XATTR_NAME_NFSV4_ACL, len); + return len; +} + +static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr) +{ + if (!((fattr->valid & NFS_ATTR_FATTR_FILEID) && + (fattr->valid & NFS_ATTR_FATTR_FSID) && + (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL))) + return; + + fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | + NFS_ATTR_FATTR_NLINK; + fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO; + fattr->nlink = 2; +} + +int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, + struct nfs4_fs_locations *fs_locations, struct page *page) +{ + struct nfs_server *server = NFS_SERVER(dir); + u32 bitmask[2] = { + [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, + [1] = FATTR4_WORD1_MOUNTED_ON_FILEID, + }; + struct nfs4_fs_locations_arg args = { + .dir_fh = NFS_FH(dir), + .name = name, + .page = page, + .bitmask = bitmask, + }; + struct nfs4_fs_locations_res res = { + .fs_locations = fs_locations, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], + .rpc_argp = &args, + .rpc_resp = &res, + }; + int status; + + dprintk("%s: start\n", __func__); + nfs_fattr_init(&fs_locations->fattr); + fs_locations->server = server; + fs_locations->nlocations = 0; + status = nfs4_call_sync(server, &msg, &args, &res, 0); + nfs_fixup_referral_attributes(&fs_locations->fattr); + dprintk("%s: returned status = %d\n", __func__, status); + return status; +} + +#ifdef CONFIG_NFS_V4_1 +/* + * nfs4_proc_exchange_id() + * + * Since the clientid has expired, all compounds using sessions + * associated with the stale clientid will be returning + * NFS4ERR_BADSESSION in the sequence operation, and will therefore + * be in some phase of session reset. + */ +int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) +{ + nfs4_verifier verifier; + struct nfs41_exchange_id_args args = { + .client = clp, + .flags = clp->cl_exchange_flags, + }; + struct nfs41_exchange_id_res res = { + .client = clp, + }; + int status; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_EXCHANGE_ID], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + __be32 *p; + + dprintk("--> %s\n", __func__); + BUG_ON(clp == NULL); + + /* Remove server-only flags */ + args.flags &= ~EXCHGID4_FLAG_CONFIRMED_R; + + p = (u32 *)verifier.data; + *p++ = htonl((u32)clp->cl_boot_time.tv_sec); + *p = htonl((u32)clp->cl_boot_time.tv_nsec); + args.verifier = &verifier; + + while (1) { + args.id_len = scnprintf(args.id, sizeof(args.id), + "%s/%s %u", + clp->cl_ipaddr, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR), + clp->cl_id_uniquifier); + + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + + if (status != NFS4ERR_CLID_INUSE) + break; + + if (signalled()) + break; + + if (++clp->cl_id_uniquifier == 0) + break; + } + + dprintk("<-- %s status= %d\n", __func__, status); + return status; +} + +struct nfs4_get_lease_time_data { + struct nfs4_get_lease_time_args *args; + struct nfs4_get_lease_time_res *res; + struct nfs_client *clp; +}; + +static void nfs4_get_lease_time_prepare(struct rpc_task *task, + void *calldata) +{ + int ret; + struct nfs4_get_lease_time_data *data = + (struct nfs4_get_lease_time_data *)calldata; + + dprintk("--> %s\n", __func__); + rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); + /* just setup sequence, do not trigger session recovery + since we're invoked within one */ + ret = nfs41_setup_sequence(data->clp->cl_session, + &data->args->la_seq_args, + &data->res->lr_seq_res, 0, task); + + BUG_ON(ret == -EAGAIN); + rpc_call_start(task); + dprintk("<-- %s\n", __func__); +} + +/* + * Called from nfs4_state_manager thread for session setup, so don't recover + * from sequence operation or clientid errors. + */ +static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_get_lease_time_data *data = + (struct nfs4_get_lease_time_data *)calldata; + + dprintk("--> %s\n", __func__); + nfs41_sequence_done(data->clp, &data->res->lr_seq_res, task->tk_status); + switch (task->tk_status) { + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); + rpc_delay(task, NFS4_POLL_RETRY_MIN); + task->tk_status = 0; + nfs_restart_rpc(task, data->clp); + return; + } + dprintk("<-- %s\n", __func__); +} + +struct rpc_call_ops nfs4_get_lease_time_ops = { + .rpc_call_prepare = nfs4_get_lease_time_prepare, + .rpc_call_done = nfs4_get_lease_time_done, +}; + +int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) +{ + struct rpc_task *task; + struct nfs4_get_lease_time_args args; + struct nfs4_get_lease_time_res res = { + .lr_fsinfo = fsinfo, + }; + struct nfs4_get_lease_time_data data = { + .args = &args, + .res = &res, + .clp = clp, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GET_LEASE_TIME], + .rpc_argp = &args, + .rpc_resp = &res, + }; + struct rpc_task_setup task_setup = { + .rpc_client = clp->cl_rpcclient, + .rpc_message = &msg, + .callback_ops = &nfs4_get_lease_time_ops, + .callback_data = &data + }; + int status; + + res.lr_seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; + dprintk("--> %s\n", __func__); + task = rpc_run_task(&task_setup); + + if (IS_ERR(task)) + status = PTR_ERR(task); + else { + status = task->tk_status; + rpc_put_task(task); + } + dprintk("<-- %s return %d\n", __func__, status); + + return status; +} + +/* + * Reset a slot table + */ +static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, int max_slots, + int old_max_slots, int ivalue) +{ + int i; + int ret = 0; + + dprintk("--> %s: max_reqs=%u, tbl %p\n", __func__, max_slots, tbl); + + /* + * Until we have dynamic slot table adjustment, insist + * upon the same slot table size + */ + if (max_slots != old_max_slots) { + dprintk("%s reset slot table does't match old\n", + __func__); + ret = -EINVAL; /*XXX NFS4ERR_REQ_TOO_BIG ? */ + goto out; + } + spin_lock(&tbl->slot_tbl_lock); + for (i = 0; i < max_slots; ++i) + tbl->slots[i].seq_nr = ivalue; + spin_unlock(&tbl->slot_tbl_lock); + dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, + tbl, tbl->slots, tbl->max_slots); +out: + dprintk("<-- %s: return %d\n", __func__, ret); + return ret; +} + +/* + * Reset the forechannel and backchannel slot tables + */ +static int nfs4_reset_slot_tables(struct nfs4_session *session) +{ + int status; + + status = nfs4_reset_slot_table(&session->fc_slot_table, + session->fc_attrs.max_reqs, + session->fc_slot_table.max_slots, + 1); + if (status) + return status; + + status = nfs4_reset_slot_table(&session->bc_slot_table, + session->bc_attrs.max_reqs, + session->bc_slot_table.max_slots, + 0); + return status; +} + +/* Destroy the slot table */ +static void nfs4_destroy_slot_tables(struct nfs4_session *session) +{ + if (session->fc_slot_table.slots != NULL) { + kfree(session->fc_slot_table.slots); + session->fc_slot_table.slots = NULL; + } + if (session->bc_slot_table.slots != NULL) { + kfree(session->bc_slot_table.slots); + session->bc_slot_table.slots = NULL; + } + return; +} + +/* + * Initialize slot table + */ +static int nfs4_init_slot_table(struct nfs4_slot_table *tbl, + int max_slots, int ivalue) +{ + struct nfs4_slot *slot; + int ret = -ENOMEM; + + BUG_ON(max_slots > NFS4_MAX_SLOT_TABLE); + + dprintk("--> %s: max_reqs=%u\n", __func__, max_slots); + + slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_KERNEL); + if (!slot) + goto out; + ret = 0; + + spin_lock(&tbl->slot_tbl_lock); + tbl->max_slots = max_slots; + tbl->slots = slot; + tbl->highest_used_slotid = -1; /* no slot is currently used */ + spin_unlock(&tbl->slot_tbl_lock); + dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, + tbl, tbl->slots, tbl->max_slots); +out: + dprintk("<-- %s: return %d\n", __func__, ret); + return ret; +} + +/* + * Initialize the forechannel and backchannel tables + */ +static int nfs4_init_slot_tables(struct nfs4_session *session) +{ + struct nfs4_slot_table *tbl; + int status = 0; + + tbl = &session->fc_slot_table; + if (tbl->slots == NULL) { + status = nfs4_init_slot_table(tbl, + session->fc_attrs.max_reqs, 1); + if (status) + return status; + } + + tbl = &session->bc_slot_table; + if (tbl->slots == NULL) { + status = nfs4_init_slot_table(tbl, + session->bc_attrs.max_reqs, 0); + if (status) + nfs4_destroy_slot_tables(session); + } + + return status; +} + +struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) +{ + struct nfs4_session *session; + struct nfs4_slot_table *tbl; + + session = kzalloc(sizeof(struct nfs4_session), GFP_KERNEL); + if (!session) + return NULL; + + /* + * The create session reply races with the server back + * channel probe. Mark the client NFS_CS_SESSION_INITING + * so that the client back channel can find the + * nfs_client struct + */ + clp->cl_cons_state = NFS_CS_SESSION_INITING; + init_completion(&session->complete); + + tbl = &session->fc_slot_table; + tbl->highest_used_slotid = -1; + spin_lock_init(&tbl->slot_tbl_lock); + rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); + + tbl = &session->bc_slot_table; + tbl->highest_used_slotid = -1; + spin_lock_init(&tbl->slot_tbl_lock); + rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); + + session->clp = clp; + return session; +} + +void nfs4_destroy_session(struct nfs4_session *session) +{ + nfs4_proc_destroy_session(session); + dprintk("%s Destroy backchannel for xprt %p\n", + __func__, session->clp->cl_rpcclient->cl_xprt); + xprt_destroy_backchannel(session->clp->cl_rpcclient->cl_xprt, + NFS41_BC_MIN_CALLBACKS); + nfs4_destroy_slot_tables(session); + kfree(session); +} + +/* + * Initialize the values to be used by the client in CREATE_SESSION + * If nfs4_init_session set the fore channel request and response sizes, + * use them. + * + * Set the back channel max_resp_sz_cached to zero to force the client to + * always set csa_cachethis to FALSE because the current implementation + * of the back channel DRC only supports caching the CB_SEQUENCE operation. + */ +static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) +{ + struct nfs4_session *session = args->client->cl_session; + unsigned int mxrqst_sz = session->fc_attrs.max_rqst_sz, + mxresp_sz = session->fc_attrs.max_resp_sz; + + if (mxrqst_sz == 0) + mxrqst_sz = NFS_MAX_FILE_IO_SIZE; + if (mxresp_sz == 0) + mxresp_sz = NFS_MAX_FILE_IO_SIZE; + /* Fore channel attributes */ + args->fc_attrs.headerpadsz = 0; + args->fc_attrs.max_rqst_sz = mxrqst_sz; + args->fc_attrs.max_resp_sz = mxresp_sz; + args->fc_attrs.max_resp_sz_cached = mxresp_sz; + args->fc_attrs.max_ops = NFS4_MAX_OPS; + args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs; + + dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u " + "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", + __func__, + args->fc_attrs.max_rqst_sz, args->fc_attrs.max_resp_sz, + args->fc_attrs.max_resp_sz_cached, args->fc_attrs.max_ops, + args->fc_attrs.max_reqs); + + /* Back channel attributes */ + args->bc_attrs.headerpadsz = 0; + args->bc_attrs.max_rqst_sz = PAGE_SIZE; + args->bc_attrs.max_resp_sz = PAGE_SIZE; + args->bc_attrs.max_resp_sz_cached = 0; + args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS; + args->bc_attrs.max_reqs = 1; + + dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u " + "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", + __func__, + args->bc_attrs.max_rqst_sz, args->bc_attrs.max_resp_sz, + args->bc_attrs.max_resp_sz_cached, args->bc_attrs.max_ops, + args->bc_attrs.max_reqs); +} + +static int _verify_channel_attr(char *chan, char *attr_name, u32 sent, u32 rcvd) +{ + if (rcvd <= sent) + return 0; + printk(KERN_WARNING "%s: Session INVALID: %s channel %s increased. " + "sent=%u rcvd=%u\n", __func__, chan, attr_name, sent, rcvd); + return -EINVAL; +} + +#define _verify_fore_channel_attr(_name_) \ + _verify_channel_attr("fore", #_name_, \ + args->fc_attrs._name_, \ + session->fc_attrs._name_) + +#define _verify_back_channel_attr(_name_) \ + _verify_channel_attr("back", #_name_, \ + args->bc_attrs._name_, \ + session->bc_attrs._name_) + +/* + * The server is not allowed to increase the fore channel header pad size, + * maximum response size, or maximum number of operations. + * + * The back channel attributes are only negotiatied down: We send what the + * (back channel) server insists upon. + */ +static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, + struct nfs4_session *session) +{ + int ret = 0; + + ret |= _verify_fore_channel_attr(headerpadsz); + ret |= _verify_fore_channel_attr(max_resp_sz); + ret |= _verify_fore_channel_attr(max_ops); + + ret |= _verify_back_channel_attr(headerpadsz); + ret |= _verify_back_channel_attr(max_rqst_sz); + ret |= _verify_back_channel_attr(max_resp_sz); + ret |= _verify_back_channel_attr(max_resp_sz_cached); + ret |= _verify_back_channel_attr(max_ops); + ret |= _verify_back_channel_attr(max_reqs); + + return ret; +} + +static int _nfs4_proc_create_session(struct nfs_client *clp) +{ + struct nfs4_session *session = clp->cl_session; + struct nfs41_create_session_args args = { + .client = clp, + .cb_program = NFS4_CALLBACK, + }; + struct nfs41_create_session_res res = { + .client = clp, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION], + .rpc_argp = &args, + .rpc_resp = &res, + }; + int status; + + nfs4_init_channel_attrs(&args); + args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN); + + status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0); + + if (!status) + /* Verify the session's negotiated channel_attrs values */ + status = nfs4_verify_channel_attrs(&args, session); + if (!status) { + /* Increment the clientid slot sequence id */ + clp->cl_seqid++; + } + + return status; +} + +/* + * Issues a CREATE_SESSION operation to the server. + * It is the responsibility of the caller to verify the session is + * expired before calling this routine. + */ +int nfs4_proc_create_session(struct nfs_client *clp) +{ + int status; + unsigned *ptr; + struct nfs4_session *session = clp->cl_session; + + dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); + + status = _nfs4_proc_create_session(clp); + if (status) + goto out; + + /* Init and reset the fore channel */ + status = nfs4_init_slot_tables(session); + dprintk("slot table initialization returned %d\n", status); + if (status) + goto out; + status = nfs4_reset_slot_tables(session); + dprintk("slot table reset returned %d\n", status); + if (status) + goto out; + + ptr = (unsigned *)&session->sess_id.data[0]; + dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__, + clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]); +out: + dprintk("<-- %s\n", __func__); + return status; +} + +/* + * Issue the over-the-wire RPC DESTROY_SESSION. + * The caller must serialize access to this routine. + */ +int nfs4_proc_destroy_session(struct nfs4_session *session) +{ + int status = 0; + struct rpc_message msg; + + dprintk("--> nfs4_proc_destroy_session\n"); + + /* session is still being setup */ + if (session->clp->cl_cons_state != NFS_CS_READY) + return status; + + msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION]; + msg.rpc_argp = session; + msg.rpc_resp = NULL; + msg.rpc_cred = NULL; + status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0); + + if (status) + printk(KERN_WARNING + "Got error %d from the server on DESTROY_SESSION. " + "Session has been destroyed regardless...\n", status); + + dprintk("<-- nfs4_proc_destroy_session\n"); + return status; +} + +int nfs4_init_session(struct nfs_server *server) +{ + struct nfs_client *clp = server->nfs_client; + struct nfs4_session *session; + unsigned int rsize, wsize; + int ret; + + if (!nfs4_has_session(clp)) + return 0; + + rsize = server->rsize; + if (rsize == 0) + rsize = NFS_MAX_FILE_IO_SIZE; + wsize = server->wsize; + if (wsize == 0) + wsize = NFS_MAX_FILE_IO_SIZE; + + session = clp->cl_session; + session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead; + session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead; + + ret = nfs4_recover_expired_lease(server); + if (!ret) + ret = nfs4_check_client_ready(clp); + return ret; +} + +/* + * Renew the cl_session lease. + */ +static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) +{ + struct nfs4_sequence_args args; + struct nfs4_sequence_res res; + + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + + args.sa_cache_this = 0; + + return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args, + &res, args.sa_cache_this, 1); +} + +void nfs41_sequence_call_done(struct rpc_task *task, void *data) +{ + struct nfs_client *clp = (struct nfs_client *)data; + + nfs41_sequence_done(clp, task->tk_msg.rpc_resp, task->tk_status); + + if (task->tk_status < 0) { + dprintk("%s ERROR %d\n", __func__, task->tk_status); + + if (_nfs4_async_handle_error(task, NULL, clp, NULL) + == -EAGAIN) { + nfs_restart_rpc(task, clp); + return; + } + } + dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred); + + kfree(task->tk_msg.rpc_argp); + kfree(task->tk_msg.rpc_resp); + + dprintk("<-- %s\n", __func__); +} + +static void nfs41_sequence_prepare(struct rpc_task *task, void *data) +{ + struct nfs_client *clp; + struct nfs4_sequence_args *args; + struct nfs4_sequence_res *res; + + clp = (struct nfs_client *)data; + args = task->tk_msg.rpc_argp; + res = task->tk_msg.rpc_resp; + + if (nfs4_setup_sequence(clp, args, res, 0, task)) + return; + rpc_call_start(task); +} + +static const struct rpc_call_ops nfs41_sequence_ops = { + .rpc_call_done = nfs41_sequence_call_done, + .rpc_call_prepare = nfs41_sequence_prepare, +}; + +static int nfs41_proc_async_sequence(struct nfs_client *clp, + struct rpc_cred *cred) +{ + struct nfs4_sequence_args *args; + struct nfs4_sequence_res *res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE], + .rpc_cred = cred, + }; + + args = kzalloc(sizeof(*args), GFP_KERNEL); + if (!args) + return -ENOMEM; + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) { + kfree(args); + return -ENOMEM; + } + res->sr_slotid = NFS4_MAX_SLOT_TABLE; + msg.rpc_argp = args; + msg.rpc_resp = res; + + return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, + &nfs41_sequence_ops, (void *)clp); +} + +struct nfs4_reclaim_complete_data { + struct nfs_client *clp; + struct nfs41_reclaim_complete_args arg; + struct nfs41_reclaim_complete_res res; +}; + +static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data) +{ + struct nfs4_reclaim_complete_data *calldata = data; + + rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); + if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args, + &calldata->res.seq_res, 0, task)) + return; + + rpc_call_start(task); +} + +static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data) +{ + struct nfs4_reclaim_complete_data *calldata = data; + struct nfs_client *clp = calldata->clp; + struct nfs4_sequence_res *res = &calldata->res.seq_res; + + dprintk("--> %s\n", __func__); + nfs41_sequence_done(clp, res, task->tk_status); + switch (task->tk_status) { + case 0: + case -NFS4ERR_COMPLETE_ALREADY: + break; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + /* + * Handle the session error, but do not retry the operation, as + * we have no way of telling whether the clientid had to be + * reset before we got our reply. If reset, a new wave of + * reclaim operations will follow, containing their own reclaim + * complete. We don't want our retry to get on the way of + * recovery by incorrectly indicating to the server that we're + * done reclaiming state since the process had to be restarted. + */ + _nfs4_async_handle_error(task, NULL, clp, NULL); + break; + default: + if (_nfs4_async_handle_error( + task, NULL, clp, NULL) == -EAGAIN) { + rpc_restart_call_prepare(task); + return; + } + } + + dprintk("<-- %s\n", __func__); +} + +static void nfs4_free_reclaim_complete_data(void *data) +{ + struct nfs4_reclaim_complete_data *calldata = data; + + kfree(calldata); +} + +static const struct rpc_call_ops nfs4_reclaim_complete_call_ops = { + .rpc_call_prepare = nfs4_reclaim_complete_prepare, + .rpc_call_done = nfs4_reclaim_complete_done, + .rpc_release = nfs4_free_reclaim_complete_data, +}; + +/* + * Issue a global reclaim complete. + */ +static int nfs41_proc_reclaim_complete(struct nfs_client *clp) +{ + struct nfs4_reclaim_complete_data *calldata; + struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RECLAIM_COMPLETE], + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clp->cl_rpcclient, + .rpc_message = &msg, + .callback_ops = &nfs4_reclaim_complete_call_ops, + .flags = RPC_TASK_ASYNC, + }; + int status = -ENOMEM; + + dprintk("--> %s\n", __func__); + calldata = kzalloc(sizeof(*calldata), GFP_KERNEL); + if (calldata == NULL) + goto out; + calldata->clp = clp; + calldata->arg.one_fs = 0; + calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; + + msg.rpc_argp = &calldata->arg; + msg.rpc_resp = &calldata->res; + task_setup_data.callback_data = calldata; + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + status = PTR_ERR(task); + rpc_put_task(task); +out: + dprintk("<-- %s status=%d\n", __func__, status); + return status; +} +#endif /* CONFIG_NFS_V4_1 */ + +struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { + .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, + .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, + .recover_open = nfs4_open_reclaim, + .recover_lock = nfs4_lock_reclaim, + .establish_clid = nfs4_init_clientid, + .get_clid_cred = nfs4_get_setclientid_cred, +}; + +#if defined(CONFIG_NFS_V4_1) +struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { + .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, + .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, + .recover_open = nfs4_open_reclaim, + .recover_lock = nfs4_lock_reclaim, + .establish_clid = nfs41_init_clientid, + .get_clid_cred = nfs4_get_exchange_id_cred, + .reclaim_complete = nfs41_proc_reclaim_complete, +}; +#endif /* CONFIG_NFS_V4_1 */ + +struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { + .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, + .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, + .recover_open = nfs4_open_expired, + .recover_lock = nfs4_lock_expired, + .establish_clid = nfs4_init_clientid, + .get_clid_cred = nfs4_get_setclientid_cred, +}; + +#if defined(CONFIG_NFS_V4_1) +struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { + .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, + .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, + .recover_open = nfs4_open_expired, + .recover_lock = nfs4_lock_expired, + .establish_clid = nfs41_init_clientid, + .get_clid_cred = nfs4_get_exchange_id_cred, +}; +#endif /* CONFIG_NFS_V4_1 */ + +struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = { + .sched_state_renewal = nfs4_proc_async_renew, + .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked, + .renew_lease = nfs4_proc_renew, +}; + +#if defined(CONFIG_NFS_V4_1) +struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { + .sched_state_renewal = nfs41_proc_async_sequence, + .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked, + .renew_lease = nfs4_proc_sequence, +}; +#endif + +/* + * Per minor version reboot and network partition recovery ops + */ + +struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[] = { + &nfs40_reboot_recovery_ops, +#if defined(CONFIG_NFS_V4_1) + &nfs41_reboot_recovery_ops, +#endif +}; + +struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[] = { + &nfs40_nograce_recovery_ops, +#if defined(CONFIG_NFS_V4_1) + &nfs41_nograce_recovery_ops, +#endif +}; + +struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[] = { + &nfs40_state_renewal_ops, +#if defined(CONFIG_NFS_V4_1) + &nfs41_state_renewal_ops, +#endif +}; + +static const struct inode_operations nfs4_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, +}; + +const struct nfs_rpc_ops nfs_v4_clientops = { + .version = 4, /* protocol version */ + .dentry_ops = &nfs4_dentry_operations, + .dir_inode_ops = &nfs4_dir_inode_operations, + .file_inode_ops = &nfs4_file_inode_operations, + .getroot = nfs4_proc_get_root, + .getattr = nfs4_proc_getattr, + .setattr = nfs4_proc_setattr, + .lookupfh = nfs4_proc_lookupfh, + .lookup = nfs4_proc_lookup, + .access = nfs4_proc_access, + .readlink = nfs4_proc_readlink, + .create = nfs4_proc_create, + .remove = nfs4_proc_remove, + .unlink_setup = nfs4_proc_unlink_setup, + .unlink_done = nfs4_proc_unlink_done, + .rename = nfs4_proc_rename, + .link = nfs4_proc_link, + .symlink = nfs4_proc_symlink, + .mkdir = nfs4_proc_mkdir, + .rmdir = nfs4_proc_remove, + .readdir = nfs4_proc_readdir, + .mknod = nfs4_proc_mknod, + .statfs = nfs4_proc_statfs, + .fsinfo = nfs4_proc_fsinfo, + .pathconf = nfs4_proc_pathconf, + .set_capabilities = nfs4_server_capabilities, + .decode_dirent = nfs4_decode_dirent, + .read_setup = nfs4_proc_read_setup, + .read_done = nfs4_read_done, + .write_setup = nfs4_proc_write_setup, + .write_done = nfs4_write_done, + .commit_setup = nfs4_proc_commit_setup, + .commit_done = nfs4_commit_done, + .lock = nfs4_proc_lock, + .clear_acl_cache = nfs4_zap_acl_attr, + .close_context = nfs4_close_context, +}; + +/* + * Local variables: + * c-basic-offset: 8 + * End: + */ diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c new file mode 100644 index 00000000000..0156c01c212 --- /dev/null +++ b/fs/nfs/nfs4renewd.c @@ -0,0 +1,139 @@ +/* + * fs/nfs/nfs4renewd.c + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Implementation of the NFSv4 "renew daemon", which wakes up periodically to + * send a RENEW, to keep state alive on the server. The daemon is implemented + * as an rpc_task, not a real kernel thread, so it always runs in rpciod's + * context. There is one renewd per nfs_server. + * + * TODO: If the send queue gets backlogged (e.g., if the server goes down), + * we will keep filling the queue with periodic RENEW requests. We need a + * mechanism for ensuring that if renewd successfully sends off a request, + * then it only wakes up when the request is finished. Maybe use the + * child task framework of the RPC layer? + */ + +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/sunrpc/sched.h> +#include <linux/sunrpc/clnt.h> + +#include <linux/nfs.h> +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> +#include "nfs4_fs.h" +#include "delegation.h" + +#define NFSDBG_FACILITY NFSDBG_PROC + +void +nfs4_renew_state(struct work_struct *work) +{ + struct nfs4_state_maintenance_ops *ops; + struct nfs_client *clp = + container_of(work, struct nfs_client, cl_renewd.work); + struct rpc_cred *cred; + long lease, timeout; + unsigned long last, now; + + ops = nfs4_state_renewal_ops[clp->cl_minorversion]; + dprintk("%s: start\n", __func__); + /* Are there any active superblocks? */ + if (list_empty(&clp->cl_superblocks)) + goto out; + spin_lock(&clp->cl_lock); + lease = clp->cl_lease_time; + last = clp->cl_last_renewal; + now = jiffies; + timeout = (2 * lease) / 3 + (long)last - (long)now; + /* Are we close to a lease timeout? */ + if (time_after(now, last + lease/3)) { + cred = ops->get_state_renewal_cred_locked(clp); + spin_unlock(&clp->cl_lock); + if (cred == NULL) { + if (list_empty(&clp->cl_delegations)) { + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + goto out; + } + nfs_expire_all_delegations(clp); + } else { + /* Queue an asynchronous RENEW. */ + ops->sched_state_renewal(clp, cred); + put_rpccred(cred); + } + timeout = (2 * lease) / 3; + spin_lock(&clp->cl_lock); + } else + dprintk("%s: failed to call renewd. Reason: lease not expired \n", + __func__); + if (timeout < 5 * HZ) /* safeguard */ + timeout = 5 * HZ; + dprintk("%s: requeueing work. Lease period = %ld\n", + __func__, (timeout + HZ - 1) / HZ); + cancel_delayed_work(&clp->cl_renewd); + schedule_delayed_work(&clp->cl_renewd, timeout); + spin_unlock(&clp->cl_lock); + nfs_expire_unreferenced_delegations(clp); +out: + dprintk("%s: done\n", __func__); +} + +void +nfs4_schedule_state_renewal(struct nfs_client *clp) +{ + long timeout; + + spin_lock(&clp->cl_lock); + timeout = (2 * clp->cl_lease_time) / 3 + (long)clp->cl_last_renewal + - (long)jiffies; + if (timeout < 5 * HZ) + timeout = 5 * HZ; + dprintk("%s: requeueing work. Lease period = %ld\n", + __func__, (timeout + HZ - 1) / HZ); + cancel_delayed_work(&clp->cl_renewd); + schedule_delayed_work(&clp->cl_renewd, timeout); + set_bit(NFS_CS_RENEWD, &clp->cl_res_state); + spin_unlock(&clp->cl_lock); +} + +void +nfs4_kill_renewd(struct nfs_client *clp) +{ + cancel_delayed_work_sync(&clp->cl_renewd); +} + +/* + * Local variables: + * c-basic-offset: 8 + * End: + */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c new file mode 100644 index 00000000000..c1e2733f4fa --- /dev/null +++ b/fs/nfs/nfs4state.c @@ -0,0 +1,1431 @@ +/* + * fs/nfs/nfs4state.c + * + * Client-side XDR for NFSv4. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Implementation of the NFSv4 state model. For the time being, + * this is minimal, but will be made much more complex in a + * subsequent patch. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/smp_lock.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_idmap.h> +#include <linux/kthread.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/workqueue.h> +#include <linux/bitops.h> + +#include "nfs4_fs.h" +#include "callback.h" +#include "delegation.h" +#include "internal.h" + +#define OPENOWNER_POOL_SIZE 8 + +const nfs4_stateid zero_stateid; + +static LIST_HEAD(nfs4_clientid_list); + +int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) +{ + unsigned short port; + int status; + + port = nfs_callback_tcpport; + if (clp->cl_addr.ss_family == AF_INET6) + port = nfs_callback_tcpport6; + + status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred); + if (status == 0) + status = nfs4_proc_setclientid_confirm(clp, cred); + if (status == 0) + nfs4_schedule_state_renewal(clp); + return status; +} + +struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) +{ + struct rpc_cred *cred = NULL; + + if (clp->cl_machine_cred != NULL) + cred = get_rpccred(clp->cl_machine_cred); + return cred; +} + +static void nfs4_clear_machine_cred(struct nfs_client *clp) +{ + struct rpc_cred *cred; + + spin_lock(&clp->cl_lock); + cred = clp->cl_machine_cred; + clp->cl_machine_cred = NULL; + spin_unlock(&clp->cl_lock); + if (cred != NULL) + put_rpccred(cred); +} + +struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) +{ + struct nfs4_state_owner *sp; + struct rb_node *pos; + struct rpc_cred *cred = NULL; + + for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); + if (list_empty(&sp->so_states)) + continue; + cred = get_rpccred(sp->so_cred); + break; + } + return cred; +} + +#if defined(CONFIG_NFS_V4_1) + +static int nfs41_setup_state_renewal(struct nfs_client *clp) +{ + int status; + struct nfs_fsinfo fsinfo; + + status = nfs4_proc_get_lease_time(clp, &fsinfo); + if (status == 0) { + /* Update lease time and schedule renewal */ + spin_lock(&clp->cl_lock); + clp->cl_lease_time = fsinfo.lease_time * HZ; + clp->cl_last_renewal = jiffies; + spin_unlock(&clp->cl_lock); + + nfs4_schedule_state_renewal(clp); + } + + return status; +} + +static void nfs4_end_drain_session(struct nfs_client *clp) +{ + struct nfs4_session *ses = clp->cl_session; + int max_slots; + + if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) { + spin_lock(&ses->fc_slot_table.slot_tbl_lock); + max_slots = ses->fc_slot_table.max_slots; + while (max_slots--) { + struct rpc_task *task; + + task = rpc_wake_up_next(&ses->fc_slot_table. + slot_tbl_waitq); + if (!task) + break; + rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); + } + spin_unlock(&ses->fc_slot_table.slot_tbl_lock); + } +} + +static int nfs4_begin_drain_session(struct nfs_client *clp) +{ + struct nfs4_session *ses = clp->cl_session; + struct nfs4_slot_table *tbl = &ses->fc_slot_table; + + spin_lock(&tbl->slot_tbl_lock); + set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); + if (tbl->highest_used_slotid != -1) { + INIT_COMPLETION(ses->complete); + spin_unlock(&tbl->slot_tbl_lock); + return wait_for_completion_interruptible(&ses->complete); + } + spin_unlock(&tbl->slot_tbl_lock); + return 0; +} + +int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) +{ + int status; + + nfs4_begin_drain_session(clp); + status = nfs4_proc_exchange_id(clp, cred); + if (status != 0) + goto out; + status = nfs4_proc_create_session(clp); + if (status != 0) + goto out; + nfs41_setup_state_renewal(clp); + nfs_mark_client_ready(clp, NFS_CS_READY); +out: + return status; +} + +struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp) +{ + struct rpc_cred *cred; + + spin_lock(&clp->cl_lock); + cred = nfs4_get_machine_cred_locked(clp); + spin_unlock(&clp->cl_lock); + return cred; +} + +#endif /* CONFIG_NFS_V4_1 */ + +struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) +{ + struct nfs4_state_owner *sp; + struct rb_node *pos; + struct rpc_cred *cred; + + spin_lock(&clp->cl_lock); + cred = nfs4_get_machine_cred_locked(clp); + if (cred != NULL) + goto out; + pos = rb_first(&clp->cl_state_owners); + if (pos != NULL) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); + cred = get_rpccred(sp->so_cred); + } +out: + spin_unlock(&clp->cl_lock); + return cred; +} + +static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new, + __u64 minval, int maxbits) +{ + struct rb_node **p, *parent; + struct nfs_unique_id *pos; + __u64 mask = ~0ULL; + + if (maxbits < 64) + mask = (1ULL << maxbits) - 1ULL; + + /* Ensure distribution is more or less flat */ + get_random_bytes(&new->id, sizeof(new->id)); + new->id &= mask; + if (new->id < minval) + new->id += minval; +retry: + p = &root->rb_node; + parent = NULL; + + while (*p != NULL) { + parent = *p; + pos = rb_entry(parent, struct nfs_unique_id, rb_node); + + if (new->id < pos->id) + p = &(*p)->rb_left; + else if (new->id > pos->id) + p = &(*p)->rb_right; + else + goto id_exists; + } + rb_link_node(&new->rb_node, parent, p); + rb_insert_color(&new->rb_node, root); + return; +id_exists: + for (;;) { + new->id++; + if (new->id < minval || (new->id & mask) != new->id) { + new->id = minval; + break; + } + parent = rb_next(parent); + if (parent == NULL) + break; + pos = rb_entry(parent, struct nfs_unique_id, rb_node); + if (new->id < pos->id) + break; + } + goto retry; +} + +static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id) +{ + rb_erase(&id->rb_node, root); +} + +static struct nfs4_state_owner * +nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred) +{ + struct nfs_client *clp = server->nfs_client; + struct rb_node **p = &clp->cl_state_owners.rb_node, + *parent = NULL; + struct nfs4_state_owner *sp, *res = NULL; + + while (*p != NULL) { + parent = *p; + sp = rb_entry(parent, struct nfs4_state_owner, so_client_node); + + if (server < sp->so_server) { + p = &parent->rb_left; + continue; + } + if (server > sp->so_server) { + p = &parent->rb_right; + continue; + } + if (cred < sp->so_cred) + p = &parent->rb_left; + else if (cred > sp->so_cred) + p = &parent->rb_right; + else { + atomic_inc(&sp->so_count); + res = sp; + break; + } + } + return res; +} + +static struct nfs4_state_owner * +nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new) +{ + struct rb_node **p = &clp->cl_state_owners.rb_node, + *parent = NULL; + struct nfs4_state_owner *sp; + + while (*p != NULL) { + parent = *p; + sp = rb_entry(parent, struct nfs4_state_owner, so_client_node); + + if (new->so_server < sp->so_server) { + p = &parent->rb_left; + continue; + } + if (new->so_server > sp->so_server) { + p = &parent->rb_right; + continue; + } + if (new->so_cred < sp->so_cred) + p = &parent->rb_left; + else if (new->so_cred > sp->so_cred) + p = &parent->rb_right; + else { + atomic_inc(&sp->so_count); + return sp; + } + } + nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64); + rb_link_node(&new->so_client_node, parent, p); + rb_insert_color(&new->so_client_node, &clp->cl_state_owners); + return new; +} + +static void +nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp) +{ + if (!RB_EMPTY_NODE(&sp->so_client_node)) + rb_erase(&sp->so_client_node, &clp->cl_state_owners); + nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id); +} + +/* + * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to + * create a new state_owner. + * + */ +static struct nfs4_state_owner * +nfs4_alloc_state_owner(void) +{ + struct nfs4_state_owner *sp; + + sp = kzalloc(sizeof(*sp),GFP_KERNEL); + if (!sp) + return NULL; + spin_lock_init(&sp->so_lock); + INIT_LIST_HEAD(&sp->so_states); + INIT_LIST_HEAD(&sp->so_delegations); + rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); + sp->so_seqid.sequence = &sp->so_sequence; + spin_lock_init(&sp->so_sequence.lock); + INIT_LIST_HEAD(&sp->so_sequence.list); + atomic_set(&sp->so_count, 1); + return sp; +} + +static void +nfs4_drop_state_owner(struct nfs4_state_owner *sp) +{ + if (!RB_EMPTY_NODE(&sp->so_client_node)) { + struct nfs_client *clp = sp->so_client; + + spin_lock(&clp->cl_lock); + rb_erase(&sp->so_client_node, &clp->cl_state_owners); + RB_CLEAR_NODE(&sp->so_client_node); + spin_unlock(&clp->cl_lock); + } +} + +struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) +{ + struct nfs_client *clp = server->nfs_client; + struct nfs4_state_owner *sp, *new; + + spin_lock(&clp->cl_lock); + sp = nfs4_find_state_owner(server, cred); + spin_unlock(&clp->cl_lock); + if (sp != NULL) + return sp; + new = nfs4_alloc_state_owner(); + if (new == NULL) + return NULL; + new->so_client = clp; + new->so_server = server; + new->so_cred = cred; + spin_lock(&clp->cl_lock); + sp = nfs4_insert_state_owner(clp, new); + spin_unlock(&clp->cl_lock); + if (sp == new) + get_rpccred(cred); + else { + rpc_destroy_wait_queue(&new->so_sequence.wait); + kfree(new); + } + return sp; +} + +void nfs4_put_state_owner(struct nfs4_state_owner *sp) +{ + struct nfs_client *clp = sp->so_client; + struct rpc_cred *cred = sp->so_cred; + + if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) + return; + nfs4_remove_state_owner(clp, sp); + spin_unlock(&clp->cl_lock); + rpc_destroy_wait_queue(&sp->so_sequence.wait); + put_rpccred(cred); + kfree(sp); +} + +static struct nfs4_state * +nfs4_alloc_open_state(void) +{ + struct nfs4_state *state; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + atomic_set(&state->count, 1); + INIT_LIST_HEAD(&state->lock_states); + spin_lock_init(&state->state_lock); + seqlock_init(&state->seqlock); + return state; +} + +void +nfs4_state_set_mode_locked(struct nfs4_state *state, fmode_t fmode) +{ + if (state->state == fmode) + return; + /* NB! List reordering - see the reclaim code for why. */ + if ((fmode & FMODE_WRITE) != (state->state & FMODE_WRITE)) { + if (fmode & FMODE_WRITE) + list_move(&state->open_states, &state->owner->so_states); + else + list_move_tail(&state->open_states, &state->owner->so_states); + } + state->state = fmode; +} + +static struct nfs4_state * +__nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs4_state *state; + + list_for_each_entry(state, &nfsi->open_states, inode_states) { + if (state->owner != owner) + continue; + if (atomic_inc_not_zero(&state->count)) + return state; + } + return NULL; +} + +static void +nfs4_free_open_state(struct nfs4_state *state) +{ + kfree(state); +} + +struct nfs4_state * +nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner) +{ + struct nfs4_state *state, *new; + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&inode->i_lock); + state = __nfs4_find_state_byowner(inode, owner); + spin_unlock(&inode->i_lock); + if (state) + goto out; + new = nfs4_alloc_open_state(); + spin_lock(&owner->so_lock); + spin_lock(&inode->i_lock); + state = __nfs4_find_state_byowner(inode, owner); + if (state == NULL && new != NULL) { + state = new; + state->owner = owner; + atomic_inc(&owner->so_count); + list_add(&state->inode_states, &nfsi->open_states); + state->inode = igrab(inode); + spin_unlock(&inode->i_lock); + /* Note: The reclaim code dictates that we add stateless + * and read-only stateids to the end of the list */ + list_add_tail(&state->open_states, &owner->so_states); + spin_unlock(&owner->so_lock); + } else { + spin_unlock(&inode->i_lock); + spin_unlock(&owner->so_lock); + if (new) + nfs4_free_open_state(new); + } +out: + return state; +} + +void nfs4_put_open_state(struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct nfs4_state_owner *owner = state->owner; + + if (!atomic_dec_and_lock(&state->count, &owner->so_lock)) + return; + spin_lock(&inode->i_lock); + list_del(&state->inode_states); + list_del(&state->open_states); + spin_unlock(&inode->i_lock); + spin_unlock(&owner->so_lock); + iput(inode); + nfs4_free_open_state(state); + nfs4_put_state_owner(owner); +} + +/* + * Close the current file. + */ +static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fmode, int wait) +{ + struct nfs4_state_owner *owner = state->owner; + int call_close = 0; + fmode_t newstate; + + atomic_inc(&owner->so_count); + /* Protect against nfs4_find_state() */ + spin_lock(&owner->so_lock); + switch (fmode & (FMODE_READ | FMODE_WRITE)) { + case FMODE_READ: + state->n_rdonly--; + break; + case FMODE_WRITE: + state->n_wronly--; + break; + case FMODE_READ|FMODE_WRITE: + state->n_rdwr--; + } + newstate = FMODE_READ|FMODE_WRITE; + if (state->n_rdwr == 0) { + if (state->n_rdonly == 0) { + newstate &= ~FMODE_READ; + call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags); + call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); + } + if (state->n_wronly == 0) { + newstate &= ~FMODE_WRITE; + call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags); + call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); + } + if (newstate == 0) + clear_bit(NFS_DELEGATED_STATE, &state->flags); + } + nfs4_state_set_mode_locked(state, newstate); + spin_unlock(&owner->so_lock); + + if (!call_close) { + nfs4_put_open_state(state); + nfs4_put_state_owner(owner); + } else + nfs4_do_close(path, state, wait); +} + +void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode) +{ + __nfs4_close(path, state, fmode, 0); +} + +void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode) +{ + __nfs4_close(path, state, fmode, 1); +} + +/* + * Search the state->lock_states for an existing lock_owner + * that is compatible with current->files + */ +static struct nfs4_lock_state * +__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *pos; + list_for_each_entry(pos, &state->lock_states, ls_locks) { + if (pos->ls_owner != fl_owner) + continue; + atomic_inc(&pos->ls_count); + return pos; + } + return NULL; +} + +/* + * Return a compatible lock_state. If no initialized lock_state structure + * exists, return an uninitialized one. + * + */ +static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *lsp; + struct nfs_client *clp = state->owner->so_client; + + lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); + if (lsp == NULL) + return NULL; + rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue"); + spin_lock_init(&lsp->ls_sequence.lock); + INIT_LIST_HEAD(&lsp->ls_sequence.list); + lsp->ls_seqid.sequence = &lsp->ls_sequence; + atomic_set(&lsp->ls_count, 1); + lsp->ls_state = state; + lsp->ls_owner = fl_owner; + spin_lock(&clp->cl_lock); + nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64); + spin_unlock(&clp->cl_lock); + INIT_LIST_HEAD(&lsp->ls_locks); + return lsp; +} + +static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) +{ + struct nfs_client *clp = lsp->ls_state->owner->so_client; + + spin_lock(&clp->cl_lock); + nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id); + spin_unlock(&clp->cl_lock); + rpc_destroy_wait_queue(&lsp->ls_sequence.wait); + kfree(lsp); +} + +/* + * Return a compatible lock_state. If no initialized lock_state structure + * exists, return an uninitialized one. + * + */ +static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) +{ + struct nfs4_lock_state *lsp, *new = NULL; + + for(;;) { + spin_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, owner); + if (lsp != NULL) + break; + if (new != NULL) { + list_add(&new->ls_locks, &state->lock_states); + set_bit(LK_STATE_IN_USE, &state->flags); + lsp = new; + new = NULL; + break; + } + spin_unlock(&state->state_lock); + new = nfs4_alloc_lock_state(state, owner); + if (new == NULL) + return NULL; + } + spin_unlock(&state->state_lock); + if (new != NULL) + nfs4_free_lock_state(new); + return lsp; +} + +/* + * Release reference to lock_state, and free it if we see that + * it is no longer in use + */ +void nfs4_put_lock_state(struct nfs4_lock_state *lsp) +{ + struct nfs4_state *state; + + if (lsp == NULL) + return; + state = lsp->ls_state; + if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock)) + return; + list_del(&lsp->ls_locks); + if (list_empty(&state->lock_states)) + clear_bit(LK_STATE_IN_USE, &state->flags); + spin_unlock(&state->state_lock); + nfs4_free_lock_state(lsp); +} + +static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) +{ + struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner; + + dst->fl_u.nfs4_fl.owner = lsp; + atomic_inc(&lsp->ls_count); +} + +static void nfs4_fl_release_lock(struct file_lock *fl) +{ + nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner); +} + +static const struct file_lock_operations nfs4_fl_lock_ops = { + .fl_copy_lock = nfs4_fl_copy_lock, + .fl_release_private = nfs4_fl_release_lock, +}; + +int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) +{ + struct nfs4_lock_state *lsp; + + if (fl->fl_ops != NULL) + return 0; + lsp = nfs4_get_lock_state(state, fl->fl_owner); + if (lsp == NULL) + return -ENOMEM; + fl->fl_u.nfs4_fl.owner = lsp; + fl->fl_ops = &nfs4_fl_lock_ops; + return 0; +} + +/* + * Byte-range lock aware utility to initialize the stateid of read/write + * requests. + */ +void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *lsp; + int seq; + + do { + seq = read_seqbegin(&state->seqlock); + memcpy(dst, &state->stateid, sizeof(*dst)); + } while (read_seqretry(&state->seqlock, seq)); + if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) + return; + + spin_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, fl_owner); + if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); + spin_unlock(&state->state_lock); + nfs4_put_lock_state(lsp); +} + +struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter) +{ + struct nfs_seqid *new; + + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (new != NULL) { + new->sequence = counter; + INIT_LIST_HEAD(&new->list); + } + return new; +} + +void nfs_release_seqid(struct nfs_seqid *seqid) +{ + if (!list_empty(&seqid->list)) { + struct rpc_sequence *sequence = seqid->sequence->sequence; + + spin_lock(&sequence->lock); + list_del_init(&seqid->list); + spin_unlock(&sequence->lock); + rpc_wake_up(&sequence->wait); + } +} + +void nfs_free_seqid(struct nfs_seqid *seqid) +{ + nfs_release_seqid(seqid); + kfree(seqid); +} + +/* + * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or + * failed with a seqid incrementing error - + * see comments nfs_fs.h:seqid_mutating_error() + */ +static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) +{ + BUG_ON(list_first_entry(&seqid->sequence->sequence->list, struct nfs_seqid, list) != seqid); + switch (status) { + case 0: + break; + case -NFS4ERR_BAD_SEQID: + if (seqid->sequence->flags & NFS_SEQID_CONFIRMED) + return; + printk(KERN_WARNING "NFS: v4 server returned a bad" + " sequence-id error on an" + " unconfirmed sequence %p!\n", + seqid->sequence); + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_BADXDR: + case -NFS4ERR_RESOURCE: + case -NFS4ERR_NOFILEHANDLE: + /* Non-seqid mutating errors */ + return; + }; + /* + * Note: no locking needed as we are guaranteed to be first + * on the sequence list + */ + seqid->sequence->counter++; +} + +void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) +{ + struct nfs4_state_owner *sp = container_of(seqid->sequence, + struct nfs4_state_owner, so_seqid); + struct nfs_server *server = sp->so_server; + + if (status == -NFS4ERR_BAD_SEQID) + nfs4_drop_state_owner(sp); + if (!nfs4_has_session(server->nfs_client)) + nfs_increment_seqid(status, seqid); +} + +/* + * Increment the seqid if the LOCK/LOCKU succeeded, or + * failed with a seqid incrementing error - + * see comments nfs_fs.h:seqid_mutating_error() + */ +void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) +{ + nfs_increment_seqid(status, seqid); +} + +int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) +{ + struct rpc_sequence *sequence = seqid->sequence->sequence; + int status = 0; + + spin_lock(&sequence->lock); + if (list_empty(&seqid->list)) + list_add_tail(&seqid->list, &sequence->list); + if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid) + goto unlock; + rpc_sleep_on(&sequence->wait, task, NULL); + status = -EAGAIN; +unlock: + spin_unlock(&sequence->lock); + return status; +} + +static int nfs4_run_state_manager(void *); + +static void nfs4_clear_state_manager_bit(struct nfs_client *clp) +{ + smp_mb__before_clear_bit(); + clear_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); + smp_mb__after_clear_bit(); + wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING); + rpc_wake_up(&clp->cl_rpcwaitq); +} + +/* + * Schedule the nfs_client asynchronous state management routine + */ +void nfs4_schedule_state_manager(struct nfs_client *clp) +{ + struct task_struct *task; + + if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) + return; + __module_get(THIS_MODULE); + atomic_inc(&clp->cl_count); + task = kthread_run(nfs4_run_state_manager, clp, "%s-manager", + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR)); + if (!IS_ERR(task)) + return; + nfs4_clear_state_manager_bit(clp); + nfs_put_client(clp); + module_put(THIS_MODULE); +} + +/* + * Schedule a state recovery attempt + */ +void nfs4_schedule_state_recovery(struct nfs_client *clp) +{ + if (!clp) + return; + if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) + set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); + nfs4_schedule_state_manager(clp); +} + +int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) +{ + + set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); + /* Don't recover state that expired before the reboot */ + if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) { + clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); + return 0; + } + set_bit(NFS_OWNER_RECLAIM_REBOOT, &state->owner->so_flags); + set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); + return 1; +} + +int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) +{ + set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); + clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); + set_bit(NFS_OWNER_RECLAIM_NOGRACE, &state->owner->so_flags); + set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); + return 1; +} + +static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) +{ + struct inode *inode = state->inode; + struct nfs_inode *nfsi = NFS_I(inode); + struct file_lock *fl; + int status = 0; + + if (inode->i_flock == NULL) + return 0; + + /* Guard against delegation returns and new lock/unlock calls */ + down_write(&nfsi->rwsem); + /* Protect inode->i_flock using the BKL */ + lock_kernel(); + for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { + if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) + continue; + if (nfs_file_open_context(fl->fl_file)->state != state) + continue; + unlock_kernel(); + status = ops->recover_lock(state, fl); + switch (status) { + case 0: + break; + case -ESTALE: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_NO_GRACE: + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + goto out; + default: + printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", + __func__, status); + case -ENOMEM: + case -NFS4ERR_DENIED: + case -NFS4ERR_RECLAIM_BAD: + case -NFS4ERR_RECLAIM_CONFLICT: + /* kill_proc(fl->fl_pid, SIGLOST, 1); */ + status = 0; + } + lock_kernel(); + } + unlock_kernel(); +out: + up_write(&nfsi->rwsem); + return status; +} + +static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs4_state_recovery_ops *ops) +{ + struct nfs4_state *state; + struct nfs4_lock_state *lock; + int status = 0; + + /* Note: we rely on the sp->so_states list being ordered + * so that we always reclaim open(O_RDWR) and/or open(O_WRITE) + * states first. + * This is needed to ensure that the server won't give us any + * read delegations that we have to return if, say, we are + * recovering after a network partition or a reboot from a + * server that doesn't support a grace period. + */ +restart: + spin_lock(&sp->so_lock); + list_for_each_entry(state, &sp->so_states, open_states) { + if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) + continue; + if (state->state == 0) + continue; + atomic_inc(&state->count); + spin_unlock(&sp->so_lock); + status = ops->recover_open(sp, state); + if (status >= 0) { + status = nfs4_reclaim_locks(state, ops); + if (status >= 0) { + list_for_each_entry(lock, &state->lock_states, ls_locks) { + if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) + printk("%s: Lock reclaim failed!\n", + __func__); + } + nfs4_put_open_state(state); + goto restart; + } + } + switch (status) { + default: + printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", + __func__, status); + case -ENOENT: + case -ENOMEM: + case -ESTALE: + /* + * Open state on this file cannot be recovered + * All we can do is revert to using the zero stateid. + */ + memset(state->stateid.data, 0, + sizeof(state->stateid.data)); + /* Mark the file as being 'closed' */ + state->state = 0; + break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_RECLAIM_BAD: + case -NFS4ERR_RECLAIM_CONFLICT: + nfs4_state_mark_reclaim_nograce(sp->so_client, state); + break; + case -NFS4ERR_EXPIRED: + case -NFS4ERR_NO_GRACE: + nfs4_state_mark_reclaim_nograce(sp->so_client, state); + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + goto out_err; + } + nfs4_put_open_state(state); + goto restart; + } + spin_unlock(&sp->so_lock); + return 0; +out_err: + nfs4_put_open_state(state); + return status; +} + +static void nfs4_clear_open_state(struct nfs4_state *state) +{ + struct nfs4_lock_state *lock; + + clear_bit(NFS_DELEGATED_STATE, &state->flags); + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_O_RDWR_STATE, &state->flags); + list_for_each_entry(lock, &state->lock_states, ls_locks) { + lock->ls_seqid.flags = 0; + lock->ls_flags &= ~NFS_LOCK_INITIALIZED; + } +} + +static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state)) +{ + struct nfs4_state_owner *sp; + struct rb_node *pos; + struct nfs4_state *state; + + /* Reset all sequence ids to zero */ + for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); + sp->so_seqid.flags = 0; + spin_lock(&sp->so_lock); + list_for_each_entry(state, &sp->so_states, open_states) { + if (mark_reclaim(clp, state)) + nfs4_clear_open_state(state); + } + spin_unlock(&sp->so_lock); + } +} + +static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) +{ + /* Mark all delegations for reclaim */ + nfs_delegation_mark_reclaim(clp); + nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot); +} + +static void nfs4_reclaim_complete(struct nfs_client *clp, + const struct nfs4_state_recovery_ops *ops) +{ + /* Notify the server we're done reclaiming our state */ + if (ops->reclaim_complete) + (void)ops->reclaim_complete(clp); +} + +static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) +{ + struct nfs4_state_owner *sp; + struct rb_node *pos; + struct nfs4_state *state; + + if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) + return; + + nfs4_reclaim_complete(clp, + nfs4_reboot_recovery_ops[clp->cl_minorversion]); + + for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); + spin_lock(&sp->so_lock); + list_for_each_entry(state, &sp->so_states, open_states) { + if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags)) + continue; + nfs4_state_mark_reclaim_nograce(clp, state); + } + spin_unlock(&sp->so_lock); + } + + nfs_delegation_reap_unclaimed(clp); +} + +static void nfs_delegation_clear_all(struct nfs_client *clp) +{ + nfs_delegation_mark_reclaim(clp); + nfs_delegation_reap_unclaimed(clp); +} + +static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp) +{ + nfs_delegation_clear_all(clp); + nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); +} + +static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) +{ + switch (error) { + case -NFS4ERR_CB_PATH_DOWN: + nfs_handle_cb_pathdown(clp); + return 0; + case -NFS4ERR_NO_GRACE: + nfs4_state_end_reclaim_reboot(clp); + return 0; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_LEASE_MOVED: + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_state_end_reclaim_reboot(clp); + nfs4_state_start_reclaim_reboot(clp); + break; + case -NFS4ERR_EXPIRED: + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_state_start_reclaim_nograce(clp); + break; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_SEQ_FALSE_RETRY: + case -NFS4ERR_SEQ_MISORDERED: + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + /* Zero session reset errors */ + return 0; + } + return error; +} + +static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops) +{ + struct rb_node *pos; + int status = 0; + +restart: + spin_lock(&clp->cl_lock); + for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { + struct nfs4_state_owner *sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); + if (!test_and_clear_bit(ops->owner_flag_bit, &sp->so_flags)) + continue; + atomic_inc(&sp->so_count); + spin_unlock(&clp->cl_lock); + status = nfs4_reclaim_open_state(sp, ops); + if (status < 0) { + set_bit(ops->owner_flag_bit, &sp->so_flags); + nfs4_put_state_owner(sp); + return nfs4_recovery_handle_error(clp, status); + } + nfs4_put_state_owner(sp); + goto restart; + } + spin_unlock(&clp->cl_lock); + return status; +} + +static int nfs4_check_lease(struct nfs_client *clp) +{ + struct rpc_cred *cred; + struct nfs4_state_maintenance_ops *ops = + nfs4_state_renewal_ops[clp->cl_minorversion]; + int status = -NFS4ERR_EXPIRED; + + /* Is the client already known to have an expired lease? */ + if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) + return 0; + spin_lock(&clp->cl_lock); + cred = ops->get_state_renewal_cred_locked(clp); + spin_unlock(&clp->cl_lock); + if (cred == NULL) { + cred = nfs4_get_setclientid_cred(clp); + if (cred == NULL) + goto out; + } + status = ops->renew_lease(clp, cred); + put_rpccred(cred); +out: + return nfs4_recovery_handle_error(clp, status); +} + +static int nfs4_reclaim_lease(struct nfs_client *clp) +{ + struct rpc_cred *cred; + struct nfs4_state_recovery_ops *ops = + nfs4_reboot_recovery_ops[clp->cl_minorversion]; + int status = -ENOENT; + + cred = ops->get_clid_cred(clp); + if (cred != NULL) { + status = ops->establish_clid(clp, cred); + put_rpccred(cred); + /* Handle case where the user hasn't set up machine creds */ + if (status == -EACCES && cred == clp->cl_machine_cred) { + nfs4_clear_machine_cred(clp); + status = -EAGAIN; + } + if (status == -NFS4ERR_MINOR_VERS_MISMATCH) + status = -EPROTONOSUPPORT; + } + return status; +} + +#ifdef CONFIG_NFS_V4_1 +void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) +{ + if (!flags) + return; + else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) { + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_state_start_reclaim_reboot(clp); + nfs4_schedule_state_recovery(clp); + } else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | + SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | + SEQ4_STATUS_ADMIN_STATE_REVOKED | + SEQ4_STATUS_RECALLABLE_STATE_REVOKED | + SEQ4_STATUS_LEASE_MOVED)) { + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_state_start_reclaim_nograce(clp); + nfs4_schedule_state_recovery(clp); + } else if (flags & (SEQ4_STATUS_CB_PATH_DOWN | + SEQ4_STATUS_BACKCHANNEL_FAULT | + SEQ4_STATUS_CB_PATH_DOWN_SESSION)) + nfs_expire_all_delegations(clp); +} + +static int nfs4_reset_session(struct nfs_client *clp) +{ + int status; + + nfs4_begin_drain_session(clp); + status = nfs4_proc_destroy_session(clp->cl_session); + if (status && status != -NFS4ERR_BADSESSION && + status != -NFS4ERR_DEADSESSION) { + status = nfs4_recovery_handle_error(clp, status); + goto out; + } + + memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN); + status = nfs4_proc_create_session(clp); + if (status) + status = nfs4_recovery_handle_error(clp, status); + +out: + /* + * Let the state manager reestablish state + */ + if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && + status == 0) + nfs41_setup_state_renewal(clp); + + return status; +} + +#else /* CONFIG_NFS_V4_1 */ +static int nfs4_reset_session(struct nfs_client *clp) { return 0; } +static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; } +#endif /* CONFIG_NFS_V4_1 */ + +/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors + * on EXCHANGE_ID for v4.1 + */ +static void nfs4_set_lease_expired(struct nfs_client *clp, int status) +{ + if (nfs4_has_session(clp)) { + switch (status) { + case -NFS4ERR_DELAY: + case -NFS4ERR_CLID_INUSE: + case -EAGAIN: + break; + + case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery + * in nfs4_exchange_id */ + default: + return; + } + } + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); +} + +static void nfs4_state_manager(struct nfs_client *clp) +{ + int status = 0; + + /* Ensure exclusive access to NFSv4 state */ + for(;;) { + if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { + /* We're going to have to re-establish a clientid */ + status = nfs4_reclaim_lease(clp); + if (status) { + nfs4_set_lease_expired(clp, status); + if (test_bit(NFS4CLNT_LEASE_EXPIRED, + &clp->cl_state)) + continue; + if (clp->cl_cons_state == + NFS_CS_SESSION_INITING) + nfs_mark_client_ready(clp, status); + goto out_error; + } + clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); + set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); + } + + if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { + status = nfs4_check_lease(clp); + if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) + continue; + if (status < 0 && status != -NFS4ERR_CB_PATH_DOWN) + goto out_error; + } + + /* Initialize or reset the session */ + if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) + && nfs4_has_session(clp)) { + status = nfs4_reset_session(clp); + if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) + continue; + if (status < 0) + goto out_error; + } + + /* First recover reboot state... */ + if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { + status = nfs4_do_reclaim(clp, + nfs4_reboot_recovery_ops[clp->cl_minorversion]); + if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || + test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) + continue; + nfs4_state_end_reclaim_reboot(clp); + if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) + continue; + if (status < 0) + goto out_error; + } + + /* Now recover expired state... */ + if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { + status = nfs4_do_reclaim(clp, + nfs4_nograce_recovery_ops[clp->cl_minorversion]); + if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || + test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) || + test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) + continue; + if (status < 0) + goto out_error; + } + + nfs4_end_drain_session(clp); + if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) { + nfs_client_return_marked_delegations(clp); + continue; + } + + nfs4_clear_state_manager_bit(clp); + /* Did we race with an attempt to give us more work? */ + if (clp->cl_state == 0) + break; + if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) + break; + } + return; +out_error: + printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" + " with error %d\n", clp->cl_hostname, -status); + nfs4_end_drain_session(clp); + nfs4_clear_state_manager_bit(clp); +} + +static int nfs4_run_state_manager(void *ptr) +{ + struct nfs_client *clp = ptr; + + allow_signal(SIGKILL); + nfs4_state_manager(clp); + nfs_put_client(clp); + module_put_and_exit(0); + return 0; +} + +/* + * Local variables: + * c-basic-offset: 8 + * End: + */ diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c new file mode 100644 index 00000000000..5cd5184b56d --- /dev/null +++ b/fs/nfs/nfs4xdr.c @@ -0,0 +1,5880 @@ +/* + * fs/nfs/nfs4xdr.c + * + * Client-side XDR for NFSv4. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * Andy Adamson <andros@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/param.h> +#include <linux/time.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/in.h> +#include <linux/pagemap.h> +#include <linux/proc_fs.h> +#include <linux/kdev_t.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/msg_prot.h> +#include <linux/nfs.h> +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_idmap.h> +#include "nfs4_fs.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_XDR + +/* Mapping from NFS error code to "errno" error code. */ +#define errno_NFSERR_IO EIO + +static int nfs4_stat_to_errno(int); + +/* NFSv4 COMPOUND tags are only wanted for debugging purposes */ +#ifdef DEBUG +#define NFS4_MAXTAGLEN 20 +#else +#define NFS4_MAXTAGLEN 0 +#endif + +/* lock,open owner id: + * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) + */ +#define open_owner_id_maxsz (1 + 4) +#define lock_owner_id_maxsz (1 + 4) +#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) +#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) +#define op_encode_hdr_maxsz (1) +#define op_decode_hdr_maxsz (2) +#define encode_stateid_maxsz (XDR_QUADLEN(NFS4_STATEID_SIZE)) +#define decode_stateid_maxsz (XDR_QUADLEN(NFS4_STATEID_SIZE)) +#define encode_verifier_maxsz (XDR_QUADLEN(NFS4_VERIFIER_SIZE)) +#define decode_verifier_maxsz (XDR_QUADLEN(NFS4_VERIFIER_SIZE)) +#define encode_putfh_maxsz (op_encode_hdr_maxsz + 1 + \ + (NFS4_FHSIZE >> 2)) +#define decode_putfh_maxsz (op_decode_hdr_maxsz) +#define encode_putrootfh_maxsz (op_encode_hdr_maxsz) +#define decode_putrootfh_maxsz (op_decode_hdr_maxsz) +#define encode_getfh_maxsz (op_encode_hdr_maxsz) +#define decode_getfh_maxsz (op_decode_hdr_maxsz + 1 + \ + ((3+NFS4_FHSIZE) >> 2)) +#define nfs4_fattr_bitmap_maxsz 3 +#define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) +#define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) +#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) +#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +/* This is based on getfattr, which uses the most attributes: */ +#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ + 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz)) +#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ + nfs4_fattr_value_maxsz) +#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) +#define encode_attrs_maxsz (nfs4_fattr_bitmap_maxsz + \ + 1 + 2 + 1 + \ + nfs4_owner_maxsz + \ + nfs4_group_maxsz + \ + 4 + 4) +#define encode_savefh_maxsz (op_encode_hdr_maxsz) +#define decode_savefh_maxsz (op_decode_hdr_maxsz) +#define encode_restorefh_maxsz (op_encode_hdr_maxsz) +#define decode_restorefh_maxsz (op_decode_hdr_maxsz) +#define encode_fsinfo_maxsz (encode_getattr_maxsz) +#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11) +#define encode_renew_maxsz (op_encode_hdr_maxsz + 3) +#define decode_renew_maxsz (op_decode_hdr_maxsz) +#define encode_setclientid_maxsz \ + (op_encode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ + XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \ + 1 /* sc_prog */ + \ + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ + 1) /* sc_cb_ident */ +#define decode_setclientid_maxsz \ + (op_decode_hdr_maxsz + \ + 2 + \ + 1024) /* large value for CLID_INUSE */ +#define encode_setclientid_confirm_maxsz \ + (op_encode_hdr_maxsz + \ + 3 + (NFS4_VERIFIER_SIZE >> 2)) +#define decode_setclientid_confirm_maxsz \ + (op_decode_hdr_maxsz) +#define encode_lookup_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz) +#define decode_lookup_maxsz (op_decode_hdr_maxsz) +#define encode_share_access_maxsz \ + (2) +#define encode_createmode_maxsz (1 + encode_attrs_maxsz + encode_verifier_maxsz) +#define encode_opentype_maxsz (1 + encode_createmode_maxsz) +#define encode_claim_null_maxsz (1 + nfs4_name_maxsz) +#define encode_open_maxsz (op_encode_hdr_maxsz + \ + 2 + encode_share_access_maxsz + 2 + \ + open_owner_id_maxsz + \ + encode_opentype_maxsz + \ + encode_claim_null_maxsz) +#define decode_ace_maxsz (3 + nfs4_owner_maxsz) +#define decode_delegation_maxsz (1 + decode_stateid_maxsz + 1 + \ + decode_ace_maxsz) +#define decode_change_info_maxsz (5) +#define decode_open_maxsz (op_decode_hdr_maxsz + \ + decode_stateid_maxsz + \ + decode_change_info_maxsz + 1 + \ + nfs4_fattr_bitmap_maxsz + \ + decode_delegation_maxsz) +#define encode_open_confirm_maxsz \ + (op_encode_hdr_maxsz + \ + encode_stateid_maxsz + 1) +#define decode_open_confirm_maxsz \ + (op_decode_hdr_maxsz + \ + decode_stateid_maxsz) +#define encode_open_downgrade_maxsz \ + (op_encode_hdr_maxsz + \ + encode_stateid_maxsz + 1 + \ + encode_share_access_maxsz) +#define decode_open_downgrade_maxsz \ + (op_decode_hdr_maxsz + \ + decode_stateid_maxsz) +#define encode_close_maxsz (op_encode_hdr_maxsz + \ + 1 + encode_stateid_maxsz) +#define decode_close_maxsz (op_decode_hdr_maxsz + \ + decode_stateid_maxsz) +#define encode_setattr_maxsz (op_encode_hdr_maxsz + \ + encode_stateid_maxsz + \ + encode_attrs_maxsz) +#define decode_setattr_maxsz (op_decode_hdr_maxsz + \ + nfs4_fattr_bitmap_maxsz) +#define encode_read_maxsz (op_encode_hdr_maxsz + \ + encode_stateid_maxsz + 3) +#define decode_read_maxsz (op_decode_hdr_maxsz + 2) +#define encode_readdir_maxsz (op_encode_hdr_maxsz + \ + 2 + encode_verifier_maxsz + 5) +#define decode_readdir_maxsz (op_decode_hdr_maxsz + \ + decode_verifier_maxsz) +#define encode_readlink_maxsz (op_encode_hdr_maxsz) +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1) +#define encode_write_maxsz (op_encode_hdr_maxsz + \ + encode_stateid_maxsz + 4) +#define decode_write_maxsz (op_decode_hdr_maxsz + \ + 2 + decode_verifier_maxsz) +#define encode_commit_maxsz (op_encode_hdr_maxsz + 3) +#define decode_commit_maxsz (op_decode_hdr_maxsz + \ + decode_verifier_maxsz) +#define encode_remove_maxsz (op_encode_hdr_maxsz + \ + nfs4_name_maxsz) +#define decode_remove_maxsz (op_decode_hdr_maxsz + \ + decode_change_info_maxsz) +#define encode_rename_maxsz (op_encode_hdr_maxsz + \ + 2 * nfs4_name_maxsz) +#define decode_rename_maxsz (op_decode_hdr_maxsz + \ + decode_change_info_maxsz + \ + decode_change_info_maxsz) +#define encode_link_maxsz (op_encode_hdr_maxsz + \ + nfs4_name_maxsz) +#define decode_link_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) +#define encode_lock_maxsz (op_encode_hdr_maxsz + \ + 7 + \ + 1 + encode_stateid_maxsz + 8) +#define decode_lock_denied_maxsz \ + (8 + decode_lockowner_maxsz) +#define decode_lock_maxsz (op_decode_hdr_maxsz + \ + decode_lock_denied_maxsz) +#define encode_lockt_maxsz (op_encode_hdr_maxsz + 12) +#define decode_lockt_maxsz (op_decode_hdr_maxsz + \ + decode_lock_denied_maxsz) +#define encode_locku_maxsz (op_encode_hdr_maxsz + 3 + \ + encode_stateid_maxsz + \ + 4) +#define decode_locku_maxsz (op_decode_hdr_maxsz + \ + decode_stateid_maxsz) +#define encode_access_maxsz (op_encode_hdr_maxsz + 1) +#define decode_access_maxsz (op_decode_hdr_maxsz + 2) +#define encode_symlink_maxsz (op_encode_hdr_maxsz + \ + 1 + nfs4_name_maxsz + \ + 1 + \ + nfs4_fattr_maxsz) +#define decode_symlink_maxsz (op_decode_hdr_maxsz + 8) +#define encode_create_maxsz (op_encode_hdr_maxsz + \ + 1 + 2 + nfs4_name_maxsz + \ + encode_attrs_maxsz) +#define decode_create_maxsz (op_decode_hdr_maxsz + \ + decode_change_info_maxsz + \ + nfs4_fattr_bitmap_maxsz) +#define encode_statfs_maxsz (encode_getattr_maxsz) +#define decode_statfs_maxsz (decode_getattr_maxsz) +#define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) +#define decode_delegreturn_maxsz (op_decode_hdr_maxsz) +#define encode_getacl_maxsz (encode_getattr_maxsz) +#define decode_getacl_maxsz (op_decode_hdr_maxsz + \ + nfs4_fattr_bitmap_maxsz + 1) +#define encode_setacl_maxsz (op_encode_hdr_maxsz + \ + encode_stateid_maxsz + 3) +#define decode_setacl_maxsz (decode_setattr_maxsz) +#define encode_fs_locations_maxsz \ + (encode_getattr_maxsz) +#define decode_fs_locations_maxsz \ + (0) + +#if defined(CONFIG_NFS_V4_1) +#define NFS4_MAX_MACHINE_NAME_LEN (64) + +#define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \ + encode_verifier_maxsz + \ + 1 /* co_ownerid.len */ + \ + XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \ + 1 /* flags */ + \ + 1 /* spa_how */ + \ + 0 /* SP4_NONE (for now) */ + \ + 1 /* zero implemetation id array */) +#define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \ + 2 /* eir_clientid */ + \ + 1 /* eir_sequenceid */ + \ + 1 /* eir_flags */ + \ + 1 /* spr_how */ + \ + 0 /* SP4_NONE (for now) */ + \ + 2 /* eir_server_owner.so_minor_id */ + \ + /* eir_server_owner.so_major_id<> */ \ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \ + /* eir_server_scope<> */ \ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \ + 1 /* eir_server_impl_id array length */ + \ + 0 /* ignored eir_server_impl_id contents */) +#define encode_channel_attrs_maxsz (6 + 1 /* ca_rdma_ird.len (0) */) +#define decode_channel_attrs_maxsz (6 + \ + 1 /* ca_rdma_ird.len */ + \ + 1 /* ca_rdma_ird */) +#define encode_create_session_maxsz (op_encode_hdr_maxsz + \ + 2 /* csa_clientid */ + \ + 1 /* csa_sequence */ + \ + 1 /* csa_flags */ + \ + encode_channel_attrs_maxsz + \ + encode_channel_attrs_maxsz + \ + 1 /* csa_cb_program */ + \ + 1 /* csa_sec_parms.len (1) */ + \ + 1 /* cb_secflavor (AUTH_SYS) */ + \ + 1 /* stamp */ + \ + 1 /* machinename.len */ + \ + XDR_QUADLEN(NFS4_MAX_MACHINE_NAME_LEN) + \ + 1 /* uid */ + \ + 1 /* gid */ + \ + 1 /* gids.len (0) */) +#define decode_create_session_maxsz (op_decode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ + 1 /* csr_sequence */ + \ + 1 /* csr_flags */ + \ + decode_channel_attrs_maxsz + \ + decode_channel_attrs_maxsz) +#define encode_destroy_session_maxsz (op_encode_hdr_maxsz + 4) +#define decode_destroy_session_maxsz (op_decode_hdr_maxsz) +#define encode_sequence_maxsz (op_encode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4) +#define decode_sequence_maxsz (op_decode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) +#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) +#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) +#else /* CONFIG_NFS_V4_1 */ +#define encode_sequence_maxsz 0 +#define decode_sequence_maxsz 0 +#endif /* CONFIG_NFS_V4_1 */ + +#define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ +#define NFS4_dec_compound_sz (1024) /* XXX: large enough? */ +#define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_read_maxsz) +#define NFS4_dec_read_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_read_maxsz) +#define NFS4_enc_readlink_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_readlink_maxsz) +#define NFS4_dec_readlink_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_readlink_maxsz) +#define NFS4_enc_readdir_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_readdir_maxsz) +#define NFS4_dec_readdir_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_readdir_maxsz) +#define NFS4_enc_write_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_write_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_write_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_write_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_commit_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_commit_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_open_maxsz + \ + encode_getfh_maxsz + \ + encode_getattr_maxsz + \ + encode_restorefh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_open_maxsz + \ + decode_getfh_maxsz + \ + decode_getattr_maxsz + \ + decode_restorefh_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_open_confirm_sz \ + (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_open_confirm_maxsz) +#define NFS4_dec_open_confirm_sz \ + (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_open_confirm_maxsz) +#define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_open_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_open_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_open_downgrade_sz \ + (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_open_downgrade_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_open_downgrade_sz \ + (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_open_downgrade_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_close_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_close_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_close_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_setattr_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_setattr_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_fsinfo_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_fsinfo_maxsz) +#define NFS4_dec_fsinfo_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_fsinfo_maxsz) +#define NFS4_enc_renew_sz (compound_encode_hdr_maxsz + \ + encode_renew_maxsz) +#define NFS4_dec_renew_sz (compound_decode_hdr_maxsz + \ + decode_renew_maxsz) +#define NFS4_enc_setclientid_sz (compound_encode_hdr_maxsz + \ + encode_setclientid_maxsz) +#define NFS4_dec_setclientid_sz (compound_decode_hdr_maxsz + \ + decode_setclientid_maxsz) +#define NFS4_enc_setclientid_confirm_sz \ + (compound_encode_hdr_maxsz + \ + encode_setclientid_confirm_maxsz + \ + encode_putrootfh_maxsz + \ + encode_fsinfo_maxsz) +#define NFS4_dec_setclientid_confirm_sz \ + (compound_decode_hdr_maxsz + \ + decode_setclientid_confirm_maxsz + \ + decode_putrootfh_maxsz + \ + decode_fsinfo_maxsz) +#define NFS4_enc_lock_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_lock_maxsz) +#define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_lock_maxsz) +#define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_lockt_maxsz) +#define NFS4_dec_lockt_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_lockt_maxsz) +#define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_locku_maxsz) +#define NFS4_dec_locku_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_locku_maxsz) +#define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_access_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_access_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_getattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_lookup_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_lookup_maxsz + \ + encode_getattr_maxsz + \ + encode_getfh_maxsz) +#define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_lookup_maxsz + \ + decode_getattr_maxsz + \ + decode_getfh_maxsz) +#define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putrootfh_maxsz + \ + encode_getattr_maxsz + \ + encode_getfh_maxsz) +#define NFS4_dec_lookup_root_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putrootfh_maxsz + \ + decode_getattr_maxsz + \ + decode_getfh_maxsz) +#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_remove_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_remove_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_putfh_maxsz + \ + encode_rename_maxsz + \ + encode_getattr_maxsz + \ + encode_restorefh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_putfh_maxsz + \ + decode_rename_maxsz + \ + decode_getattr_maxsz + \ + decode_restorefh_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_putfh_maxsz + \ + encode_link_maxsz + \ + decode_getattr_maxsz + \ + encode_restorefh_maxsz + \ + decode_getattr_maxsz) +#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_putfh_maxsz + \ + decode_link_maxsz + \ + decode_getattr_maxsz + \ + decode_restorefh_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_symlink_maxsz + \ + encode_getattr_maxsz + \ + encode_getfh_maxsz) +#define NFS4_dec_symlink_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_symlink_maxsz + \ + decode_getattr_maxsz + \ + decode_getfh_maxsz) +#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_create_maxsz + \ + encode_getfh_maxsz + \ + encode_getattr_maxsz + \ + encode_restorefh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_create_maxsz + \ + decode_getfh_maxsz + \ + decode_getattr_maxsz + \ + decode_restorefh_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_pathconf_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_statfs_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_statfs_maxsz) +#define NFS4_dec_statfs_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_statfs_maxsz) +#define NFS4_enc_server_caps_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_server_caps_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_delegreturn_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_delegreturn_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_delegreturn_maxsz + \ + decode_getattr_maxsz) +#define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_getacl_maxsz) +#define NFS4_dec_getacl_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_getacl_maxsz) +#define NFS4_enc_setacl_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_setacl_maxsz) +#define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_setacl_maxsz) +#define NFS4_enc_fs_locations_sz \ + (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_lookup_maxsz + \ + encode_fs_locations_maxsz) +#define NFS4_dec_fs_locations_sz \ + (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_lookup_maxsz + \ + decode_fs_locations_maxsz) +#if defined(CONFIG_NFS_V4_1) +#define NFS4_enc_exchange_id_sz \ + (compound_encode_hdr_maxsz + \ + encode_exchange_id_maxsz) +#define NFS4_dec_exchange_id_sz \ + (compound_decode_hdr_maxsz + \ + decode_exchange_id_maxsz) +#define NFS4_enc_create_session_sz \ + (compound_encode_hdr_maxsz + \ + encode_create_session_maxsz) +#define NFS4_dec_create_session_sz \ + (compound_decode_hdr_maxsz + \ + decode_create_session_maxsz) +#define NFS4_enc_destroy_session_sz (compound_encode_hdr_maxsz + \ + encode_destroy_session_maxsz) +#define NFS4_dec_destroy_session_sz (compound_decode_hdr_maxsz + \ + decode_destroy_session_maxsz) +#define NFS4_enc_sequence_sz \ + (compound_decode_hdr_maxsz + \ + encode_sequence_maxsz) +#define NFS4_dec_sequence_sz \ + (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz) +#define NFS4_enc_get_lease_time_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putrootfh_maxsz + \ + encode_fsinfo_maxsz) +#define NFS4_dec_get_lease_time_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putrootfh_maxsz + \ + decode_fsinfo_maxsz) +#define NFS4_enc_reclaim_complete_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_reclaim_complete_maxsz) +#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_reclaim_complete_maxsz) + +const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + + compound_encode_hdr_maxsz + + encode_sequence_maxsz + + encode_putfh_maxsz + + encode_getattr_maxsz) * + XDR_UNIT); + +const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH + + compound_decode_hdr_maxsz + + decode_sequence_maxsz + + decode_putfh_maxsz) * + XDR_UNIT); +#endif /* CONFIG_NFS_V4_1 */ + +static const umode_t nfs_type2fmt[] = { + [NF4BAD] = 0, + [NF4REG] = S_IFREG, + [NF4DIR] = S_IFDIR, + [NF4BLK] = S_IFBLK, + [NF4CHR] = S_IFCHR, + [NF4LNK] = S_IFLNK, + [NF4SOCK] = S_IFSOCK, + [NF4FIFO] = S_IFIFO, + [NF4ATTRDIR] = 0, + [NF4NAMEDATTR] = 0, +}; + +struct compound_hdr { + int32_t status; + uint32_t nops; + __be32 * nops_p; + uint32_t taglen; + char * tag; + uint32_t replen; /* expected reply words */ + u32 minorversion; +}; + +static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes) +{ + __be32 *p = xdr_reserve_space(xdr, nbytes); + BUG_ON(!p); + return p; +} + +static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 4 + len); + BUG_ON(p == NULL); + xdr_encode_opaque(p, str, len); +} + +static void encode_compound_hdr(struct xdr_stream *xdr, + struct rpc_rqst *req, + struct compound_hdr *hdr) +{ + __be32 *p; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; + + /* initialize running count of expected bytes in reply. + * NOTE: the replied tag SHOULD be the same is the one sent, + * but this is not required as a MUST for the server to do so. */ + hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen; + + dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag); + BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); + p = reserve_space(xdr, 4 + hdr->taglen + 8); + p = xdr_encode_opaque(p, hdr->tag, hdr->taglen); + *p++ = cpu_to_be32(hdr->minorversion); + hdr->nops_p = p; + *p = cpu_to_be32(hdr->nops); +} + +static void encode_nops(struct compound_hdr *hdr) +{ + BUG_ON(hdr->nops > NFS4_MAX_OPS); + *hdr->nops_p = htonl(hdr->nops); +} + +static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + BUG_ON(p == NULL); + xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE); +} + +static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) +{ + char owner_name[IDMAP_NAMESZ]; + char owner_group[IDMAP_NAMESZ]; + int owner_namelen = 0; + int owner_grouplen = 0; + __be32 *p; + __be32 *q; + int len; + uint32_t bmval0 = 0; + uint32_t bmval1 = 0; + + /* + * We reserve enough space to write the entire attribute buffer at once. + * In the worst-case, this would be + * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) + * = 36 bytes, plus any contribution from variable-length fields + * such as owner/group. + */ + len = 16; + + /* Sigh */ + if (iap->ia_valid & ATTR_SIZE) + len += 8; + if (iap->ia_valid & ATTR_MODE) + len += 4; + if (iap->ia_valid & ATTR_UID) { + owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name); + if (owner_namelen < 0) { + dprintk("nfs: couldn't resolve uid %d to string\n", + iap->ia_uid); + /* XXX */ + strcpy(owner_name, "nobody"); + owner_namelen = sizeof("nobody") - 1; + /* goto out; */ + } + len += 4 + (XDR_QUADLEN(owner_namelen) << 2); + } + if (iap->ia_valid & ATTR_GID) { + owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group); + if (owner_grouplen < 0) { + dprintk("nfs: couldn't resolve gid %d to string\n", + iap->ia_gid); + strcpy(owner_group, "nobody"); + owner_grouplen = sizeof("nobody") - 1; + /* goto out; */ + } + len += 4 + (XDR_QUADLEN(owner_grouplen) << 2); + } + if (iap->ia_valid & ATTR_ATIME_SET) + len += 16; + else if (iap->ia_valid & ATTR_ATIME) + len += 4; + if (iap->ia_valid & ATTR_MTIME_SET) + len += 16; + else if (iap->ia_valid & ATTR_MTIME) + len += 4; + p = reserve_space(xdr, len); + + /* + * We write the bitmap length now, but leave the bitmap and the attribute + * buffer length to be backfilled at the end of this routine. + */ + *p++ = cpu_to_be32(2); + q = p; + p += 3; + + if (iap->ia_valid & ATTR_SIZE) { + bmval0 |= FATTR4_WORD0_SIZE; + p = xdr_encode_hyper(p, iap->ia_size); + } + if (iap->ia_valid & ATTR_MODE) { + bmval1 |= FATTR4_WORD1_MODE; + *p++ = cpu_to_be32(iap->ia_mode & S_IALLUGO); + } + if (iap->ia_valid & ATTR_UID) { + bmval1 |= FATTR4_WORD1_OWNER; + p = xdr_encode_opaque(p, owner_name, owner_namelen); + } + if (iap->ia_valid & ATTR_GID) { + bmval1 |= FATTR4_WORD1_OWNER_GROUP; + p = xdr_encode_opaque(p, owner_group, owner_grouplen); + } + if (iap->ia_valid & ATTR_ATIME_SET) { + bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; + *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(iap->ia_mtime.tv_sec); + *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); + } + else if (iap->ia_valid & ATTR_ATIME) { + bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; + *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); + } + if (iap->ia_valid & ATTR_MTIME_SET) { + bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; + *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(iap->ia_mtime.tv_sec); + *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); + } + else if (iap->ia_valid & ATTR_MTIME) { + bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; + *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); + } + + /* + * Now we backfill the bitmap and the attribute buffer length. + */ + if (len != ((char *)p - (char *)q) + 4) { + printk(KERN_ERR "nfs: Attr length error, %u != %Zu\n", + len, ((char *)p - (char *)q) + 4); + BUG(); + } + len = (char *)p - (char *)q - 12; + *q++ = htonl(bmval0); + *q++ = htonl(bmval1); + *q = htonl(len); + +/* out: */ +} + +static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(OP_ACCESS); + *p = cpu_to_be32(access); + hdr->nops++; + hdr->replen += decode_access_maxsz; +} + +static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 8+NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_CLOSE); + *p++ = cpu_to_be32(arg->seqid->sequence->counter); + xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); + hdr->nops++; + hdr->replen += decode_close_maxsz; +} + +static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 16); + *p++ = cpu_to_be32(OP_COMMIT); + p = xdr_encode_hyper(p, args->offset); + *p = cpu_to_be32(args->count); + hdr->nops++; + hdr->replen += decode_commit_maxsz; +} + +static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(OP_CREATE); + *p = cpu_to_be32(create->ftype); + + switch (create->ftype) { + case NF4LNK: + p = reserve_space(xdr, 4); + *p = cpu_to_be32(create->u.symlink.len); + xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len); + break; + + case NF4BLK: case NF4CHR: + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(create->u.device.specdata1); + *p = cpu_to_be32(create->u.device.specdata2); + break; + + default: + break; + } + + encode_string(xdr, create->name->len, create->name->name); + hdr->nops++; + hdr->replen += decode_create_maxsz; + + encode_attrs(xdr, create->attrs, create->server); +} + +static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 12); + *p++ = cpu_to_be32(OP_GETATTR); + *p++ = cpu_to_be32(1); + *p = cpu_to_be32(bitmap); + hdr->nops++; + hdr->replen += decode_getattr_maxsz; +} + +static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 16); + *p++ = cpu_to_be32(OP_GETATTR); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(bm0); + *p = cpu_to_be32(bm1); + hdr->nops++; + hdr->replen += decode_getattr_maxsz; +} + +static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) +{ + encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], + bitmask[1] & nfs4_fattr_bitmap[1], hdr); +} + +static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) +{ + encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], + bitmask[1] & nfs4_fsinfo_bitmap[1], hdr); +} + +static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) +{ + encode_getattr_two(xdr, bitmask[0] & nfs4_fs_locations_bitmap[0], + bitmask[1] & nfs4_fs_locations_bitmap[1], hdr); +} + +static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_GETFH); + hdr->nops++; + hdr->replen += decode_getfh_maxsz; +} + +static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 8 + name->len); + *p++ = cpu_to_be32(OP_LINK); + xdr_encode_opaque(p, name->name, name->len); + hdr->nops++; + hdr->replen += decode_link_maxsz; +} + +static inline int nfs4_lock_type(struct file_lock *fl, int block) +{ + if ((fl->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) == F_RDLCK) + return block ? NFS4_READW_LT : NFS4_READ_LT; + return block ? NFS4_WRITEW_LT : NFS4_WRITE_LT; +} + +static inline uint64_t nfs4_lock_length(struct file_lock *fl) +{ + if (fl->fl_end == OFFSET_MAX) + return ~(uint64_t)0; + return fl->fl_end - fl->fl_start + 1; +} + +/* + * opcode,type,reclaim,offset,length,new_lock_owner = 32 + * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40 + */ +static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 32); + *p++ = cpu_to_be32(OP_LOCK); + *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block)); + *p++ = cpu_to_be32(args->reclaim); + p = xdr_encode_hyper(p, args->fl->fl_start); + p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); + *p = cpu_to_be32(args->new_lock_owner); + if (args->new_lock_owner){ + p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32); + *p++ = cpu_to_be32(args->open_seqid->sequence->counter); + p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(args->lock_seqid->sequence->counter); + p = xdr_encode_hyper(p, args->lock_owner.clientid); + *p++ = cpu_to_be32(16); + p = xdr_encode_opaque_fixed(p, "lock id:", 8); + xdr_encode_hyper(p, args->lock_owner.id); + } + else { + p = reserve_space(xdr, NFS4_STATEID_SIZE+4); + p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE); + *p = cpu_to_be32(args->lock_seqid->sequence->counter); + } + hdr->nops++; + hdr->replen += decode_lock_maxsz; +} + +static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 52); + *p++ = cpu_to_be32(OP_LOCKT); + *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); + p = xdr_encode_hyper(p, args->fl->fl_start); + p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); + p = xdr_encode_hyper(p, args->lock_owner.clientid); + *p++ = cpu_to_be32(16); + p = xdr_encode_opaque_fixed(p, "lock id:", 8); + xdr_encode_hyper(p, args->lock_owner.id); + hdr->nops++; + hdr->replen += decode_lockt_maxsz; +} + +static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 12+NFS4_STATEID_SIZE+16); + *p++ = cpu_to_be32(OP_LOCKU); + *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); + *p++ = cpu_to_be32(args->seqid->sequence->counter); + p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); + p = xdr_encode_hyper(p, args->fl->fl_start); + xdr_encode_hyper(p, nfs4_lock_length(args->fl)); + hdr->nops++; + hdr->replen += decode_locku_maxsz; +} + +static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) +{ + int len = name->len; + __be32 *p; + + p = reserve_space(xdr, 8 + len); + *p++ = cpu_to_be32(OP_LOOKUP); + xdr_encode_opaque(p, name->name, len); + hdr->nops++; + hdr->replen += decode_lookup_maxsz; +} + +static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) +{ + __be32 *p; + + p = reserve_space(xdr, 8); + switch (fmode & (FMODE_READ|FMODE_WRITE)) { + case FMODE_READ: + *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_READ); + break; + case FMODE_WRITE: + *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_WRITE); + break; + case FMODE_READ|FMODE_WRITE: + *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_BOTH); + break; + default: + *p++ = cpu_to_be32(0); + } + *p = cpu_to_be32(0); /* for linux, share_deny = 0 always */ +} + +static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg) +{ + __be32 *p; + /* + * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, + * owner 4 = 32 + */ + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(OP_OPEN); + *p = cpu_to_be32(arg->seqid->sequence->counter); + encode_share_access(xdr, arg->fmode); + p = reserve_space(xdr, 28); + p = xdr_encode_hyper(p, arg->clientid); + *p++ = cpu_to_be32(16); + p = xdr_encode_opaque_fixed(p, "open id:", 8); + xdr_encode_hyper(p, arg->id); +} + +static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) +{ + __be32 *p; + struct nfs_client *clp; + + p = reserve_space(xdr, 4); + switch(arg->open_flags & O_EXCL) { + case 0: + *p = cpu_to_be32(NFS4_CREATE_UNCHECKED); + encode_attrs(xdr, arg->u.attrs, arg->server); + break; + default: + clp = arg->server->nfs_client; + if (clp->cl_minorversion > 0) { + if (nfs4_has_persistent_session(clp)) { + *p = cpu_to_be32(NFS4_CREATE_GUARDED); + encode_attrs(xdr, arg->u.attrs, arg->server); + } else { + struct iattr dummy; + + *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); + encode_nfs4_verifier(xdr, &arg->u.verifier); + dummy.ia_valid = 0; + encode_attrs(xdr, &dummy, arg->server); + } + } else { + *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); + encode_nfs4_verifier(xdr, &arg->u.verifier); + } + } +} + +static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *arg) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + switch (arg->open_flags & O_CREAT) { + case 0: + *p = cpu_to_be32(NFS4_OPEN_NOCREATE); + break; + default: + BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL); + *p = cpu_to_be32(NFS4_OPEN_CREATE); + encode_createmode(xdr, arg); + } +} + +static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delegation_type) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + switch (delegation_type) { + case 0: + *p = cpu_to_be32(NFS4_OPEN_DELEGATE_NONE); + break; + case FMODE_READ: + *p = cpu_to_be32(NFS4_OPEN_DELEGATE_READ); + break; + case FMODE_WRITE|FMODE_READ: + *p = cpu_to_be32(NFS4_OPEN_DELEGATE_WRITE); + break; + default: + BUG(); + } +} + +static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *name) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(NFS4_OPEN_CLAIM_NULL); + encode_string(xdr, name->len, name->name); +} + +static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(NFS4_OPEN_CLAIM_PREVIOUS); + encode_delegation_type(xdr, type); +} + +static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struct qstr *name, const nfs4_stateid *stateid) +{ + __be32 *p; + + p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR); + xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE); + encode_string(xdr, name->len, name->name); +} + +static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr) +{ + encode_openhdr(xdr, arg); + encode_opentype(xdr, arg); + switch (arg->claim) { + case NFS4_OPEN_CLAIM_NULL: + encode_claim_null(xdr, arg->name); + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + encode_claim_previous(xdr, arg->u.delegation_type); + break; + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation); + break; + default: + BUG(); + } + hdr->nops++; + hdr->replen += decode_open_maxsz; +} + +static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); + *p++ = cpu_to_be32(OP_OPEN_CONFIRM); + p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); + *p = cpu_to_be32(arg->seqid->sequence->counter); + hdr->nops++; + hdr->replen += decode_open_confirm_maxsz; +} + +static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); + *p++ = cpu_to_be32(OP_OPEN_DOWNGRADE); + p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); + *p = cpu_to_be32(arg->seqid->sequence->counter); + encode_share_access(xdr, arg->fmode); + hdr->nops++; + hdr->replen += decode_open_downgrade_maxsz; +} + +static void +encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr) +{ + int len = fh->size; + __be32 *p; + + p = reserve_space(xdr, 8 + len); + *p++ = cpu_to_be32(OP_PUTFH); + xdr_encode_opaque(p, fh->data, len); + hdr->nops++; + hdr->replen += decode_putfh_maxsz; +} + +static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_PUTROOTFH); + hdr->nops++; + hdr->replen += decode_putrootfh_maxsz; +} + +static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) +{ + nfs4_stateid stateid; + __be32 *p; + + p = reserve_space(xdr, NFS4_STATEID_SIZE); + if (ctx->state != NULL) { + nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner); + xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); + } else + xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); +} + +static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_READ); + + encode_stateid(xdr, args->context); + + p = reserve_space(xdr, 12); + p = xdr_encode_hyper(p, args->offset); + *p = cpu_to_be32(args->count); + hdr->nops++; + hdr->replen += decode_read_maxsz; +} + +static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) +{ + uint32_t attrs[2] = { + FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, + FATTR4_WORD1_MOUNTED_ON_FILEID, + }; + __be32 *p; + + p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); + *p++ = cpu_to_be32(OP_READDIR); + p = xdr_encode_hyper(p, readdir->cookie); + p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE); + *p++ = cpu_to_be32(readdir->count >> 1); /* We're not doing readdirplus */ + *p++ = cpu_to_be32(readdir->count); + *p++ = cpu_to_be32(2); + /* Switch to mounted_on_fileid if the server supports it */ + if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) + attrs[0] &= ~FATTR4_WORD0_FILEID; + else + attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; + *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); + *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); + hdr->nops++; + hdr->replen += decode_readdir_maxsz; + dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", + __func__, + (unsigned long long)readdir->cookie, + ((u32 *)readdir->verifier.data)[0], + ((u32 *)readdir->verifier.data)[1], + attrs[0] & readdir->bitmask[0], + attrs[1] & readdir->bitmask[1]); +} + +static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_READLINK); + hdr->nops++; + hdr->replen += decode_readlink_maxsz; +} + +static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 8 + name->len); + *p++ = cpu_to_be32(OP_REMOVE); + xdr_encode_opaque(p, name->name, name->len); + hdr->nops++; + hdr->replen += decode_remove_maxsz; +} + +static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_RENAME); + encode_string(xdr, oldname->len, oldname->name); + encode_string(xdr, newname->len, newname->name); + hdr->nops++; + hdr->replen += decode_rename_maxsz; +} + +static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 12); + *p++ = cpu_to_be32(OP_RENEW); + xdr_encode_hyper(p, client_stateid->cl_clientid); + hdr->nops++; + hdr->replen += decode_renew_maxsz; +} + +static void +encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_RESTOREFH); + hdr->nops++; + hdr->replen += decode_restorefh_maxsz; +} + +static int +encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_SETATTR); + xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); + p = reserve_space(xdr, 2*4); + *p++ = cpu_to_be32(1); + *p = cpu_to_be32(FATTR4_WORD0_ACL); + if (arg->acl_len % 4) + return -EINVAL; + p = reserve_space(xdr, 4); + *p = cpu_to_be32(arg->acl_len); + xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); + hdr->nops++; + hdr->replen += decode_setacl_maxsz; + return 0; +} + +static void +encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_SAVEFH); + hdr->nops++; + hdr->replen += decode_savefh_maxsz; +} + +static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_SETATTR); + xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE); + hdr->nops++; + hdr->replen += decode_setattr_maxsz; + encode_attrs(xdr, arg->iap, server); +} + +static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4 + NFS4_VERIFIER_SIZE); + *p++ = cpu_to_be32(OP_SETCLIENTID); + xdr_encode_opaque_fixed(p, setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE); + + encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(setclientid->sc_prog); + encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); + encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(setclientid->sc_cb_ident); + hdr->nops++; + hdr->replen += decode_setclientid_maxsz; +} + +static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE); + *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM); + p = xdr_encode_hyper(p, client_state->cl_clientid); + xdr_encode_opaque_fixed(p, client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); + hdr->nops++; + hdr->replen += decode_setclientid_confirm_maxsz; +} + +static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_WRITE); + + encode_stateid(xdr, args->context); + + p = reserve_space(xdr, 16); + p = xdr_encode_hyper(p, args->offset); + *p++ = cpu_to_be32(args->stable); + *p = cpu_to_be32(args->count); + + xdr_write_pages(xdr, args->pages, args->pgbase, args->count); + hdr->nops++; + hdr->replen += decode_write_maxsz; +} + +static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); + + *p++ = cpu_to_be32(OP_DELEGRETURN); + xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE); + hdr->nops++; + hdr->replen += decode_delegreturn_maxsz; +} + +#if defined(CONFIG_NFS_V4_1) +/* NFSv4.1 operations */ +static void encode_exchange_id(struct xdr_stream *xdr, + struct nfs41_exchange_id_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4 + sizeof(args->verifier->data)); + *p++ = cpu_to_be32(OP_EXCHANGE_ID); + xdr_encode_opaque_fixed(p, args->verifier->data, sizeof(args->verifier->data)); + + encode_string(xdr, args->id_len, args->id); + + p = reserve_space(xdr, 12); + *p++ = cpu_to_be32(args->flags); + *p++ = cpu_to_be32(0); /* zero length state_protect4_a */ + *p = cpu_to_be32(0); /* zero length implementation id array */ + hdr->nops++; + hdr->replen += decode_exchange_id_maxsz; +} + +static void encode_create_session(struct xdr_stream *xdr, + struct nfs41_create_session_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + char machine_name[NFS4_MAX_MACHINE_NAME_LEN]; + uint32_t len; + struct nfs_client *clp = args->client; + + len = scnprintf(machine_name, sizeof(machine_name), "%s", + clp->cl_ipaddr); + + p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); + *p++ = cpu_to_be32(OP_CREATE_SESSION); + p = xdr_encode_hyper(p, clp->cl_ex_clid); + *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ + *p++ = cpu_to_be32(args->flags); /*flags */ + + /* Fore Channel */ + *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */ + *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */ + *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */ + *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */ + *p++ = cpu_to_be32(args->fc_attrs.max_ops); /* max operations */ + *p++ = cpu_to_be32(args->fc_attrs.max_reqs); /* max requests */ + *p++ = cpu_to_be32(0); /* rdmachannel_attrs */ + + /* Back Channel */ + *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */ + *p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz); /* max req size */ + *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz); /* max resp size */ + *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */ + *p++ = cpu_to_be32(args->bc_attrs.max_ops); /* max operations */ + *p++ = cpu_to_be32(args->bc_attrs.max_reqs); /* max requests */ + *p++ = cpu_to_be32(0); /* rdmachannel_attrs */ + + *p++ = cpu_to_be32(args->cb_program); /* cb_program */ + *p++ = cpu_to_be32(1); + *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */ + + /* authsys_parms rfc1831 */ + *p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec); /* stamp */ + p = xdr_encode_opaque(p, machine_name, len); + *p++ = cpu_to_be32(0); /* UID */ + *p++ = cpu_to_be32(0); /* GID */ + *p = cpu_to_be32(0); /* No more gids */ + hdr->nops++; + hdr->replen += decode_create_session_maxsz; +} + +static void encode_destroy_session(struct xdr_stream *xdr, + struct nfs4_session *session, + struct compound_hdr *hdr) +{ + __be32 *p; + p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN); + *p++ = cpu_to_be32(OP_DESTROY_SESSION); + xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); + hdr->nops++; + hdr->replen += decode_destroy_session_maxsz; +} + +static void encode_reclaim_complete(struct xdr_stream *xdr, + struct nfs41_reclaim_complete_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(OP_RECLAIM_COMPLETE); + *p++ = cpu_to_be32(args->one_fs); + hdr->nops++; + hdr->replen += decode_reclaim_complete_maxsz; +} +#endif /* CONFIG_NFS_V4_1 */ + +static void encode_sequence(struct xdr_stream *xdr, + const struct nfs4_sequence_args *args, + struct compound_hdr *hdr) +{ +#if defined(CONFIG_NFS_V4_1) + struct nfs4_session *session = args->sa_session; + struct nfs4_slot_table *tp; + struct nfs4_slot *slot; + __be32 *p; + + if (!session) + return; + + tp = &session->fc_slot_table; + + WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); + slot = tp->slots + args->sa_slotid; + + p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN + 16); + *p++ = cpu_to_be32(OP_SEQUENCE); + + /* + * Sessionid + seqid + slotid + max slotid + cache_this + */ + dprintk("%s: sessionid=%u:%u:%u:%u seqid=%d slotid=%d " + "max_slotid=%d cache_this=%d\n", + __func__, + ((u32 *)session->sess_id.data)[0], + ((u32 *)session->sess_id.data)[1], + ((u32 *)session->sess_id.data)[2], + ((u32 *)session->sess_id.data)[3], + slot->seq_nr, args->sa_slotid, + tp->highest_used_slotid, args->sa_cache_this); + p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); + *p++ = cpu_to_be32(slot->seq_nr); + *p++ = cpu_to_be32(args->sa_slotid); + *p++ = cpu_to_be32(tp->highest_used_slotid); + *p = cpu_to_be32(args->sa_cache_this); + hdr->nops++; + hdr->replen += decode_sequence_maxsz; +#endif /* CONFIG_NFS_V4_1 */ +} + +/* + * END OF "GENERIC" ENCODE ROUTINES. + */ + +static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args) +{ +#if defined(CONFIG_NFS_V4_1) + if (args->sa_session) + return args->sa_session->clp->cl_minorversion; +#endif /* CONFIG_NFS_V4_1 */ + return 0; +} + +/* + * Encode an ACCESS request + */ +static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs4_accessargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_access(&xdr, args->access, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode LOOKUP request + */ +static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->dir_fh, &hdr); + encode_lookup(&xdr, args->name, &hdr); + encode_getfh(&xdr, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode LOOKUP_ROOT request + */ +static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_root_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putrootfh(&xdr, &hdr); + encode_getfh(&xdr, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode REMOVE request + */ +static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_remove(&xdr, &args->name, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode RENAME request + */ +static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs4_rename_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->old_dir, &hdr); + encode_savefh(&xdr, &hdr); + encode_putfh(&xdr, args->new_dir, &hdr); + encode_rename(&xdr, args->old_name, args->new_name, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_restorefh(&xdr, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode LINK request + */ +static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_link_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_savefh(&xdr, &hdr); + encode_putfh(&xdr, args->dir_fh, &hdr); + encode_link(&xdr, args->name, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_restorefh(&xdr, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode CREATE request + */ +static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->dir_fh, &hdr); + encode_savefh(&xdr, &hdr); + encode_create(&xdr, args, &hdr); + encode_getfh(&xdr, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_restorefh(&xdr, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode SYMLINK request + */ +static int nfs4_xdr_enc_symlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args) +{ + return nfs4_xdr_enc_create(req, p, args); +} + +/* + * Encode GETATTR request + */ +static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nfs4_getattr_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode a CLOSE request + */ +static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_close(&xdr, args, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode an OPEN request + */ +static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_savefh(&xdr, &hdr); + encode_open(&xdr, args, &hdr); + encode_getfh(&xdr, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_restorefh(&xdr, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode an OPEN_CONFIRM request + */ +static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_open_confirmargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 0, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_open_confirm(&xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode an OPEN request with no attributes. + */ +static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_open(&xdr, args, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode an OPEN_DOWNGRADE request + */ +static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_open_downgrade(&xdr, args, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode a LOCK request + */ +static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_lock(&xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode a LOCKT request + */ +static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_lockt(&xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode a LOCKU request + */ +static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_locku(&xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode a READLINK request + */ +static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_readlink *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_readlink(&xdr, args, req, &hdr); + + xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, + args->pgbase, args->pglen); + encode_nops(&hdr); + return 0; +} + +/* + * Encode a READDIR request + */ +static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nfs4_readdir_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_readdir(&xdr, args, req, &hdr); + + xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, + args->pgbase, args->count); + dprintk("%s: inlined page args = (%u, %p, %u, %u)\n", + __func__, hdr.replen << 2, args->pages, + args->pgbase, args->count); + encode_nops(&hdr); + return 0; +} + +/* + * Encode a READ request + */ +static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_read(&xdr, args, &hdr); + + xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, + args->pages, args->pgbase, args->count); + req->rq_rcv_buf.flags |= XDRBUF_READ; + encode_nops(&hdr); + return 0; +} + +/* + * Encode an SETATTR request + */ +static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_setattrargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_setattr(&xdr, args, args->server, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode a GETACL request + */ +static int +nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p, + struct nfs_getaclargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + uint32_t replen; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1; + encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr); + + xdr_inline_pages(&req->rq_rcv_buf, replen << 2, + args->acl_pages, args->acl_pgbase, args->acl_len); + encode_nops(&hdr); + return 0; +} + +/* + * Encode a WRITE request + */ +static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_write(&xdr, args, &hdr); + req->rq_snd_buf.flags |= XDRBUF_WRITE; + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a COMMIT request + */ +static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_commit(&xdr, args, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * FSINFO request + */ +static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsinfo_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_fsinfo(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a PATHCONF request + */ +static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct nfs4_pathconf_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_getattr_one(&xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0], + &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a STATFS request + */ +static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs4_statfs_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0], + args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * GETATTR_BITMAP request + */ +static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, + struct nfs4_server_caps_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fhandle, &hdr); + encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS| + FATTR4_WORD0_LINK_SUPPORT| + FATTR4_WORD0_SYMLINK_SUPPORT| + FATTR4_WORD0_ACLSUPPORT, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a RENEW request + */ +static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 0, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_renew(&xdr, clp, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a SETCLIENTID request + */ +static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid *sc) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 0, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_setclientid(&xdr, sc, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a SETCLIENTID_CONFIRM request + */ +static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 0, + }; + const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_setclientid_confirm(&xdr, clp, &hdr); + encode_putrootfh(&xdr, &hdr); + encode_fsinfo(&xdr, lease_bitmap, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * DELEGRETURN request + */ +static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struct nfs4_delegreturnargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fhandle, &hdr); + encode_delegreturn(&xdr, args->stateid, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode FS_LOCATIONS request + */ +static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs4_fs_locations_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + uint32_t replen; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->dir_fh, &hdr); + encode_lookup(&xdr, args->name, &hdr); + replen = hdr.replen; /* get the attribute into args->page */ + encode_fs_locations(&xdr, args->bitmask, &hdr); + + xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page, + 0, PAGE_SIZE); + encode_nops(&hdr); + return 0; +} + +#if defined(CONFIG_NFS_V4_1) +/* + * EXCHANGE_ID request + */ +static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p, + struct nfs41_exchange_id_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = args->client->cl_minorversion, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_exchange_id(&xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a CREATE_SESSION request + */ +static int nfs4_xdr_enc_create_session(struct rpc_rqst *req, uint32_t *p, + struct nfs41_create_session_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = args->client->cl_minorversion, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_create_session(&xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a DESTROY_SESSION request + */ +static int nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, uint32_t *p, + struct nfs4_session *session) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = session->clp->cl_minorversion, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_destroy_session(&xdr, session, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a SEQUENCE request + */ +static int nfs4_xdr_enc_sequence(struct rpc_rqst *req, uint32_t *p, + struct nfs4_sequence_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a GET_LEASE_TIME request + */ +static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p, + struct nfs4_get_lease_time_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->la_seq_args), + }; + const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->la_seq_args, &hdr); + encode_putrootfh(&xdr, &hdr); + encode_fsinfo(&xdr, lease_bitmap, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a RECLAIM_COMPLETE request + */ +static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p, + struct nfs41_reclaim_complete_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args) + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_reclaim_complete(&xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + +#endif /* CONFIG_NFS_V4_1 */ + +static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) +{ + dprintk("nfs: %s: prematurely hit end of receive buffer. " + "Remaining buffer length is %tu words.\n", + func, xdr->end - xdr->p); +} + +static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + *len = be32_to_cpup(p); + p = xdr_inline_decode(xdr, *len); + if (unlikely(!p)) + goto out_overflow; + *string = (char *)p; + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + hdr->status = be32_to_cpup(p++); + hdr->taglen = be32_to_cpup(p); + + p = xdr_inline_decode(xdr, hdr->taglen + 4); + if (unlikely(!p)) + goto out_overflow; + hdr->tag = (char *)p; + p += XDR_QUADLEN(hdr->taglen); + hdr->nops = be32_to_cpup(p); + if (unlikely(hdr->nops < 1)) + return nfs4_stat_to_errno(hdr->status); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) +{ + __be32 *p; + uint32_t opnum; + int32_t nfserr; + + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + opnum = be32_to_cpup(p++); + if (opnum != expected) { + dprintk("nfs: Server returned operation" + " %d but we issued a request for %d\n", + opnum, expected); + return -EIO; + } + nfserr = be32_to_cpup(p); + if (nfserr != NFS_OK) + return nfs4_stat_to_errno(nfserr); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +/* Dummy routine */ +static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp) +{ + __be32 *p; + unsigned int strlen; + char *str; + + p = xdr_inline_decode(xdr, 12); + if (likely(p)) + return decode_opaque_inline(xdr, &strlen, &str); + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) +{ + uint32_t bmlen; + __be32 *p; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + bmlen = be32_to_cpup(p); + + bitmap[0] = bitmap[1] = 0; + p = xdr_inline_decode(xdr, (bmlen << 2)); + if (unlikely(!p)) + goto out_overflow; + if (bmlen > 0) { + bitmap[0] = be32_to_cpup(p++); + if (bmlen > 1) + bitmap[1] = be32_to_cpup(p); + } + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + *attrlen = be32_to_cpup(p); + *savep = xdr->p; + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask) +{ + if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) { + decode_attr_bitmap(xdr, bitmask); + bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; + } else + bitmask[0] = bitmask[1] = 0; + dprintk("%s: bitmask=%08x:%08x\n", __func__, bitmask[0], bitmask[1]); + return 0; +} + +static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *type) +{ + __be32 *p; + int ret = 0; + + *type = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + *type = be32_to_cpup(p); + if (*type < NF4REG || *type > NF4NAMEDATTR) { + dprintk("%s: bad type %d\n", __func__, *type); + return -EIO; + } + bitmap[0] &= ~FATTR4_WORD0_TYPE; + ret = NFS_ATTR_FATTR_TYPE; + } + dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]); + return ret; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) +{ + __be32 *p; + int ret = 0; + + *change = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) { + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, change); + bitmap[0] &= ~FATTR4_WORD0_CHANGE; + ret = NFS_ATTR_FATTR_CHANGE; + } + dprintk("%s: change attribute=%Lu\n", __func__, + (unsigned long long)*change); + return ret; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size) +{ + __be32 *p; + int ret = 0; + + *size = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) { + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, size); + bitmap[0] &= ~FATTR4_WORD0_SIZE; + ret = NFS_ATTR_FATTR_SIZE; + } + dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size); + return ret; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) +{ + __be32 *p; + + *res = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + *res = be32_to_cpup(p); + bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT; + } + dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true"); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) +{ + __be32 *p; + + *res = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + *res = be32_to_cpup(p); + bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT; + } + dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true"); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid) +{ + __be32 *p; + int ret = 0; + + fsid->major = 0; + fsid->minor = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_FSID)) { + p = xdr_inline_decode(xdr, 16); + if (unlikely(!p)) + goto out_overflow; + p = xdr_decode_hyper(p, &fsid->major); + xdr_decode_hyper(p, &fsid->minor); + bitmap[0] &= ~FATTR4_WORD0_FSID; + ret = NFS_ATTR_FATTR_FSID; + } + dprintk("%s: fsid=(0x%Lx/0x%Lx)\n", __func__, + (unsigned long long)fsid->major, + (unsigned long long)fsid->minor); + return ret; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) +{ + __be32 *p; + + *res = 60; + if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + *res = be32_to_cpup(p); + bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME; + } + dprintk("%s: file size=%u\n", __func__, (unsigned int)*res); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) +{ + __be32 *p; + + *res = ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL; + if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + *res = be32_to_cpup(p); + bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT; + } + dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) +{ + __be32 *p; + int ret = 0; + + *fileid = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) { + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, fileid); + bitmap[0] &= ~FATTR4_WORD0_FILEID; + ret = NFS_ATTR_FATTR_FILEID; + } + dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); + return ret; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) +{ + __be32 *p; + int ret = 0; + + *fileid = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) { + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, fileid); + bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; + ret = NFS_ATTR_FATTR_FILEID; + } + dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); + return ret; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) +{ + __be32 *p; + int status = 0; + + *res = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) { + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, res); + bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL; + } + dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) +{ + __be32 *p; + int status = 0; + + *res = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) { + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, res); + bitmap[0] &= ~FATTR4_WORD0_FILES_FREE; + } + dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) +{ + __be32 *p; + int status = 0; + + *res = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) { + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, res); + bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL; + } + dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) +{ + u32 n; + __be32 *p; + int status = 0; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + n = be32_to_cpup(p); + if (n == 0) + goto root_path; + dprintk("path "); + path->ncomponents = 0; + while (path->ncomponents < n) { + struct nfs4_string *component = &path->components[path->ncomponents]; + status = decode_opaque_inline(xdr, &component->len, &component->data); + if (unlikely(status != 0)) + goto out_eio; + if (path->ncomponents != n) + dprintk("/"); + dprintk("%s", component->data); + if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS) + path->ncomponents++; + else { + dprintk("cannot parse %d components in path\n", n); + goto out_eio; + } + } +out: + dprintk("\n"); + return status; +root_path: +/* a root pathname is sent as a zero component4 */ + path->ncomponents = 1; + path->components[0].len=0; + path->components[0].data=NULL; + dprintk("path /\n"); + goto out; +out_eio: + dprintk(" status %d", status); + status = -EIO; + goto out; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res) +{ + int n; + __be32 *p; + int status = -EIO; + + if (unlikely(bitmap[0] & (FATTR4_WORD0_FS_LOCATIONS -1U))) + goto out; + status = 0; + if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS))) + goto out; + dprintk("%s: fsroot ", __func__); + status = decode_pathname(xdr, &res->fs_path); + if (unlikely(status != 0)) + goto out; + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + n = be32_to_cpup(p); + if (n <= 0) + goto out_eio; + res->nlocations = 0; + while (res->nlocations < n) { + u32 m; + struct nfs4_fs_location *loc = &res->locations[res->nlocations]; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + m = be32_to_cpup(p); + + loc->nservers = 0; + dprintk("%s: servers ", __func__); + while (loc->nservers < m) { + struct nfs4_string *server = &loc->servers[loc->nservers]; + status = decode_opaque_inline(xdr, &server->len, &server->data); + if (unlikely(status != 0)) + goto out_eio; + dprintk("%s ", server->data); + if (loc->nservers < NFS4_FS_LOCATION_MAXSERVERS) + loc->nservers++; + else { + unsigned int i; + dprintk("%s: using first %u of %u servers " + "returned for location %u\n", + __func__, + NFS4_FS_LOCATION_MAXSERVERS, + m, res->nlocations); + for (i = loc->nservers; i < m; i++) { + unsigned int len; + char *data; + status = decode_opaque_inline(xdr, &len, &data); + if (unlikely(status != 0)) + goto out_eio; + } + } + } + status = decode_pathname(xdr, &loc->rootpath); + if (unlikely(status != 0)) + goto out_eio; + if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES) + res->nlocations++; + } + if (res->nlocations != 0) + status = NFS_ATTR_FATTR_V4_REFERRAL; +out: + dprintk("%s: fs_locations done, error = %d\n", __func__, status); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); +out_eio: + status = -EIO; + goto out; +} + +static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) +{ + __be32 *p; + int status = 0; + + *res = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) { + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, res); + bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE; + } + dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink) +{ + __be32 *p; + int status = 0; + + *maxlink = 1; + if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + *maxlink = be32_to_cpup(p); + bitmap[0] &= ~FATTR4_WORD0_MAXLINK; + } + dprintk("%s: maxlink=%u\n", __func__, *maxlink); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname) +{ + __be32 *p; + int status = 0; + + *maxname = 1024; + if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + *maxname = be32_to_cpup(p); + bitmap[0] &= ~FATTR4_WORD0_MAXNAME; + } + dprintk("%s: maxname=%u\n", __func__, *maxname); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) +{ + __be32 *p; + int status = 0; + + *res = 1024; + if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXREAD - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) { + uint64_t maxread; + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, &maxread); + if (maxread > 0x7FFFFFFF) + maxread = 0x7FFFFFFF; + *res = (uint32_t)maxread; + bitmap[0] &= ~FATTR4_WORD0_MAXREAD; + } + dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) +{ + __be32 *p; + int status = 0; + + *res = 1024; + if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXWRITE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) { + uint64_t maxwrite; + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, &maxwrite); + if (maxwrite > 0x7FFFFFFF) + maxwrite = 0x7FFFFFFF; + *res = (uint32_t)maxwrite; + bitmap[0] &= ~FATTR4_WORD0_MAXWRITE; + } + dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode) +{ + uint32_t tmp; + __be32 *p; + int ret = 0; + + *mode = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_MODE)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + tmp = be32_to_cpup(p); + *mode = tmp & ~S_IFMT; + bitmap[1] &= ~FATTR4_WORD1_MODE; + ret = NFS_ATTR_FATTR_MODE; + } + dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode); + return ret; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink) +{ + __be32 *p; + int ret = 0; + + *nlink = 1; + if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + *nlink = be32_to_cpup(p); + bitmap[1] &= ~FATTR4_WORD1_NUMLINKS; + ret = NFS_ATTR_FATTR_NLINK; + } + dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink); + return ret; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, + struct nfs_client *clp, uint32_t *uid, int may_sleep) +{ + uint32_t len; + __be32 *p; + int ret = 0; + + *uid = -2; + if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + len = be32_to_cpup(p); + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + goto out_overflow; + if (!may_sleep) { + /* do nothing */ + } else if (len < XDR_MAX_NETOBJ) { + if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0) + ret = NFS_ATTR_FATTR_OWNER; + else + dprintk("%s: nfs_map_name_to_uid failed!\n", + __func__); + } else + dprintk("%s: name too long (%u)!\n", + __func__, len); + bitmap[1] &= ~FATTR4_WORD1_OWNER; + } + dprintk("%s: uid=%d\n", __func__, (int)*uid); + return ret; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, + struct nfs_client *clp, uint32_t *gid, int may_sleep) +{ + uint32_t len; + __be32 *p; + int ret = 0; + + *gid = -2; + if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + len = be32_to_cpup(p); + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + goto out_overflow; + if (!may_sleep) { + /* do nothing */ + } else if (len < XDR_MAX_NETOBJ) { + if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0) + ret = NFS_ATTR_FATTR_GROUP; + else + dprintk("%s: nfs_map_group_to_gid failed!\n", + __func__); + } else + dprintk("%s: name too long (%u)!\n", + __func__, len); + bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; + } + dprintk("%s: gid=%d\n", __func__, (int)*gid); + return ret; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev) +{ + uint32_t major = 0, minor = 0; + __be32 *p; + int ret = 0; + + *rdev = MKDEV(0,0); + if (unlikely(bitmap[1] & (FATTR4_WORD1_RAWDEV - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) { + dev_t tmp; + + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + major = be32_to_cpup(p++); + minor = be32_to_cpup(p); + tmp = MKDEV(major, minor); + if (MAJOR(tmp) == major && MINOR(tmp) == minor) + *rdev = tmp; + bitmap[1] &= ~ FATTR4_WORD1_RAWDEV; + ret = NFS_ATTR_FATTR_RDEV; + } + dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor); + return ret; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) +{ + __be32 *p; + int status = 0; + + *res = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) { + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, res); + bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL; + } + dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) +{ + __be32 *p; + int status = 0; + + *res = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) { + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, res); + bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE; + } + dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) +{ + __be32 *p; + int status = 0; + + *res = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) { + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, res); + bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL; + } + dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used) +{ + __be32 *p; + int ret = 0; + + *used = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) { + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, used); + bitmap[1] &= ~FATTR4_WORD1_SPACE_USED; + ret = NFS_ATTR_FATTR_SPACE_USED; + } + dprintk("%s: space used=%Lu\n", __func__, + (unsigned long long)*used); + return ret; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) +{ + __be32 *p; + uint64_t sec; + uint32_t nsec; + + p = xdr_inline_decode(xdr, 12); + if (unlikely(!p)) + goto out_overflow; + p = xdr_decode_hyper(p, &sec); + nsec = be32_to_cpup(p); + time->tv_sec = (time_t)sec; + time->tv_nsec = (long)nsec; + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) +{ + int status = 0; + + time->tv_sec = 0; + time->tv_nsec = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_ACCESS - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_TIME_ACCESS)) { + status = decode_attr_time(xdr, time); + if (status == 0) + status = NFS_ATTR_FATTR_ATIME; + bitmap[1] &= ~FATTR4_WORD1_TIME_ACCESS; + } + dprintk("%s: atime=%ld\n", __func__, (long)time->tv_sec); + return status; +} + +static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) +{ + int status = 0; + + time->tv_sec = 0; + time->tv_nsec = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_METADATA - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) { + status = decode_attr_time(xdr, time); + if (status == 0) + status = NFS_ATTR_FATTR_CTIME; + bitmap[1] &= ~FATTR4_WORD1_TIME_METADATA; + } + dprintk("%s: ctime=%ld\n", __func__, (long)time->tv_sec); + return status; +} + +static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) +{ + int status = 0; + + time->tv_sec = 0; + time->tv_nsec = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_MODIFY - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) { + status = decode_attr_time(xdr, time); + if (status == 0) + status = NFS_ATTR_FATTR_MTIME; + bitmap[1] &= ~FATTR4_WORD1_TIME_MODIFY; + } + dprintk("%s: mtime=%ld\n", __func__, (long)time->tv_sec); + return status; +} + +static int verify_attr_len(struct xdr_stream *xdr, __be32 *savep, uint32_t attrlen) +{ + unsigned int attrwords = XDR_QUADLEN(attrlen); + unsigned int nwords = xdr->p - savep; + + if (unlikely(attrwords != nwords)) { + dprintk("%s: server returned incorrect attribute length: " + "%u %c %u\n", + __func__, + attrwords << 2, + (attrwords < nwords) ? '<' : '>', + nwords << 2); + return -EIO; + } + return 0; +} + +static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, 20); + if (unlikely(!p)) + goto out_overflow; + cinfo->atomic = be32_to_cpup(p++); + p = xdr_decode_hyper(p, &cinfo->before); + xdr_decode_hyper(p, &cinfo->after); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) +{ + __be32 *p; + uint32_t supp, acc; + int status; + + status = decode_op_hdr(xdr, OP_ACCESS); + if (status) + return status; + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + supp = be32_to_cpup(p++); + acc = be32_to_cpup(p); + access->supported = supp; + access->access = acc; + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, len); + if (likely(p)) { + memcpy(buf, p, len); + return 0; + } + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) +{ + return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); +} + +static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) +{ + int status; + + status = decode_op_hdr(xdr, OP_CLOSE); + if (status != -EIO) + nfs_increment_open_seqid(status, res->seqid); + if (!status) + status = decode_stateid(xdr, &res->stateid); + return status; +} + +static int decode_verifier(struct xdr_stream *xdr, void *verifier) +{ + return decode_opaque_fixed(xdr, verifier, 8); +} + +static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) +{ + int status; + + status = decode_op_hdr(xdr, OP_COMMIT); + if (!status) + status = decode_verifier(xdr, res->verf->verifier); + return status; +} + +static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) +{ + __be32 *p; + uint32_t bmlen; + int status; + + status = decode_op_hdr(xdr, OP_CREATE); + if (status) + return status; + if ((status = decode_change_info(xdr, cinfo))) + return status; + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + bmlen = be32_to_cpup(p); + p = xdr_inline_decode(xdr, bmlen << 2); + if (likely(p)) + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) +{ + __be32 *savep; + uint32_t attrlen, bitmap[2] = {0}; + int status; + + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto xdr_error; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto xdr_error; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto xdr_error; + if ((status = decode_attr_supported(xdr, bitmap, res->attr_bitmask)) != 0) + goto xdr_error; + if ((status = decode_attr_link_support(xdr, bitmap, &res->has_links)) != 0) + goto xdr_error; + if ((status = decode_attr_symlink_support(xdr, bitmap, &res->has_symlinks)) != 0) + goto xdr_error; + if ((status = decode_attr_aclsupport(xdr, bitmap, &res->acl_bitmask)) != 0) + goto xdr_error; + status = verify_attr_len(xdr, savep, attrlen); +xdr_error: + dprintk("%s: xdr returned %d!\n", __func__, -status); + return status; +} + +static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) +{ + __be32 *savep; + uint32_t attrlen, bitmap[2] = {0}; + int status; + + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto xdr_error; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto xdr_error; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto xdr_error; + + if ((status = decode_attr_files_avail(xdr, bitmap, &fsstat->afiles)) != 0) + goto xdr_error; + if ((status = decode_attr_files_free(xdr, bitmap, &fsstat->ffiles)) != 0) + goto xdr_error; + if ((status = decode_attr_files_total(xdr, bitmap, &fsstat->tfiles)) != 0) + goto xdr_error; + if ((status = decode_attr_space_avail(xdr, bitmap, &fsstat->abytes)) != 0) + goto xdr_error; + if ((status = decode_attr_space_free(xdr, bitmap, &fsstat->fbytes)) != 0) + goto xdr_error; + if ((status = decode_attr_space_total(xdr, bitmap, &fsstat->tbytes)) != 0) + goto xdr_error; + + status = verify_attr_len(xdr, savep, attrlen); +xdr_error: + dprintk("%s: xdr returned %d!\n", __func__, -status); + return status; +} + +static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf) +{ + __be32 *savep; + uint32_t attrlen, bitmap[2] = {0}; + int status; + + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto xdr_error; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto xdr_error; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto xdr_error; + + if ((status = decode_attr_maxlink(xdr, bitmap, &pathconf->max_link)) != 0) + goto xdr_error; + if ((status = decode_attr_maxname(xdr, bitmap, &pathconf->max_namelen)) != 0) + goto xdr_error; + + status = verify_attr_len(xdr, savep, attrlen); +xdr_error: + dprintk("%s: xdr returned %d!\n", __func__, -status); + return status; +} + +static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, + const struct nfs_server *server, int may_sleep) +{ + __be32 *savep; + uint32_t attrlen, + bitmap[2] = {0}, + type; + int status; + umode_t fmode = 0; + uint64_t fileid; + + status = decode_op_hdr(xdr, OP_GETATTR); + if (status < 0) + goto xdr_error; + + status = decode_attr_bitmap(xdr, bitmap); + if (status < 0) + goto xdr_error; + + status = decode_attr_length(xdr, &attrlen, &savep); + if (status < 0) + goto xdr_error; + + + status = decode_attr_type(xdr, bitmap, &type); + if (status < 0) + goto xdr_error; + fattr->mode = 0; + if (status != 0) { + fattr->mode |= nfs_type2fmt[type]; + fattr->valid |= status; + } + + status = decode_attr_change(xdr, bitmap, &fattr->change_attr); + if (status < 0) + goto xdr_error; + fattr->valid |= status; + + status = decode_attr_size(xdr, bitmap, &fattr->size); + if (status < 0) + goto xdr_error; + fattr->valid |= status; + + status = decode_attr_fsid(xdr, bitmap, &fattr->fsid); + if (status < 0) + goto xdr_error; + fattr->valid |= status; + + status = decode_attr_fileid(xdr, bitmap, &fattr->fileid); + if (status < 0) + goto xdr_error; + fattr->valid |= status; + + status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr, + struct nfs4_fs_locations, + fattr)); + if (status < 0) + goto xdr_error; + fattr->valid |= status; + + status = decode_attr_mode(xdr, bitmap, &fmode); + if (status < 0) + goto xdr_error; + if (status != 0) { + fattr->mode |= fmode; + fattr->valid |= status; + } + + status = decode_attr_nlink(xdr, bitmap, &fattr->nlink); + if (status < 0) + goto xdr_error; + fattr->valid |= status; + + status = decode_attr_owner(xdr, bitmap, server->nfs_client, + &fattr->uid, may_sleep); + if (status < 0) + goto xdr_error; + fattr->valid |= status; + + status = decode_attr_group(xdr, bitmap, server->nfs_client, + &fattr->gid, may_sleep); + if (status < 0) + goto xdr_error; + fattr->valid |= status; + + status = decode_attr_rdev(xdr, bitmap, &fattr->rdev); + if (status < 0) + goto xdr_error; + fattr->valid |= status; + + status = decode_attr_space_used(xdr, bitmap, &fattr->du.nfs3.used); + if (status < 0) + goto xdr_error; + fattr->valid |= status; + + status = decode_attr_time_access(xdr, bitmap, &fattr->atime); + if (status < 0) + goto xdr_error; + fattr->valid |= status; + + status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime); + if (status < 0) + goto xdr_error; + fattr->valid |= status; + + status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime); + if (status < 0) + goto xdr_error; + fattr->valid |= status; + + status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid); + if (status < 0) + goto xdr_error; + if (status != 0 && !(fattr->valid & status)) { + fattr->fileid = fileid; + fattr->valid |= status; + } + + status = verify_attr_len(xdr, savep, attrlen); +xdr_error: + dprintk("%s: xdr returned %d\n", __func__, -status); + return status; +} + + +static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) +{ + __be32 *savep; + uint32_t attrlen, bitmap[2]; + int status; + + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto xdr_error; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto xdr_error; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto xdr_error; + + fsinfo->rtmult = fsinfo->wtmult = 512; /* ??? */ + + if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0) + goto xdr_error; + if ((status = decode_attr_maxfilesize(xdr, bitmap, &fsinfo->maxfilesize)) != 0) + goto xdr_error; + if ((status = decode_attr_maxread(xdr, bitmap, &fsinfo->rtmax)) != 0) + goto xdr_error; + fsinfo->rtpref = fsinfo->dtpref = fsinfo->rtmax; + if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0) + goto xdr_error; + fsinfo->wtpref = fsinfo->wtmax; + + status = verify_attr_len(xdr, savep, attrlen); +xdr_error: + dprintk("%s: xdr returned %d!\n", __func__, -status); + return status; +} + +static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh) +{ + __be32 *p; + uint32_t len; + int status; + + /* Zero handle first to allow comparisons */ + memset(fh, 0, sizeof(*fh)); + + status = decode_op_hdr(xdr, OP_GETFH); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + len = be32_to_cpup(p); + if (len > NFS4_FHSIZE) + return -EIO; + fh->size = len; + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + goto out_overflow; + memcpy(fh->data, p, len); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) +{ + int status; + + status = decode_op_hdr(xdr, OP_LINK); + if (status) + return status; + return decode_change_info(xdr, cinfo); +} + +/* + * We create the owner, so we know a proper owner.id length is 4. + */ +static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) +{ + uint64_t offset, length, clientid; + __be32 *p; + uint32_t namelen, type; + + p = xdr_inline_decode(xdr, 32); + if (unlikely(!p)) + goto out_overflow; + p = xdr_decode_hyper(p, &offset); + p = xdr_decode_hyper(p, &length); + type = be32_to_cpup(p++); + if (fl != NULL) { + fl->fl_start = (loff_t)offset; + fl->fl_end = fl->fl_start + (loff_t)length - 1; + if (length == ~(uint64_t)0) + fl->fl_end = OFFSET_MAX; + fl->fl_type = F_WRLCK; + if (type & 1) + fl->fl_type = F_RDLCK; + fl->fl_pid = 0; + } + p = xdr_decode_hyper(p, &clientid); + namelen = be32_to_cpup(p); + p = xdr_inline_decode(xdr, namelen); + if (likely(p)) + return -NFS4ERR_DENIED; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res) +{ + int status; + + status = decode_op_hdr(xdr, OP_LOCK); + if (status == -EIO) + goto out; + if (status == 0) { + status = decode_stateid(xdr, &res->stateid); + if (unlikely(status)) + goto out; + } else if (status == -NFS4ERR_DENIED) + status = decode_lock_denied(xdr, NULL); + if (res->open_seqid != NULL) + nfs_increment_open_seqid(status, res->open_seqid); + nfs_increment_lock_seqid(status, res->lock_seqid); +out: + return status; +} + +static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockt_res *res) +{ + int status; + status = decode_op_hdr(xdr, OP_LOCKT); + if (status == -NFS4ERR_DENIED) + return decode_lock_denied(xdr, res->denied); + return status; +} + +static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res) +{ + int status; + + status = decode_op_hdr(xdr, OP_LOCKU); + if (status != -EIO) + nfs_increment_lock_seqid(status, res->seqid); + if (status == 0) + status = decode_stateid(xdr, &res->stateid); + return status; +} + +static int decode_lookup(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_LOOKUP); +} + +/* This is too sick! */ +static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize) +{ + __be32 *p; + uint32_t limit_type, nblocks, blocksize; + + p = xdr_inline_decode(xdr, 12); + if (unlikely(!p)) + goto out_overflow; + limit_type = be32_to_cpup(p++); + switch (limit_type) { + case 1: + xdr_decode_hyper(p, maxsize); + break; + case 2: + nblocks = be32_to_cpup(p++); + blocksize = be32_to_cpup(p); + *maxsize = (uint64_t)nblocks * (uint64_t)blocksize; + } + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) +{ + __be32 *p; + uint32_t delegation_type; + int status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + delegation_type = be32_to_cpup(p); + if (delegation_type == NFS4_OPEN_DELEGATE_NONE) { + res->delegation_type = 0; + return 0; + } + status = decode_stateid(xdr, &res->delegation); + if (unlikely(status)) + return status; + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + res->do_recall = be32_to_cpup(p); + + switch (delegation_type) { + case NFS4_OPEN_DELEGATE_READ: + res->delegation_type = FMODE_READ; + break; + case NFS4_OPEN_DELEGATE_WRITE: + res->delegation_type = FMODE_WRITE|FMODE_READ; + if (decode_space_limit(xdr, &res->maxsize) < 0) + return -EIO; + } + return decode_ace(xdr, NULL, res->server->nfs_client); +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) +{ + __be32 *p; + uint32_t savewords, bmlen, i; + int status; + + status = decode_op_hdr(xdr, OP_OPEN); + if (status != -EIO) + nfs_increment_open_seqid(status, res->seqid); + if (!status) + status = decode_stateid(xdr, &res->stateid); + if (unlikely(status)) + return status; + + decode_change_info(xdr, &res->cinfo); + + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + res->rflags = be32_to_cpup(p++); + bmlen = be32_to_cpup(p); + if (bmlen > 10) + goto xdr_error; + + p = xdr_inline_decode(xdr, bmlen << 2); + if (unlikely(!p)) + goto out_overflow; + savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE); + for (i = 0; i < savewords; ++i) + res->attrset[i] = be32_to_cpup(p++); + for (; i < NFS4_BITMAP_SIZE; i++) + res->attrset[i] = 0; + + return decode_delegation(xdr, res); +xdr_error: + dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen); + return -EIO; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) +{ + int status; + + status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); + if (status != -EIO) + nfs_increment_open_seqid(status, res->seqid); + if (!status) + status = decode_stateid(xdr, &res->stateid); + return status; +} + +static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res) +{ + int status; + + status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE); + if (status != -EIO) + nfs_increment_open_seqid(status, res->seqid); + if (!status) + status = decode_stateid(xdr, &res->stateid); + return status; +} + +static int decode_putfh(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_PUTFH); +} + +static int decode_putrootfh(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_PUTROOTFH); +} + +static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res) +{ + struct kvec *iov = req->rq_rcv_buf.head; + __be32 *p; + uint32_t count, eof, recvd, hdrlen; + int status; + + status = decode_op_hdr(xdr, OP_READ); + if (status) + return status; + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + eof = be32_to_cpup(p++); + count = be32_to_cpup(p); + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + recvd = req->rq_rcv_buf.len - hdrlen; + if (count > recvd) { + dprintk("NFS: server cheating in read reply: " + "count %u > recvd %u\n", count, recvd); + count = recvd; + eof = 0; + } + xdr_read_pages(xdr, count); + res->eof = eof; + res->count = count; + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) +{ + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct page *page = *rcvbuf->pages; + struct kvec *iov = rcvbuf->head; + size_t hdrlen; + u32 recvd, pglen = rcvbuf->page_len; + __be32 *end, *entry, *p, *kaddr; + unsigned int nr = 0; + int status; + + status = decode_op_hdr(xdr, OP_READDIR); + if (!status) + status = decode_verifier(xdr, readdir->verifier.data); + if (unlikely(status)) + return status; + dprintk("%s: verifier = %08x:%08x\n", + __func__, + ((u32 *)readdir->verifier.data)[0], + ((u32 *)readdir->verifier.data)[1]); + + + hdrlen = (char *) xdr->p - (char *) iov->iov_base; + recvd = rcvbuf->len - hdrlen; + if (pglen > recvd) + pglen = recvd; + xdr_read_pages(xdr, pglen); + + BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE); + kaddr = p = kmap_atomic(page, KM_USER0); + end = p + ((pglen + readdir->pgbase) >> 2); + entry = p; + + /* Make sure the packet actually has a value_follows and EOF entry */ + if ((entry + 1) > end) + goto short_pkt; + + for (; *p++; nr++) { + u32 len, attrlen, xlen; + if (end - p < 3) + goto short_pkt; + dprintk("cookie = %Lu, ", *((unsigned long long *)p)); + p += 2; /* cookie */ + len = ntohl(*p++); /* filename length */ + if (len > NFS4_MAXNAMLEN) { + dprintk("NFS: giant filename in readdir (len 0x%x)\n", + len); + goto err_unmap; + } + xlen = XDR_QUADLEN(len); + if (end - p < xlen + 1) + goto short_pkt; + dprintk("filename = %*s\n", len, (char *)p); + p += xlen; + len = ntohl(*p++); /* bitmap length */ + if (end - p < len + 1) + goto short_pkt; + p += len; + attrlen = XDR_QUADLEN(ntohl(*p++)); + if (end - p < attrlen + 2) + goto short_pkt; + p += attrlen; /* attributes */ + entry = p; + } + /* + * Apparently some server sends responses that are a valid size, but + * contain no entries, and have value_follows==0 and EOF==0. For + * those, just set the EOF marker. + */ + if (!nr && entry[1] == 0) { + dprintk("NFS: readdir reply truncated!\n"); + entry[1] = 1; + } +out: + kunmap_atomic(kaddr, KM_USER0); + return 0; +short_pkt: + /* + * When we get a short packet there are 2 possibilities. We can + * return an error, or fix up the response to look like a valid + * response and return what we have so far. If there are no + * entries and the packet was short, then return -EIO. If there + * are valid entries in the response, return them and pretend that + * the call was successful, but incomplete. The caller can retry the + * readdir starting at the last cookie. + */ + dprintk("%s: short packet at entry %d\n", __func__, nr); + entry[0] = entry[1] = 0; + if (nr) + goto out; +err_unmap: + kunmap_atomic(kaddr, KM_USER0); + return -errno_NFSERR_IO; +} + +static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) +{ + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct kvec *iov = rcvbuf->head; + size_t hdrlen; + u32 len, recvd; + __be32 *p; + char *kaddr; + int status; + + status = decode_op_hdr(xdr, OP_READLINK); + if (status) + return status; + + /* Convert length of symlink */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + len = be32_to_cpup(p); + if (len >= rcvbuf->page_len || len <= 0) { + dprintk("nfs: server returned giant symlink!\n"); + return -ENAMETOOLONG; + } + hdrlen = (char *) xdr->p - (char *) iov->iov_base; + recvd = req->rq_rcv_buf.len - hdrlen; + if (recvd < len) { + dprintk("NFS: server cheating in readlink reply: " + "count %u > recvd %u\n", len, recvd); + return -EIO; + } + xdr_read_pages(xdr, len); + /* + * The XDR encode routine has set things up so that + * the link text will be copied directly into the + * buffer. We just have to do overflow-checking, + * and and null-terminate the text (the VFS expects + * null-termination). + */ + kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0); + kaddr[len+rcvbuf->page_base] = '\0'; + kunmap_atomic(kaddr, KM_USER0); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) +{ + int status; + + status = decode_op_hdr(xdr, OP_REMOVE); + if (status) + goto out; + status = decode_change_info(xdr, cinfo); +out: + return status; +} + +static int decode_rename(struct xdr_stream *xdr, struct nfs4_change_info *old_cinfo, + struct nfs4_change_info *new_cinfo) +{ + int status; + + status = decode_op_hdr(xdr, OP_RENAME); + if (status) + goto out; + if ((status = decode_change_info(xdr, old_cinfo))) + goto out; + status = decode_change_info(xdr, new_cinfo); +out: + return status; +} + +static int decode_renew(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_RENEW); +} + +static int +decode_restorefh(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_RESTOREFH); +} + +static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, + size_t *acl_len) +{ + __be32 *savep; + uint32_t attrlen, + bitmap[2] = {0}; + struct kvec *iov = req->rq_rcv_buf.head; + int status; + + *acl_len = 0; + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto out; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto out; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto out; + + if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { + size_t hdrlen; + u32 recvd; + + /* We ignore &savep and don't do consistency checks on + * the attr length. Let userspace figure it out.... */ + hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; + recvd = req->rq_rcv_buf.len - hdrlen; + if (attrlen > recvd) { + dprintk("NFS: server cheating in getattr" + " acl reply: attrlen %u > recvd %u\n", + attrlen, recvd); + return -EINVAL; + } + xdr_read_pages(xdr, attrlen); + *acl_len = attrlen; + } else + status = -EOPNOTSUPP; + +out: + return status; +} + +static int +decode_savefh(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_SAVEFH); +} + +static int decode_setattr(struct xdr_stream *xdr) +{ + __be32 *p; + uint32_t bmlen; + int status; + + status = decode_op_hdr(xdr, OP_SETATTR); + if (status) + return status; + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + bmlen = be32_to_cpup(p); + p = xdr_inline_decode(xdr, bmlen << 2); + if (likely(p)) + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) +{ + __be32 *p; + uint32_t opnum; + int32_t nfserr; + + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + opnum = be32_to_cpup(p++); + if (opnum != OP_SETCLIENTID) { + dprintk("nfs: decode_setclientid: Server returned operation" + " %d\n", opnum); + return -EIO; + } + nfserr = be32_to_cpup(p); + if (nfserr == NFS_OK) { + p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE); + if (unlikely(!p)) + goto out_overflow; + p = xdr_decode_hyper(p, &clp->cl_clientid); + memcpy(clp->cl_confirm.data, p, NFS4_VERIFIER_SIZE); + } else if (nfserr == NFSERR_CLID_INUSE) { + uint32_t len; + + /* skip netid string */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + len = be32_to_cpup(p); + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + goto out_overflow; + + /* skip uaddr string */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + len = be32_to_cpup(p); + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + goto out_overflow; + return -NFSERR_CLID_INUSE; + } else + return nfs4_stat_to_errno(nfserr); + + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_setclientid_confirm(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM); +} + +static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) +{ + __be32 *p; + int status; + + status = decode_op_hdr(xdr, OP_WRITE); + if (status) + return status; + + p = xdr_inline_decode(xdr, 16); + if (unlikely(!p)) + goto out_overflow; + res->count = be32_to_cpup(p++); + res->verf->committed = be32_to_cpup(p++); + memcpy(res->verf->verifier, p, 8); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_delegreturn(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_DELEGRETURN); +} + +#if defined(CONFIG_NFS_V4_1) +static int decode_exchange_id(struct xdr_stream *xdr, + struct nfs41_exchange_id_res *res) +{ + __be32 *p; + uint32_t dummy; + char *dummy_str; + int status; + struct nfs_client *clp = res->client; + + status = decode_op_hdr(xdr, OP_EXCHANGE_ID); + if (status) + return status; + + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, &clp->cl_ex_clid); + p = xdr_inline_decode(xdr, 12); + if (unlikely(!p)) + goto out_overflow; + clp->cl_seqid = be32_to_cpup(p++); + clp->cl_exchange_flags = be32_to_cpup(p++); + + /* We ask for SP4_NONE */ + dummy = be32_to_cpup(p); + if (dummy != SP4_NONE) + return -EIO; + + /* Throw away minor_id */ + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + + /* Throw away Major id */ + status = decode_opaque_inline(xdr, &dummy, &dummy_str); + if (unlikely(status)) + return status; + + /* Throw away server_scope */ + status = decode_opaque_inline(xdr, &dummy, &dummy_str); + if (unlikely(status)) + return status; + + /* Throw away Implementation id array */ + status = decode_opaque_inline(xdr, &dummy, &dummy_str); + if (unlikely(status)) + return status; + + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_chan_attrs(struct xdr_stream *xdr, + struct nfs4_channel_attrs *attrs) +{ + __be32 *p; + u32 nr_attrs; + + p = xdr_inline_decode(xdr, 28); + if (unlikely(!p)) + goto out_overflow; + attrs->headerpadsz = be32_to_cpup(p++); + attrs->max_rqst_sz = be32_to_cpup(p++); + attrs->max_resp_sz = be32_to_cpup(p++); + attrs->max_resp_sz_cached = be32_to_cpup(p++); + attrs->max_ops = be32_to_cpup(p++); + attrs->max_reqs = be32_to_cpup(p++); + nr_attrs = be32_to_cpup(p); + if (unlikely(nr_attrs > 1)) { + printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n", + __func__, nr_attrs); + return -EINVAL; + } + if (nr_attrs == 1) { + p = xdr_inline_decode(xdr, 4); /* skip rdma_attrs */ + if (unlikely(!p)) + goto out_overflow; + } + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid) +{ + return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN); +} + +static int decode_create_session(struct xdr_stream *xdr, + struct nfs41_create_session_res *res) +{ + __be32 *p; + int status; + struct nfs_client *clp = res->client; + struct nfs4_session *session = clp->cl_session; + + status = decode_op_hdr(xdr, OP_CREATE_SESSION); + if (!status) + status = decode_sessionid(xdr, &session->sess_id); + if (unlikely(status)) + return status; + + /* seqid, flags */ + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + clp->cl_seqid = be32_to_cpup(p++); + session->flags = be32_to_cpup(p); + + /* Channel attributes */ + status = decode_chan_attrs(xdr, &session->fc_attrs); + if (!status) + status = decode_chan_attrs(xdr, &session->bc_attrs); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_destroy_session(struct xdr_stream *xdr, void *dummy) +{ + return decode_op_hdr(xdr, OP_DESTROY_SESSION); +} + +static int decode_reclaim_complete(struct xdr_stream *xdr, void *dummy) +{ + return decode_op_hdr(xdr, OP_RECLAIM_COMPLETE); +} +#endif /* CONFIG_NFS_V4_1 */ + +static int decode_sequence(struct xdr_stream *xdr, + struct nfs4_sequence_res *res, + struct rpc_rqst *rqstp) +{ +#if defined(CONFIG_NFS_V4_1) + struct nfs4_slot *slot; + struct nfs4_sessionid id; + u32 dummy; + int status; + __be32 *p; + + if (!res->sr_session) + return 0; + + status = decode_op_hdr(xdr, OP_SEQUENCE); + if (!status) + status = decode_sessionid(xdr, &id); + if (unlikely(status)) + goto out_err; + + /* + * If the server returns different values for sessionID, slotID or + * sequence number, the server is looney tunes. + */ + status = -EREMOTEIO; + + if (memcmp(id.data, res->sr_session->sess_id.data, + NFS4_MAX_SESSIONID_LEN)) { + dprintk("%s Invalid session id\n", __func__); + goto out_err; + } + + p = xdr_inline_decode(xdr, 20); + if (unlikely(!p)) + goto out_overflow; + + /* seqid */ + slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid]; + dummy = be32_to_cpup(p++); + if (dummy != slot->seq_nr) { + dprintk("%s Invalid sequence number\n", __func__); + goto out_err; + } + /* slot id */ + dummy = be32_to_cpup(p++); + if (dummy != res->sr_slotid) { + dprintk("%s Invalid slot id\n", __func__); + goto out_err; + } + /* highest slot id - currently not processed */ + dummy = be32_to_cpup(p++); + /* target highest slot id - currently not processed */ + dummy = be32_to_cpup(p++); + /* result flags */ + res->sr_status_flags = be32_to_cpup(p); + status = 0; +out_err: + res->sr_status = status; + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + status = -EIO; + goto out_err; +#else /* CONFIG_NFS_V4_1 */ + return 0; +#endif /* CONFIG_NFS_V4_1 */ +} + +/* + * END OF "GENERIC" DECODE ROUTINES. + */ + +/* + * Decode OPEN_DOWNGRADE response + */ +static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open_downgrade(&xdr, res); + if (status != 0) + goto out; + decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); +out: + return status; +} + +/* + * Decode ACCESS response + */ +static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_accessres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status != 0) + goto out; + status = decode_access(&xdr, res); + if (status != 0) + goto out; + decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); +out: + return status; +} + +/* + * Decode LOOKUP response + */ +static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_lookup(&xdr)) != 0) + goto out; + if ((status = decode_getfh(&xdr, res->fh)) != 0) + goto out; + status = decode_getfattr(&xdr, res->fattr, res->server + ,!RPC_IS_ASYNC(rqstp->rq_task)); +out: + return status; +} + +/* + * Decode LOOKUP_ROOT response + */ +static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + if ((status = decode_putrootfh(&xdr)) != 0) + goto out; + if ((status = decode_getfh(&xdr, res->fh)) == 0) + status = decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); +out: + return status; +} + +/* + * Decode REMOVE response + */ +static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_removeres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_remove(&xdr, &res->cinfo)) != 0) + goto out; + decode_getfattr(&xdr, &res->dir_attr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); +out: + return status; +} + +/* + * Decode RENAME response + */ +static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_rename_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_savefh(&xdr)) != 0) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0) + goto out; + /* Current FH is target directory */ + if (decode_getfattr(&xdr, res->new_fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)) != 0) + goto out; + if ((status = decode_restorefh(&xdr)) != 0) + goto out; + decode_getfattr(&xdr, res->old_fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); +out: + return status; +} + +/* + * Decode LINK response + */ +static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_savefh(&xdr)) != 0) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_link(&xdr, &res->cinfo)) != 0) + goto out; + /* + * Note order: OP_LINK leaves the directory as the current + * filehandle. + */ + if (decode_getfattr(&xdr, res->dir_attr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)) != 0) + goto out; + if ((status = decode_restorefh(&xdr)) != 0) + goto out; + decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); +out: + return status; +} + +/* + * Decode CREATE response + */ +static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_savefh(&xdr)) != 0) + goto out; + if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0) + goto out; + if ((status = decode_getfh(&xdr, res->fh)) != 0) + goto out; + if (decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)) != 0) + goto out; + if ((status = decode_restorefh(&xdr)) != 0) + goto out; + decode_getfattr(&xdr, res->dir_fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); +out: + return status; +} + +/* + * Decode SYMLINK response + */ +static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res) +{ + return nfs4_xdr_dec_create(rqstp, p, res); +} + +/* + * Decode GETATTR response + */ +static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_getattr_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); +out: + return status; +} + +/* + * Encode an SETACL request + */ +static int +nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + status = encode_setacl(&xdr, args, &hdr); + encode_nops(&hdr); + return status; +} + +/* + * Decode SETACL response + */ +static int +nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p, + struct nfs_setaclres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_setattr(&xdr); +out: + return status; +} + +/* + * Decode GETACL response + */ +static int +nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, + struct nfs_getaclres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_getacl(&xdr, rqstp, &res->acl_len); + +out: + return status; +} + +/* + * Decode CLOSE response + */ +static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_close(&xdr, res); + if (status != 0) + goto out; + /* + * Note: Server may do delete on close for this file + * in which case the getattr call will fail with + * an ESTALE error. Shouldn't be a problem, + * though, since fattr->valid will remain unset. + */ + decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); +out: + return status; +} + +/* + * Decode OPEN response + */ +static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_savefh(&xdr); + if (status) + goto out; + status = decode_open(&xdr, res); + if (status) + goto out; + if (decode_getfh(&xdr, &res->fh) != 0) + goto out; + if (decode_getfattr(&xdr, res->f_attr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)) != 0) + goto out; + if (decode_restorefh(&xdr) != 0) + goto out; + decode_getfattr(&xdr, res->dir_attr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); +out: + return status; +} + +/* + * Decode OPEN_CONFIRM response + */ +static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, __be32 *p, struct nfs_open_confirmres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open_confirm(&xdr, res); +out: + return status; +} + +/* + * Decode OPEN response + */ +static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open(&xdr, res); + if (status) + goto out; + decode_getfattr(&xdr, res->f_attr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); +out: + return status; +} + +/* + * Decode SETATTR response + */ +static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_setattrres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_setattr(&xdr); + if (status) + goto out; + decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); +out: + return status; +} + +/* + * Decode LOCK response + */ +static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_lock(&xdr, res); +out: + return status; +} + +/* + * Decode LOCKT response + */ +static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lockt_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_lockt(&xdr, res); +out: + return status; +} + +/* + * Decode LOCKU response + */ +static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, __be32 *p, struct nfs_locku_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_locku(&xdr, res); +out: + return status; +} + +/* + * Decode READLINK response + */ +static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p, + struct nfs4_readlink_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_readlink(&xdr, rqstp); +out: + return status; +} + +/* + * Decode READDIR response + */ +static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_readdir_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_readdir(&xdr, rqstp, res); +out: + return status; +} + +/* + * Decode Read response + */ +static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, __be32 *p, struct nfs_readres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_read(&xdr, rqstp, res); + if (!status) + status = res->count; +out: + return status; +} + +/* + * Decode WRITE response + */ +static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_write(&xdr, res); + if (status) + goto out; + decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); + if (!status) + status = res->count; +out: + return status; +} + +/* + * Decode COMMIT response + */ +static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_commit(&xdr, res); + if (status) + goto out; + decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); +out: + return status; +} + +/* + * Decode FSINFO response + */ +static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, + struct nfs4_fsinfo_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_sequence(&xdr, &res->seq_res, req); + if (!status) + status = decode_putfh(&xdr); + if (!status) + status = decode_fsinfo(&xdr, res->fsinfo); + return status; +} + +/* + * Decode PATHCONF response + */ +static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, + struct nfs4_pathconf_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_sequence(&xdr, &res->seq_res, req); + if (!status) + status = decode_putfh(&xdr); + if (!status) + status = decode_pathconf(&xdr, res->pathconf); + return status; +} + +/* + * Decode STATFS response + */ +static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, + struct nfs4_statfs_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_sequence(&xdr, &res->seq_res, req); + if (!status) + status = decode_putfh(&xdr); + if (!status) + status = decode_statfs(&xdr, res->fsstat); + return status; +} + +/* + * Decode GETATTR_BITMAP response + */ +static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, __be32 *p, struct nfs4_server_caps_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, req); + if (status) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + status = decode_server_caps(&xdr, res); +out: + return status; +} + +/* + * Decode RENEW response + */ +static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, __be32 *p, void *dummy) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_renew(&xdr); + return status; +} + +/* + * Decode SETCLIENTID response + */ +static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p, + struct nfs_client *clp) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_setclientid(&xdr, clp); + return status; +} + +/* + * Decode SETCLIENTID_CONFIRM response + */ +static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_setclientid_confirm(&xdr); + if (!status) + status = decode_putrootfh(&xdr); + if (!status) + status = decode_fsinfo(&xdr, fsinfo); + return status; +} + +/* + * Decode DELEGRETURN response + */ +static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_delegreturnres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status != 0) + goto out; + status = decode_delegreturn(&xdr); + decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); +out: + return status; +} + +/* + * Decode FS_LOCATIONS response + */ +static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, + struct nfs4_fs_locations_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, req); + if (status) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_lookup(&xdr)) != 0) + goto out; + xdr_enter_page(&xdr, PAGE_SIZE); + status = decode_getfattr(&xdr, &res->fs_locations->fattr, + res->fs_locations->server, + !RPC_IS_ASYNC(req->rq_task)); +out: + return status; +} + +#if defined(CONFIG_NFS_V4_1) +/* + * Decode EXCHANGE_ID response + */ +static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p, + void *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_exchange_id(&xdr, res); + return status; +} + +/* + * Decode CREATE_SESSION response + */ +static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p, + struct nfs41_create_session_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_create_session(&xdr, res); + return status; +} + +/* + * Decode DESTROY_SESSION response + */ +static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p, + void *dummy) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_destroy_session(&xdr, dummy); + return status; +} + +/* + * Decode SEQUENCE response + */ +static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p, + struct nfs4_sequence_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_sequence(&xdr, res, rqstp); + return status; +} + +/* + * Decode GET_LEASE_TIME response + */ +static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p, + struct nfs4_get_lease_time_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_sequence(&xdr, &res->lr_seq_res, rqstp); + if (!status) + status = decode_putrootfh(&xdr); + if (!status) + status = decode_fsinfo(&xdr, res->lr_fsinfo); + return status; +} + +/* + * Decode RECLAIM_COMPLETE response + */ +static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p, + struct nfs41_reclaim_complete_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (!status) + status = decode_reclaim_complete(&xdr, (void *)NULL); + return status; +} +#endif /* CONFIG_NFS_V4_1 */ + +__be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) +{ + uint32_t bitmap[2] = {0}; + uint32_t len; + + if (!*p++) { + if (!*p) + return ERR_PTR(-EAGAIN); + entry->eof = 1; + return ERR_PTR(-EBADCOOKIE); + } + + entry->prev_cookie = entry->cookie; + p = xdr_decode_hyper(p, &entry->cookie); + entry->len = ntohl(*p++); + entry->name = (const char *) p; + p += XDR_QUADLEN(entry->len); + + /* + * In case the server doesn't return an inode number, + * we fake one here. (We don't use inode number 0, + * since glibc seems to choke on it...) + */ + entry->ino = 1; + + len = ntohl(*p++); /* bitmap length */ + if (len-- > 0) { + bitmap[0] = ntohl(*p++); + if (len-- > 0) { + bitmap[1] = ntohl(*p++); + p += len; + } + } + len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */ + if (len > 0) { + if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) { + bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; + /* Ignore the return value of rdattr_error for now */ + p++; + len--; + } + if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) + xdr_decode_hyper(p, &entry->ino); + else if (bitmap[0] == FATTR4_WORD0_FILEID) + xdr_decode_hyper(p, &entry->ino); + p += len; + } + + entry->eof = !p[0] && p[1]; + return p; +} + +/* + * We need to translate between nfs status return values and + * the local errno values which may not be the same. + */ +static struct { + int stat; + int errno; +} nfs_errtbl[] = { + { NFS4_OK, 0 }, + { NFS4ERR_PERM, -EPERM }, + { NFS4ERR_NOENT, -ENOENT }, + { NFS4ERR_IO, -errno_NFSERR_IO}, + { NFS4ERR_NXIO, -ENXIO }, + { NFS4ERR_ACCESS, -EACCES }, + { NFS4ERR_EXIST, -EEXIST }, + { NFS4ERR_XDEV, -EXDEV }, + { NFS4ERR_NOTDIR, -ENOTDIR }, + { NFS4ERR_ISDIR, -EISDIR }, + { NFS4ERR_INVAL, -EINVAL }, + { NFS4ERR_FBIG, -EFBIG }, + { NFS4ERR_NOSPC, -ENOSPC }, + { NFS4ERR_ROFS, -EROFS }, + { NFS4ERR_MLINK, -EMLINK }, + { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG }, + { NFS4ERR_NOTEMPTY, -ENOTEMPTY }, + { NFS4ERR_DQUOT, -EDQUOT }, + { NFS4ERR_STALE, -ESTALE }, + { NFS4ERR_BADHANDLE, -EBADHANDLE }, + { NFS4ERR_BADOWNER, -EINVAL }, + { NFS4ERR_BADNAME, -EINVAL }, + { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, + { NFS4ERR_NOTSUPP, -ENOTSUPP }, + { NFS4ERR_TOOSMALL, -ETOOSMALL }, + { NFS4ERR_SERVERFAULT, -EREMOTEIO }, + { NFS4ERR_BADTYPE, -EBADTYPE }, + { NFS4ERR_LOCKED, -EAGAIN }, + { NFS4ERR_SYMLINK, -ELOOP }, + { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, + { NFS4ERR_DEADLOCK, -EDEADLK }, + { NFS4ERR_WRONGSEC, -EPERM }, /* FIXME: this needs + * to be handled by a + * middle-layer. + */ + { -1, -EIO } +}; + +/* + * Convert an NFS error code to a local one. + * This one is used jointly by NFSv2 and NFSv3. + */ +static int +nfs4_stat_to_errno(int stat) +{ + int i; + for (i = 0; nfs_errtbl[i].stat != -1; i++) { + if (nfs_errtbl[i].stat == stat) + return nfs_errtbl[i].errno; + } + if (stat <= 10000 || stat > 10100) { + /* The server is looney tunes. */ + return -EREMOTEIO; + } + /* If we cannot translate the error, the recovery routines should + * handle it. + * Note: remaining NFSv4 error codes have values > 10000, so should + * not conflict with native Linux error codes. + */ + return -stat; +} + +#define PROC(proc, argtype, restype) \ +[NFSPROC4_CLNT_##proc] = { \ + .p_proc = NFSPROC4_COMPOUND, \ + .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ + .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ + .p_arglen = NFS4_##argtype##_sz, \ + .p_replen = NFS4_##restype##_sz, \ + .p_statidx = NFSPROC4_CLNT_##proc, \ + .p_name = #proc, \ +} + +struct rpc_procinfo nfs4_procedures[] = { + PROC(READ, enc_read, dec_read), + PROC(WRITE, enc_write, dec_write), + PROC(COMMIT, enc_commit, dec_commit), + PROC(OPEN, enc_open, dec_open), + PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm), + PROC(OPEN_NOATTR, enc_open_noattr, dec_open_noattr), + PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade), + PROC(CLOSE, enc_close, dec_close), + PROC(SETATTR, enc_setattr, dec_setattr), + PROC(FSINFO, enc_fsinfo, dec_fsinfo), + PROC(RENEW, enc_renew, dec_renew), + PROC(SETCLIENTID, enc_setclientid, dec_setclientid), + PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm), + PROC(LOCK, enc_lock, dec_lock), + PROC(LOCKT, enc_lockt, dec_lockt), + PROC(LOCKU, enc_locku, dec_locku), + PROC(ACCESS, enc_access, dec_access), + PROC(GETATTR, enc_getattr, dec_getattr), + PROC(LOOKUP, enc_lookup, dec_lookup), + PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root), + PROC(REMOVE, enc_remove, dec_remove), + PROC(RENAME, enc_rename, dec_rename), + PROC(LINK, enc_link, dec_link), + PROC(SYMLINK, enc_symlink, dec_symlink), + PROC(CREATE, enc_create, dec_create), + PROC(PATHCONF, enc_pathconf, dec_pathconf), + PROC(STATFS, enc_statfs, dec_statfs), + PROC(READLINK, enc_readlink, dec_readlink), + PROC(READDIR, enc_readdir, dec_readdir), + PROC(SERVER_CAPS, enc_server_caps, dec_server_caps), + PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), + PROC(GETACL, enc_getacl, dec_getacl), + PROC(SETACL, enc_setacl, dec_setacl), + PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), +#if defined(CONFIG_NFS_V4_1) + PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), + PROC(CREATE_SESSION, enc_create_session, dec_create_session), + PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), + PROC(SEQUENCE, enc_sequence, dec_sequence), + PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), + PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), +#endif /* CONFIG_NFS_V4_1 */ +}; + +struct rpc_version nfs_version4 = { + .number = 4, + .nrprocs = ARRAY_SIZE(nfs4_procedures), + .procs = nfs4_procedures +}; + +/* + * Local variables: + * c-basic-offset: 8 + * End: + */ diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c new file mode 100644 index 00000000000..8c55b27c0de --- /dev/null +++ b/fs/nfs/nfsroot.c @@ -0,0 +1,532 @@ +/* + * Copyright (C) 1995, 1996 Gero Kuhlmann <gero@gkminix.han.de> + * + * Allow an NFS filesystem to be mounted as root. The way this works is: + * (1) Use the IP autoconfig mechanism to set local IP addresses and routes. + * (2) Handle RPC negotiation with the system which replied to RARP or + * was reported as a boot server by BOOTP or manually. + * (3) The actual mounting is done later, when init() is running. + * + * + * Changes: + * + * Alan Cox : Removed get_address name clash with FPU. + * Alan Cox : Reformatted a bit. + * Gero Kuhlmann : Code cleanup + * Michael Rausch : Fixed recognition of an incoming RARP answer. + * Martin Mares : (2.0) Auto-configuration via BOOTP supported. + * Martin Mares : Manual selection of interface & BOOTP/RARP. + * Martin Mares : Using network routes instead of host routes, + * allowing the default configuration to be used + * for normal operation of the host. + * Martin Mares : Randomized timer with exponential backoff + * installed to minimize network congestion. + * Martin Mares : Code cleanup. + * Martin Mares : (2.1) BOOTP and RARP made configuration options. + * Martin Mares : Server hostname generation fixed. + * Gerd Knorr : Fixed wired inode handling + * Martin Mares : (2.2) "0.0.0.0" addresses from command line ignored. + * Martin Mares : RARP replies not tested for server address. + * Gero Kuhlmann : (2.3) Some bug fixes and code cleanup again (please + * send me your new patches _before_ bothering + * Linus so that I don' always have to cleanup + * _afterwards_ - thanks) + * Gero Kuhlmann : Last changes of Martin Mares undone. + * Gero Kuhlmann : RARP replies are tested for specified server + * again. However, it's now possible to have + * different RARP and NFS servers. + * Gero Kuhlmann : "0.0.0.0" addresses from command line are + * now mapped to INADDR_NONE. + * Gero Kuhlmann : Fixed a bug which prevented BOOTP path name + * from being used (thanks to Leo Spiekman) + * Andy Walker : Allow to specify the NFS server in nfs_root + * without giving a path name + * Swen Thümmler : Allow to specify the NFS options in nfs_root + * without giving a path name. Fix BOOTP request + * for domainname (domainname is NIS domain, not + * DNS domain!). Skip dummy devices for BOOTP. + * Jacek Zapala : Fixed a bug which prevented server-ip address + * from nfsroot parameter from being used. + * Olaf Kirch : Adapted to new NFS code. + * Jakub Jelinek : Free used code segment. + * Marko Kohtala : Fixed some bugs. + * Martin Mares : Debug message cleanup + * Martin Mares : Changed to use the new generic IP layer autoconfig + * code. BOOTP and RARP moved there. + * Martin Mares : Default path now contains host name instead of + * host IP address (but host name defaults to IP + * address anyway). + * Martin Mares : Use root_server_addr appropriately during setup. + * Martin Mares : Rewrote parameter parsing, now hopefully giving + * correct overriding. + * Trond Myklebust : Add in preliminary support for NFSv3 and TCP. + * Fix bug in root_nfs_addr(). nfs_data.namlen + * is NOT for the length of the hostname. + * Hua Qin : Support for mounting root file system via + * NFS over TCP. + * Fabian Frederick: Option parser rebuilt (using parser lib) +*/ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/xprtsock.h> +#include <linux/nfs.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/in.h> +#include <linux/major.h> +#include <linux/utsname.h> +#include <linux/inet.h> +#include <linux/root_dev.h> +#include <net/ipconfig.h> +#include <linux/parser.h> + +#include "internal.h" + +/* Define this to allow debugging output */ +#undef NFSROOT_DEBUG +#define NFSDBG_FACILITY NFSDBG_ROOT + +/* Default port to use if server is not running a portmapper */ +#define NFS_MNT_PORT 627 + +/* Default path we try to mount. "%s" gets replaced by our IP address */ +#define NFS_ROOT "/tftpboot/%s" + +/* Parameters passed from the kernel command line */ +static char nfs_root_name[256] __initdata = ""; + +/* Address of NFS server */ +static __be32 servaddr __initdata = 0; + +/* Name of directory to mount */ +static char nfs_export_path[NFS_MAXPATHLEN] __initdata = { 0, }; + +/* NFS-related data */ +static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */ +static int nfs_port __initdata = 0; /* Port to connect to for NFS */ +static int mount_port __initdata = 0; /* Mount daemon port number */ + + +/*************************************************************************** + + Parsing of options + + ***************************************************************************/ + +enum { + /* Options that take integer arguments */ + Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin, + Opt_acregmax, Opt_acdirmin, Opt_acdirmax, + /* Options that take no arguments */ + Opt_soft, Opt_hard, Opt_intr, + Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, + Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp, + Opt_acl, Opt_noacl, + /* Error token */ + Opt_err +}; + +static const match_table_t tokens __initconst = { + {Opt_port, "port=%u"}, + {Opt_rsize, "rsize=%u"}, + {Opt_wsize, "wsize=%u"}, + {Opt_timeo, "timeo=%u"}, + {Opt_retrans, "retrans=%u"}, + {Opt_acregmin, "acregmin=%u"}, + {Opt_acregmax, "acregmax=%u"}, + {Opt_acdirmin, "acdirmin=%u"}, + {Opt_acdirmax, "acdirmax=%u"}, + {Opt_soft, "soft"}, + {Opt_hard, "hard"}, + {Opt_intr, "intr"}, + {Opt_nointr, "nointr"}, + {Opt_posix, "posix"}, + {Opt_noposix, "noposix"}, + {Opt_cto, "cto"}, + {Opt_nocto, "nocto"}, + {Opt_ac, "ac"}, + {Opt_noac, "noac"}, + {Opt_lock, "lock"}, + {Opt_nolock, "nolock"}, + {Opt_v2, "nfsvers=2"}, + {Opt_v2, "v2"}, + {Opt_v3, "nfsvers=3"}, + {Opt_v3, "v3"}, + {Opt_udp, "proto=udp"}, + {Opt_udp, "udp"}, + {Opt_tcp, "proto=tcp"}, + {Opt_tcp, "tcp"}, + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, + {Opt_err, NULL} + +}; + +/* + * Parse option string. + */ + +static int __init root_nfs_parse(char *name, char *buf) +{ + + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + + if (!name) + return 1; + + /* Set the NFS remote path */ + p = strsep(&name, ","); + if (p[0] != '\0' && strcmp(p, "default") != 0) + strlcpy(buf, p, NFS_MAXPATHLEN); + + while ((p = strsep (&name, ",")) != NULL) { + int token; + if (!*p) + continue; + token = match_token(p, tokens, args); + + /* %u tokens only. Beware if you add new tokens! */ + if (token < Opt_soft && match_int(&args[0], &option)) + return 0; + switch (token) { + case Opt_port: + nfs_port = option; + break; + case Opt_rsize: + nfs_data.rsize = option; + break; + case Opt_wsize: + nfs_data.wsize = option; + break; + case Opt_timeo: + nfs_data.timeo = option; + break; + case Opt_retrans: + nfs_data.retrans = option; + break; + case Opt_acregmin: + nfs_data.acregmin = option; + break; + case Opt_acregmax: + nfs_data.acregmax = option; + break; + case Opt_acdirmin: + nfs_data.acdirmin = option; + break; + case Opt_acdirmax: + nfs_data.acdirmax = option; + break; + case Opt_soft: + nfs_data.flags |= NFS_MOUNT_SOFT; + break; + case Opt_hard: + nfs_data.flags &= ~NFS_MOUNT_SOFT; + break; + case Opt_intr: + case Opt_nointr: + break; + case Opt_posix: + nfs_data.flags |= NFS_MOUNT_POSIX; + break; + case Opt_noposix: + nfs_data.flags &= ~NFS_MOUNT_POSIX; + break; + case Opt_cto: + nfs_data.flags &= ~NFS_MOUNT_NOCTO; + break; + case Opt_nocto: + nfs_data.flags |= NFS_MOUNT_NOCTO; + break; + case Opt_ac: + nfs_data.flags &= ~NFS_MOUNT_NOAC; + break; + case Opt_noac: + nfs_data.flags |= NFS_MOUNT_NOAC; + break; + case Opt_lock: + nfs_data.flags &= ~NFS_MOUNT_NONLM; + break; + case Opt_nolock: + nfs_data.flags |= NFS_MOUNT_NONLM; + break; + case Opt_v2: + nfs_data.flags &= ~NFS_MOUNT_VER3; + break; + case Opt_v3: + nfs_data.flags |= NFS_MOUNT_VER3; + break; + case Opt_udp: + nfs_data.flags &= ~NFS_MOUNT_TCP; + break; + case Opt_tcp: + nfs_data.flags |= NFS_MOUNT_TCP; + break; + case Opt_acl: + nfs_data.flags &= ~NFS_MOUNT_NOACL; + break; + case Opt_noacl: + nfs_data.flags |= NFS_MOUNT_NOACL; + break; + default: + printk(KERN_WARNING "Root-NFS: unknown " + "option: %s\n", p); + return 0; + } + } + + return 1; +} + +/* + * Prepare the NFS data structure and parse all options. + */ +static int __init root_nfs_name(char *name) +{ + static char buf[NFS_MAXPATHLEN] __initdata; + char *cp; + + /* Set some default values */ + memset(&nfs_data, 0, sizeof(nfs_data)); + nfs_port = -1; + nfs_data.version = NFS_MOUNT_VERSION; + nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ + nfs_data.rsize = NFS_DEF_FILE_IO_SIZE; + nfs_data.wsize = NFS_DEF_FILE_IO_SIZE; + nfs_data.acregmin = NFS_DEF_ACREGMIN; + nfs_data.acregmax = NFS_DEF_ACREGMAX; + nfs_data.acdirmin = NFS_DEF_ACDIRMIN; + nfs_data.acdirmax = NFS_DEF_ACDIRMAX; + strcpy(buf, NFS_ROOT); + + /* Process options received from the remote server */ + root_nfs_parse(root_server_path, buf); + + /* Override them by options set on kernel command-line */ + root_nfs_parse(name, buf); + + cp = utsname()->nodename; + if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) { + printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n"); + return -1; + } + sprintf(nfs_export_path, buf, cp); + + return 1; +} + + +/* + * Get NFS server address. + */ +static int __init root_nfs_addr(void) +{ + if ((servaddr = root_server_addr) == htonl(INADDR_NONE)) { + printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n"); + return -1; + } + + snprintf(nfs_data.hostname, sizeof(nfs_data.hostname), + "%pI4", &servaddr); + return 0; +} + +/* + * Tell the user what's going on. + */ +#ifdef NFSROOT_DEBUG +static void __init root_nfs_print(void) +{ + printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n", + nfs_export_path, nfs_data.hostname); + printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n", + nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans); + printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n", + nfs_data.acregmin, nfs_data.acregmax, + nfs_data.acdirmin, nfs_data.acdirmax); + printk(KERN_NOTICE "Root-NFS: nfsd port = %d, mountd port = %d, flags = %08x\n", + nfs_port, mount_port, nfs_data.flags); +} +#endif + + +static int __init root_nfs_init(void) +{ +#ifdef NFSROOT_DEBUG + nfs_debug |= NFSDBG_ROOT; +#endif + + /* + * Decode the root directory path name and NFS options from + * the kernel command line. This has to go here in order to + * be able to use the client IP address for the remote root + * directory (necessary for pure RARP booting). + */ + if (root_nfs_name(nfs_root_name) < 0 || + root_nfs_addr() < 0) + return -1; + +#ifdef NFSROOT_DEBUG + root_nfs_print(); +#endif + + return 0; +} + + +/* + * Parse NFS server and directory information passed on the kernel + * command line. + */ +static int __init nfs_root_setup(char *line) +{ + ROOT_DEV = Root_NFS; + if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) { + strlcpy(nfs_root_name, line, sizeof(nfs_root_name)); + } else { + int n = strlen(line) + sizeof(NFS_ROOT) - 1; + if (n >= sizeof(nfs_root_name)) + line[sizeof(nfs_root_name) - sizeof(NFS_ROOT) - 2] = '\0'; + sprintf(nfs_root_name, NFS_ROOT, line); + } + root_server_addr = root_nfs_parse_addr(nfs_root_name); + return 1; +} + +__setup("nfsroot=", nfs_root_setup); + +/*************************************************************************** + + Routines to actually mount the root directory + + ***************************************************************************/ + +/* + * Construct sockaddr_in from address and port number. + */ +static inline void +set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port) +{ + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = addr; + sin->sin_port = port; +} + +/* + * Query server portmapper for the port of a daemon program. + */ +static int __init root_nfs_getport(int program, int version, int proto) +{ + struct sockaddr_in sin; + + printk(KERN_NOTICE "Looking up port of RPC %d/%d on %pI4\n", + program, version, &servaddr); + set_sockaddr(&sin, servaddr, 0); + return rpcb_getport_sync(&sin, program, version, proto); +} + + +/* + * Use portmapper to find mountd and nfsd port numbers if not overriden + * by the user. Use defaults if portmapper is not available. + * XXX: Is there any nfs server with no portmapper? + */ +static int __init root_nfs_ports(void) +{ + int port; + int nfsd_ver, mountd_ver; + int nfsd_port, mountd_port; + int proto; + + if (nfs_data.flags & NFS_MOUNT_VER3) { + nfsd_ver = NFS3_VERSION; + mountd_ver = NFS_MNT3_VERSION; + nfsd_port = NFS_PORT; + mountd_port = NFS_MNT_PORT; + } else { + nfsd_ver = NFS2_VERSION; + mountd_ver = NFS_MNT_VERSION; + nfsd_port = NFS_PORT; + mountd_port = NFS_MNT_PORT; + } + + proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; + + if (nfs_port < 0) { + if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) { + printk(KERN_ERR "Root-NFS: Unable to get nfsd port " + "number from server, using default\n"); + port = nfsd_port; + } + nfs_port = port; + dprintk("Root-NFS: Portmapper on server returned %d " + "as nfsd port\n", port); + } + + if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) { + printk(KERN_ERR "Root-NFS: Unable to get mountd port " + "number from server, using default\n"); + port = mountd_port; + } + mount_port = port; + dprintk("Root-NFS: mountd port is %d\n", port); + + return 0; +} + + +/* + * Get a file handle from the server for the directory which is to be + * mounted. + */ +static int __init root_nfs_get_handle(void) +{ + struct nfs_fh fh; + struct sockaddr_in sin; + unsigned int auth_flav_len = 0; + struct nfs_mount_request request = { + .sap = (struct sockaddr *)&sin, + .salen = sizeof(sin), + .dirpath = nfs_export_path, + .version = (nfs_data.flags & NFS_MOUNT_VER3) ? + NFS_MNT3_VERSION : NFS_MNT_VERSION, + .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ? + XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP, + .fh = &fh, + .auth_flav_len = &auth_flav_len, + }; + int status; + + set_sockaddr(&sin, servaddr, htons(mount_port)); + status = nfs_mount(&request); + if (status < 0) + printk(KERN_ERR "Root-NFS: Server returned error %d " + "while mounting %s\n", status, nfs_export_path); + else { + nfs_data.root.size = fh.size; + memcpy(nfs_data.root.data, fh.data, fh.size); + } + + return status; +} + +/* + * Get the NFS port numbers and file handle, and return the prepared 'data' + * argument for mount() if everything went OK. Return NULL otherwise. + */ +void * __init nfs_root_data(void) +{ + if (root_nfs_init() < 0 + || root_nfs_ports() < 0 + || root_nfs_get_handle() < 0) + return NULL; + set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, htons(nfs_port)); + return (void*)&nfs_data; +} diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c new file mode 100644 index 00000000000..29d9d36cd5f --- /dev/null +++ b/fs/nfs/pagelist.c @@ -0,0 +1,443 @@ +/* + * linux/fs/nfs/pagelist.c + * + * A set of helper functions for managing NFS read and write requests. + * The main purpose of these routines is to provide support for the + * coalescing of several requests into a single RPC call. + * + * Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no> + * + */ + +#include <linux/slab.h> +#include <linux/file.h> +#include <linux/sched.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs3.h> +#include <linux/nfs4.h> +#include <linux/nfs_page.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> + +#include "internal.h" + +static struct kmem_cache *nfs_page_cachep; + +static inline struct nfs_page * +nfs_page_alloc(void) +{ + struct nfs_page *p; + p = kmem_cache_alloc(nfs_page_cachep, GFP_KERNEL); + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->wb_list); + } + return p; +} + +static inline void +nfs_page_free(struct nfs_page *p) +{ + kmem_cache_free(nfs_page_cachep, p); +} + +/** + * nfs_create_request - Create an NFS read/write request. + * @file: file descriptor to use + * @inode: inode to which the request is attached + * @page: page to write + * @offset: starting offset within the page for the write + * @count: number of bytes to read/write + * + * The page must be locked by the caller. This makes sure we never + * create two different requests for the same page. + * User should ensure it is safe to sleep in this function. + */ +struct nfs_page * +nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, + struct page *page, + unsigned int offset, unsigned int count) +{ + struct nfs_page *req; + + for (;;) { + /* try to allocate the request struct */ + req = nfs_page_alloc(); + if (req != NULL) + break; + + if (fatal_signal_pending(current)) + return ERR_PTR(-ERESTARTSYS); + yield(); + } + + /* Initialize the request struct. Initially, we assume a + * long write-back delay. This will be adjusted in + * update_nfs_request below if the region is not locked. */ + req->wb_page = page; + atomic_set(&req->wb_complete, 0); + req->wb_index = page->index; + page_cache_get(page); + BUG_ON(PagePrivate(page)); + BUG_ON(!PageLocked(page)); + BUG_ON(page->mapping->host != inode); + req->wb_offset = offset; + req->wb_pgbase = offset; + req->wb_bytes = count; + req->wb_context = get_nfs_open_context(ctx); + kref_init(&req->wb_kref); + return req; +} + +/** + * nfs_unlock_request - Unlock request and wake up sleepers. + * @req: + */ +void nfs_unlock_request(struct nfs_page *req) +{ + if (!NFS_WBACK_BUSY(req)) { + printk(KERN_ERR "NFS: Invalid unlock attempted\n"); + BUG(); + } + smp_mb__before_clear_bit(); + clear_bit(PG_BUSY, &req->wb_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&req->wb_flags, PG_BUSY); + nfs_release_request(req); +} + +/** + * nfs_set_page_tag_locked - Tag a request as locked + * @req: + */ +int nfs_set_page_tag_locked(struct nfs_page *req) +{ + if (!nfs_lock_request_dontget(req)) + return 0; + if (req->wb_page != NULL) + radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); + return 1; +} + +/** + * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers + */ +void nfs_clear_page_tag_locked(struct nfs_page *req) +{ + if (req->wb_page != NULL) { + struct inode *inode = req->wb_context->path.dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&inode->i_lock); + radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); + nfs_unlock_request(req); + spin_unlock(&inode->i_lock); + } else + nfs_unlock_request(req); +} + +/** + * nfs_clear_request - Free up all resources allocated to the request + * @req: + * + * Release page and open context resources associated with a read/write + * request after it has completed. + */ +void nfs_clear_request(struct nfs_page *req) +{ + struct page *page = req->wb_page; + struct nfs_open_context *ctx = req->wb_context; + + if (page != NULL) { + page_cache_release(page); + req->wb_page = NULL; + } + if (ctx != NULL) { + put_nfs_open_context(ctx); + req->wb_context = NULL; + } +} + + +/** + * nfs_release_request - Release the count on an NFS read/write request + * @req: request to release + * + * Note: Should never be called with the spinlock held! + */ +static void nfs_free_request(struct kref *kref) +{ + struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); + + /* Release struct file and open context */ + nfs_clear_request(req); + nfs_page_free(req); +} + +void nfs_release_request(struct nfs_page *req) +{ + kref_put(&req->wb_kref, nfs_free_request); +} + +static int nfs_wait_bit_uninterruptible(void *word) +{ + io_schedule(); + return 0; +} + +/** + * nfs_wait_on_request - Wait for a request to complete. + * @req: request to wait upon. + * + * Interruptible by fatal signals only. + * The user is responsible for holding a count on the request. + */ +int +nfs_wait_on_request(struct nfs_page *req) +{ + return wait_on_bit(&req->wb_flags, PG_BUSY, + nfs_wait_bit_uninterruptible, + TASK_UNINTERRUPTIBLE); +} + +/** + * nfs_pageio_init - initialise a page io descriptor + * @desc: pointer to descriptor + * @inode: pointer to inode + * @doio: pointer to io function + * @bsize: io block size + * @io_flags: extra parameters for the io function + */ +void nfs_pageio_init(struct nfs_pageio_descriptor *desc, + struct inode *inode, + int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), + size_t bsize, + int io_flags) +{ + INIT_LIST_HEAD(&desc->pg_list); + desc->pg_bytes_written = 0; + desc->pg_count = 0; + desc->pg_bsize = bsize; + desc->pg_base = 0; + desc->pg_inode = inode; + desc->pg_doio = doio; + desc->pg_ioflags = io_flags; + desc->pg_error = 0; +} + +/** + * nfs_can_coalesce_requests - test two requests for compatibility + * @prev: pointer to nfs_page + * @req: pointer to nfs_page + * + * The nfs_page structures 'prev' and 'req' are compared to ensure that the + * page data area they describe is contiguous, and that their RPC + * credentials, NFSv4 open state, and lockowners are the same. + * + * Return 'true' if this is the case, else return 'false'. + */ +static int nfs_can_coalesce_requests(struct nfs_page *prev, + struct nfs_page *req) +{ + if (req->wb_context->cred != prev->wb_context->cred) + return 0; + if (req->wb_context->lockowner != prev->wb_context->lockowner) + return 0; + if (req->wb_context->state != prev->wb_context->state) + return 0; + if (req->wb_index != (prev->wb_index + 1)) + return 0; + if (req->wb_pgbase != 0) + return 0; + if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) + return 0; + return 1; +} + +/** + * nfs_pageio_do_add_request - Attempt to coalesce a request into a page list. + * @desc: destination io descriptor + * @req: request + * + * Returns true if the request 'req' was successfully coalesced into the + * existing list of pages 'desc'. + */ +static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) +{ + size_t newlen = req->wb_bytes; + + if (desc->pg_count != 0) { + struct nfs_page *prev; + + /* + * FIXME: ideally we should be able to coalesce all requests + * that are not block boundary aligned, but currently this + * is problematic for the case of bsize < PAGE_CACHE_SIZE, + * since nfs_flush_multi and nfs_pagein_multi assume you + * can have only one struct nfs_page. + */ + if (desc->pg_bsize < PAGE_SIZE) + return 0; + newlen += desc->pg_count; + if (newlen > desc->pg_bsize) + return 0; + prev = nfs_list_entry(desc->pg_list.prev); + if (!nfs_can_coalesce_requests(prev, req)) + return 0; + } else + desc->pg_base = req->wb_pgbase; + nfs_list_remove_request(req); + nfs_list_add_request(req, &desc->pg_list); + desc->pg_count = newlen; + return 1; +} + +/* + * Helper for nfs_pageio_add_request and nfs_pageio_complete + */ +static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) +{ + if (!list_empty(&desc->pg_list)) { + int error = desc->pg_doio(desc->pg_inode, + &desc->pg_list, + nfs_page_array_len(desc->pg_base, + desc->pg_count), + desc->pg_count, + desc->pg_ioflags); + if (error < 0) + desc->pg_error = error; + else + desc->pg_bytes_written += desc->pg_count; + } + if (list_empty(&desc->pg_list)) { + desc->pg_count = 0; + desc->pg_base = 0; + } +} + +/** + * nfs_pageio_add_request - Attempt to coalesce a request into a page list. + * @desc: destination io descriptor + * @req: request + * + * Returns true if the request 'req' was successfully coalesced into the + * existing list of pages 'desc'. + */ +int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) +{ + while (!nfs_pageio_do_add_request(desc, req)) { + nfs_pageio_doio(desc); + if (desc->pg_error < 0) + return 0; + } + return 1; +} + +/** + * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor + * @desc: pointer to io descriptor + */ +void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) +{ + nfs_pageio_doio(desc); +} + +/** + * nfs_pageio_cond_complete - Conditional I/O completion + * @desc: pointer to io descriptor + * @index: page index + * + * It is important to ensure that processes don't try to take locks + * on non-contiguous ranges of pages as that might deadlock. This + * function should be called before attempting to wait on a locked + * nfs_page. It will complete the I/O if the page index 'index' + * is not contiguous with the existing list of pages in 'desc'. + */ +void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) +{ + if (!list_empty(&desc->pg_list)) { + struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); + if (index != prev->wb_index + 1) + nfs_pageio_doio(desc); + } +} + +#define NFS_SCAN_MAXENTRIES 16 +/** + * nfs_scan_list - Scan a list for matching requests + * @nfsi: NFS inode + * @dst: Destination list + * @idx_start: lower bound of page->index to scan + * @npages: idx_start + npages sets the upper bound to scan. + * @tag: tag to scan for + * + * Moves elements from one of the inode request lists. + * If the number of requests is set to 0, the entire address_space + * starting at index idx_start, is scanned. + * The requests are *not* checked to ensure that they form a contiguous set. + * You must be holding the inode's i_lock when calling this function + */ +int nfs_scan_list(struct nfs_inode *nfsi, + struct list_head *dst, pgoff_t idx_start, + unsigned int npages, int tag) +{ + struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; + struct nfs_page *req; + pgoff_t idx_end; + int found, i; + int res; + + res = 0; + if (npages == 0) + idx_end = ~0; + else + idx_end = idx_start + npages - 1; + + for (;;) { + found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, + (void **)&pgvec[0], idx_start, + NFS_SCAN_MAXENTRIES, tag); + if (found <= 0) + break; + for (i = 0; i < found; i++) { + req = pgvec[i]; + if (req->wb_index > idx_end) + goto out; + idx_start = req->wb_index + 1; + if (nfs_set_page_tag_locked(req)) { + kref_get(&req->wb_kref); + nfs_list_remove_request(req); + radix_tree_tag_clear(&nfsi->nfs_page_tree, + req->wb_index, tag); + nfs_list_add_request(req, dst); + res++; + if (res == INT_MAX) + goto out; + } + } + /* for latency reduction */ + cond_resched_lock(&nfsi->vfs_inode.i_lock); + } +out: + return res; +} + +int __init nfs_init_nfspagecache(void) +{ + nfs_page_cachep = kmem_cache_create("nfs_page", + sizeof(struct nfs_page), + 0, SLAB_HWCACHE_ALIGN, + NULL); + if (nfs_page_cachep == NULL) + return -ENOMEM; + + return 0; +} + +void nfs_destroy_nfspagecache(void) +{ + kmem_cache_destroy(nfs_page_cachep); +} + diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c new file mode 100644 index 00000000000..ef583854d8d --- /dev/null +++ b/fs/nfs/proc.c @@ -0,0 +1,666 @@ +/* + * linux/fs/nfs/proc.c + * + * Copyright (C) 1992, 1993, 1994 Rick Sladkey + * + * OS-independent nfs remote procedure call functions + * + * Tuned by Alan Cox <A.Cox@swansea.ac.uk> for >3K buffers + * so at last we can have decent(ish) throughput off a + * Sun server. + * + * Coding optimized and cleaned up by Florian La Roche. + * Note: Error returns are optimized for NFS_OK, which isn't translated via + * nfs_stat_to_errno(), but happens to be already the right return code. + * + * Also, the code currently doesn't check the size of the packet, when + * it decodes the packet. + * + * Feel free to fix it and mail me the diffs if it worries you. + * + * Completely rewritten to support the new RPC call interface; + * rewrote and moved the entire XDR stuff to xdr.c + * --Olaf Kirch June 1996 + * + * The code below initializes all auto variables explicitly, otherwise + * it will fail to work as a module (gcc generates a memset call for an + * incomplete struct). + */ + +#include <linux/types.h> +#include <linux/param.h> +#include <linux/slab.h> +#include <linux/time.h> +#include <linux/mm.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/in.h> +#include <linux/pagemap.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs2.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_page.h> +#include <linux/lockd/bind.h> +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_PROC + +/* + * Bare-bones access to getattr: this is for nfs_read_super. + */ +static int +nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + struct nfs_fattr *fattr = info->fattr; + struct nfs2_fsstat fsinfo; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_GETATTR], + .rpc_argp = fhandle, + .rpc_resp = fattr, + }; + int status; + + dprintk("%s: call getattr\n", __func__); + nfs_fattr_init(fattr); + status = rpc_call_sync(server->client, &msg, 0); + /* Retry with default authentication if different */ + if (status && server->nfs_client->cl_rpcclient != server->client) + status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); + dprintk("%s: reply getattr: %d\n", __func__, status); + if (status) + return status; + dprintk("%s: call statfs\n", __func__); + msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; + msg.rpc_resp = &fsinfo; + status = rpc_call_sync(server->client, &msg, 0); + /* Retry with default authentication if different */ + if (status && server->nfs_client->cl_rpcclient != server->client) + status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); + dprintk("%s: reply statfs: %d\n", __func__, status); + if (status) + return status; + info->rtmax = NFS_MAXDATA; + info->rtpref = fsinfo.tsize; + info->rtmult = fsinfo.bsize; + info->wtmax = NFS_MAXDATA; + info->wtpref = fsinfo.tsize; + info->wtmult = fsinfo.bsize; + info->dtpref = fsinfo.tsize; + info->maxfilesize = 0x7FFFFFFF; + info->lease_time = 0; + return 0; +} + +/* + * One function for each procedure in the NFS protocol. + */ +static int +nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_GETATTR], + .rpc_argp = fhandle, + .rpc_resp = fattr, + }; + int status; + + dprintk("NFS call getattr\n"); + nfs_fattr_init(fattr); + status = rpc_call_sync(server->client, &msg, 0); + dprintk("NFS reply getattr: %d\n", status); + return status; +} + +static int +nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, + struct iattr *sattr) +{ + struct inode *inode = dentry->d_inode; + struct nfs_sattrargs arg = { + .fh = NFS_FH(inode), + .sattr = sattr + }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_SETATTR], + .rpc_argp = &arg, + .rpc_resp = fattr, + }; + int status; + + /* Mask out the non-modebit related stuff from attr->ia_mode */ + sattr->ia_mode &= S_IALLUGO; + + dprintk("NFS call setattr\n"); + if (sattr->ia_valid & ATTR_FILE) + msg.rpc_cred = nfs_file_cred(sattr->ia_file); + nfs_fattr_init(fattr); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + if (status == 0) + nfs_setattr_update_inode(inode, sattr); + dprintk("NFS reply setattr: %d\n", status); + return status; +} + +static int +nfs_proc_lookup(struct inode *dir, struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct nfs_diropargs arg = { + .fh = NFS_FH(dir), + .name = name->name, + .len = name->len + }; + struct nfs_diropok res = { + .fh = fhandle, + .fattr = fattr + }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_LOOKUP], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int status; + + dprintk("NFS call lookup %s\n", name->name); + nfs_fattr_init(fattr); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + dprintk("NFS reply lookup: %d\n", status); + return status; +} + +static int nfs_proc_readlink(struct inode *inode, struct page *page, + unsigned int pgbase, unsigned int pglen) +{ + struct nfs_readlinkargs args = { + .fh = NFS_FH(inode), + .pgbase = pgbase, + .pglen = pglen, + .pages = &page + }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_READLINK], + .rpc_argp = &args, + }; + int status; + + dprintk("NFS call readlink\n"); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + dprintk("NFS reply readlink: %d\n", status); + return status; +} + +static int +nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, + int flags, struct nameidata *nd) +{ + struct nfs_fh fhandle; + struct nfs_fattr fattr; + struct nfs_createargs arg = { + .fh = NFS_FH(dir), + .name = dentry->d_name.name, + .len = dentry->d_name.len, + .sattr = sattr + }; + struct nfs_diropok res = { + .fh = &fhandle, + .fattr = &fattr + }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_CREATE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int status; + + nfs_fattr_init(&fattr); + dprintk("NFS call create %s\n", dentry->d_name.name); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_mark_for_revalidate(dir); + if (status == 0) + status = nfs_instantiate(dentry, &fhandle, &fattr); + dprintk("NFS reply create: %d\n", status); + return status; +} + +/* + * In NFSv2, mknod is grafted onto the create call. + */ +static int +nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, + dev_t rdev) +{ + struct nfs_fh fhandle; + struct nfs_fattr fattr; + struct nfs_createargs arg = { + .fh = NFS_FH(dir), + .name = dentry->d_name.name, + .len = dentry->d_name.len, + .sattr = sattr + }; + struct nfs_diropok res = { + .fh = &fhandle, + .fattr = &fattr + }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_CREATE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int status, mode; + + dprintk("NFS call mknod %s\n", dentry->d_name.name); + + mode = sattr->ia_mode; + if (S_ISFIFO(mode)) { + sattr->ia_mode = (mode & ~S_IFMT) | S_IFCHR; + sattr->ia_valid &= ~ATTR_SIZE; + } else if (S_ISCHR(mode) || S_ISBLK(mode)) { + sattr->ia_valid |= ATTR_SIZE; + sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */ + } + + nfs_fattr_init(&fattr); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_mark_for_revalidate(dir); + + if (status == -EINVAL && S_ISFIFO(mode)) { + sattr->ia_mode = mode; + nfs_fattr_init(&fattr); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + } + if (status == 0) + status = nfs_instantiate(dentry, &fhandle, &fattr); + dprintk("NFS reply mknod: %d\n", status); + return status; +} + +static int +nfs_proc_remove(struct inode *dir, struct qstr *name) +{ + struct nfs_removeargs arg = { + .fh = NFS_FH(dir), + .name.len = name->len, + .name.name = name->name, + }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_REMOVE], + .rpc_argp = &arg, + }; + int status; + + dprintk("NFS call remove %s\n", name->name); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_mark_for_revalidate(dir); + + dprintk("NFS reply remove: %d\n", status); + return status; +} + +static void +nfs_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) +{ + msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE]; +} + +static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) +{ + nfs_mark_for_revalidate(dir); + return 1; +} + +static int +nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, + struct inode *new_dir, struct qstr *new_name) +{ + struct nfs_renameargs arg = { + .fromfh = NFS_FH(old_dir), + .fromname = old_name->name, + .fromlen = old_name->len, + .tofh = NFS_FH(new_dir), + .toname = new_name->name, + .tolen = new_name->len + }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_RENAME], + .rpc_argp = &arg, + }; + int status; + + dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); + status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); + nfs_mark_for_revalidate(old_dir); + nfs_mark_for_revalidate(new_dir); + dprintk("NFS reply rename: %d\n", status); + return status; +} + +static int +nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) +{ + struct nfs_linkargs arg = { + .fromfh = NFS_FH(inode), + .tofh = NFS_FH(dir), + .toname = name->name, + .tolen = name->len + }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_LINK], + .rpc_argp = &arg, + }; + int status; + + dprintk("NFS call link %s\n", name->name); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + nfs_mark_for_revalidate(inode); + nfs_mark_for_revalidate(dir); + dprintk("NFS reply link: %d\n", status); + return status; +} + +static int +nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, + unsigned int len, struct iattr *sattr) +{ + struct nfs_fh fhandle; + struct nfs_fattr fattr; + struct nfs_symlinkargs arg = { + .fromfh = NFS_FH(dir), + .fromname = dentry->d_name.name, + .fromlen = dentry->d_name.len, + .pages = &page, + .pathlen = len, + .sattr = sattr + }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_SYMLINK], + .rpc_argp = &arg, + }; + int status; + + if (len > NFS2_MAXPATHLEN) + return -ENAMETOOLONG; + + dprintk("NFS call symlink %s\n", dentry->d_name.name); + + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_mark_for_revalidate(dir); + + /* + * V2 SYMLINK requests don't return any attributes. Setting the + * filehandle size to zero indicates to nfs_instantiate that it + * should fill in the data with a LOOKUP call on the wire. + */ + if (status == 0) { + nfs_fattr_init(&fattr); + fhandle.size = 0; + status = nfs_instantiate(dentry, &fhandle, &fattr); + } + + dprintk("NFS reply symlink: %d\n", status); + return status; +} + +static int +nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) +{ + struct nfs_fh fhandle; + struct nfs_fattr fattr; + struct nfs_createargs arg = { + .fh = NFS_FH(dir), + .name = dentry->d_name.name, + .len = dentry->d_name.len, + .sattr = sattr + }; + struct nfs_diropok res = { + .fh = &fhandle, + .fattr = &fattr + }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_MKDIR], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int status; + + dprintk("NFS call mkdir %s\n", dentry->d_name.name); + nfs_fattr_init(&fattr); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_mark_for_revalidate(dir); + if (status == 0) + status = nfs_instantiate(dentry, &fhandle, &fattr); + dprintk("NFS reply mkdir: %d\n", status); + return status; +} + +static int +nfs_proc_rmdir(struct inode *dir, struct qstr *name) +{ + struct nfs_diropargs arg = { + .fh = NFS_FH(dir), + .name = name->name, + .len = name->len + }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_RMDIR], + .rpc_argp = &arg, + }; + int status; + + dprintk("NFS call rmdir %s\n", name->name); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_mark_for_revalidate(dir); + dprintk("NFS reply rmdir: %d\n", status); + return status; +} + +/* + * The READDIR implementation is somewhat hackish - we pass a temporary + * buffer to the encode function, which installs it in the receive + * the receive iovec. The decode function just parses the reply to make + * sure it is syntactically correct; the entries itself are decoded + * from nfs_readdir by calling the decode_entry function directly. + */ +static int +nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, + u64 cookie, struct page *page, unsigned int count, int plus) +{ + struct inode *dir = dentry->d_inode; + struct nfs_readdirargs arg = { + .fh = NFS_FH(dir), + .cookie = cookie, + .count = count, + .pages = &page, + }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_READDIR], + .rpc_argp = &arg, + .rpc_cred = cred, + }; + int status; + + dprintk("NFS call readdir %d\n", (unsigned int)cookie); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + + nfs_invalidate_atime(dir); + + dprintk("NFS reply readdir: %d\n", status); + return status; +} + +static int +nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsstat *stat) +{ + struct nfs2_fsstat fsinfo; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_STATFS], + .rpc_argp = fhandle, + .rpc_resp = &fsinfo, + }; + int status; + + dprintk("NFS call statfs\n"); + nfs_fattr_init(stat->fattr); + status = rpc_call_sync(server->client, &msg, 0); + dprintk("NFS reply statfs: %d\n", status); + if (status) + goto out; + stat->tbytes = (u64)fsinfo.blocks * fsinfo.bsize; + stat->fbytes = (u64)fsinfo.bfree * fsinfo.bsize; + stat->abytes = (u64)fsinfo.bavail * fsinfo.bsize; + stat->tfiles = 0; + stat->ffiles = 0; + stat->afiles = 0; +out: + return status; +} + +static int +nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + struct nfs2_fsstat fsinfo; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_STATFS], + .rpc_argp = fhandle, + .rpc_resp = &fsinfo, + }; + int status; + + dprintk("NFS call fsinfo\n"); + nfs_fattr_init(info->fattr); + status = rpc_call_sync(server->client, &msg, 0); + dprintk("NFS reply fsinfo: %d\n", status); + if (status) + goto out; + info->rtmax = NFS_MAXDATA; + info->rtpref = fsinfo.tsize; + info->rtmult = fsinfo.bsize; + info->wtmax = NFS_MAXDATA; + info->wtpref = fsinfo.tsize; + info->wtmult = fsinfo.bsize; + info->dtpref = fsinfo.tsize; + info->maxfilesize = 0x7FFFFFFF; + info->lease_time = 0; +out: + return status; +} + +static int +nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *info) +{ + info->max_link = 0; + info->max_namelen = NFS2_MAXNAMLEN; + return 0; +} + +static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) +{ + nfs_invalidate_atime(data->inode); + if (task->tk_status >= 0) { + nfs_refresh_inode(data->inode, data->res.fattr); + /* Emulate the eof flag, which isn't normally needed in NFSv2 + * as it is guaranteed to always return the file attributes + */ + if (data->args.offset + data->args.count >= data->res.fattr->size) + data->res.eof = 1; + } + return 0; +} + +static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) +{ + msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; +} + +static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) +{ + if (task->tk_status >= 0) + nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); + return 0; +} + +static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) +{ + /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ + data->args.stable = NFS_FILE_SYNC; + msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; +} + +static void +nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) +{ + BUG(); +} + +static int +nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + + return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); +} + +/* Helper functions for NFS lock bounds checking */ +#define NFS_LOCK32_OFFSET_MAX ((__s32)0x7fffffffUL) +static int nfs_lock_check_bounds(const struct file_lock *fl) +{ + __s32 start, end; + + start = (__s32)fl->fl_start; + if ((loff_t)start != fl->fl_start) + goto out_einval; + + if (fl->fl_end != OFFSET_MAX) { + end = (__s32)fl->fl_end; + if ((loff_t)end != fl->fl_end) + goto out_einval; + } else + end = NFS_LOCK32_OFFSET_MAX; + + if (start < 0 || start > end) + goto out_einval; + return 0; +out_einval: + return -EINVAL; +} + +const struct nfs_rpc_ops nfs_v2_clientops = { + .version = 2, /* protocol version */ + .dentry_ops = &nfs_dentry_operations, + .dir_inode_ops = &nfs_dir_inode_operations, + .file_inode_ops = &nfs_file_inode_operations, + .getroot = nfs_proc_get_root, + .getattr = nfs_proc_getattr, + .setattr = nfs_proc_setattr, + .lookup = nfs_proc_lookup, + .access = NULL, /* access */ + .readlink = nfs_proc_readlink, + .create = nfs_proc_create, + .remove = nfs_proc_remove, + .unlink_setup = nfs_proc_unlink_setup, + .unlink_done = nfs_proc_unlink_done, + .rename = nfs_proc_rename, + .link = nfs_proc_link, + .symlink = nfs_proc_symlink, + .mkdir = nfs_proc_mkdir, + .rmdir = nfs_proc_rmdir, + .readdir = nfs_proc_readdir, + .mknod = nfs_proc_mknod, + .statfs = nfs_proc_statfs, + .fsinfo = nfs_proc_fsinfo, + .pathconf = nfs_proc_pathconf, + .decode_dirent = nfs_decode_dirent, + .read_setup = nfs_proc_read_setup, + .read_done = nfs_read_done, + .write_setup = nfs_proc_write_setup, + .write_done = nfs_write_done, + .commit_setup = nfs_proc_commit_setup, + .lock = nfs_proc_lock, + .lock_check_bounds = nfs_lock_check_bounds, + .close_context = nfs_close_context, +}; diff --git a/fs/nfs/read.c b/fs/nfs/read.c new file mode 100644 index 00000000000..db9b360ae19 --- /dev/null +++ b/fs/nfs/read.c @@ -0,0 +1,664 @@ +/* + * linux/fs/nfs/read.c + * + * Block I/O for NFS + * + * Partial copy of Linus' read cache modifications to fs/nfs/file.c + * modified for async RPC by okir@monad.swb.de + */ + +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_page.h> + +#include <asm/system.h> + +#include "nfs4_fs.h" +#include "internal.h" +#include "iostat.h" +#include "fscache.h" + +#define NFSDBG_FACILITY NFSDBG_PAGECACHE + +static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int); +static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int); +static const struct rpc_call_ops nfs_read_partial_ops; +static const struct rpc_call_ops nfs_read_full_ops; + +static struct kmem_cache *nfs_rdata_cachep; +static mempool_t *nfs_rdata_mempool; + +#define MIN_POOL_READ (32) + +struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) +{ + struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS); + + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + p->npages = pagecount; + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; + if (pagecount <= ARRAY_SIZE(p->page_array)) + p->pagevec = p->page_array; + else { + p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); + if (!p->pagevec) { + mempool_free(p, nfs_rdata_mempool); + p = NULL; + } + } + } + return p; +} + +void nfs_readdata_free(struct nfs_read_data *p) +{ + if (p && (p->pagevec != &p->page_array[0])) + kfree(p->pagevec); + mempool_free(p, nfs_rdata_mempool); +} + +static void nfs_readdata_release(struct nfs_read_data *rdata) +{ + put_nfs_open_context(rdata->args.context); + nfs_readdata_free(rdata); +} + +static +int nfs_return_empty_page(struct page *page) +{ + zero_user(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + unlock_page(page); + return 0; +} + +static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) +{ + unsigned int remainder = data->args.count - data->res.count; + unsigned int base = data->args.pgbase + data->res.count; + unsigned int pglen; + struct page **pages; + + if (data->res.eof == 0 || remainder == 0) + return; + /* + * Note: "remainder" can never be negative, since we check for + * this in the XDR code. + */ + pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; + base &= ~PAGE_CACHE_MASK; + pglen = PAGE_CACHE_SIZE - base; + for (;;) { + if (remainder <= pglen) { + zero_user(*pages, base, remainder); + break; + } + zero_user(*pages, base, pglen); + pages++; + remainder -= pglen; + pglen = PAGE_CACHE_SIZE; + base = 0; + } +} + +int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, + struct page *page) +{ + LIST_HEAD(one_request); + struct nfs_page *new; + unsigned int len; + + len = nfs_page_length(page); + if (len == 0) + return nfs_return_empty_page(page); + new = nfs_create_request(ctx, inode, page, 0, len); + if (IS_ERR(new)) { + unlock_page(page); + return PTR_ERR(new); + } + if (len < PAGE_CACHE_SIZE) + zero_user_segment(page, len, PAGE_CACHE_SIZE); + + nfs_list_add_request(new, &one_request); + if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) + nfs_pagein_multi(inode, &one_request, 1, len, 0); + else + nfs_pagein_one(inode, &one_request, 1, len, 0); + return 0; +} + +static void nfs_readpage_release(struct nfs_page *req) +{ + struct inode *d_inode = req->wb_context->path.dentry->d_inode; + + if (PageUptodate(req->wb_page)) + nfs_readpage_to_fscache(d_inode, req->wb_page, 0); + + unlock_page(req->wb_page); + + dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), + req->wb_bytes, + (long long)req_offset(req)); + nfs_clear_request(req); + nfs_release_request(req); +} + +/* + * Set up the NFS read request struct + */ +static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, + const struct rpc_call_ops *call_ops, + unsigned int count, unsigned int offset) +{ + struct inode *inode = req->wb_context->path.dentry->d_inode; + int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; + struct rpc_task *task; + struct rpc_message msg = { + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = req->wb_context->cred, + }; + struct rpc_task_setup task_setup_data = { + .task = &data->task, + .rpc_client = NFS_CLIENT(inode), + .rpc_message = &msg, + .callback_ops = call_ops, + .callback_data = data, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC | swap_flags, + }; + + data->req = req; + data->inode = inode; + data->cred = msg.rpc_cred; + + data->args.fh = NFS_FH(inode); + data->args.offset = req_offset(req) + offset; + data->args.pgbase = req->wb_pgbase + offset; + data->args.pages = data->pagevec; + data->args.count = count; + data->args.context = get_nfs_open_context(req->wb_context); + + data->res.fattr = &data->fattr; + data->res.count = count; + data->res.eof = 0; + nfs_fattr_init(&data->fattr); + + /* Set up the initial task struct. */ + NFS_PROTO(inode)->read_setup(data, &msg); + + dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + count, + (unsigned long long)data->args.offset); + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; +} + +static void +nfs_async_read_error(struct list_head *head) +{ + struct nfs_page *req; + + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + SetPageError(req->wb_page); + nfs_readpage_release(req); + } +} + +/* + * Generate multiple requests to fill a single page. + * + * We optimize to reduce the number of read operations on the wire. If we + * detect that we're reading a page, or an area of a page, that is past the + * end of file, we do not generate NFS read operations but just clear the + * parts of the page that would have come back zero from the server anyway. + * + * We rely on the cached value of i_size to make this determination; another + * client can fill pages on the server past our cached end-of-file, but we + * won't see the new data until our attribute cache is updated. This is more + * or less conventional NFS client behavior. + */ +static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) +{ + struct nfs_page *req = nfs_list_entry(head->next); + struct page *page = req->wb_page; + struct nfs_read_data *data; + size_t rsize = NFS_SERVER(inode)->rsize, nbytes; + unsigned int offset; + int requests = 0; + int ret = 0; + LIST_HEAD(list); + + nfs_list_remove_request(req); + + nbytes = count; + do { + size_t len = min(nbytes,rsize); + + data = nfs_readdata_alloc(1); + if (!data) + goto out_bad; + list_add(&data->pages, &list); + requests++; + nbytes -= len; + } while(nbytes != 0); + atomic_set(&req->wb_complete, requests); + + ClearPageError(page); + offset = 0; + nbytes = count; + do { + int ret2; + + data = list_entry(list.next, struct nfs_read_data, pages); + list_del_init(&data->pages); + + data->pagevec[0] = page; + + if (nbytes < rsize) + rsize = nbytes; + ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, + rsize, offset); + if (ret == 0) + ret = ret2; + offset += rsize; + nbytes -= rsize; + } while (nbytes != 0); + + return ret; + +out_bad: + while (!list_empty(&list)) { + data = list_entry(list.next, struct nfs_read_data, pages); + list_del(&data->pages); + nfs_readdata_free(data); + } + SetPageError(page); + nfs_readpage_release(req); + return -ENOMEM; +} + +static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) +{ + struct nfs_page *req; + struct page **pages; + struct nfs_read_data *data; + int ret = -ENOMEM; + + data = nfs_readdata_alloc(npages); + if (!data) + goto out_bad; + + pages = data->pagevec; + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_list_add_request(req, &data->pages); + ClearPageError(req->wb_page); + *pages++ = req->wb_page; + } + req = nfs_list_entry(data->pages.next); + + return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); +out_bad: + nfs_async_read_error(head); + return ret; +} + +/* + * This is the callback from RPC telling us whether a reply was + * received or some error occurred (timeout or socket shutdown). + */ +int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) +{ + int status; + + dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, + task->tk_status); + + status = NFS_PROTO(data->inode)->read_done(task, data); + if (status != 0) + return status; + + nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); + + if (task->tk_status == -ESTALE) { + set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); + nfs_mark_for_revalidate(data->inode); + } + return 0; +} + +static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) +{ + struct nfs_readargs *argp = &data->args; + struct nfs_readres *resp = &data->res; + + if (resp->eof || resp->count == argp->count) + return; + + /* This is a short read! */ + nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); + /* Has the server at least made some progress? */ + if (resp->count == 0) + return; + + /* Yes, so retry the read at the end of the data */ + argp->offset += resp->count; + argp->pgbase += resp->count; + argp->count -= resp->count; + nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client); +} + +/* + * Handle a read reply that fills part of a page. + */ +static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) +{ + struct nfs_read_data *data = calldata; + + if (nfs_readpage_result(task, data) != 0) + return; + if (task->tk_status < 0) + return; + + nfs_readpage_truncate_uninitialised_page(data); + nfs_readpage_retry(task, data); +} + +static void nfs_readpage_release_partial(void *calldata) +{ + struct nfs_read_data *data = calldata; + struct nfs_page *req = data->req; + struct page *page = req->wb_page; + int status = data->task.tk_status; + + if (status < 0) + SetPageError(page); + + if (atomic_dec_and_test(&req->wb_complete)) { + if (!PageError(page)) + SetPageUptodate(page); + nfs_readpage_release(req); + } + nfs_readdata_release(calldata); +} + +#if defined(CONFIG_NFS_V4_1) +void nfs_read_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_read_data *data = calldata; + + if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client, + &data->args.seq_args, &data->res.seq_res, + 0, task)) + return; + rpc_call_start(task); +} +#endif /* CONFIG_NFS_V4_1 */ + +static const struct rpc_call_ops nfs_read_partial_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_read_prepare, +#endif /* CONFIG_NFS_V4_1 */ + .rpc_call_done = nfs_readpage_result_partial, + .rpc_release = nfs_readpage_release_partial, +}; + +static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) +{ + unsigned int count = data->res.count; + unsigned int base = data->args.pgbase; + struct page **pages; + + if (data->res.eof) + count = data->args.count; + if (unlikely(count == 0)) + return; + pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; + base &= ~PAGE_CACHE_MASK; + count += base; + for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) + SetPageUptodate(*pages); + if (count == 0) + return; + /* Was this a short read? */ + if (data->res.eof || data->res.count == data->args.count) + SetPageUptodate(*pages); +} + +/* + * This is the callback from RPC telling us whether a reply was + * received or some error occurred (timeout or socket shutdown). + */ +static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) +{ + struct nfs_read_data *data = calldata; + + if (nfs_readpage_result(task, data) != 0) + return; + if (task->tk_status < 0) + return; + /* + * Note: nfs_readpage_retry may change the values of + * data->args. In the multi-page case, we therefore need + * to ensure that we call nfs_readpage_set_pages_uptodate() + * first. + */ + nfs_readpage_truncate_uninitialised_page(data); + nfs_readpage_set_pages_uptodate(data); + nfs_readpage_retry(task, data); +} + +static void nfs_readpage_release_full(void *calldata) +{ + struct nfs_read_data *data = calldata; + + while (!list_empty(&data->pages)) { + struct nfs_page *req = nfs_list_entry(data->pages.next); + + nfs_list_remove_request(req); + nfs_readpage_release(req); + } + nfs_readdata_release(calldata); +} + +static const struct rpc_call_ops nfs_read_full_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_read_prepare, +#endif /* CONFIG_NFS_V4_1 */ + .rpc_call_done = nfs_readpage_result_full, + .rpc_release = nfs_readpage_release_full, +}; + +/* + * Read a page over NFS. + * We read the page synchronously in the following case: + * - The error flag is set for this page. This happens only when a + * previous async read operation failed. + */ +int nfs_readpage(struct file *file, struct page *page) +{ + struct nfs_open_context *ctx; + struct inode *inode = page->mapping->host; + int error; + + dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", + page, PAGE_CACHE_SIZE, page->index); + nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); + nfs_add_stats(inode, NFSIOS_READPAGES, 1); + + /* + * Try to flush any pending writes to the file.. + * + * NOTE! Because we own the page lock, there cannot + * be any new pending writes generated at this point + * for this page (other pages can be written to). + */ + error = nfs_wb_page(inode, page); + if (error) + goto out_unlock; + if (PageUptodate(page)) + goto out_unlock; + + error = -ESTALE; + if (NFS_STALE(inode)) + goto out_unlock; + + if (file == NULL) { + error = -EBADF; + ctx = nfs_find_open_context(inode, NULL, FMODE_READ); + if (ctx == NULL) + goto out_unlock; + } else + ctx = get_nfs_open_context(nfs_file_open_context(file)); + + if (!IS_SYNC(inode)) { + error = nfs_readpage_from_fscache(ctx, inode, page); + if (error == 0) + goto out; + } + + error = nfs_readpage_async(ctx, inode, page); + +out: + put_nfs_open_context(ctx); + return error; +out_unlock: + unlock_page(page); + return error; +} + +struct nfs_readdesc { + struct nfs_pageio_descriptor *pgio; + struct nfs_open_context *ctx; +}; + +static int +readpage_async_filler(void *data, struct page *page) +{ + struct nfs_readdesc *desc = (struct nfs_readdesc *)data; + struct inode *inode = page->mapping->host; + struct nfs_page *new; + unsigned int len; + int error; + + len = nfs_page_length(page); + if (len == 0) + return nfs_return_empty_page(page); + + new = nfs_create_request(desc->ctx, inode, page, 0, len); + if (IS_ERR(new)) + goto out_error; + + if (len < PAGE_CACHE_SIZE) + zero_user_segment(page, len, PAGE_CACHE_SIZE); + if (!nfs_pageio_add_request(desc->pgio, new)) { + error = desc->pgio->pg_error; + goto out_unlock; + } + return 0; +out_error: + error = PTR_ERR(new); + SetPageError(page); +out_unlock: + unlock_page(page); + return error; +} + +int nfs_readpages(struct file *filp, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + struct nfs_pageio_descriptor pgio; + struct nfs_readdesc desc = { + .pgio = &pgio, + }; + struct inode *inode = mapping->host; + struct nfs_server *server = NFS_SERVER(inode); + size_t rsize = server->rsize; + unsigned long npages; + int ret = -ESTALE; + + dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + nr_pages); + nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); + + if (NFS_STALE(inode)) + goto out; + + if (filp == NULL) { + desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); + if (desc.ctx == NULL) + return -EBADF; + } else + desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); + + /* attempt to read as many of the pages as possible from the cache + * - this returns -ENOBUFS immediately if the cookie is negative + */ + ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping, + pages, &nr_pages); + if (ret == 0) + goto read_complete; /* all pages were read */ + + if (rsize < PAGE_CACHE_SIZE) + nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); + else + nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0); + + ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); + + nfs_pageio_complete(&pgio); + npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + nfs_add_stats(inode, NFSIOS_READPAGES, npages); +read_complete: + put_nfs_open_context(desc.ctx); +out: + return ret; +} + +int __init nfs_init_readpagecache(void) +{ + nfs_rdata_cachep = kmem_cache_create("nfs_read_data", + sizeof(struct nfs_read_data), + 0, SLAB_HWCACHE_ALIGN, + NULL); + if (nfs_rdata_cachep == NULL) + return -ENOMEM; + + nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ, + nfs_rdata_cachep); + if (nfs_rdata_mempool == NULL) + return -ENOMEM; + + return 0; +} + +void nfs_destroy_readpagecache(void) +{ + mempool_destroy(nfs_rdata_mempool); + kmem_cache_destroy(nfs_rdata_cachep); +} diff --git a/fs/nfs/super.c b/fs/nfs/super.c new file mode 100644 index 00000000000..f1afee4eea7 --- /dev/null +++ b/fs/nfs/super.c @@ -0,0 +1,2969 @@ +/* + * linux/fs/nfs/super.c + * + * Copyright (C) 1992 Rick Sladkey + * + * nfs superblock handling functions + * + * Modularised by Alan Cox <alan@lxorguk.ukuu.org.uk>, while hacking some + * experimental NFS changes. Modularisation taken straight from SYS5 fs. + * + * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts. + * J.S.Peatfield@damtp.cam.ac.uk + * + * Split from inode.c by David Howells <dhowells@redhat.com> + * + * - superblocks are indexed on server only - all inodes, dentries, etc. associated with a + * particular server are held in the same superblock + * - NFS superblocks can have several effective roots to the dentry tree + * - directory type roots are spliced into the tree when a path from one root reaches the root + * of another (see nfs_lookup()) + */ + +#include <linux/module.h> +#include <linux/init.h> + +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/unistd.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/stats.h> +#include <linux/sunrpc/metrics.h> +#include <linux/sunrpc/xprtsock.h> +#include <linux/sunrpc/xprtrdma.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/nfs4_mount.h> +#include <linux/lockd/bind.h> +#include <linux/smp_lock.h> +#include <linux/seq_file.h> +#include <linux/mount.h> +#include <linux/mnt_namespace.h> +#include <linux/namei.h> +#include <linux/nfs_idmap.h> +#include <linux/vfs.h> +#include <linux/inet.h> +#include <linux/in6.h> +#include <net/ipv6.h> +#include <linux/netdevice.h> +#include <linux/nfs_xdr.h> +#include <linux/magic.h> +#include <linux/parser.h> + +#include <asm/system.h> +#include <asm/uaccess.h> + +#include "nfs4_fs.h" +#include "callback.h" +#include "delegation.h" +#include "iostat.h" +#include "internal.h" +#include "fscache.h" + +#define NFSDBG_FACILITY NFSDBG_VFS + +enum { + /* Mount options that take no arguments */ + Opt_soft, Opt_hard, + Opt_posix, Opt_noposix, + Opt_cto, Opt_nocto, + Opt_ac, Opt_noac, + Opt_lock, Opt_nolock, + Opt_v2, Opt_v3, Opt_v4, + Opt_udp, Opt_tcp, Opt_rdma, + Opt_acl, Opt_noacl, + Opt_rdirplus, Opt_nordirplus, + Opt_sharecache, Opt_nosharecache, + Opt_resvport, Opt_noresvport, + Opt_fscache, Opt_nofscache, + + /* Mount options that take integer arguments */ + Opt_port, + Opt_rsize, Opt_wsize, Opt_bsize, + Opt_timeo, Opt_retrans, + Opt_acregmin, Opt_acregmax, + Opt_acdirmin, Opt_acdirmax, + Opt_actimeo, + Opt_namelen, + Opt_mountport, + Opt_mountvers, + Opt_nfsvers, + Opt_minorversion, + + /* Mount options that take string arguments */ + Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, + Opt_addr, Opt_mountaddr, Opt_clientaddr, + Opt_lookupcache, + Opt_fscache_uniq, + + /* Special mount options */ + Opt_userspace, Opt_deprecated, Opt_sloppy, + + Opt_err +}; + +static const match_table_t nfs_mount_option_tokens = { + { Opt_userspace, "bg" }, + { Opt_userspace, "fg" }, + { Opt_userspace, "retry=%s" }, + + { Opt_sloppy, "sloppy" }, + + { Opt_soft, "soft" }, + { Opt_hard, "hard" }, + { Opt_deprecated, "intr" }, + { Opt_deprecated, "nointr" }, + { Opt_posix, "posix" }, + { Opt_noposix, "noposix" }, + { Opt_cto, "cto" }, + { Opt_nocto, "nocto" }, + { Opt_ac, "ac" }, + { Opt_noac, "noac" }, + { Opt_lock, "lock" }, + { Opt_nolock, "nolock" }, + { Opt_v2, "v2" }, + { Opt_v3, "v3" }, + { Opt_v4, "v4" }, + { Opt_udp, "udp" }, + { Opt_tcp, "tcp" }, + { Opt_rdma, "rdma" }, + { Opt_acl, "acl" }, + { Opt_noacl, "noacl" }, + { Opt_rdirplus, "rdirplus" }, + { Opt_nordirplus, "nordirplus" }, + { Opt_sharecache, "sharecache" }, + { Opt_nosharecache, "nosharecache" }, + { Opt_resvport, "resvport" }, + { Opt_noresvport, "noresvport" }, + { Opt_fscache, "fsc" }, + { Opt_fscache_uniq, "fsc=%s" }, + { Opt_nofscache, "nofsc" }, + + { Opt_port, "port=%s" }, + { Opt_rsize, "rsize=%s" }, + { Opt_wsize, "wsize=%s" }, + { Opt_bsize, "bsize=%s" }, + { Opt_timeo, "timeo=%s" }, + { Opt_retrans, "retrans=%s" }, + { Opt_acregmin, "acregmin=%s" }, + { Opt_acregmax, "acregmax=%s" }, + { Opt_acdirmin, "acdirmin=%s" }, + { Opt_acdirmax, "acdirmax=%s" }, + { Opt_actimeo, "actimeo=%s" }, + { Opt_namelen, "namlen=%s" }, + { Opt_mountport, "mountport=%s" }, + { Opt_mountvers, "mountvers=%s" }, + { Opt_nfsvers, "nfsvers=%s" }, + { Opt_nfsvers, "vers=%s" }, + { Opt_minorversion, "minorversion=%s" }, + + { Opt_sec, "sec=%s" }, + { Opt_proto, "proto=%s" }, + { Opt_mountproto, "mountproto=%s" }, + { Opt_addr, "addr=%s" }, + { Opt_clientaddr, "clientaddr=%s" }, + { Opt_mounthost, "mounthost=%s" }, + { Opt_mountaddr, "mountaddr=%s" }, + + { Opt_lookupcache, "lookupcache=%s" }, + + { Opt_err, NULL } +}; + +enum { + Opt_xprt_udp, Opt_xprt_udp6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_rdma, + + Opt_xprt_err +}; + +static const match_table_t nfs_xprt_protocol_tokens = { + { Opt_xprt_udp, "udp" }, + { Opt_xprt_udp6, "udp6" }, + { Opt_xprt_tcp, "tcp" }, + { Opt_xprt_tcp6, "tcp6" }, + { Opt_xprt_rdma, "rdma" }, + + { Opt_xprt_err, NULL } +}; + +enum { + Opt_sec_none, Opt_sec_sys, + Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p, + Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp, + Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp, + + Opt_sec_err +}; + +static const match_table_t nfs_secflavor_tokens = { + { Opt_sec_none, "none" }, + { Opt_sec_none, "null" }, + { Opt_sec_sys, "sys" }, + + { Opt_sec_krb5, "krb5" }, + { Opt_sec_krb5i, "krb5i" }, + { Opt_sec_krb5p, "krb5p" }, + + { Opt_sec_lkey, "lkey" }, + { Opt_sec_lkeyi, "lkeyi" }, + { Opt_sec_lkeyp, "lkeyp" }, + + { Opt_sec_spkm, "spkm3" }, + { Opt_sec_spkmi, "spkm3i" }, + { Opt_sec_spkmp, "spkm3p" }, + + { Opt_sec_err, NULL } +}; + +enum { + Opt_lookupcache_all, Opt_lookupcache_positive, + Opt_lookupcache_none, + + Opt_lookupcache_err +}; + +static match_table_t nfs_lookupcache_tokens = { + { Opt_lookupcache_all, "all" }, + { Opt_lookupcache_positive, "pos" }, + { Opt_lookupcache_positive, "positive" }, + { Opt_lookupcache_none, "none" }, + + { Opt_lookupcache_err, NULL } +}; + + +static void nfs_umount_begin(struct super_block *); +static int nfs_statfs(struct dentry *, struct kstatfs *); +static int nfs_show_options(struct seq_file *, struct vfsmount *); +static int nfs_show_stats(struct seq_file *, struct vfsmount *); +static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); +static int nfs_xdev_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static void nfs_put_super(struct super_block *); +static void nfs_kill_super(struct super_block *); +static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); + +static struct file_system_type nfs_fs_type = { + .owner = THIS_MODULE, + .name = "nfs", + .get_sb = nfs_get_sb, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +struct file_system_type nfs_xdev_fs_type = { + .owner = THIS_MODULE, + .name = "nfs", + .get_sb = nfs_xdev_get_sb, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static const struct super_operations nfs_sops = { + .alloc_inode = nfs_alloc_inode, + .destroy_inode = nfs_destroy_inode, + .write_inode = nfs_write_inode, + .put_super = nfs_put_super, + .statfs = nfs_statfs, + .clear_inode = nfs_clear_inode, + .umount_begin = nfs_umount_begin, + .show_options = nfs_show_options, + .show_stats = nfs_show_stats, + .remount_fs = nfs_remount, +}; + +#ifdef CONFIG_NFS_V4 +static int nfs4_validate_text_mount_data(void *options, + struct nfs_parsed_mount_data *args, const char *dev_name); +static int nfs4_try_mount(int flags, const char *dev_name, + struct nfs_parsed_mount_data *data, struct vfsmount *mnt); +static int nfs4_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs4_remote_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs4_xdev_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs4_referral_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static void nfs4_kill_super(struct super_block *sb); + +static struct file_system_type nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs4_get_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static struct file_system_type nfs4_remote_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs4_remote_get_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +struct file_system_type nfs4_xdev_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs4_xdev_get_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static struct file_system_type nfs4_remote_referral_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs4_remote_referral_get_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +struct file_system_type nfs4_referral_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs4_referral_get_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static const struct super_operations nfs4_sops = { + .alloc_inode = nfs_alloc_inode, + .destroy_inode = nfs_destroy_inode, + .write_inode = nfs_write_inode, + .put_super = nfs_put_super, + .statfs = nfs_statfs, + .clear_inode = nfs4_clear_inode, + .umount_begin = nfs_umount_begin, + .show_options = nfs_show_options, + .show_stats = nfs_show_stats, + .remount_fs = nfs_remount, +}; +#endif + +static struct shrinker acl_shrinker = { + .shrink = nfs_access_cache_shrinker, + .seeks = DEFAULT_SEEKS, +}; + +/* + * Register the NFS filesystems + */ +int __init register_nfs_fs(void) +{ + int ret; + + ret = register_filesystem(&nfs_fs_type); + if (ret < 0) + goto error_0; + + ret = nfs_register_sysctl(); + if (ret < 0) + goto error_1; +#ifdef CONFIG_NFS_V4 + ret = register_filesystem(&nfs4_fs_type); + if (ret < 0) + goto error_2; +#endif + register_shrinker(&acl_shrinker); + return 0; + +#ifdef CONFIG_NFS_V4 +error_2: + nfs_unregister_sysctl(); +#endif +error_1: + unregister_filesystem(&nfs_fs_type); +error_0: + return ret; +} + +/* + * Unregister the NFS filesystems + */ +void __exit unregister_nfs_fs(void) +{ + unregister_shrinker(&acl_shrinker); +#ifdef CONFIG_NFS_V4 + unregister_filesystem(&nfs4_fs_type); +#endif + nfs_unregister_sysctl(); + unregister_filesystem(&nfs_fs_type); +} + +void nfs_sb_active(struct super_block *sb) +{ + struct nfs_server *server = NFS_SB(sb); + + if (atomic_inc_return(&server->active) == 1) + atomic_inc(&sb->s_active); +} + +void nfs_sb_deactive(struct super_block *sb) +{ + struct nfs_server *server = NFS_SB(sb); + + if (atomic_dec_and_test(&server->active)) + deactivate_super(sb); +} + +/* + * Deliver file system statistics to userspace + */ +static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct nfs_server *server = NFS_SB(dentry->d_sb); + unsigned char blockbits; + unsigned long blockres; + struct nfs_fh *fh = NFS_FH(dentry->d_inode); + struct nfs_fattr fattr; + struct nfs_fsstat res = { + .fattr = &fattr, + }; + int error; + + error = server->nfs_client->rpc_ops->statfs(server, fh, &res); + if (error < 0) + goto out_err; + buf->f_type = NFS_SUPER_MAGIC; + + /* + * Current versions of glibc do not correctly handle the + * case where f_frsize != f_bsize. Eventually we want to + * report the value of wtmult in this field. + */ + buf->f_frsize = dentry->d_sb->s_blocksize; + + /* + * On most *nix systems, f_blocks, f_bfree, and f_bavail + * are reported in units of f_frsize. Linux hasn't had + * an f_frsize field in its statfs struct until recently, + * thus historically Linux's sys_statfs reports these + * fields in units of f_bsize. + */ + buf->f_bsize = dentry->d_sb->s_blocksize; + blockbits = dentry->d_sb->s_blocksize_bits; + blockres = (1 << blockbits) - 1; + buf->f_blocks = (res.tbytes + blockres) >> blockbits; + buf->f_bfree = (res.fbytes + blockres) >> blockbits; + buf->f_bavail = (res.abytes + blockres) >> blockbits; + + buf->f_files = res.tfiles; + buf->f_ffree = res.afiles; + + buf->f_namelen = server->namelen; + + return 0; + + out_err: + dprintk("%s: statfs error = %d\n", __func__, -error); + return error; +} + +/* + * Map the security flavour number to a name + */ +static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) +{ + static const struct { + rpc_authflavor_t flavour; + const char *str; + } sec_flavours[] = { + { RPC_AUTH_NULL, "null" }, + { RPC_AUTH_UNIX, "sys" }, + { RPC_AUTH_GSS_KRB5, "krb5" }, + { RPC_AUTH_GSS_KRB5I, "krb5i" }, + { RPC_AUTH_GSS_KRB5P, "krb5p" }, + { RPC_AUTH_GSS_LKEY, "lkey" }, + { RPC_AUTH_GSS_LKEYI, "lkeyi" }, + { RPC_AUTH_GSS_LKEYP, "lkeyp" }, + { RPC_AUTH_GSS_SPKM, "spkm" }, + { RPC_AUTH_GSS_SPKMI, "spkmi" }, + { RPC_AUTH_GSS_SPKMP, "spkmp" }, + { UINT_MAX, "unknown" } + }; + int i; + + for (i = 0; sec_flavours[i].flavour != UINT_MAX; i++) { + if (sec_flavours[i].flavour == flavour) + break; + } + return sec_flavours[i].str; +} + +static void nfs_show_mountd_netid(struct seq_file *m, struct nfs_server *nfss, + int showdefaults) +{ + struct sockaddr *sap = (struct sockaddr *) &nfss->mountd_address; + + seq_printf(m, ",mountproto="); + switch (sap->sa_family) { + case AF_INET: + switch (nfss->mountd_protocol) { + case IPPROTO_UDP: + seq_printf(m, RPCBIND_NETID_UDP); + break; + case IPPROTO_TCP: + seq_printf(m, RPCBIND_NETID_TCP); + break; + default: + if (showdefaults) + seq_printf(m, "auto"); + } + break; + case AF_INET6: + switch (nfss->mountd_protocol) { + case IPPROTO_UDP: + seq_printf(m, RPCBIND_NETID_UDP6); + break; + case IPPROTO_TCP: + seq_printf(m, RPCBIND_NETID_TCP6); + break; + default: + if (showdefaults) + seq_printf(m, "auto"); + } + break; + default: + if (showdefaults) + seq_printf(m, "auto"); + } +} + +static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, + int showdefaults) +{ + struct sockaddr *sap = (struct sockaddr *)&nfss->mountd_address; + + switch (sap->sa_family) { + case AF_INET: { + struct sockaddr_in *sin = (struct sockaddr_in *)sap; + seq_printf(m, ",mountaddr=%pI4", &sin->sin_addr.s_addr); + break; + } + case AF_INET6: { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + seq_printf(m, ",mountaddr=%pI6c", &sin6->sin6_addr); + break; + } + default: + if (showdefaults) + seq_printf(m, ",mountaddr=unspecified"); + } + + if (nfss->mountd_version || showdefaults) + seq_printf(m, ",mountvers=%u", nfss->mountd_version); + if (nfss->mountd_port || showdefaults) + seq_printf(m, ",mountport=%u", nfss->mountd_port); + + nfs_show_mountd_netid(m, nfss, showdefaults); +} + +/* + * Describe the mount options in force on this server representation + */ +static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, + int showdefaults) +{ + static const struct proc_nfs_info { + int flag; + const char *str; + const char *nostr; + } nfs_info[] = { + { NFS_MOUNT_SOFT, ",soft", ",hard" }, + { NFS_MOUNT_POSIX, ",posix", "" }, + { NFS_MOUNT_NOCTO, ",nocto", "" }, + { NFS_MOUNT_NOAC, ",noac", "" }, + { NFS_MOUNT_NONLM, ",nolock", "" }, + { NFS_MOUNT_NOACL, ",noacl", "" }, + { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, + { NFS_MOUNT_UNSHARED, ",nosharecache", "" }, + { NFS_MOUNT_NORESVPORT, ",noresvport", "" }, + { 0, NULL, NULL } + }; + const struct proc_nfs_info *nfs_infop; + struct nfs_client *clp = nfss->nfs_client; + u32 version = clp->rpc_ops->version; + + seq_printf(m, ",vers=%u", version); + seq_printf(m, ",rsize=%u", nfss->rsize); + seq_printf(m, ",wsize=%u", nfss->wsize); + if (nfss->bsize != 0) + seq_printf(m, ",bsize=%u", nfss->bsize); + seq_printf(m, ",namlen=%u", nfss->namelen); + if (nfss->acregmin != NFS_DEF_ACREGMIN*HZ || showdefaults) + seq_printf(m, ",acregmin=%u", nfss->acregmin/HZ); + if (nfss->acregmax != NFS_DEF_ACREGMAX*HZ || showdefaults) + seq_printf(m, ",acregmax=%u", nfss->acregmax/HZ); + if (nfss->acdirmin != NFS_DEF_ACDIRMIN*HZ || showdefaults) + seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ); + if (nfss->acdirmax != NFS_DEF_ACDIRMAX*HZ || showdefaults) + seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ); + for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { + if (nfss->flags & nfs_infop->flag) + seq_puts(m, nfs_infop->str); + else + seq_puts(m, nfs_infop->nostr); + } + seq_printf(m, ",proto=%s", + rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID)); + if (version == 4) { + if (nfss->port != NFS_PORT) + seq_printf(m, ",port=%u", nfss->port); + } else + if (nfss->port) + seq_printf(m, ",port=%u", nfss->port); + + seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ); + seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries); + seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); + + if (version != 4) + nfs_show_mountd_options(m, nfss, showdefaults); + +#ifdef CONFIG_NFS_V4 + if (clp->rpc_ops->version == 4) + seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); +#endif + if (nfss->options & NFS_OPTION_FSCACHE) + seq_printf(m, ",fsc"); +} + +/* + * Describe the mount options on this VFS mountpoint + */ +static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + + nfs_show_mount_options(m, nfss, 0); + + seq_printf(m, ",addr=%s", + rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient, + RPC_DISPLAY_ADDR)); + + return 0; +} + +/* + * Present statistical information for this VFS mountpoint + */ +static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) +{ + int i, cpu; + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + struct rpc_auth *auth = nfss->client->cl_auth; + struct nfs_iostats totals = { }; + + seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS); + + /* + * Display all mount option settings + */ + seq_printf(m, "\n\topts:\t"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); + seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : ""); + seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); + nfs_show_mount_options(m, nfss, 1); + + seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); + + seq_printf(m, "\n\tcaps:\t"); + seq_printf(m, "caps=0x%x", nfss->caps); + seq_printf(m, ",wtmult=%u", nfss->wtmult); + seq_printf(m, ",dtsize=%u", nfss->dtsize); + seq_printf(m, ",bsize=%u", nfss->bsize); + seq_printf(m, ",namlen=%u", nfss->namelen); + +#ifdef CONFIG_NFS_V4 + if (nfss->nfs_client->rpc_ops->version == 4) { + seq_printf(m, "\n\tnfsv4:\t"); + seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); + seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); + seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); + } +#endif + + /* + * Display security flavor in effect for this mount + */ + seq_printf(m, "\n\tsec:\tflavor=%u", auth->au_ops->au_flavor); + if (auth->au_flavor) + seq_printf(m, ",pseudoflavor=%u", auth->au_flavor); + + /* + * Display superblock I/O counters + */ + for_each_possible_cpu(cpu) { + struct nfs_iostats *stats; + + preempt_disable(); + stats = per_cpu_ptr(nfss->io_stats, cpu); + + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + totals.events[i] += stats->events[i]; + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + totals.bytes[i] += stats->bytes[i]; +#ifdef CONFIG_NFS_FSCACHE + for (i = 0; i < __NFSIOS_FSCACHEMAX; i++) + totals.fscache[i] += stats->fscache[i]; +#endif + + preempt_enable(); + } + + seq_printf(m, "\n\tevents:\t"); + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + seq_printf(m, "%lu ", totals.events[i]); + seq_printf(m, "\n\tbytes:\t"); + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + seq_printf(m, "%Lu ", totals.bytes[i]); +#ifdef CONFIG_NFS_FSCACHE + if (nfss->options & NFS_OPTION_FSCACHE) { + seq_printf(m, "\n\tfsc:\t"); + for (i = 0; i < __NFSIOS_FSCACHEMAX; i++) + seq_printf(m, "%Lu ", totals.bytes[i]); + } +#endif + seq_printf(m, "\n"); + + rpc_print_iostats(m, nfss->client); + + return 0; +} + +/* + * Begin unmount by attempting to remove all automounted mountpoints we added + * in response to xdev traversals and referrals + */ +static void nfs_umount_begin(struct super_block *sb) +{ + struct nfs_server *server; + struct rpc_clnt *rpc; + + server = NFS_SB(sb); + /* -EIO all pending I/O */ + rpc = server->client_acl; + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); + rpc = server->client; + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); +} + +static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int version) +{ + struct nfs_parsed_mount_data *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data) { + data->acregmin = NFS_DEF_ACREGMIN; + data->acregmax = NFS_DEF_ACREGMAX; + data->acdirmin = NFS_DEF_ACDIRMIN; + data->acdirmax = NFS_DEF_ACDIRMAX; + data->mount_server.port = NFS_UNSPEC_PORT; + data->nfs_server.port = NFS_UNSPEC_PORT; + data->nfs_server.protocol = XPRT_TRANSPORT_TCP; + data->auth_flavors[0] = RPC_AUTH_UNIX; + data->auth_flavor_len = 1; + data->version = version; + data->minorversion = 0; + } + return data; +} + +/* + * Sanity-check a server address provided by the mount command. + * + * Address family must be initialized, and address must not be + * the ANY address for that family. + */ +static int nfs_verify_server_address(struct sockaddr *addr) +{ + switch (addr->sa_family) { + case AF_INET: { + struct sockaddr_in *sa = (struct sockaddr_in *)addr; + return sa->sin_addr.s_addr != htonl(INADDR_ANY); + } + case AF_INET6: { + struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr; + return !ipv6_addr_any(sa); + } + } + + dfprintk(MOUNT, "NFS: Invalid IP address specified\n"); + return 0; +} + +/* + * Select between a default port value and a user-specified port value. + * If a zero value is set, then autobind will be used. + */ +static void nfs_set_port(struct sockaddr *sap, int *port, + const unsigned short default_port) +{ + if (*port == NFS_UNSPEC_PORT) + *port = default_port; + + rpc_set_port(sap, *port); +} + +/* + * Sanity check the NFS transport protocol. + * + */ +static void nfs_validate_transport_protocol(struct nfs_parsed_mount_data *mnt) +{ + switch (mnt->nfs_server.protocol) { + case XPRT_TRANSPORT_UDP: + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_RDMA: + break; + default: + mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; + } +} + +/* + * For text based NFSv2/v3 mounts, the mount protocol transport default + * settings should depend upon the specified NFS transport. + */ +static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) +{ + nfs_validate_transport_protocol(mnt); + + if (mnt->mount_server.protocol == XPRT_TRANSPORT_UDP || + mnt->mount_server.protocol == XPRT_TRANSPORT_TCP) + return; + switch (mnt->nfs_server.protocol) { + case XPRT_TRANSPORT_UDP: + mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; + break; + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_RDMA: + mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; + } +} + +/* + * Parse the value of the 'sec=' option. + */ +static int nfs_parse_security_flavors(char *value, + struct nfs_parsed_mount_data *mnt) +{ + substring_t args[MAX_OPT_ARGS]; + + dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value); + + switch (match_token(value, nfs_secflavor_tokens, args)) { + case Opt_sec_none: + mnt->auth_flavors[0] = RPC_AUTH_NULL; + break; + case Opt_sec_sys: + mnt->auth_flavors[0] = RPC_AUTH_UNIX; + break; + case Opt_sec_krb5: + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; + break; + case Opt_sec_krb5i: + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; + break; + case Opt_sec_krb5p: + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; + break; + case Opt_sec_lkey: + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; + break; + case Opt_sec_lkeyi: + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; + break; + case Opt_sec_lkeyp: + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; + break; + case Opt_sec_spkm: + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; + break; + case Opt_sec_spkmi: + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; + break; + case Opt_sec_spkmp: + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; + break; + default: + return 0; + } + + mnt->auth_flavor_len = 1; + return 1; +} + +/* + * Error-check and convert a string of mount options from user space into + * a data structure. The whole mount string is processed; bad options are + * skipped as they are encountered. If there were no errors, return 1; + * otherwise return 0 (zero). + */ +static int nfs_parse_mount_options(char *raw, + struct nfs_parsed_mount_data *mnt) +{ + char *p, *string, *secdata; + int rc, sloppy = 0, invalid_option = 0; + unsigned short protofamily = AF_UNSPEC; + unsigned short mountfamily = AF_UNSPEC; + + if (!raw) { + dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); + return 1; + } + dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw); + + secdata = alloc_secdata(); + if (!secdata) + goto out_nomem; + + rc = security_sb_copy_data(raw, secdata); + if (rc) + goto out_security_failure; + + rc = security_sb_parse_opts_str(secdata, &mnt->lsm_opts); + if (rc) + goto out_security_failure; + + free_secdata(secdata); + + while ((p = strsep(&raw, ",")) != NULL) { + substring_t args[MAX_OPT_ARGS]; + unsigned long option; + int token; + + if (!*p) + continue; + + dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p); + + token = match_token(p, nfs_mount_option_tokens, args); + switch (token) { + + /* + * boolean options: foo/nofoo + */ + case Opt_soft: + mnt->flags |= NFS_MOUNT_SOFT; + break; + case Opt_hard: + mnt->flags &= ~NFS_MOUNT_SOFT; + break; + case Opt_posix: + mnt->flags |= NFS_MOUNT_POSIX; + break; + case Opt_noposix: + mnt->flags &= ~NFS_MOUNT_POSIX; + break; + case Opt_cto: + mnt->flags &= ~NFS_MOUNT_NOCTO; + break; + case Opt_nocto: + mnt->flags |= NFS_MOUNT_NOCTO; + break; + case Opt_ac: + mnt->flags &= ~NFS_MOUNT_NOAC; + break; + case Opt_noac: + mnt->flags |= NFS_MOUNT_NOAC; + break; + case Opt_lock: + mnt->flags &= ~NFS_MOUNT_NONLM; + break; + case Opt_nolock: + mnt->flags |= NFS_MOUNT_NONLM; + break; + case Opt_v2: + mnt->flags &= ~NFS_MOUNT_VER3; + mnt->version = 2; + break; + case Opt_v3: + mnt->flags |= NFS_MOUNT_VER3; + mnt->version = 3; + break; +#ifdef CONFIG_NFS_V4 + case Opt_v4: + mnt->flags &= ~NFS_MOUNT_VER3; + mnt->version = 4; + break; +#endif + case Opt_udp: + mnt->flags &= ~NFS_MOUNT_TCP; + mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; + break; + case Opt_tcp: + mnt->flags |= NFS_MOUNT_TCP; + mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; + break; + case Opt_rdma: + mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */ + mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; + xprt_load_transport(p); + break; + case Opt_acl: + mnt->flags &= ~NFS_MOUNT_NOACL; + break; + case Opt_noacl: + mnt->flags |= NFS_MOUNT_NOACL; + break; + case Opt_rdirplus: + mnt->flags &= ~NFS_MOUNT_NORDIRPLUS; + break; + case Opt_nordirplus: + mnt->flags |= NFS_MOUNT_NORDIRPLUS; + break; + case Opt_sharecache: + mnt->flags &= ~NFS_MOUNT_UNSHARED; + break; + case Opt_nosharecache: + mnt->flags |= NFS_MOUNT_UNSHARED; + break; + case Opt_resvport: + mnt->flags &= ~NFS_MOUNT_NORESVPORT; + break; + case Opt_noresvport: + mnt->flags |= NFS_MOUNT_NORESVPORT; + break; + case Opt_fscache: + mnt->options |= NFS_OPTION_FSCACHE; + kfree(mnt->fscache_uniq); + mnt->fscache_uniq = NULL; + break; + case Opt_nofscache: + mnt->options &= ~NFS_OPTION_FSCACHE; + kfree(mnt->fscache_uniq); + mnt->fscache_uniq = NULL; + break; + case Opt_fscache_uniq: + string = match_strdup(args); + if (!string) + goto out_nomem; + kfree(mnt->fscache_uniq); + mnt->fscache_uniq = string; + mnt->options |= NFS_OPTION_FSCACHE; + break; + + /* + * options that take numeric values + */ + case Opt_port: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0 || option > USHORT_MAX) + goto out_invalid_value; + mnt->nfs_server.port = option; + break; + case Opt_rsize: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->rsize = option; + break; + case Opt_wsize: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->wsize = option; + break; + case Opt_bsize: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->bsize = option; + break; + case Opt_timeo: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0 || option == 0) + goto out_invalid_value; + mnt->timeo = option; + break; + case Opt_retrans: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0 || option == 0) + goto out_invalid_value; + mnt->retrans = option; + break; + case Opt_acregmin: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->acregmin = option; + break; + case Opt_acregmax: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->acregmax = option; + break; + case Opt_acdirmin: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->acdirmin = option; + break; + case Opt_acdirmax: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->acdirmax = option; + break; + case Opt_actimeo: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->acregmin = mnt->acregmax = + mnt->acdirmin = mnt->acdirmax = option; + break; + case Opt_namelen: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->namlen = option; + break; + case Opt_mountport: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0 || option > USHORT_MAX) + goto out_invalid_value; + mnt->mount_server.port = option; + break; + case Opt_mountvers: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0 || + option < NFS_MNT_VERSION || + option > NFS_MNT3_VERSION) + goto out_invalid_value; + mnt->mount_server.version = option; + break; + case Opt_nfsvers: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + switch (option) { + case NFS2_VERSION: + mnt->flags &= ~NFS_MOUNT_VER3; + mnt->version = 2; + break; + case NFS3_VERSION: + mnt->flags |= NFS_MOUNT_VER3; + mnt->version = 3; + break; +#ifdef CONFIG_NFS_V4 + case NFS4_VERSION: + mnt->flags &= ~NFS_MOUNT_VER3; + mnt->version = 4; + break; +#endif + default: + goto out_invalid_value; + } + break; + case Opt_minorversion: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + if (option > NFS4_MAX_MINOR_VERSION) + goto out_invalid_value; + mnt->minorversion = option; + break; + + /* + * options that take text values + */ + case Opt_sec: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = nfs_parse_security_flavors(string, mnt); + kfree(string); + if (!rc) { + dfprintk(MOUNT, "NFS: unrecognized " + "security flavor\n"); + return 0; + } + break; + case Opt_proto: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, + nfs_xprt_protocol_tokens, args); + + protofamily = AF_INET; + switch (token) { + case Opt_xprt_udp6: + protofamily = AF_INET6; + case Opt_xprt_udp: + mnt->flags &= ~NFS_MOUNT_TCP; + mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; + kfree(string); + break; + case Opt_xprt_tcp6: + protofamily = AF_INET6; + case Opt_xprt_tcp: + mnt->flags |= NFS_MOUNT_TCP; + mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; + kfree(string); + break; + case Opt_xprt_rdma: + /* vector side protocols to TCP */ + mnt->flags |= NFS_MOUNT_TCP; + mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; + xprt_load_transport(string); + kfree(string); + break; + default: + dfprintk(MOUNT, "NFS: unrecognized " + "transport protocol\n"); + kfree(string); + return 0; + } + break; + case Opt_mountproto: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, + nfs_xprt_protocol_tokens, args); + kfree(string); + + mountfamily = AF_INET; + switch (token) { + case Opt_xprt_udp6: + mountfamily = AF_INET6; + case Opt_xprt_udp: + mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; + break; + case Opt_xprt_tcp6: + mountfamily = AF_INET6; + case Opt_xprt_tcp: + mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; + break; + case Opt_xprt_rdma: /* not used for side protocols */ + default: + dfprintk(MOUNT, "NFS: unrecognized " + "transport protocol\n"); + return 0; + } + break; + case Opt_addr: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + mnt->nfs_server.addrlen = + rpc_pton(string, strlen(string), + (struct sockaddr *) + &mnt->nfs_server.address, + sizeof(mnt->nfs_server.address)); + kfree(string); + if (mnt->nfs_server.addrlen == 0) + goto out_invalid_address; + break; + case Opt_clientaddr: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + kfree(mnt->client_address); + mnt->client_address = string; + break; + case Opt_mounthost: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + kfree(mnt->mount_server.hostname); + mnt->mount_server.hostname = string; + break; + case Opt_mountaddr: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + mnt->mount_server.addrlen = + rpc_pton(string, strlen(string), + (struct sockaddr *) + &mnt->mount_server.address, + sizeof(mnt->mount_server.address)); + kfree(string); + if (mnt->mount_server.addrlen == 0) + goto out_invalid_address; + break; + case Opt_lookupcache: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, + nfs_lookupcache_tokens, args); + kfree(string); + switch (token) { + case Opt_lookupcache_all: + mnt->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); + break; + case Opt_lookupcache_positive: + mnt->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; + mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; + break; + case Opt_lookupcache_none: + mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; + break; + default: + dfprintk(MOUNT, "NFS: invalid " + "lookupcache argument\n"); + return 0; + }; + break; + + /* + * Special options + */ + case Opt_sloppy: + sloppy = 1; + dfprintk(MOUNT, "NFS: relaxing parsing rules\n"); + break; + case Opt_userspace: + case Opt_deprecated: + dfprintk(MOUNT, "NFS: ignoring mount option " + "'%s'\n", p); + break; + + default: + invalid_option = 1; + dfprintk(MOUNT, "NFS: unrecognized mount option " + "'%s'\n", p); + } + } + + if (!sloppy && invalid_option) + return 0; + + /* + * verify that any proto=/mountproto= options match the address + * familiies in the addr=/mountaddr= options. + */ + if (protofamily != AF_UNSPEC && + protofamily != mnt->nfs_server.address.ss_family) + goto out_proto_mismatch; + + if (mountfamily != AF_UNSPEC) { + if (mnt->mount_server.addrlen) { + if (mountfamily != mnt->mount_server.address.ss_family) + goto out_mountproto_mismatch; + } else { + if (mountfamily != mnt->nfs_server.address.ss_family) + goto out_mountproto_mismatch; + } + } + + return 1; + +out_mountproto_mismatch: + printk(KERN_INFO "NFS: mount server address does not match mountproto= " + "option\n"); + return 0; +out_proto_mismatch: + printk(KERN_INFO "NFS: server address does not match proto= option\n"); + return 0; +out_invalid_address: + printk(KERN_INFO "NFS: bad IP address specified: %s\n", p); + return 0; +out_invalid_value: + printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); + return 0; +out_nomem: + printk(KERN_INFO "NFS: not enough memory to parse option\n"); + return 0; +out_security_failure: + free_secdata(secdata); + printk(KERN_INFO "NFS: security options invalid: %d\n", rc); + return 0; +} + +/* + * Match the requested auth flavors with the list returned by + * the server. Returns zero and sets the mount's authentication + * flavor on success; returns -EACCES if server does not support + * the requested flavor. + */ +static int nfs_walk_authlist(struct nfs_parsed_mount_data *args, + struct nfs_mount_request *request) +{ + unsigned int i, j, server_authlist_len = *(request->auth_flav_len); + + /* + * Certain releases of Linux's mountd return an empty + * flavor list. To prevent behavioral regression with + * these servers (ie. rejecting mounts that used to + * succeed), revert to pre-2.6.32 behavior (no checking) + * if the returned flavor list is empty. + */ + if (server_authlist_len == 0) + return 0; + + /* + * We avoid sophisticated negotiating here, as there are + * plenty of cases where we can get it wrong, providing + * either too little or too much security. + * + * RFC 2623, section 2.7 suggests we SHOULD prefer the + * flavor listed first. However, some servers list + * AUTH_NULL first. Our caller plants AUTH_SYS, the + * preferred default, in args->auth_flavors[0] if user + * didn't specify sec= mount option. + */ + for (i = 0; i < args->auth_flavor_len; i++) + for (j = 0; j < server_authlist_len; j++) + if (args->auth_flavors[i] == request->auth_flavs[j]) { + dfprintk(MOUNT, "NFS: using auth flavor %d\n", + request->auth_flavs[j]); + args->auth_flavors[0] = request->auth_flavs[j]; + return 0; + } + + dfprintk(MOUNT, "NFS: server does not support requested auth flavor\n"); + nfs_umount(request); + return -EACCES; +} + +/* + * Use the remote server's MOUNT service to request the NFS file handle + * corresponding to the provided path. + */ +static int nfs_try_mount(struct nfs_parsed_mount_data *args, + struct nfs_fh *root_fh) +{ + rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS]; + unsigned int server_authlist_len = ARRAY_SIZE(server_authlist); + struct nfs_mount_request request = { + .sap = (struct sockaddr *) + &args->mount_server.address, + .dirpath = args->nfs_server.export_path, + .protocol = args->mount_server.protocol, + .fh = root_fh, + .noresvport = args->flags & NFS_MOUNT_NORESVPORT, + .auth_flav_len = &server_authlist_len, + .auth_flavs = server_authlist, + }; + int status; + + if (args->mount_server.version == 0) { + switch (args->version) { + default: + args->mount_server.version = NFS_MNT3_VERSION; + break; + case 2: + args->mount_server.version = NFS_MNT_VERSION; + } + } + request.version = args->mount_server.version; + + if (args->mount_server.hostname) + request.hostname = args->mount_server.hostname; + else + request.hostname = args->nfs_server.hostname; + + /* + * Construct the mount server's address. + */ + if (args->mount_server.address.ss_family == AF_UNSPEC) { + memcpy(request.sap, &args->nfs_server.address, + args->nfs_server.addrlen); + args->mount_server.addrlen = args->nfs_server.addrlen; + } + request.salen = args->mount_server.addrlen; + nfs_set_port(request.sap, &args->mount_server.port, 0); + + /* + * Now ask the mount server to map our export path + * to a file handle. + */ + status = nfs_mount(&request); + if (status != 0) { + dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", + request.hostname, status); + return status; + } + + /* + * MNTv1 (NFSv2) does not support auth flavor negotiation. + */ + if (args->mount_server.version != NFS_MNT3_VERSION) + return 0; + return nfs_walk_authlist(args, &request); +} + +static int nfs_parse_simple_hostname(const char *dev_name, + char **hostname, size_t maxnamlen, + char **export_path, size_t maxpathlen) +{ + size_t len; + char *colon, *comma; + + colon = strchr(dev_name, ':'); + if (colon == NULL) + goto out_bad_devname; + + len = colon - dev_name; + if (len > maxnamlen) + goto out_hostname; + + /* N.B. caller will free nfs_server.hostname in all cases */ + *hostname = kstrndup(dev_name, len, GFP_KERNEL); + if (!*hostname) + goto out_nomem; + + /* kill possible hostname list: not supported */ + comma = strchr(*hostname, ','); + if (comma != NULL) { + if (comma == *hostname) + goto out_bad_devname; + *comma = '\0'; + } + + colon++; + len = strlen(colon); + if (len > maxpathlen) + goto out_path; + *export_path = kstrndup(colon, len, GFP_KERNEL); + if (!*export_path) + goto out_nomem; + + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path); + return 0; + +out_bad_devname: + dfprintk(MOUNT, "NFS: device name not in host:path format\n"); + return -EINVAL; + +out_nomem: + dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); + return -ENOMEM; + +out_hostname: + dfprintk(MOUNT, "NFS: server hostname too long\n"); + return -ENAMETOOLONG; + +out_path: + dfprintk(MOUNT, "NFS: export pathname too long\n"); + return -ENAMETOOLONG; +} + +/* + * Hostname has square brackets around it because it contains one or + * more colons. We look for the first closing square bracket, and a + * colon must follow it. + */ +static int nfs_parse_protected_hostname(const char *dev_name, + char **hostname, size_t maxnamlen, + char **export_path, size_t maxpathlen) +{ + size_t len; + char *start, *end; + + start = (char *)(dev_name + 1); + + end = strchr(start, ']'); + if (end == NULL) + goto out_bad_devname; + if (*(end + 1) != ':') + goto out_bad_devname; + + len = end - start; + if (len > maxnamlen) + goto out_hostname; + + /* N.B. caller will free nfs_server.hostname in all cases */ + *hostname = kstrndup(start, len, GFP_KERNEL); + if (*hostname == NULL) + goto out_nomem; + + end += 2; + len = strlen(end); + if (len > maxpathlen) + goto out_path; + *export_path = kstrndup(end, len, GFP_KERNEL); + if (!*export_path) + goto out_nomem; + + return 0; + +out_bad_devname: + dfprintk(MOUNT, "NFS: device name not in host:path format\n"); + return -EINVAL; + +out_nomem: + dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); + return -ENOMEM; + +out_hostname: + dfprintk(MOUNT, "NFS: server hostname too long\n"); + return -ENAMETOOLONG; + +out_path: + dfprintk(MOUNT, "NFS: export pathname too long\n"); + return -ENAMETOOLONG; +} + +/* + * Split "dev_name" into "hostname:export_path". + * + * The leftmost colon demarks the split between the server's hostname + * and the export path. If the hostname starts with a left square + * bracket, then it may contain colons. + * + * Note: caller frees hostname and export path, even on error. + */ +static int nfs_parse_devname(const char *dev_name, + char **hostname, size_t maxnamlen, + char **export_path, size_t maxpathlen) +{ + if (*dev_name == '[') + return nfs_parse_protected_hostname(dev_name, + hostname, maxnamlen, + export_path, maxpathlen); + + return nfs_parse_simple_hostname(dev_name, + hostname, maxnamlen, + export_path, maxpathlen); +} + +/* + * Validate the NFS2/NFS3 mount data + * - fills in the mount root filehandle + * + * For option strings, user space handles the following behaviors: + * + * + DNS: mapping server host name to IP address ("addr=" option) + * + * + failure mode: how to behave if a mount request can't be handled + * immediately ("fg/bg" option) + * + * + retry: how often to retry a mount request ("retry=" option) + * + * + breaking back: trying proto=udp after proto=tcp, v2 after v3, + * mountproto=tcp after mountproto=udp, and so on + */ +static int nfs_validate_mount_data(void *options, + struct nfs_parsed_mount_data *args, + struct nfs_fh *mntfh, + const char *dev_name) +{ + struct nfs_mount_data *data = (struct nfs_mount_data *)options; + struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; + + if (data == NULL) + goto out_no_data; + + switch (data->version) { + case 1: + data->namlen = 0; + case 2: + data->bsize = 0; + case 3: + if (data->flags & NFS_MOUNT_VER3) + goto out_no_v3; + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) + goto out_no_sec; + case 5: + memset(data->context, 0, sizeof(data->context)); + case 6: + if (data->flags & NFS_MOUNT_VER3) { + if (data->root.size > NFS3_FHSIZE || data->root.size == 0) + goto out_invalid_fh; + mntfh->size = data->root.size; + args->version = 3; + } else { + mntfh->size = NFS2_FHSIZE; + args->version = 2; + } + + + memcpy(mntfh->data, data->root.data, mntfh->size); + if (mntfh->size < sizeof(mntfh->data)) + memset(mntfh->data + mntfh->size, 0, + sizeof(mntfh->data) - mntfh->size); + + /* + * Translate to nfs_parsed_mount_data, which nfs_fill_super + * can deal with. + */ + args->flags = data->flags & NFS_MOUNT_FLAGMASK; + args->rsize = data->rsize; + args->wsize = data->wsize; + args->timeo = data->timeo; + args->retrans = data->retrans; + args->acregmin = data->acregmin; + args->acregmax = data->acregmax; + args->acdirmin = data->acdirmin; + args->acdirmax = data->acdirmax; + + memcpy(sap, &data->addr, sizeof(data->addr)); + args->nfs_server.addrlen = sizeof(data->addr); + if (!nfs_verify_server_address(sap)) + goto out_no_address; + + if (!(data->flags & NFS_MOUNT_TCP)) + args->nfs_server.protocol = XPRT_TRANSPORT_UDP; + /* N.B. caller will free nfs_server.hostname in all cases */ + args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); + args->namlen = data->namlen; + args->bsize = data->bsize; + + if (data->flags & NFS_MOUNT_SECFLAVOUR) + args->auth_flavors[0] = data->pseudoflavor; + if (!args->nfs_server.hostname) + goto out_nomem; + + /* + * The legacy version 6 binary mount data from userspace has a + * field used only to transport selinux information into the + * the kernel. To continue to support that functionality we + * have a touch of selinux knowledge here in the NFS code. The + * userspace code converted context=blah to just blah so we are + * converting back to the full string selinux understands. + */ + if (data->context[0]){ +#ifdef CONFIG_SECURITY_SELINUX + int rc; + char *opts_str = kmalloc(sizeof(data->context) + 8, GFP_KERNEL); + if (!opts_str) + return -ENOMEM; + strcpy(opts_str, "context="); + data->context[NFS_MAX_CONTEXT_LEN] = '\0'; + strcat(opts_str, &data->context[0]); + rc = security_sb_parse_opts_str(opts_str, &args->lsm_opts); + kfree(opts_str); + if (rc) + return rc; +#else + return -EINVAL; +#endif + } + + break; + default: { + int status; + + if (nfs_parse_mount_options((char *)options, args) == 0) + return -EINVAL; + + if (!nfs_verify_server_address(sap)) + goto out_no_address; + + if (args->version == 4) +#ifdef CONFIG_NFS_V4 + return nfs4_validate_text_mount_data(options, + args, dev_name); +#else + goto out_v4_not_compiled; +#endif + + nfs_set_port(sap, &args->nfs_server.port, 0); + + nfs_set_mount_transport_protocol(args); + + status = nfs_parse_devname(dev_name, + &args->nfs_server.hostname, + PAGE_SIZE, + &args->nfs_server.export_path, + NFS_MAXPATHLEN); + if (!status) + status = nfs_try_mount(args, mntfh); + + kfree(args->nfs_server.export_path); + args->nfs_server.export_path = NULL; + + if (status) + return status; + + break; + } + } + +#ifndef CONFIG_NFS_V3 + if (args->version == 3) + goto out_v3_not_compiled; +#endif /* !CONFIG_NFS_V3 */ + + return 0; + +out_no_data: + dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n"); + return -EINVAL; + +out_no_v3: + dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n", + data->version); + return -EINVAL; + +out_no_sec: + dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); + return -EINVAL; + +#ifndef CONFIG_NFS_V3 +out_v3_not_compiled: + dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n"); + return -EPROTONOSUPPORT; +#endif /* !CONFIG_NFS_V3 */ + +#ifndef CONFIG_NFS_V4 +out_v4_not_compiled: + dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); + return -EPROTONOSUPPORT; +#endif /* !CONFIG_NFS_V4 */ + +out_nomem: + dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); + return -ENOMEM; + +out_no_address: + dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); + return -EINVAL; + +out_invalid_fh: + dfprintk(MOUNT, "NFS: invalid root filehandle\n"); + return -EINVAL; +} + +static int +nfs_compare_remount_data(struct nfs_server *nfss, + struct nfs_parsed_mount_data *data) +{ + if (data->flags != nfss->flags || + data->rsize != nfss->rsize || + data->wsize != nfss->wsize || + data->retrans != nfss->client->cl_timeout->to_retries || + data->auth_flavors[0] != nfss->client->cl_auth->au_flavor || + data->acregmin != nfss->acregmin / HZ || + data->acregmax != nfss->acregmax / HZ || + data->acdirmin != nfss->acdirmin / HZ || + data->acdirmax != nfss->acdirmax / HZ || + data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) || + data->nfs_server.port != nfss->port || + data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen || + !rpc_cmp_addr((struct sockaddr *)&data->nfs_server.address, + (struct sockaddr *)&nfss->nfs_client->cl_addr)) + return -EINVAL; + + return 0; +} + +static int +nfs_remount(struct super_block *sb, int *flags, char *raw_data) +{ + int error; + struct nfs_server *nfss = sb->s_fs_info; + struct nfs_parsed_mount_data *data; + struct nfs_mount_data *options = (struct nfs_mount_data *)raw_data; + struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; + u32 nfsvers = nfss->nfs_client->rpc_ops->version; + + /* + * Userspace mount programs that send binary options generally send + * them populated with default values. We have no way to know which + * ones were explicitly specified. Fall back to legacy behavior and + * just return success. + */ + if ((nfsvers == 4 && (!options4 || options4->version == 1)) || + (nfsvers <= 3 && (!options || (options->version >= 1 && + options->version <= 6)))) + return 0; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + + /* fill out struct with values from existing mount */ + data->flags = nfss->flags; + data->rsize = nfss->rsize; + data->wsize = nfss->wsize; + data->retrans = nfss->client->cl_timeout->to_retries; + data->auth_flavors[0] = nfss->client->cl_auth->au_flavor; + data->acregmin = nfss->acregmin / HZ; + data->acregmax = nfss->acregmax / HZ; + data->acdirmin = nfss->acdirmin / HZ; + data->acdirmax = nfss->acdirmax / HZ; + data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; + data->nfs_server.port = nfss->port; + data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; + memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, + data->nfs_server.addrlen); + + /* overwrite those values with any that were specified */ + error = nfs_parse_mount_options((char *)options, data); + if (error < 0) + goto out; + + /* compare new mount options with old ones */ + error = nfs_compare_remount_data(nfss, data); +out: + kfree(data); + return error; +} + +/* + * Initialise the common bits of the superblock + */ +static inline void nfs_initialise_sb(struct super_block *sb) +{ + struct nfs_server *server = NFS_SB(sb); + + sb->s_magic = NFS_SUPER_MAGIC; + + /* We probably want something more informative here */ + snprintf(sb->s_id, sizeof(sb->s_id), + "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); + + if (sb->s_blocksize == 0) + sb->s_blocksize = nfs_block_bits(server->wsize, + &sb->s_blocksize_bits); + + if (server->flags & NFS_MOUNT_NOAC) + sb->s_flags |= MS_SYNCHRONOUS; + + sb->s_bdi = &server->backing_dev_info; + + nfs_super_set_maxbytes(sb, server->maxfilesize); +} + +/* + * Finish setting up an NFS2/3 superblock + */ +static void nfs_fill_super(struct super_block *sb, + struct nfs_parsed_mount_data *data) +{ + struct nfs_server *server = NFS_SB(sb); + + sb->s_blocksize_bits = 0; + sb->s_blocksize = 0; + if (data->bsize) + sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); + + if (server->nfs_client->rpc_ops->version == 3) { + /* The VFS shouldn't apply the umask to mode bits. We will do + * so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; + sb->s_time_gran = 1; + } + + sb->s_op = &nfs_sops; + nfs_initialise_sb(sb); +} + +/* + * Finish setting up a cloned NFS2/3 superblock + */ +static void nfs_clone_super(struct super_block *sb, + const struct super_block *old_sb) +{ + struct nfs_server *server = NFS_SB(sb); + + sb->s_blocksize_bits = old_sb->s_blocksize_bits; + sb->s_blocksize = old_sb->s_blocksize; + sb->s_maxbytes = old_sb->s_maxbytes; + + if (server->nfs_client->rpc_ops->version == 3) { + /* The VFS shouldn't apply the umask to mode bits. We will do + * so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; + sb->s_time_gran = 1; + } + + sb->s_op = old_sb->s_op; + nfs_initialise_sb(sb); +} + +static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) +{ + const struct nfs_server *a = s->s_fs_info; + const struct rpc_clnt *clnt_a = a->client; + const struct rpc_clnt *clnt_b = b->client; + + if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK)) + goto Ebusy; + if (a->nfs_client != b->nfs_client) + goto Ebusy; + if (a->flags != b->flags) + goto Ebusy; + if (a->wsize != b->wsize) + goto Ebusy; + if (a->rsize != b->rsize) + goto Ebusy; + if (a->acregmin != b->acregmin) + goto Ebusy; + if (a->acregmax != b->acregmax) + goto Ebusy; + if (a->acdirmin != b->acdirmin) + goto Ebusy; + if (a->acdirmax != b->acdirmax) + goto Ebusy; + if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) + goto Ebusy; + return 1; +Ebusy: + return 0; +} + +struct nfs_sb_mountdata { + struct nfs_server *server; + int mntflags; +}; + +static int nfs_set_super(struct super_block *s, void *data) +{ + struct nfs_sb_mountdata *sb_mntdata = data; + struct nfs_server *server = sb_mntdata->server; + int ret; + + s->s_flags = sb_mntdata->mntflags; + s->s_fs_info = server; + ret = set_anon_super(s, server); + if (ret == 0) + server->s_dev = s->s_dev; + return ret; +} + +static int nfs_compare_super_address(struct nfs_server *server1, + struct nfs_server *server2) +{ + struct sockaddr *sap1, *sap2; + + sap1 = (struct sockaddr *)&server1->nfs_client->cl_addr; + sap2 = (struct sockaddr *)&server2->nfs_client->cl_addr; + + if (sap1->sa_family != sap2->sa_family) + return 0; + + switch (sap1->sa_family) { + case AF_INET: { + struct sockaddr_in *sin1 = (struct sockaddr_in *)sap1; + struct sockaddr_in *sin2 = (struct sockaddr_in *)sap2; + if (sin1->sin_addr.s_addr != sin2->sin_addr.s_addr) + return 0; + if (sin1->sin_port != sin2->sin_port) + return 0; + break; + } + case AF_INET6: { + struct sockaddr_in6 *sin1 = (struct sockaddr_in6 *)sap1; + struct sockaddr_in6 *sin2 = (struct sockaddr_in6 *)sap2; + if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) + return 0; + if (sin1->sin6_port != sin2->sin6_port) + return 0; + break; + } + default: + return 0; + } + + return 1; +} + +static int nfs_compare_super(struct super_block *sb, void *data) +{ + struct nfs_sb_mountdata *sb_mntdata = data; + struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb); + int mntflags = sb_mntdata->mntflags; + + if (!nfs_compare_super_address(old, server)) + return 0; + /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */ + if (old->flags & NFS_MOUNT_UNSHARED) + return 0; + if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0) + return 0; + return nfs_compare_mount_options(sb, server, mntflags); +} + +static int nfs_bdi_register(struct nfs_server *server) +{ + return bdi_register_dev(&server->backing_dev_info, server->s_dev); +} + +static int nfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +{ + struct nfs_server *server = NULL; + struct super_block *s; + struct nfs_parsed_mount_data *data; + struct nfs_fh *mntfh; + struct dentry *mntroot; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; + struct nfs_sb_mountdata sb_mntdata = { + .mntflags = flags, + }; + int error = -ENOMEM; + + data = nfs_alloc_parsed_mount_data(3); + mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); + if (data == NULL || mntfh == NULL) + goto out_free_fh; + + security_init_mnt_opts(&data->lsm_opts); + + /* Validate the mount data */ + error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); + if (error < 0) + goto out; + +#ifdef CONFIG_NFS_V4 + if (data->version == 4) { + error = nfs4_try_mount(flags, dev_name, data, mnt); + kfree(data->client_address); + goto out; + } +#endif /* CONFIG_NFS_V4 */ + + /* Get a volume representation */ + server = nfs_create_server(data, mntfh); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out; + } + sb_mntdata.server = server; + + if (server->flags & NFS_MOUNT_UNSHARED) + compare_super = NULL; + + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; + } + + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; + } else { + error = nfs_bdi_register(server); + if (error) + goto error_splat_super; + } + + if (!s->s_root) { + /* initial superblock/root creation */ + nfs_fill_super(s, data); + nfs_fscache_get_super_cookie( + s, data ? data->fscache_uniq : NULL, NULL); + } + + mntroot = nfs_get_root(s, mntfh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; + } + + error = security_sb_set_mnt_opts(s, &data->lsm_opts); + if (error) + goto error_splat_root; + + s->s_flags |= MS_ACTIVE; + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + error = 0; + +out: + kfree(data->nfs_server.hostname); + kfree(data->mount_server.hostname); + kfree(data->fscache_uniq); + security_free_mnt_opts(&data->lsm_opts); +out_free_fh: + kfree(mntfh); + kfree(data); + return error; + +out_err_nosb: + nfs_free_server(server); + goto out; + +error_splat_root: + dput(mntroot); +error_splat_super: + deactivate_locked_super(s); + goto out; +} + +/* + * Ensure that we unregister the bdi before kill_anon_super + * releases the device name + */ +static void nfs_put_super(struct super_block *s) +{ + struct nfs_server *server = NFS_SB(s); + + bdi_unregister(&server->backing_dev_info); +} + +/* + * Destroy an NFS2/3 superblock + */ +static void nfs_kill_super(struct super_block *s) +{ + struct nfs_server *server = NFS_SB(s); + + kill_anon_super(s); + nfs_fscache_release_super_cookie(s); + nfs_free_server(server); +} + +/* + * Clone an NFS2/3 server record on xdev traversal (FSID-change) + */ +static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data, + struct vfsmount *mnt) +{ + struct nfs_clone_mount *data = raw_data; + struct super_block *s; + struct nfs_server *server; + struct dentry *mntroot; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; + struct nfs_sb_mountdata sb_mntdata = { + .mntflags = flags, + }; + int error; + + dprintk("--> nfs_xdev_get_sb()\n"); + + /* create a new volume representation */ + server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_err_noserver; + } + sb_mntdata.server = server; + + if (server->flags & NFS_MOUNT_UNSHARED) + compare_super = NULL; + + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; + } + + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; + } else { + error = nfs_bdi_register(server); + if (error) + goto error_splat_super; + } + + if (!s->s_root) { + /* initial superblock/root creation */ + nfs_clone_super(s, data->sb); + nfs_fscache_get_super_cookie(s, NULL, data); + } + + mntroot = nfs_get_root(s, data->fh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; + } + if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) { + dput(mntroot); + error = -ESTALE; + goto error_splat_super; + } + + s->s_flags |= MS_ACTIVE; + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + + /* clone any lsm security options from the parent to the new sb */ + security_sb_clone_mnt_opts(data->sb, s); + + dprintk("<-- nfs_xdev_get_sb() = 0\n"); + return 0; + +out_err_nosb: + nfs_free_server(server); +out_err_noserver: + dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error); + return error; + +error_splat_super: + deactivate_locked_super(s); + dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error); + return error; +} + +#ifdef CONFIG_NFS_V4 + +/* + * Finish setting up a cloned NFS4 superblock + */ +static void nfs4_clone_super(struct super_block *sb, + const struct super_block *old_sb) +{ + sb->s_blocksize_bits = old_sb->s_blocksize_bits; + sb->s_blocksize = old_sb->s_blocksize; + sb->s_maxbytes = old_sb->s_maxbytes; + sb->s_time_gran = 1; + sb->s_op = old_sb->s_op; + nfs_initialise_sb(sb); +} + +/* + * Set up an NFS4 superblock + */ +static void nfs4_fill_super(struct super_block *sb) +{ + sb->s_time_gran = 1; + sb->s_op = &nfs4_sops; + nfs_initialise_sb(sb); +} + +static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) +{ + args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3); +} + +static int nfs4_validate_text_mount_data(void *options, + struct nfs_parsed_mount_data *args, + const char *dev_name) +{ + struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; + + nfs_set_port(sap, &args->nfs_server.port, NFS_PORT); + + nfs_validate_transport_protocol(args); + + nfs4_validate_mount_flags(args); + + if (args->version != 4) { + dfprintk(MOUNT, + "NFS4: Illegal mount version\n"); + return -EINVAL; + } + + if (args->auth_flavor_len > 1) { + dfprintk(MOUNT, + "NFS4: Too many RPC auth flavours specified\n"); + return -EINVAL; + } + + if (args->client_address == NULL) { + dfprintk(MOUNT, + "NFS4: mount program didn't pass callback address\n"); + return -EINVAL; + } + + return nfs_parse_devname(dev_name, + &args->nfs_server.hostname, + NFS4_MAXNAMLEN, + &args->nfs_server.export_path, + NFS4_MAXPATHLEN); +} + +/* + * Validate NFSv4 mount options + */ +static int nfs4_validate_mount_data(void *options, + struct nfs_parsed_mount_data *args, + const char *dev_name) +{ + struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; + struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; + char *c; + + if (data == NULL) + goto out_no_data; + + switch (data->version) { + case 1: + if (data->host_addrlen > sizeof(args->nfs_server.address)) + goto out_no_address; + if (data->host_addrlen == 0) + goto out_no_address; + args->nfs_server.addrlen = data->host_addrlen; + if (copy_from_user(sap, data->host_addr, data->host_addrlen)) + return -EFAULT; + if (!nfs_verify_server_address(sap)) + goto out_no_address; + + if (data->auth_flavourlen) { + if (data->auth_flavourlen > 1) + goto out_inval_auth; + if (copy_from_user(&args->auth_flavors[0], + data->auth_flavours, + sizeof(args->auth_flavors[0]))) + return -EFAULT; + } + + c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); + if (IS_ERR(c)) + return PTR_ERR(c); + args->nfs_server.hostname = c; + + c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); + if (IS_ERR(c)) + return PTR_ERR(c); + args->nfs_server.export_path = c; + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c); + + c = strndup_user(data->client_addr.data, 16); + if (IS_ERR(c)) + return PTR_ERR(c); + args->client_address = c; + + /* + * Translate to nfs_parsed_mount_data, which nfs4_fill_super + * can deal with. + */ + + args->flags = data->flags & NFS4_MOUNT_FLAGMASK; + args->rsize = data->rsize; + args->wsize = data->wsize; + args->timeo = data->timeo; + args->retrans = data->retrans; + args->acregmin = data->acregmin; + args->acregmax = data->acregmax; + args->acdirmin = data->acdirmin; + args->acdirmax = data->acdirmax; + args->nfs_server.protocol = data->proto; + nfs_validate_transport_protocol(args); + + break; + default: + if (nfs_parse_mount_options((char *)options, args) == 0) + return -EINVAL; + + if (!nfs_verify_server_address(sap)) + return -EINVAL; + + return nfs4_validate_text_mount_data(options, args, dev_name); + } + + return 0; + +out_no_data: + dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n"); + return -EINVAL; + +out_inval_auth: + dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n", + data->auth_flavourlen); + return -EINVAL; + +out_no_address: + dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); + return -EINVAL; +} + +/* + * Get the superblock for the NFS4 root partition + */ +static int nfs4_remote_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +{ + struct nfs_parsed_mount_data *data = raw_data; + struct super_block *s; + struct nfs_server *server; + struct nfs_fh *mntfh; + struct dentry *mntroot; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; + struct nfs_sb_mountdata sb_mntdata = { + .mntflags = flags, + }; + int error = -ENOMEM; + + mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); + if (data == NULL || mntfh == NULL) + goto out_free_fh; + + security_init_mnt_opts(&data->lsm_opts); + + /* Get a volume representation */ + server = nfs4_create_server(data, mntfh); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out; + } + sb_mntdata.server = server; + + if (server->flags & NFS4_MOUNT_UNSHARED) + compare_super = NULL; + + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_free; + } + + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; + } else { + error = nfs_bdi_register(server); + if (error) + goto error_splat_super; + } + + if (!s->s_root) { + /* initial superblock/root creation */ + nfs4_fill_super(s); + nfs_fscache_get_super_cookie( + s, data ? data->fscache_uniq : NULL, NULL); + } + + mntroot = nfs4_get_root(s, mntfh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; + } + + error = security_sb_set_mnt_opts(s, &data->lsm_opts); + if (error) + goto error_splat_root; + + s->s_flags |= MS_ACTIVE; + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + error = 0; + +out: + security_free_mnt_opts(&data->lsm_opts); +out_free_fh: + kfree(mntfh); + return error; + +out_free: + nfs_free_server(server); + goto out; + +error_splat_root: + dput(mntroot); +error_splat_super: + deactivate_locked_super(s); + goto out; +} + +static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, + int flags, void *data, const char *hostname) +{ + struct vfsmount *root_mnt; + char *root_devname; + size_t len; + + len = strlen(hostname) + 3; + root_devname = kmalloc(len, GFP_KERNEL); + if (root_devname == NULL) + return ERR_PTR(-ENOMEM); + snprintf(root_devname, len, "%s:/", hostname); + root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); + kfree(root_devname); + return root_mnt; +} + +static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt) +{ + char *page = (char *) __get_free_page(GFP_KERNEL); + char *devname, *tmp; + + if (page == NULL) + return; + devname = nfs_path(path->mnt->mnt_devname, + path->mnt->mnt_root, path->dentry, + page, PAGE_SIZE); + if (devname == NULL) + goto out_freepage; + tmp = kstrdup(devname, GFP_KERNEL); + if (tmp == NULL) + goto out_freepage; + kfree(mnt->mnt_devname); + mnt->mnt_devname = tmp; +out_freepage: + free_page((unsigned long)page); +} + +static int nfs_follow_remote_path(struct vfsmount *root_mnt, + const char *export_path, struct vfsmount *mnt_target) +{ + struct mnt_namespace *ns_private; + struct nameidata nd; + struct super_block *s; + int ret; + + ns_private = create_mnt_ns(root_mnt); + ret = PTR_ERR(ns_private); + if (IS_ERR(ns_private)) + goto out_mntput; + + ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, + export_path, LOOKUP_FOLLOW, &nd); + + put_mnt_ns(ns_private); + + if (ret != 0) + goto out_err; + + s = nd.path.mnt->mnt_sb; + atomic_inc(&s->s_active); + mnt_target->mnt_sb = s; + mnt_target->mnt_root = dget(nd.path.dentry); + + /* Correct the device pathname */ + nfs_fix_devname(&nd.path, mnt_target); + + path_put(&nd.path); + down_write(&s->s_umount); + return 0; +out_mntput: + mntput(root_mnt); +out_err: + return ret; +} + +static int nfs4_try_mount(int flags, const char *dev_name, + struct nfs_parsed_mount_data *data, + struct vfsmount *mnt) +{ + char *export_path; + struct vfsmount *root_mnt; + int error; + + dfprintk(MOUNT, "--> nfs4_try_mount()\n"); + + export_path = data->nfs_server.export_path; + data->nfs_server.export_path = "/"; + root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data, + data->nfs_server.hostname); + data->nfs_server.export_path = export_path; + + error = PTR_ERR(root_mnt); + if (IS_ERR(root_mnt)) + goto out; + + error = nfs_follow_remote_path(root_mnt, export_path, mnt); + +out: + dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", error, + error != 0 ? " [error]" : ""); + return error; +} + +/* + * Get the superblock for an NFS4 mountpoint + */ +static int nfs4_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +{ + struct nfs_parsed_mount_data *data; + int error = -ENOMEM; + + data = nfs_alloc_parsed_mount_data(4); + if (data == NULL) + goto out_free_data; + + /* Validate the mount data */ + error = nfs4_validate_mount_data(raw_data, data, dev_name); + if (error < 0) + goto out; + + error = nfs4_try_mount(flags, dev_name, data, mnt); + +out: + kfree(data->client_address); + kfree(data->nfs_server.export_path); + kfree(data->nfs_server.hostname); + kfree(data->fscache_uniq); +out_free_data: + kfree(data); + dprintk("<-- nfs4_get_sb() = %d%s\n", error, + error != 0 ? " [error]" : ""); + return error; +} + +static void nfs4_kill_super(struct super_block *sb) +{ + struct nfs_server *server = NFS_SB(sb); + + dprintk("--> %s\n", __func__); + nfs_super_return_all_delegations(sb); + kill_anon_super(sb); + nfs_fscache_release_super_cookie(sb); + nfs_free_server(server); + dprintk("<-- %s\n", __func__); +} + +/* + * Clone an NFS4 server record on xdev traversal (FSID-change) + */ +static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data, + struct vfsmount *mnt) +{ + struct nfs_clone_mount *data = raw_data; + struct super_block *s; + struct nfs_server *server; + struct dentry *mntroot; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; + struct nfs_sb_mountdata sb_mntdata = { + .mntflags = flags, + }; + int error; + + dprintk("--> nfs4_xdev_get_sb()\n"); + + /* create a new volume representation */ + server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_err_noserver; + } + sb_mntdata.server = server; + + if (server->flags & NFS4_MOUNT_UNSHARED) + compare_super = NULL; + + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; + } + + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; + } else { + error = nfs_bdi_register(server); + if (error) + goto error_splat_super; + } + + if (!s->s_root) { + /* initial superblock/root creation */ + nfs4_clone_super(s, data->sb); + nfs_fscache_get_super_cookie(s, NULL, data); + } + + mntroot = nfs4_get_root(s, data->fh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; + } + if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) { + dput(mntroot); + error = -ESTALE; + goto error_splat_super; + } + + s->s_flags |= MS_ACTIVE; + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + + security_sb_clone_mnt_opts(data->sb, s); + + dprintk("<-- nfs4_xdev_get_sb() = 0\n"); + return 0; + +out_err_nosb: + nfs_free_server(server); +out_err_noserver: + dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error); + return error; + +error_splat_super: + deactivate_locked_super(s); + dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error); + return error; +} + +static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, + struct vfsmount *mnt) +{ + struct nfs_clone_mount *data = raw_data; + struct super_block *s; + struct nfs_server *server; + struct dentry *mntroot; + struct nfs_fh mntfh; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; + struct nfs_sb_mountdata sb_mntdata = { + .mntflags = flags, + }; + int error; + + dprintk("--> nfs4_referral_get_sb()\n"); + + /* create a new volume representation */ + server = nfs4_create_referral_server(data, &mntfh); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_err_noserver; + } + sb_mntdata.server = server; + + if (server->flags & NFS4_MOUNT_UNSHARED) + compare_super = NULL; + + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; + } + + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; + } else { + error = nfs_bdi_register(server); + if (error) + goto error_splat_super; + } + + if (!s->s_root) { + /* initial superblock/root creation */ + nfs4_fill_super(s); + nfs_fscache_get_super_cookie(s, NULL, data); + } + + mntroot = nfs4_get_root(s, &mntfh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; + } + if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) { + dput(mntroot); + error = -ESTALE; + goto error_splat_super; + } + + s->s_flags |= MS_ACTIVE; + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + + security_sb_clone_mnt_opts(data->sb, s); + + dprintk("<-- nfs4_referral_get_sb() = 0\n"); + return 0; + +out_err_nosb: + nfs_free_server(server); +out_err_noserver: + dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error); + return error; + +error_splat_super: + deactivate_locked_super(s); + dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error); + return error; +} + +/* + * Create an NFS4 server record on referral traversal + */ +static int nfs4_referral_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, + struct vfsmount *mnt) +{ + struct nfs_clone_mount *data = raw_data; + char *export_path; + struct vfsmount *root_mnt; + int error; + + dprintk("--> nfs4_referral_get_sb()\n"); + + export_path = data->mnt_path; + data->mnt_path = "/"; + + root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type, + flags, data, data->hostname); + data->mnt_path = export_path; + + error = PTR_ERR(root_mnt); + if (IS_ERR(root_mnt)) + goto out; + + error = nfs_follow_remote_path(root_mnt, export_path, mnt); +out: + dprintk("<-- nfs4_referral_get_sb() = %d%s\n", error, + error != 0 ? " [error]" : ""); + return error; +} + +#endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c new file mode 100644 index 00000000000..412738dbfbc --- /dev/null +++ b/fs/nfs/symlink.c @@ -0,0 +1,79 @@ +/* + * linux/fs/nfs/symlink.c + * + * Copyright (C) 1992 Rick Sladkey + * + * Optimization changes Copyright (C) 1994 Florian La Roche + * + * Jun 7 1999, cache symlink lookups in the page cache. -DaveM + * + * nfs symlink handling code + */ + +#include <linux/time.h> +#include <linux/errno.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs2.h> +#include <linux/nfs_fs.h> +#include <linux/pagemap.h> +#include <linux/stat.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/namei.h> + +/* Symlink caching in the page cache is even more simplistic + * and straight-forward than readdir caching. + */ + +static int nfs_symlink_filler(struct inode *inode, struct page *page) +{ + int error; + + error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE); + if (error < 0) + goto error; + SetPageUptodate(page); + unlock_page(page); + return 0; + +error: + SetPageError(page); + unlock_page(page); + return -EIO; +} + +static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode = dentry->d_inode; + struct page *page; + void *err; + + err = ERR_PTR(nfs_revalidate_mapping_nolock(inode, inode->i_mapping)); + if (err) + goto read_failed; + page = read_cache_page(&inode->i_data, 0, + (filler_t *)nfs_symlink_filler, inode); + if (IS_ERR(page)) { + err = page; + goto read_failed; + } + nd_set_link(nd, kmap(page)); + return page; + +read_failed: + nd_set_link(nd, err); + return NULL; +} + +/* + * symlinks can't do much... + */ +const struct inode_operations nfs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = nfs_follow_link, + .put_link = page_put_link, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c new file mode 100644 index 00000000000..ad4d2e787b2 --- /dev/null +++ b/fs/nfs/sysctl.c @@ -0,0 +1,90 @@ +/* + * linux/fs/nfs/sysctl.c + * + * Sysctl interface to NFS parameters + */ +#include <linux/types.h> +#include <linux/linkage.h> +#include <linux/ctype.h> +#include <linux/fs.h> +#include <linux/sysctl.h> +#include <linux/module.h> +#include <linux/nfs4.h> +#include <linux/nfs_idmap.h> +#include <linux/nfs_fs.h> + +#include "callback.h" + +#ifdef CONFIG_NFS_V4 +static const int nfs_set_port_min = 0; +static const int nfs_set_port_max = 65535; +#endif +static struct ctl_table_header *nfs_callback_sysctl_table; + +static ctl_table nfs_cb_sysctls[] = { +#ifdef CONFIG_NFS_V4 + { + .procname = "nfs_callback_tcpport", + .data = &nfs_callback_set_tcpport, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (int *)&nfs_set_port_min, + .extra2 = (int *)&nfs_set_port_max, + }, + { + .procname = "idmap_cache_timeout", + .data = &nfs_idmap_cache_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, +#endif + { + .procname = "nfs_mountpoint_timeout", + .data = &nfs_mountpoint_expiry_timeout, + .maxlen = sizeof(nfs_mountpoint_expiry_timeout), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "nfs_congestion_kb", + .data = &nfs_congestion_kb, + .maxlen = sizeof(nfs_congestion_kb), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + +static ctl_table nfs_cb_sysctl_dir[] = { + { + .procname = "nfs", + .mode = 0555, + .child = nfs_cb_sysctls, + }, + { } +}; + +static ctl_table nfs_cb_sysctl_root[] = { + { + .procname = "fs", + .mode = 0555, + .child = nfs_cb_sysctl_dir, + }, + { } +}; + +int nfs_register_sysctl(void) +{ + nfs_callback_sysctl_table = register_sysctl_table(nfs_cb_sysctl_root); + if (nfs_callback_sysctl_table == NULL) + return -ENOMEM; + return 0; +} + +void nfs_unregister_sysctl(void) +{ + unregister_sysctl_table(nfs_callback_sysctl_table); + nfs_callback_sysctl_table = NULL; +} diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c new file mode 100644 index 00000000000..6da3d3ff6ed --- /dev/null +++ b/fs/nfs/unlink.c @@ -0,0 +1,303 @@ +/* + * linux/fs/nfs/unlink.c + * + * nfs sillydelete handling + * + */ + +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/dcache.h> +#include <linux/sunrpc/sched.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs_fs.h> +#include <linux/sched.h> +#include <linux/wait.h> + +#include "internal.h" +#include "nfs4_fs.h" + +struct nfs_unlinkdata { + struct hlist_node list; + struct nfs_removeargs args; + struct nfs_removeres res; + struct inode *dir; + struct rpc_cred *cred; +}; + +/** + * nfs_free_unlinkdata - release data from a sillydelete operation. + * @data: pointer to unlink structure. + */ +static void +nfs_free_unlinkdata(struct nfs_unlinkdata *data) +{ + iput(data->dir); + put_rpccred(data->cred); + kfree(data->args.name.name); + kfree(data); +} + +#define NAME_ALLOC_LEN(len) ((len+16) & ~15) +/** + * nfs_copy_dname - copy dentry name to data structure + * @dentry: pointer to dentry + * @data: nfs_unlinkdata + */ +static int nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data) +{ + char *str; + int len = dentry->d_name.len; + + str = kmemdup(dentry->d_name.name, NAME_ALLOC_LEN(len), GFP_KERNEL); + if (!str) + return -ENOMEM; + data->args.name.len = len; + data->args.name.name = str; + return 0; +} + +static void nfs_free_dname(struct nfs_unlinkdata *data) +{ + kfree(data->args.name.name); + data->args.name.name = NULL; + data->args.name.len = 0; +} + +static void nfs_dec_sillycount(struct inode *dir) +{ + struct nfs_inode *nfsi = NFS_I(dir); + if (atomic_dec_return(&nfsi->silly_count) == 1) + wake_up(&nfsi->waitqueue); +} + +/** + * nfs_async_unlink_done - Sillydelete post-processing + * @task: rpc_task of the sillydelete + * + * Do the directory attribute update. + */ +static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) +{ + struct nfs_unlinkdata *data = calldata; + struct inode *dir = data->dir; + + if (!NFS_PROTO(dir)->unlink_done(task, dir)) + nfs_restart_rpc(task, NFS_SERVER(dir)->nfs_client); +} + +/** + * nfs_async_unlink_release - Release the sillydelete data. + * @task: rpc_task of the sillydelete + * + * We need to call nfs_put_unlinkdata as a 'tk_release' task since the + * rpc_task would be freed too. + */ +static void nfs_async_unlink_release(void *calldata) +{ + struct nfs_unlinkdata *data = calldata; + struct super_block *sb = data->dir->i_sb; + + nfs_dec_sillycount(data->dir); + nfs_free_unlinkdata(data); + nfs_sb_deactive(sb); +} + +#if defined(CONFIG_NFS_V4_1) +void nfs_unlink_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_unlinkdata *data = calldata; + struct nfs_server *server = NFS_SERVER(data->dir); + + if (nfs4_setup_sequence(server->nfs_client, &data->args.seq_args, + &data->res.seq_res, 1, task)) + return; + rpc_call_start(task); +} +#endif /* CONFIG_NFS_V4_1 */ + +static const struct rpc_call_ops nfs_unlink_ops = { + .rpc_call_done = nfs_async_unlink_done, + .rpc_release = nfs_async_unlink_release, +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_unlink_prepare, +#endif /* CONFIG_NFS_V4_1 */ +}; + +static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) +{ + struct rpc_message msg = { + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = data->cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_message = &msg, + .callback_ops = &nfs_unlink_ops, + .callback_data = data, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC, + }; + struct rpc_task *task; + struct dentry *alias; + + alias = d_lookup(parent, &data->args.name); + if (alias != NULL) { + int ret = 0; + + /* + * Hey, we raced with lookup... See if we need to transfer + * the sillyrename information to the aliased dentry. + */ + nfs_free_dname(data); + spin_lock(&alias->d_lock); + if (alias->d_inode != NULL && + !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { + alias->d_fsdata = data; + alias->d_flags |= DCACHE_NFSFS_RENAMED; + ret = 1; + } + spin_unlock(&alias->d_lock); + nfs_dec_sillycount(dir); + dput(alias); + return ret; + } + data->dir = igrab(dir); + if (!data->dir) { + nfs_dec_sillycount(dir); + return 0; + } + nfs_sb_active(dir->i_sb); + data->args.fh = NFS_FH(dir); + nfs_fattr_init(&data->res.dir_attr); + + NFS_PROTO(dir)->unlink_setup(&msg, dir); + + task_setup_data.rpc_client = NFS_CLIENT(dir); + task = rpc_run_task(&task_setup_data); + if (!IS_ERR(task)) + rpc_put_task(task); + return 1; +} + +static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) +{ + struct dentry *parent; + struct inode *dir; + int ret = 0; + + + parent = dget_parent(dentry); + if (parent == NULL) + goto out_free; + dir = parent->d_inode; + if (nfs_copy_dname(dentry, data) != 0) + goto out_dput; + /* Non-exclusive lock protects against concurrent lookup() calls */ + spin_lock(&dir->i_lock); + if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { + /* Deferred delete */ + hlist_add_head(&data->list, &NFS_I(dir)->silly_list); + spin_unlock(&dir->i_lock); + ret = 1; + goto out_dput; + } + spin_unlock(&dir->i_lock); + ret = nfs_do_call_unlink(parent, dir, data); +out_dput: + dput(parent); +out_free: + return ret; +} + +void nfs_block_sillyrename(struct dentry *dentry) +{ + struct nfs_inode *nfsi = NFS_I(dentry->d_inode); + + wait_event(nfsi->waitqueue, atomic_cmpxchg(&nfsi->silly_count, 1, 0) == 1); +} + +void nfs_unblock_sillyrename(struct dentry *dentry) +{ + struct inode *dir = dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(dir); + struct nfs_unlinkdata *data; + + atomic_inc(&nfsi->silly_count); + spin_lock(&dir->i_lock); + while (!hlist_empty(&nfsi->silly_list)) { + if (!atomic_inc_not_zero(&nfsi->silly_count)) + break; + data = hlist_entry(nfsi->silly_list.first, struct nfs_unlinkdata, list); + hlist_del(&data->list); + spin_unlock(&dir->i_lock); + if (nfs_do_call_unlink(dentry, dir, data) == 0) + nfs_free_unlinkdata(data); + spin_lock(&dir->i_lock); + } + spin_unlock(&dir->i_lock); +} + +/** + * nfs_async_unlink - asynchronous unlinking of a file + * @dir: parent directory of dentry + * @dentry: dentry to unlink + */ +int +nfs_async_unlink(struct inode *dir, struct dentry *dentry) +{ + struct nfs_unlinkdata *data; + int status = -ENOMEM; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + goto out; + + data->cred = rpc_lookup_cred(); + if (IS_ERR(data->cred)) { + status = PTR_ERR(data->cred); + goto out_free; + } + data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; + + status = -EBUSY; + spin_lock(&dentry->d_lock); + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + goto out_unlock; + dentry->d_flags |= DCACHE_NFSFS_RENAMED; + dentry->d_fsdata = data; + spin_unlock(&dentry->d_lock); + return 0; +out_unlock: + spin_unlock(&dentry->d_lock); + put_rpccred(data->cred); +out_free: + kfree(data); +out: + return status; +} + +/** + * nfs_complete_unlink - Initialize completion of the sillydelete + * @dentry: dentry to delete + * @inode: inode + * + * Since we're most likely to be called by dentry_iput(), we + * only use the dentry to find the sillydelete. We then copy the name + * into the qstr. + */ +void +nfs_complete_unlink(struct dentry *dentry, struct inode *inode) +{ + struct nfs_unlinkdata *data = NULL; + + spin_lock(&dentry->d_lock); + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { + dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; + data = dentry->d_fsdata; + } + spin_unlock(&dentry->d_lock); + + if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data))) + nfs_free_unlinkdata(data); +} diff --git a/fs/nfs/write.c b/fs/nfs/write.c new file mode 100644 index 00000000000..d63d964a039 --- /dev/null +++ b/fs/nfs/write.c @@ -0,0 +1,1677 @@ +/* + * linux/fs/nfs/write.c + * + * Write file data over NFS. + * + * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/file.h> +#include <linux/writeback.h> +#include <linux/swap.h> +#include <linux/migrate.h> + +#include <linux/sunrpc/clnt.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/nfs_page.h> +#include <linux/backing-dev.h> + +#include <asm/uaccess.h> + +#include "delegation.h" +#include "internal.h" +#include "iostat.h" +#include "nfs4_fs.h" +#include "fscache.h" + +#define NFSDBG_FACILITY NFSDBG_PAGECACHE + +#define MIN_POOL_WRITE (32) +#define MIN_POOL_COMMIT (4) + +/* + * Local function declarations + */ +static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, + struct inode *inode, int ioflags); +static void nfs_redirty_request(struct nfs_page *req); +static const struct rpc_call_ops nfs_write_partial_ops; +static const struct rpc_call_ops nfs_write_full_ops; +static const struct rpc_call_ops nfs_commit_ops; + +static struct kmem_cache *nfs_wdata_cachep; +static mempool_t *nfs_wdata_mempool; +static mempool_t *nfs_commit_mempool; + +struct nfs_write_data *nfs_commitdata_alloc(void) +{ + struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); + + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; + } + return p; +} + +void nfs_commit_free(struct nfs_write_data *p) +{ + if (p && (p->pagevec != &p->page_array[0])) + kfree(p->pagevec); + mempool_free(p, nfs_commit_mempool); +} + +struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) +{ + struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); + + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + p->npages = pagecount; + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; + if (pagecount <= ARRAY_SIZE(p->page_array)) + p->pagevec = p->page_array; + else { + p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); + if (!p->pagevec) { + mempool_free(p, nfs_wdata_mempool); + p = NULL; + } + } + } + return p; +} + +void nfs_writedata_free(struct nfs_write_data *p) +{ + if (p && (p->pagevec != &p->page_array[0])) + kfree(p->pagevec); + mempool_free(p, nfs_wdata_mempool); +} + +static void nfs_writedata_release(struct nfs_write_data *wdata) +{ + put_nfs_open_context(wdata->args.context); + nfs_writedata_free(wdata); +} + +static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) +{ + ctx->error = error; + smp_wmb(); + set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); +} + +static struct nfs_page *nfs_page_find_request_locked(struct page *page) +{ + struct nfs_page *req = NULL; + + if (PagePrivate(page)) { + req = (struct nfs_page *)page_private(page); + if (req != NULL) + kref_get(&req->wb_kref); + } + return req; +} + +static struct nfs_page *nfs_page_find_request(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct nfs_page *req = NULL; + + spin_lock(&inode->i_lock); + req = nfs_page_find_request_locked(page); + spin_unlock(&inode->i_lock); + return req; +} + +/* Adjust the file length if we're writing beyond the end */ +static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) +{ + struct inode *inode = page->mapping->host; + loff_t end, i_size; + pgoff_t end_index; + + spin_lock(&inode->i_lock); + i_size = i_size_read(inode); + end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; + if (i_size > 0 && page->index < end_index) + goto out; + end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); + if (i_size >= end) + goto out; + i_size_write(inode, end); + nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); +out: + spin_unlock(&inode->i_lock); +} + +/* A writeback failed: mark the page as bad, and invalidate the page cache */ +static void nfs_set_pageerror(struct page *page) +{ + SetPageError(page); + nfs_zap_mapping(page->mapping->host, page->mapping); +} + +/* We can set the PG_uptodate flag if we see that a write request + * covers the full page. + */ +static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) +{ + if (PageUptodate(page)) + return; + if (base != 0) + return; + if (count != nfs_page_length(page)) + return; + SetPageUptodate(page); +} + +static int wb_priority(struct writeback_control *wbc) +{ + if (wbc->for_reclaim) + return FLUSH_HIGHPRI | FLUSH_STABLE; + if (wbc->for_kupdate || wbc->for_background) + return FLUSH_LOWPRI; + return 0; +} + +/* + * NFS congestion control + */ + +int nfs_congestion_kb; + +#define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10)) +#define NFS_CONGESTION_OFF_THRESH \ + (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) + +static int nfs_set_page_writeback(struct page *page) +{ + int ret = test_set_page_writeback(page); + + if (!ret) { + struct inode *inode = page->mapping->host; + struct nfs_server *nfss = NFS_SERVER(inode); + + if (atomic_long_inc_return(&nfss->writeback) > + NFS_CONGESTION_ON_THRESH) { + set_bdi_congested(&nfss->backing_dev_info, + BLK_RW_ASYNC); + } + } + return ret; +} + +static void nfs_end_page_writeback(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct nfs_server *nfss = NFS_SERVER(inode); + + end_page_writeback(page); + if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) + clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); +} + +static struct nfs_page *nfs_find_and_lock_request(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct nfs_page *req; + int ret; + + spin_lock(&inode->i_lock); + for (;;) { + req = nfs_page_find_request_locked(page); + if (req == NULL) + break; + if (nfs_set_page_tag_locked(req)) + break; + /* Note: If we hold the page lock, as is the case in nfs_writepage, + * then the call to nfs_set_page_tag_locked() will always + * succeed provided that someone hasn't already marked the + * request as dirty (in which case we don't care). + */ + spin_unlock(&inode->i_lock); + ret = nfs_wait_on_request(req); + nfs_release_request(req); + if (ret != 0) + return ERR_PTR(ret); + spin_lock(&inode->i_lock); + } + spin_unlock(&inode->i_lock); + return req; +} + +/* + * Find an associated nfs write request, and prepare to flush it out + * May return an error if the user signalled nfs_wait_on_request(). + */ +static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, + struct page *page) +{ + struct nfs_page *req; + int ret = 0; + + req = nfs_find_and_lock_request(page); + if (!req) + goto out; + ret = PTR_ERR(req); + if (IS_ERR(req)) + goto out; + + ret = nfs_set_page_writeback(page); + BUG_ON(ret != 0); + BUG_ON(test_bit(PG_CLEAN, &req->wb_flags)); + + if (!nfs_pageio_add_request(pgio, req)) { + nfs_redirty_request(req); + ret = pgio->pg_error; + } +out: + return ret; +} + +static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) +{ + struct inode *inode = page->mapping->host; + + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); + nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); + + nfs_pageio_cond_complete(pgio, page->index); + return nfs_page_async_flush(pgio, page); +} + +/* + * Write an mmapped page to the server. + */ +static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) +{ + struct nfs_pageio_descriptor pgio; + int err; + + nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); + err = nfs_do_writepage(page, wbc, &pgio); + nfs_pageio_complete(&pgio); + if (err < 0) + return err; + if (pgio.pg_error < 0) + return pgio.pg_error; + return 0; +} + +int nfs_writepage(struct page *page, struct writeback_control *wbc) +{ + int ret; + + ret = nfs_writepage_locked(page, wbc); + unlock_page(page); + return ret; +} + +static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data) +{ + int ret; + + ret = nfs_do_writepage(page, wbc, data); + unlock_page(page); + return ret; +} + +int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) +{ + struct inode *inode = mapping->host; + unsigned long *bitlock = &NFS_I(inode)->flags; + struct nfs_pageio_descriptor pgio; + int err; + + /* Stop dirtying of new pages while we sync */ + err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING, + nfs_wait_bit_killable, TASK_KILLABLE); + if (err) + goto out_err; + + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); + + nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); + err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); + nfs_pageio_complete(&pgio); + + clear_bit_unlock(NFS_INO_FLUSHING, bitlock); + smp_mb__after_clear_bit(); + wake_up_bit(bitlock, NFS_INO_FLUSHING); + + if (err < 0) + goto out_err; + err = pgio.pg_error; + if (err < 0) + goto out_err; + return 0; +out_err: + return err; +} + +/* + * Insert a write request into an inode + */ +static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int error; + + error = radix_tree_preload(GFP_NOFS); + if (error != 0) + goto out; + + /* Lock the request! */ + nfs_lock_request_dontget(req); + + spin_lock(&inode->i_lock); + error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); + BUG_ON(error); + if (!nfsi->npages) { + igrab(inode); + if (nfs_have_delegation(inode, FMODE_WRITE)) + nfsi->change_attr++; + } + SetPagePrivate(req->wb_page); + set_page_private(req->wb_page, (unsigned long)req); + nfsi->npages++; + kref_get(&req->wb_kref); + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, + NFS_PAGE_TAG_LOCKED); + spin_unlock(&inode->i_lock); + radix_tree_preload_end(); +out: + return error; +} + +/* + * Remove a write request from an inode + */ +static void nfs_inode_remove_request(struct nfs_page *req) +{ + struct inode *inode = req->wb_context->path.dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); + + BUG_ON (!NFS_WBACK_BUSY(req)); + + spin_lock(&inode->i_lock); + set_page_private(req->wb_page, 0); + ClearPagePrivate(req->wb_page); + radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); + nfsi->npages--; + if (!nfsi->npages) { + spin_unlock(&inode->i_lock); + iput(inode); + } else + spin_unlock(&inode->i_lock); + nfs_clear_request(req); + nfs_release_request(req); +} + +static void +nfs_mark_request_dirty(struct nfs_page *req) +{ + __set_page_dirty_nobuffers(req->wb_page); +} + +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +/* + * Add a request to the inode's commit list. + */ +static void +nfs_mark_request_commit(struct nfs_page *req) +{ + struct inode *inode = req->wb_context->path.dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&inode->i_lock); + set_bit(PG_CLEAN, &(req)->wb_flags); + radix_tree_tag_set(&nfsi->nfs_page_tree, + req->wb_index, + NFS_PAGE_TAG_COMMIT); + spin_unlock(&inode->i_lock); + inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); +} + +static int +nfs_clear_request_commit(struct nfs_page *req) +{ + struct page *page = req->wb_page; + + if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { + dec_zone_page_state(page, NR_UNSTABLE_NFS); + dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); + return 1; + } + return 0; +} + +static inline +int nfs_write_need_commit(struct nfs_write_data *data) +{ + return data->verf.committed != NFS_FILE_SYNC; +} + +static inline +int nfs_reschedule_unstable_write(struct nfs_page *req) +{ + if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { + nfs_mark_request_commit(req); + return 1; + } + if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { + nfs_mark_request_dirty(req); + return 1; + } + return 0; +} +#else +static inline void +nfs_mark_request_commit(struct nfs_page *req) +{ +} + +static inline int +nfs_clear_request_commit(struct nfs_page *req) +{ + return 0; +} + +static inline +int nfs_write_need_commit(struct nfs_write_data *data) +{ + return 0; +} + +static inline +int nfs_reschedule_unstable_write(struct nfs_page *req) +{ + return 0; +} +#endif + +/* + * Wait for a request to complete. + * + * Interruptible by fatal signals only. + */ +static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_page *req; + pgoff_t idx_end, next; + unsigned int res = 0; + int error; + + if (npages == 0) + idx_end = ~0; + else + idx_end = idx_start + npages - 1; + + next = idx_start; + while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) { + if (req->wb_index > idx_end) + break; + + next = req->wb_index + 1; + BUG_ON(!NFS_WBACK_BUSY(req)); + + kref_get(&req->wb_kref); + spin_unlock(&inode->i_lock); + error = nfs_wait_on_request(req); + nfs_release_request(req); + spin_lock(&inode->i_lock); + if (error < 0) + return error; + res++; + } + return res; +} + +static void nfs_cancel_commit_list(struct list_head *head) +{ + struct nfs_page *req; + + while(!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_clear_request_commit(req); + nfs_inode_remove_request(req); + nfs_unlock_request(req); + } +} + +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +static int +nfs_need_commit(struct nfs_inode *nfsi) +{ + return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT); +} + +/* + * nfs_scan_commit - Scan an inode for commit requests + * @inode: NFS inode to scan + * @dst: destination list + * @idx_start: lower bound of page->index to scan. + * @npages: idx_start + npages sets the upper bound to scan. + * + * Moves requests from the inode's 'commit' request list. + * The requests are *not* checked to ensure that they form a contiguous set. + */ +static int +nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (!nfs_need_commit(nfsi)) + return 0; + + return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); +} +#else +static inline int nfs_need_commit(struct nfs_inode *nfsi) +{ + return 0; +} + +static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) +{ + return 0; +} +#endif + +/* + * Search for an existing write request, and attempt to update + * it to reflect a new dirty region on a given page. + * + * If the attempt fails, then the existing request is flushed out + * to disk. + */ +static struct nfs_page *nfs_try_to_update_request(struct inode *inode, + struct page *page, + unsigned int offset, + unsigned int bytes) +{ + struct nfs_page *req; + unsigned int rqend; + unsigned int end; + int error; + + if (!PagePrivate(page)) + return NULL; + + end = offset + bytes; + spin_lock(&inode->i_lock); + + for (;;) { + req = nfs_page_find_request_locked(page); + if (req == NULL) + goto out_unlock; + + rqend = req->wb_offset + req->wb_bytes; + /* + * Tell the caller to flush out the request if + * the offsets are non-contiguous. + * Note: nfs_flush_incompatible() will already + * have flushed out requests having wrong owners. + */ + if (offset > rqend + || end < req->wb_offset) + goto out_flushme; + + if (nfs_set_page_tag_locked(req)) + break; + + /* The request is locked, so wait and then retry */ + spin_unlock(&inode->i_lock); + error = nfs_wait_on_request(req); + nfs_release_request(req); + if (error != 0) + goto out_err; + spin_lock(&inode->i_lock); + } + + if (nfs_clear_request_commit(req)) + radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_COMMIT); + + /* Okay, the request matches. Update the region */ + if (offset < req->wb_offset) { + req->wb_offset = offset; + req->wb_pgbase = offset; + } + if (end > rqend) + req->wb_bytes = end - req->wb_offset; + else + req->wb_bytes = rqend - req->wb_offset; +out_unlock: + spin_unlock(&inode->i_lock); + return req; +out_flushme: + spin_unlock(&inode->i_lock); + nfs_release_request(req); + error = nfs_wb_page(inode, page); +out_err: + return ERR_PTR(error); +} + +/* + * Try to update an existing write request, or create one if there is none. + * + * Note: Should always be called with the Page Lock held to prevent races + * if we have to add a new request. Also assumes that the caller has + * already called nfs_flush_incompatible() if necessary. + */ +static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, + struct page *page, unsigned int offset, unsigned int bytes) +{ + struct inode *inode = page->mapping->host; + struct nfs_page *req; + int error; + + req = nfs_try_to_update_request(inode, page, offset, bytes); + if (req != NULL) + goto out; + req = nfs_create_request(ctx, inode, page, offset, bytes); + if (IS_ERR(req)) + goto out; + error = nfs_inode_add_request(inode, req); + if (error != 0) { + nfs_release_request(req); + req = ERR_PTR(error); + } +out: + return req; +} + +static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, + unsigned int offset, unsigned int count) +{ + struct nfs_page *req; + + req = nfs_setup_write_request(ctx, page, offset, count); + if (IS_ERR(req)) + return PTR_ERR(req); + /* Update file length */ + nfs_grow_file(page, offset, count); + nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); + nfs_clear_page_tag_locked(req); + return 0; +} + +int nfs_flush_incompatible(struct file *file, struct page *page) +{ + struct nfs_open_context *ctx = nfs_file_open_context(file); + struct nfs_page *req; + int do_flush, status; + /* + * Look for a request corresponding to this page. If there + * is one, and it belongs to another file, we flush it out + * before we try to copy anything into the page. Do this + * due to the lack of an ACCESS-type call in NFSv2. + * Also do the same if we find a request from an existing + * dropped page. + */ + do { + req = nfs_page_find_request(page); + if (req == NULL) + return 0; + do_flush = req->wb_page != page || req->wb_context != ctx; + nfs_release_request(req); + if (!do_flush) + return 0; + status = nfs_wb_page(page->mapping->host, page); + } while (status == 0); + return status; +} + +/* + * If the page cache is marked as unsafe or invalid, then we can't rely on + * the PageUptodate() flag. In this case, we will need to turn off + * write optimisations that depend on the page contents being correct. + */ +static int nfs_write_pageuptodate(struct page *page, struct inode *inode) +{ + return PageUptodate(page) && + !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA)); +} + +/* + * Update and possibly write a cached page of an NFS file. + * + * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad + * things with a page scheduled for an RPC call (e.g. invalidate it). + */ +int nfs_updatepage(struct file *file, struct page *page, + unsigned int offset, unsigned int count) +{ + struct nfs_open_context *ctx = nfs_file_open_context(file); + struct inode *inode = page->mapping->host; + int status = 0; + + nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); + + dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, count, + (long long)(page_offset(page) + offset)); + + /* If we're not using byte range locks, and we know the page + * is up to date, it may be more efficient to extend the write + * to cover the entire page in order to avoid fragmentation + * inefficiencies. + */ + if (nfs_write_pageuptodate(page, inode) && + inode->i_flock == NULL && + !(file->f_flags & O_DSYNC)) { + count = max(count + offset, nfs_page_length(page)); + offset = 0; + } + + status = nfs_writepage_setup(ctx, page, offset, count); + if (status < 0) + nfs_set_pageerror(page); + else + __set_page_dirty_nobuffers(page); + + dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", + status, (long long)i_size_read(inode)); + return status; +} + +static void nfs_writepage_release(struct nfs_page *req) +{ + + if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { + nfs_end_page_writeback(req->wb_page); + nfs_inode_remove_request(req); + } else + nfs_end_page_writeback(req->wb_page); + nfs_clear_page_tag_locked(req); +} + +static int flush_task_priority(int how) +{ + switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { + case FLUSH_HIGHPRI: + return RPC_PRIORITY_HIGH; + case FLUSH_LOWPRI: + return RPC_PRIORITY_LOW; + } + return RPC_PRIORITY_NORMAL; +} + +/* + * Set up the argument/result storage required for the RPC call. + */ +static int nfs_write_rpcsetup(struct nfs_page *req, + struct nfs_write_data *data, + const struct rpc_call_ops *call_ops, + unsigned int count, unsigned int offset, + int how) +{ + struct inode *inode = req->wb_context->path.dentry->d_inode; + int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + int priority = flush_task_priority(how); + struct rpc_task *task; + struct rpc_message msg = { + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = req->wb_context->cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = NFS_CLIENT(inode), + .task = &data->task, + .rpc_message = &msg, + .callback_ops = call_ops, + .callback_data = data, + .workqueue = nfsiod_workqueue, + .flags = flags, + .priority = priority, + }; + + /* Set up the RPC argument and reply structs + * NB: take care not to mess about with data->commit et al. */ + + data->req = req; + data->inode = inode = req->wb_context->path.dentry->d_inode; + data->cred = msg.rpc_cred; + + data->args.fh = NFS_FH(inode); + data->args.offset = req_offset(req) + offset; + data->args.pgbase = req->wb_pgbase + offset; + data->args.pages = data->pagevec; + data->args.count = count; + data->args.context = get_nfs_open_context(req->wb_context); + data->args.stable = NFS_UNSTABLE; + if (how & FLUSH_STABLE) { + data->args.stable = NFS_DATA_SYNC; + if (!nfs_need_commit(NFS_I(inode))) + data->args.stable = NFS_FILE_SYNC; + } + + data->res.fattr = &data->fattr; + data->res.count = count; + data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); + + /* Set up the initial task struct. */ + NFS_PROTO(inode)->write_setup(data, &msg); + + dprintk("NFS: %5u initiated write call " + "(req %s/%lld, %u bytes @ offset %llu)\n", + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + count, + (unsigned long long)data->args.offset); + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; +} + +/* If a nfs_flush_* function fails, it should remove reqs from @head and + * call this on each, which will prepare them to be retried on next + * writeback using standard nfs. + */ +static void nfs_redirty_request(struct nfs_page *req) +{ + nfs_mark_request_dirty(req); + nfs_end_page_writeback(req->wb_page); + nfs_clear_page_tag_locked(req); +} + +/* + * Generate multiple small requests to write out a single + * contiguous dirty area on one page. + */ +static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) +{ + struct nfs_page *req = nfs_list_entry(head->next); + struct page *page = req->wb_page; + struct nfs_write_data *data; + size_t wsize = NFS_SERVER(inode)->wsize, nbytes; + unsigned int offset; + int requests = 0; + int ret = 0; + LIST_HEAD(list); + + nfs_list_remove_request(req); + + nbytes = count; + do { + size_t len = min(nbytes, wsize); + + data = nfs_writedata_alloc(1); + if (!data) + goto out_bad; + list_add(&data->pages, &list); + requests++; + nbytes -= len; + } while (nbytes != 0); + atomic_set(&req->wb_complete, requests); + + ClearPageError(page); + offset = 0; + nbytes = count; + do { + int ret2; + + data = list_entry(list.next, struct nfs_write_data, pages); + list_del_init(&data->pages); + + data->pagevec[0] = page; + + if (nbytes < wsize) + wsize = nbytes; + ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, + wsize, offset, how); + if (ret == 0) + ret = ret2; + offset += wsize; + nbytes -= wsize; + } while (nbytes != 0); + + return ret; + +out_bad: + while (!list_empty(&list)) { + data = list_entry(list.next, struct nfs_write_data, pages); + list_del(&data->pages); + nfs_writedata_release(data); + } + nfs_redirty_request(req); + return -ENOMEM; +} + +/* + * Create an RPC task for the given write request and kick it. + * The page must have been locked by the caller. + * + * It may happen that the page we're passed is not marked dirty. + * This is the case if nfs_updatepage detects a conflicting request + * that has been written but not committed. + */ +static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) +{ + struct nfs_page *req; + struct page **pages; + struct nfs_write_data *data; + + data = nfs_writedata_alloc(npages); + if (!data) + goto out_bad; + + pages = data->pagevec; + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_list_add_request(req, &data->pages); + ClearPageError(req->wb_page); + *pages++ = req->wb_page; + } + req = nfs_list_entry(data->pages.next); + + /* Set up the argument struct */ + return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); + out_bad: + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_redirty_request(req); + } + return -ENOMEM; +} + +static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, + struct inode *inode, int ioflags) +{ + size_t wsize = NFS_SERVER(inode)->wsize; + + if (wsize < PAGE_CACHE_SIZE) + nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); + else + nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags); +} + +/* + * Handle a write reply that flushed part of a page. + */ +static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) +{ + struct nfs_write_data *data = calldata; + + dprintk("NFS: %5u write(%s/%lld %d@%lld)", + task->tk_pid, + data->req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long) + NFS_FILEID(data->req->wb_context->path.dentry->d_inode), + data->req->wb_bytes, (long long)req_offset(data->req)); + + nfs_writeback_done(task, data); +} + +static void nfs_writeback_release_partial(void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_page *req = data->req; + struct page *page = req->wb_page; + int status = data->task.tk_status; + + if (status < 0) { + nfs_set_pageerror(page); + nfs_context_set_write_error(req->wb_context, status); + dprintk(", error = %d\n", status); + goto out; + } + + if (nfs_write_need_commit(data)) { + struct inode *inode = page->mapping->host; + + spin_lock(&inode->i_lock); + if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { + /* Do nothing we need to resend the writes */ + } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { + memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); + dprintk(" defer commit\n"); + } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { + set_bit(PG_NEED_RESCHED, &req->wb_flags); + clear_bit(PG_NEED_COMMIT, &req->wb_flags); + dprintk(" server reboot detected\n"); + } + spin_unlock(&inode->i_lock); + } else + dprintk(" OK\n"); + +out: + if (atomic_dec_and_test(&req->wb_complete)) + nfs_writepage_release(req); + nfs_writedata_release(calldata); +} + +#if defined(CONFIG_NFS_V4_1) +void nfs_write_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client; + + if (nfs4_setup_sequence(clp, &data->args.seq_args, + &data->res.seq_res, 1, task)) + return; + rpc_call_start(task); +} +#endif /* CONFIG_NFS_V4_1 */ + +static const struct rpc_call_ops nfs_write_partial_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ + .rpc_call_done = nfs_writeback_done_partial, + .rpc_release = nfs_writeback_release_partial, +}; + +/* + * Handle a write reply that flushes a whole page. + * + * FIXME: There is an inherent race with invalidate_inode_pages and + * writebacks since the page->count is kept > 1 for as long + * as the page has a write request pending. + */ +static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) +{ + struct nfs_write_data *data = calldata; + + nfs_writeback_done(task, data); +} + +static void nfs_writeback_release_full(void *calldata) +{ + struct nfs_write_data *data = calldata; + int status = data->task.tk_status; + + /* Update attributes as result of writeback. */ + while (!list_empty(&data->pages)) { + struct nfs_page *req = nfs_list_entry(data->pages.next); + struct page *page = req->wb_page; + + nfs_list_remove_request(req); + + dprintk("NFS: %5u write (%s/%lld %d@%lld)", + data->task.tk_pid, + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), + req->wb_bytes, + (long long)req_offset(req)); + + if (status < 0) { + nfs_set_pageerror(page); + nfs_context_set_write_error(req->wb_context, status); + dprintk(", error = %d\n", status); + goto remove_request; + } + + if (nfs_write_need_commit(data)) { + memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); + nfs_mark_request_commit(req); + nfs_end_page_writeback(page); + dprintk(" marked for commit\n"); + goto next; + } + dprintk(" OK\n"); +remove_request: + nfs_end_page_writeback(page); + nfs_inode_remove_request(req); + next: + nfs_clear_page_tag_locked(req); + } + nfs_writedata_release(calldata); +} + +static const struct rpc_call_ops nfs_write_full_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ + .rpc_call_done = nfs_writeback_done_full, + .rpc_release = nfs_writeback_release_full, +}; + + +/* + * This function is called when the WRITE call is complete. + */ +int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) +{ + struct nfs_writeargs *argp = &data->args; + struct nfs_writeres *resp = &data->res; + struct nfs_server *server = NFS_SERVER(data->inode); + int status; + + dprintk("NFS: %5u nfs_writeback_done (status %d)\n", + task->tk_pid, task->tk_status); + + /* + * ->write_done will attempt to use post-op attributes to detect + * conflicting writes by other clients. A strict interpretation + * of close-to-open would allow us to continue caching even if + * another writer had changed the file, but some applications + * depend on tighter cache coherency when writing. + */ + status = NFS_PROTO(data->inode)->write_done(task, data); + if (status != 0) + return status; + nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); + +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) + if (resp->verf->committed < argp->stable && task->tk_status >= 0) { + /* We tried a write call, but the server did not + * commit data to stable storage even though we + * requested it. + * Note: There is a known bug in Tru64 < 5.0 in which + * the server reports NFS_DATA_SYNC, but performs + * NFS_FILE_SYNC. We therefore implement this checking + * as a dprintk() in order to avoid filling syslog. + */ + static unsigned long complain; + + if (time_before(complain, jiffies)) { + dprintk("NFS: faulty NFS server %s:" + " (committed = %d) != (stable = %d)\n", + server->nfs_client->cl_hostname, + resp->verf->committed, argp->stable); + complain = jiffies + 300 * HZ; + } + } +#endif + /* Is this a short write? */ + if (task->tk_status >= 0 && resp->count < argp->count) { + static unsigned long complain; + + nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); + + /* Has the server at least made some progress? */ + if (resp->count != 0) { + /* Was this an NFSv2 write or an NFSv3 stable write? */ + if (resp->verf->committed != NFS_UNSTABLE) { + /* Resend from where the server left off */ + argp->offset += resp->count; + argp->pgbase += resp->count; + argp->count -= resp->count; + } else { + /* Resend as a stable write in order to avoid + * headaches in the case of a server crash. + */ + argp->stable = NFS_FILE_SYNC; + } + nfs_restart_rpc(task, server->nfs_client); + return -EAGAIN; + } + if (time_before(complain, jiffies)) { + printk(KERN_WARNING + "NFS: Server wrote zero bytes, expected %u.\n", + argp->count); + complain = jiffies + 300 * HZ; + } + /* Can't do anything about it except throw an error. */ + task->tk_status = -EIO; + } + return 0; +} + + +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +static void nfs_commitdata_release(void *data) +{ + struct nfs_write_data *wdata = data; + + put_nfs_open_context(wdata->args.context); + nfs_commit_free(wdata); +} + +/* + * Set up the argument/result storage required for the RPC call. + */ +static int nfs_commit_rpcsetup(struct list_head *head, + struct nfs_write_data *data, + int how) +{ + struct nfs_page *first = nfs_list_entry(head->next); + struct inode *inode = first->wb_context->path.dentry->d_inode; + int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + int priority = flush_task_priority(how); + struct rpc_task *task; + struct rpc_message msg = { + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = first->wb_context->cred, + }; + struct rpc_task_setup task_setup_data = { + .task = &data->task, + .rpc_client = NFS_CLIENT(inode), + .rpc_message = &msg, + .callback_ops = &nfs_commit_ops, + .callback_data = data, + .workqueue = nfsiod_workqueue, + .flags = flags, + .priority = priority, + }; + + /* Set up the RPC argument and reply structs + * NB: take care not to mess about with data->commit et al. */ + + list_splice_init(head, &data->pages); + + data->inode = inode; + data->cred = msg.rpc_cred; + + data->args.fh = NFS_FH(data->inode); + /* Note: we always request a commit of the entire inode */ + data->args.offset = 0; + data->args.count = 0; + data->args.context = get_nfs_open_context(first->wb_context); + data->res.count = 0; + data->res.fattr = &data->fattr; + data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); + + /* Set up the initial task struct. */ + NFS_PROTO(inode)->commit_setup(data, &msg); + + dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; +} + +/* + * Commit dirty pages + */ +static int +nfs_commit_list(struct inode *inode, struct list_head *head, int how) +{ + struct nfs_write_data *data; + struct nfs_page *req; + + data = nfs_commitdata_alloc(); + + if (!data) + goto out_bad; + + /* Set up the argument struct */ + return nfs_commit_rpcsetup(head, data, how); + out_bad: + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_mark_request_commit(req); + dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + dec_bdi_stat(req->wb_page->mapping->backing_dev_info, + BDI_RECLAIMABLE); + nfs_clear_page_tag_locked(req); + } + return -ENOMEM; +} + +/* + * COMMIT call returned + */ +static void nfs_commit_done(struct rpc_task *task, void *calldata) +{ + struct nfs_write_data *data = calldata; + + dprintk("NFS: %5u nfs_commit_done (status %d)\n", + task->tk_pid, task->tk_status); + + /* Call the NFS version-specific code */ + if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) + return; +} + +static void nfs_commit_release(void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_page *req; + int status = data->task.tk_status; + + while (!list_empty(&data->pages)) { + req = nfs_list_entry(data->pages.next); + nfs_list_remove_request(req); + nfs_clear_request_commit(req); + + dprintk("NFS: commit (%s/%lld %d@%lld)", + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), + req->wb_bytes, + (long long)req_offset(req)); + if (status < 0) { + nfs_context_set_write_error(req->wb_context, status); + nfs_inode_remove_request(req); + dprintk(", error = %d\n", status); + goto next; + } + + /* Okay, COMMIT succeeded, apparently. Check the verifier + * returned by the server against all stored verfs. */ + if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { + /* We have a match */ + nfs_inode_remove_request(req); + dprintk(" OK\n"); + goto next; + } + /* We have a mismatch. Write the page again */ + dprintk(" mismatch\n"); + nfs_mark_request_dirty(req); + next: + nfs_clear_page_tag_locked(req); + } + nfs_commitdata_release(calldata); +} + +static const struct rpc_call_ops nfs_commit_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ + .rpc_call_done = nfs_commit_done, + .rpc_release = nfs_commit_release, +}; + +int nfs_commit_inode(struct inode *inode, int how) +{ + LIST_HEAD(head); + int res; + + spin_lock(&inode->i_lock); + res = nfs_scan_commit(inode, &head, 0, 0); + spin_unlock(&inode->i_lock); + if (res) { + int error = nfs_commit_list(inode, &head, how); + if (error < 0) + return error; + } + return res; +} +#else +static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) +{ + return 0; +} +#endif + +long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) +{ + struct inode *inode = mapping->host; + pgoff_t idx_start, idx_end; + unsigned int npages = 0; + LIST_HEAD(head); + int nocommit = how & FLUSH_NOCOMMIT; + long pages, ret; + + /* FIXME */ + if (wbc->range_cyclic) + idx_start = 0; + else { + idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; + idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (idx_end > idx_start) { + pgoff_t l_npages = 1 + idx_end - idx_start; + npages = l_npages; + if (sizeof(npages) != sizeof(l_npages) && + (pgoff_t)npages != l_npages) + npages = 0; + } + } + how &= ~FLUSH_NOCOMMIT; + spin_lock(&inode->i_lock); + do { + ret = nfs_wait_on_requests_locked(inode, idx_start, npages); + if (ret != 0) + continue; + if (nocommit) + break; + pages = nfs_scan_commit(inode, &head, idx_start, npages); + if (pages == 0) + break; + if (how & FLUSH_INVALIDATE) { + spin_unlock(&inode->i_lock); + nfs_cancel_commit_list(&head); + ret = pages; + spin_lock(&inode->i_lock); + continue; + } + pages += nfs_scan_commit(inode, &head, 0, 0); + spin_unlock(&inode->i_lock); + ret = nfs_commit_list(inode, &head, how); + spin_lock(&inode->i_lock); + + } while (ret >= 0); + spin_unlock(&inode->i_lock); + return ret; +} + +static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how) +{ + int ret; + + ret = nfs_writepages(mapping, wbc); + if (ret < 0) + goto out; + ret = nfs_sync_mapping_wait(mapping, wbc, how); + if (ret < 0) + goto out; + return 0; +out: + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + return ret; +} + +/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */ +static int nfs_write_mapping(struct address_space *mapping, int how) +{ + struct writeback_control wbc = { + .bdi = mapping->backing_dev_info, + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .range_start = 0, + .range_end = LLONG_MAX, + }; + + return __nfs_write_mapping(mapping, &wbc, how); +} + +/* + * flush the inode to disk. + */ +int nfs_wb_all(struct inode *inode) +{ + return nfs_write_mapping(inode->i_mapping, 0); +} + +int nfs_wb_nocommit(struct inode *inode) +{ + return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT); +} + +int nfs_wb_page_cancel(struct inode *inode, struct page *page) +{ + struct nfs_page *req; + loff_t range_start = page_offset(page); + loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); + struct writeback_control wbc = { + .bdi = page->mapping->backing_dev_info, + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .range_start = range_start, + .range_end = range_end, + }; + int ret = 0; + + BUG_ON(!PageLocked(page)); + for (;;) { + req = nfs_page_find_request(page); + if (req == NULL) + goto out; + if (test_bit(PG_CLEAN, &req->wb_flags)) { + nfs_release_request(req); + break; + } + if (nfs_lock_request_dontget(req)) { + nfs_inode_remove_request(req); + /* + * In case nfs_inode_remove_request has marked the + * page as being dirty + */ + cancel_dirty_page(page, PAGE_CACHE_SIZE); + nfs_unlock_request(req); + break; + } + ret = nfs_wait_on_request(req); + nfs_release_request(req); + if (ret < 0) + goto out; + } + if (!PagePrivate(page)) + return 0; + ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE); +out: + return ret; +} + +static int nfs_wb_page_priority(struct inode *inode, struct page *page, + int how) +{ + loff_t range_start = page_offset(page); + loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); + struct writeback_control wbc = { + .bdi = page->mapping->backing_dev_info, + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .range_start = range_start, + .range_end = range_end, + }; + int ret; + + do { + if (clear_page_dirty_for_io(page)) { + ret = nfs_writepage_locked(page, &wbc); + if (ret < 0) + goto out_error; + } else if (!PagePrivate(page)) + break; + ret = nfs_sync_mapping_wait(page->mapping, &wbc, how); + if (ret < 0) + goto out_error; + } while (PagePrivate(page)); + return 0; +out_error: + __mark_inode_dirty(inode, I_DIRTY_PAGES); + return ret; +} + +/* + * Write back all requests on one page - we do this before reading it. + */ +int nfs_wb_page(struct inode *inode, struct page* page) +{ + return nfs_wb_page_priority(inode, page, FLUSH_STABLE); +} + +#ifdef CONFIG_MIGRATION +int nfs_migrate_page(struct address_space *mapping, struct page *newpage, + struct page *page) +{ + struct nfs_page *req; + int ret; + + nfs_fscache_release_page(page, GFP_KERNEL); + + req = nfs_find_and_lock_request(page); + ret = PTR_ERR(req); + if (IS_ERR(req)) + goto out; + + ret = migrate_page(mapping, newpage, page); + if (!req) + goto out; + if (ret) + goto out_unlock; + page_cache_get(newpage); + spin_lock(&mapping->host->i_lock); + req->wb_page = newpage; + SetPagePrivate(newpage); + set_page_private(newpage, (unsigned long)req); + ClearPagePrivate(page); + set_page_private(page, 0); + spin_unlock(&mapping->host->i_lock); + page_cache_release(page); +out_unlock: + nfs_clear_page_tag_locked(req); +out: + return ret; +} +#endif + +int __init nfs_init_writepagecache(void) +{ + nfs_wdata_cachep = kmem_cache_create("nfs_write_data", + sizeof(struct nfs_write_data), + 0, SLAB_HWCACHE_ALIGN, + NULL); + if (nfs_wdata_cachep == NULL) + return -ENOMEM; + + nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, + nfs_wdata_cachep); + if (nfs_wdata_mempool == NULL) + return -ENOMEM; + + nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, + nfs_wdata_cachep); + if (nfs_commit_mempool == NULL) + return -ENOMEM; + + /* + * NFS congestion size, scale with available memory. + * + * 64MB: 8192k + * 128MB: 11585k + * 256MB: 16384k + * 512MB: 23170k + * 1GB: 32768k + * 2GB: 46340k + * 4GB: 65536k + * 8GB: 92681k + * 16GB: 131072k + * + * This allows larger machines to have larger/more transfers. + * Limit the default to 256M + */ + nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); + if (nfs_congestion_kb > 256*1024) + nfs_congestion_kb = 256*1024; + + return 0; +} + +void nfs_destroy_writepagecache(void) +{ + mempool_destroy(nfs_commit_mempool); + mempool_destroy(nfs_wdata_mempool); + kmem_cache_destroy(nfs_wdata_cachep); +} + |