summaryrefslogtreecommitdiffstats
path: root/fs/nfsd
diff options
context:
space:
mode:
authorAnton Arapov <anton@redhat.com>2012-04-16 10:05:28 +0200
committerAnton Arapov <anton@redhat.com>2012-04-16 10:05:28 +0200
commitb4b6116a13633898cf868f2f103c96a90c4c20f8 (patch)
tree93d1b7e2cfcdf473d8d4ff3ad141fa864f8491f6 /fs/nfsd
parentedd4be777c953e5faafc80d091d3084b4343f5d3 (diff)
downloadkernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.tar.gz
kernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.tar.xz
kernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.zip
fedora kernel: d9aad82f3319f3cfd1aebc01234254ef0c37ad84v3.3.2-1
Signed-off-by: Anton Arapov <anton@redhat.com>
Diffstat (limited to 'fs/nfsd')
-rw-r--r--fs/nfsd/Kconfig92
-rw-r--r--fs/nfsd/Makefile14
-rw-r--r--fs/nfsd/acl.h59
-rw-r--r--fs/nfsd/auth.c95
-rw-r--r--fs/nfsd/auth.h22
-rw-r--r--fs/nfsd/cache.h83
-rw-r--r--fs/nfsd/export.c1263
-rw-r--r--fs/nfsd/fault_inject.c91
-rw-r--r--fs/nfsd/fault_inject.h28
-rw-r--r--fs/nfsd/idmap.h62
-rw-r--r--fs/nfsd/lockd.c77
-rw-r--r--fs/nfsd/nfs2acl.c356
-rw-r--r--fs/nfsd/nfs3acl.c267
-rw-r--r--fs/nfsd/nfs3proc.c896
-rw-r--r--fs/nfsd/nfs3xdr.c1118
-rw-r--r--fs/nfsd/nfs4acl.c839
-rw-r--r--fs/nfsd/nfs4callback.c1036
-rw-r--r--fs/nfsd/nfs4idmap.c588
-rw-r--r--fs/nfsd/nfs4proc.c1720
-rw-r--r--fs/nfsd/nfs4recover.c427
-rw-r--r--fs/nfsd/nfs4state.c4693
-rw-r--r--fs/nfsd/nfs4xdr.c3673
-rw-r--r--fs/nfsd/nfscache.c323
-rw-r--r--fs/nfsd/nfsctl.c1192
-rw-r--r--fs/nfsd/nfsd.h375
-rw-r--r--fs/nfsd/nfsfh.c690
-rw-r--r--fs/nfsd/nfsfh.h206
-rw-r--r--fs/nfsd/nfsproc.c755
-rw-r--r--fs/nfsd/nfssvc.c663
-rw-r--r--fs/nfsd/nfsxdr.c545
-rw-r--r--fs/nfsd/state.h492
-rw-r--r--fs/nfsd/stats.c104
-rw-r--r--fs/nfsd/vfs.c2304
-rw-r--r--fs/nfsd/vfs.h119
-rw-r--r--fs/nfsd/xdr.h173
-rw-r--r--fs/nfsd/xdr3.h344
-rw-r--r--fs/nfsd/xdr4.h601
37 files changed, 26385 insertions, 0 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
new file mode 100644
index 00000000000..8df1ea4a6ff
--- /dev/null
+++ b/fs/nfsd/Kconfig
@@ -0,0 +1,92 @@
+config NFSD
+ tristate "NFS server support"
+ depends on INET
+ depends on FILE_LOCKING
+ select LOCKD
+ select SUNRPC
+ select EXPORTFS
+ select NFS_ACL_SUPPORT if NFSD_V2_ACL
+ help
+ Choose Y here if you want to allow other computers to access
+ files residing on this system using Sun's Network File System
+ protocol. To compile the NFS server support as a module,
+ choose M here: the module will be called nfsd.
+
+ You may choose to use a user-space NFS server instead, in which
+ case you can choose N here.
+
+ To export local file systems using NFS, you also need to install
+ user space programs which can be found in the Linux nfs-utils
+ package, available from http://linux-nfs.org/. More detail about
+ the Linux NFS server implementation is available via the
+ exports(5) man page.
+
+ Below you can choose which versions of the NFS protocol are
+ available to clients mounting the NFS server on this system.
+ Support for NFS version 2 (RFC 1094) is always available when
+ CONFIG_NFSD is selected.
+
+ If unsure, say N.
+
+config NFSD_V2_ACL
+ bool
+ depends on NFSD
+
+config NFSD_V3
+ bool "NFS server support for NFS version 3"
+ depends on NFSD
+ help
+ This option enables support in your system's NFS server for
+ version 3 of the NFS protocol (RFC 1813).
+
+ If unsure, say Y.
+
+config NFSD_V3_ACL
+ bool "NFS server support for the NFSv3 ACL protocol extension"
+ depends on NFSD_V3
+ select NFSD_V2_ACL
+ help
+ Solaris NFS servers support an auxiliary NFSv3 ACL protocol that
+ never became an official part of the NFS version 3 protocol.
+ This protocol extension allows applications on NFS clients to
+ manipulate POSIX Access Control Lists on files residing on NFS
+ servers. NFS servers enforce POSIX ACLs on local files whether
+ this protocol is available or not.
+
+ This option enables support in your system's NFS server for the
+ NFSv3 ACL protocol extension allowing NFS clients to manipulate
+ POSIX ACLs on files exported by your system's NFS server. NFS
+ clients which support the Solaris NFSv3 ACL protocol can then
+ access and modify ACLs on your NFS server.
+
+ To store ACLs on your NFS server, you also need to enable ACL-
+ related CONFIG options for your local file systems of choice.
+
+ If unsure, say N.
+
+config NFSD_V4
+ bool "NFS server support for NFS version 4 (EXPERIMENTAL)"
+ depends on NFSD && PROC_FS && EXPERIMENTAL
+ select NFSD_V3
+ select FS_POSIX_ACL
+ select SUNRPC_GSS
+ select CRYPTO
+ help
+ This option enables support in your system's NFS server for
+ version 4 of the NFS protocol (RFC 3530).
+
+ To export files using NFSv4, you need to install additional user
+ space programs which can be found in the Linux nfs-utils package,
+ available from http://linux-nfs.org/.
+
+ If unsure, say N.
+
+config NFSD_FAULT_INJECTION
+ bool "NFS server manual fault injection"
+ depends on NFSD_V4 && DEBUG_KERNEL
+ help
+ This option enables support for manually injecting faults
+ into the NFS server. This is intended to be used for
+ testing error recovery on the NFS client.
+
+ If unsure, say N.
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
new file mode 100644
index 00000000000..af32ef06b4f
--- /dev/null
+++ b/fs/nfsd/Makefile
@@ -0,0 +1,14 @@
+#
+# Makefile for the Linux nfs server
+#
+
+obj-$(CONFIG_NFSD) += nfsd.o
+
+nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
+ export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
+nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
+nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
+nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
+nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
+ nfs4acl.o nfs4callback.o nfs4recover.o
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
new file mode 100644
index 00000000000..34e5c40af5e
--- /dev/null
+++ b/fs/nfsd/acl.h
@@ -0,0 +1,59 @@
+/*
+ * Common NFSv4 ACL handling definitions.
+ *
+ * Copyright (c) 2002 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Marius Aamodt Eriksen <marius@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef LINUX_NFS4_ACL_H
+#define LINUX_NFS4_ACL_H
+
+#include <linux/posix_acl.h>
+
+/* Maximum ACL we'll accept from client; chosen (somewhat arbitrarily) to
+ * fit in a page: */
+#define NFS4_ACL_MAX 170
+
+struct nfs4_acl *nfs4_acl_new(int);
+int nfs4_acl_get_whotype(char *, u32);
+int nfs4_acl_write_who(int who, char *p);
+int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group,
+ uid_t who, u32 mask);
+
+#define NFS4_ACL_TYPE_DEFAULT 0x01
+#define NFS4_ACL_DIR 0x02
+#define NFS4_ACL_OWNER 0x04
+
+struct nfs4_acl *nfs4_acl_posix_to_nfsv4(struct posix_acl *,
+ struct posix_acl *, unsigned int flags);
+int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *, struct posix_acl **,
+ struct posix_acl **, unsigned int flags);
+
+#endif /* LINUX_NFS4_ACL_H */
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
new file mode 100644
index 00000000000..79717a40dab
--- /dev/null
+++ b/fs/nfsd/auth.c
@@ -0,0 +1,95 @@
+/* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
+
+#include <linux/sched.h>
+#include "nfsd.h"
+#include "auth.h"
+
+int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
+{
+ struct exp_flavor_info *f;
+ struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
+
+ for (f = exp->ex_flavors; f < end; f++) {
+ if (f->pseudoflavor == rqstp->rq_flavor)
+ return f->flags;
+ }
+ return exp->ex_flags;
+
+}
+
+int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
+{
+ struct group_info *rqgi;
+ struct group_info *gi;
+ struct cred *new;
+ int i;
+ int flags = nfsexp_flags(rqstp, exp);
+ int ret;
+
+ validate_process_creds();
+
+ /* discard any old override before preparing the new set */
+ revert_creds(get_cred(current->real_cred));
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+
+ new->fsuid = rqstp->rq_cred.cr_uid;
+ new->fsgid = rqstp->rq_cred.cr_gid;
+
+ rqgi = rqstp->rq_cred.cr_group_info;
+
+ if (flags & NFSEXP_ALLSQUASH) {
+ new->fsuid = exp->ex_anon_uid;
+ new->fsgid = exp->ex_anon_gid;
+ gi = groups_alloc(0);
+ if (!gi)
+ goto oom;
+ } else if (flags & NFSEXP_ROOTSQUASH) {
+ if (!new->fsuid)
+ new->fsuid = exp->ex_anon_uid;
+ if (!new->fsgid)
+ new->fsgid = exp->ex_anon_gid;
+
+ gi = groups_alloc(rqgi->ngroups);
+ if (!gi)
+ goto oom;
+
+ for (i = 0; i < rqgi->ngroups; i++) {
+ if (!GROUP_AT(rqgi, i))
+ GROUP_AT(gi, i) = exp->ex_anon_gid;
+ else
+ GROUP_AT(gi, i) = GROUP_AT(rqgi, i);
+ }
+ } else {
+ gi = get_group_info(rqgi);
+ }
+
+ if (new->fsuid == (uid_t) -1)
+ new->fsuid = exp->ex_anon_uid;
+ if (new->fsgid == (gid_t) -1)
+ new->fsgid = exp->ex_anon_gid;
+
+ ret = set_groups(new, gi);
+ put_group_info(gi);
+ if (ret < 0)
+ goto error;
+
+ if (new->fsuid)
+ new->cap_effective = cap_drop_nfsd_set(new->cap_effective);
+ else
+ new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
+ new->cap_permitted);
+ validate_process_creds();
+ put_cred(override_creds(new));
+ put_cred(new);
+ validate_process_creds();
+ return 0;
+
+oom:
+ ret = -ENOMEM;
+error:
+ abort_creds(new);
+ return ret;
+}
+
diff --git a/fs/nfsd/auth.h b/fs/nfsd/auth.h
new file mode 100644
index 00000000000..78b3c0e9382
--- /dev/null
+++ b/fs/nfsd/auth.h
@@ -0,0 +1,22 @@
+/*
+ * nfsd-specific authentication stuff.
+ * uid/gid mapping not yet implemented.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef LINUX_NFSD_AUTH_H
+#define LINUX_NFSD_AUTH_H
+
+#define nfsd_luid(rq, uid) ((u32)(uid))
+#define nfsd_lgid(rq, gid) ((u32)(gid))
+#define nfsd_ruid(rq, uid) ((u32)(uid))
+#define nfsd_rgid(rq, gid) ((u32)(gid))
+
+/*
+ * Set the current process's fsuid/fsgid etc to those of the NFS
+ * client user
+ */
+int nfsd_setuser(struct svc_rqst *, struct svc_export *);
+
+#endif /* LINUX_NFSD_AUTH_H */
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
new file mode 100644
index 00000000000..93cc9d34c45
--- /dev/null
+++ b/fs/nfsd/cache.h
@@ -0,0 +1,83 @@
+/*
+ * Request reply cache. This was heavily inspired by the
+ * implementation in 4.3BSD/4.4BSD.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef NFSCACHE_H
+#define NFSCACHE_H
+
+#include <linux/sunrpc/svc.h>
+
+/*
+ * Representation of a reply cache entry.
+ */
+struct svc_cacherep {
+ struct hlist_node c_hash;
+ struct list_head c_lru;
+
+ unsigned char c_state, /* unused, inprog, done */
+ c_type, /* status, buffer */
+ c_secure : 1; /* req came from port < 1024 */
+ struct sockaddr_in c_addr;
+ __be32 c_xid;
+ u32 c_prot;
+ u32 c_proc;
+ u32 c_vers;
+ unsigned long c_timestamp;
+ union {
+ struct kvec u_vec;
+ __be32 u_status;
+ } c_u;
+};
+
+#define c_replvec c_u.u_vec
+#define c_replstat c_u.u_status
+
+/* cache entry states */
+enum {
+ RC_UNUSED,
+ RC_INPROG,
+ RC_DONE
+};
+
+/* return values */
+enum {
+ RC_DROPIT,
+ RC_REPLY,
+ RC_DOIT,
+ RC_INTR
+};
+
+/*
+ * Cache types.
+ * We may want to add more types one day, e.g. for diropres and
+ * attrstat replies. Using cache entries with fixed length instead
+ * of buffer pointers may be more efficient.
+ */
+enum {
+ RC_NOCACHE,
+ RC_REPLSTAT,
+ RC_REPLBUFF,
+};
+
+/*
+ * If requests are retransmitted within this interval, they're dropped.
+ */
+#define RC_DELAY (HZ/5)
+
+int nfsd_reply_cache_init(void);
+void nfsd_reply_cache_shutdown(void);
+int nfsd_cache_lookup(struct svc_rqst *);
+void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
+
+#ifdef CONFIG_NFSD_V4
+void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
+#else /* CONFIG_NFSD_V4 */
+static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
+{
+}
+#endif /* CONFIG_NFSD_V4 */
+
+#endif /* NFSCACHE_H */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
new file mode 100644
index 00000000000..cf8a6bd062f
--- /dev/null
+++ b/fs/nfsd/export.c
@@ -0,0 +1,1263 @@
+/*
+ * NFS exporting and validation.
+ *
+ * We maintain a list of clients, each of which has a list of
+ * exports. To export an fs to a given client, you first have
+ * to create the client entry with NFSCTL_ADDCLIENT, which
+ * creates a client control block and adds it to the hash
+ * table. Then, you call NFSCTL_EXPORT for each fs.
+ *
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de>
+ */
+
+#include <linux/slab.h>
+#include <linux/namei.h>
+#include <linux/module.h>
+#include <linux/exportfs.h>
+
+#include <net/ipv6.h>
+
+#include "nfsd.h"
+#include "nfsfh.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_EXPORT
+
+typedef struct auth_domain svc_client;
+typedef struct svc_export svc_export;
+
+/*
+ * We have two caches.
+ * One maps client+vfsmnt+dentry to export options - the export map
+ * The other maps client+filehandle-fragment to export options. - the expkey map
+ *
+ * The export options are actually stored in the first map, and the
+ * second map contains a reference to the entry in the first map.
+ */
+
+#define EXPKEY_HASHBITS 8
+#define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS)
+#define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1)
+static struct cache_head *expkey_table[EXPKEY_HASHMAX];
+
+static void expkey_put(struct kref *ref)
+{
+ struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
+
+ if (test_bit(CACHE_VALID, &key->h.flags) &&
+ !test_bit(CACHE_NEGATIVE, &key->h.flags))
+ path_put(&key->ek_path);
+ auth_domain_put(key->ek_client);
+ kfree(key);
+}
+
+static void expkey_request(struct cache_detail *cd,
+ struct cache_head *h,
+ char **bpp, int *blen)
+{
+ /* client fsidtype \xfsid */
+ struct svc_expkey *ek = container_of(h, struct svc_expkey, h);
+ char type[5];
+
+ qword_add(bpp, blen, ek->ek_client->name);
+ snprintf(type, 5, "%d", ek->ek_fsidtype);
+ qword_add(bpp, blen, type);
+ qword_addhex(bpp, blen, (char*)ek->ek_fsid, key_len(ek->ek_fsidtype));
+ (*bpp)[-1] = '\n';
+}
+
+static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
+{
+ return sunrpc_cache_pipe_upcall(cd, h, expkey_request);
+}
+
+static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old);
+static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *);
+static struct cache_detail svc_expkey_cache;
+
+static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
+{
+ /* client fsidtype fsid [path] */
+ char *buf;
+ int len;
+ struct auth_domain *dom = NULL;
+ int err;
+ int fsidtype;
+ char *ep;
+ struct svc_expkey key;
+ struct svc_expkey *ek = NULL;
+
+ if (mlen < 1 || mesg[mlen-1] != '\n')
+ return -EINVAL;
+ mesg[mlen-1] = 0;
+
+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ err = -ENOMEM;
+ if (!buf)
+ goto out;
+
+ err = -EINVAL;
+ if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ goto out;
+
+ err = -ENOENT;
+ dom = auth_domain_find(buf);
+ if (!dom)
+ goto out;
+ dprintk("found domain %s\n", buf);
+
+ err = -EINVAL;
+ if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ goto out;
+ fsidtype = simple_strtoul(buf, &ep, 10);
+ if (*ep)
+ goto out;
+ dprintk("found fsidtype %d\n", fsidtype);
+ if (key_len(fsidtype)==0) /* invalid type */
+ goto out;
+ if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ goto out;
+ dprintk("found fsid length %d\n", len);
+ if (len != key_len(fsidtype))
+ goto out;
+
+ /* OK, we seem to have a valid key */
+ key.h.flags = 0;
+ key.h.expiry_time = get_expiry(&mesg);
+ if (key.h.expiry_time == 0)
+ goto out;
+
+ key.ek_client = dom;
+ key.ek_fsidtype = fsidtype;
+ memcpy(key.ek_fsid, buf, len);
+
+ ek = svc_expkey_lookup(&key);
+ err = -ENOMEM;
+ if (!ek)
+ goto out;
+
+ /* now we want a pathname, or empty meaning NEGATIVE */
+ err = -EINVAL;
+ len = qword_get(&mesg, buf, PAGE_SIZE);
+ if (len < 0)
+ goto out;
+ dprintk("Path seems to be <%s>\n", buf);
+ err = 0;
+ if (len == 0) {
+ set_bit(CACHE_NEGATIVE, &key.h.flags);
+ ek = svc_expkey_update(&key, ek);
+ if (!ek)
+ err = -ENOMEM;
+ } else {
+ err = kern_path(buf, 0, &key.ek_path);
+ if (err)
+ goto out;
+
+ dprintk("Found the path %s\n", buf);
+
+ ek = svc_expkey_update(&key, ek);
+ if (!ek)
+ err = -ENOMEM;
+ path_put(&key.ek_path);
+ }
+ cache_flush();
+ out:
+ if (ek)
+ cache_put(&ek->h, &svc_expkey_cache);
+ if (dom)
+ auth_domain_put(dom);
+ kfree(buf);
+ return err;
+}
+
+static int expkey_show(struct seq_file *m,
+ struct cache_detail *cd,
+ struct cache_head *h)
+{
+ struct svc_expkey *ek ;
+ int i;
+
+ if (h ==NULL) {
+ seq_puts(m, "#domain fsidtype fsid [path]\n");
+ return 0;
+ }
+ ek = container_of(h, struct svc_expkey, h);
+ seq_printf(m, "%s %d 0x", ek->ek_client->name,
+ ek->ek_fsidtype);
+ for (i=0; i < key_len(ek->ek_fsidtype)/4; i++)
+ seq_printf(m, "%08x", ek->ek_fsid[i]);
+ if (test_bit(CACHE_VALID, &h->flags) &&
+ !test_bit(CACHE_NEGATIVE, &h->flags)) {
+ seq_printf(m, " ");
+ seq_path(m, &ek->ek_path, "\\ \t\n");
+ }
+ seq_printf(m, "\n");
+ return 0;
+}
+
+static inline int expkey_match (struct cache_head *a, struct cache_head *b)
+{
+ struct svc_expkey *orig = container_of(a, struct svc_expkey, h);
+ struct svc_expkey *new = container_of(b, struct svc_expkey, h);
+
+ if (orig->ek_fsidtype != new->ek_fsidtype ||
+ orig->ek_client != new->ek_client ||
+ memcmp(orig->ek_fsid, new->ek_fsid, key_len(orig->ek_fsidtype)) != 0)
+ return 0;
+ return 1;
+}
+
+static inline void expkey_init(struct cache_head *cnew,
+ struct cache_head *citem)
+{
+ struct svc_expkey *new = container_of(cnew, struct svc_expkey, h);
+ struct svc_expkey *item = container_of(citem, struct svc_expkey, h);
+
+ kref_get(&item->ek_client->ref);
+ new->ek_client = item->ek_client;
+ new->ek_fsidtype = item->ek_fsidtype;
+
+ memcpy(new->ek_fsid, item->ek_fsid, sizeof(new->ek_fsid));
+}
+
+static inline void expkey_update(struct cache_head *cnew,
+ struct cache_head *citem)
+{
+ struct svc_expkey *new = container_of(cnew, struct svc_expkey, h);
+ struct svc_expkey *item = container_of(citem, struct svc_expkey, h);
+
+ new->ek_path = item->ek_path;
+ path_get(&item->ek_path);
+}
+
+static struct cache_head *expkey_alloc(void)
+{
+ struct svc_expkey *i = kmalloc(sizeof(*i), GFP_KERNEL);
+ if (i)
+ return &i->h;
+ else
+ return NULL;
+}
+
+static struct cache_detail svc_expkey_cache = {
+ .owner = THIS_MODULE,
+ .hash_size = EXPKEY_HASHMAX,
+ .hash_table = expkey_table,
+ .name = "nfsd.fh",
+ .cache_put = expkey_put,
+ .cache_upcall = expkey_upcall,
+ .cache_parse = expkey_parse,
+ .cache_show = expkey_show,
+ .match = expkey_match,
+ .init = expkey_init,
+ .update = expkey_update,
+ .alloc = expkey_alloc,
+};
+
+static int
+svc_expkey_hash(struct svc_expkey *item)
+{
+ int hash = item->ek_fsidtype;
+ char * cp = (char*)item->ek_fsid;
+ int len = key_len(item->ek_fsidtype);
+
+ hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
+ hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
+ hash &= EXPKEY_HASHMASK;
+ return hash;
+}
+
+static struct svc_expkey *
+svc_expkey_lookup(struct svc_expkey *item)
+{
+ struct cache_head *ch;
+ int hash = svc_expkey_hash(item);
+
+ ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h,
+ hash);
+ if (ch)
+ return container_of(ch, struct svc_expkey, h);
+ else
+ return NULL;
+}
+
+static struct svc_expkey *
+svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
+{
+ struct cache_head *ch;
+ int hash = svc_expkey_hash(new);
+
+ ch = sunrpc_cache_update(&svc_expkey_cache, &new->h,
+ &old->h, hash);
+ if (ch)
+ return container_of(ch, struct svc_expkey, h);
+ else
+ return NULL;
+}
+
+
+#define EXPORT_HASHBITS 8
+#define EXPORT_HASHMAX (1<< EXPORT_HASHBITS)
+
+static struct cache_head *export_table[EXPORT_HASHMAX];
+
+static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
+{
+ int i;
+
+ for (i = 0; i < fsloc->locations_count; i++) {
+ kfree(fsloc->locations[i].path);
+ kfree(fsloc->locations[i].hosts);
+ }
+ kfree(fsloc->locations);
+}
+
+static void svc_export_put(struct kref *ref)
+{
+ struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
+ path_put(&exp->ex_path);
+ auth_domain_put(exp->ex_client);
+ nfsd4_fslocs_free(&exp->ex_fslocs);
+ kfree(exp);
+}
+
+static void svc_export_request(struct cache_detail *cd,
+ struct cache_head *h,
+ char **bpp, int *blen)
+{
+ /* client path */
+ struct svc_export *exp = container_of(h, struct svc_export, h);
+ char *pth;
+
+ qword_add(bpp, blen, exp->ex_client->name);
+ pth = d_path(&exp->ex_path, *bpp, *blen);
+ if (IS_ERR(pth)) {
+ /* is this correct? */
+ (*bpp)[0] = '\n';
+ return;
+ }
+ qword_add(bpp, blen, pth);
+ (*bpp)[-1] = '\n';
+}
+
+static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
+{
+ return sunrpc_cache_pipe_upcall(cd, h, svc_export_request);
+}
+
+static struct svc_export *svc_export_update(struct svc_export *new,
+ struct svc_export *old);
+static struct svc_export *svc_export_lookup(struct svc_export *);
+
+static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
+{
+
+ /*
+ * We currently export only dirs, regular files, and (for v4
+ * pseudoroot) symlinks.
+ */
+ if (!S_ISDIR(inode->i_mode) &&
+ !S_ISLNK(inode->i_mode) &&
+ !S_ISREG(inode->i_mode))
+ return -ENOTDIR;
+
+ /*
+ * Mountd should never pass down a writeable V4ROOT export, but,
+ * just to make sure:
+ */
+ if (*flags & NFSEXP_V4ROOT)
+ *flags |= NFSEXP_READONLY;
+
+ /* There are two requirements on a filesystem to be exportable.
+ * 1: We must be able to identify the filesystem from a number.
+ * either a device number (so FS_REQUIRES_DEV needed)
+ * or an FSID number (so NFSEXP_FSID or ->uuid is needed).
+ * 2: We must be able to find an inode from a filehandle.
+ * This means that s_export_op must be set.
+ */
+ if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
+ !(*flags & NFSEXP_FSID) &&
+ uuid == NULL) {
+ dprintk("exp_export: export of non-dev fs without fsid\n");
+ return -EINVAL;
+ }
+
+ if (!inode->i_sb->s_export_op ||
+ !inode->i_sb->s_export_op->fh_to_dentry) {
+ dprintk("exp_export: export of invalid fs type.\n");
+ return -EINVAL;
+ }
+
+ return 0;
+
+}
+
+#ifdef CONFIG_NFSD_V4
+
+static int
+fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc)
+{
+ int len;
+ int migrated, i, err;
+
+ /* listsize */
+ err = get_int(mesg, &fsloc->locations_count);
+ if (err)
+ return err;
+ if (fsloc->locations_count > MAX_FS_LOCATIONS)
+ return -EINVAL;
+ if (fsloc->locations_count == 0)
+ return 0;
+
+ fsloc->locations = kzalloc(fsloc->locations_count
+ * sizeof(struct nfsd4_fs_location), GFP_KERNEL);
+ if (!fsloc->locations)
+ return -ENOMEM;
+ for (i=0; i < fsloc->locations_count; i++) {
+ /* colon separated host list */
+ err = -EINVAL;
+ len = qword_get(mesg, buf, PAGE_SIZE);
+ if (len <= 0)
+ goto out_free_all;
+ err = -ENOMEM;
+ fsloc->locations[i].hosts = kstrdup(buf, GFP_KERNEL);
+ if (!fsloc->locations[i].hosts)
+ goto out_free_all;
+ err = -EINVAL;
+ /* slash separated path component list */
+ len = qword_get(mesg, buf, PAGE_SIZE);
+ if (len <= 0)
+ goto out_free_all;
+ err = -ENOMEM;
+ fsloc->locations[i].path = kstrdup(buf, GFP_KERNEL);
+ if (!fsloc->locations[i].path)
+ goto out_free_all;
+ }
+ /* migrated */
+ err = get_int(mesg, &migrated);
+ if (err)
+ goto out_free_all;
+ err = -EINVAL;
+ if (migrated < 0 || migrated > 1)
+ goto out_free_all;
+ fsloc->migrated = migrated;
+ return 0;
+out_free_all:
+ nfsd4_fslocs_free(fsloc);
+ return err;
+}
+
+static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp)
+{
+ int listsize, err;
+ struct exp_flavor_info *f;
+
+ err = get_int(mesg, &listsize);
+ if (err)
+ return err;
+ if (listsize < 0 || listsize > MAX_SECINFO_LIST)
+ return -EINVAL;
+
+ for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) {
+ err = get_int(mesg, &f->pseudoflavor);
+ if (err)
+ return err;
+ /*
+ * XXX: It would be nice to also check whether this
+ * pseudoflavor is supported, so we can discover the
+ * problem at export time instead of when a client fails
+ * to authenticate.
+ */
+ err = get_int(mesg, &f->flags);
+ if (err)
+ return err;
+ /* Only some flags are allowed to differ between flavors: */
+ if (~NFSEXP_SECINFO_FLAGS & (f->flags ^ exp->ex_flags))
+ return -EINVAL;
+ }
+ exp->ex_nflavors = listsize;
+ return 0;
+}
+
+#else /* CONFIG_NFSD_V4 */
+static inline int
+fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc){return 0;}
+static inline int
+secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }
+#endif
+
+static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
+{
+ /* client path expiry [flags anonuid anongid fsid] */
+ char *buf;
+ int len;
+ int err;
+ struct auth_domain *dom = NULL;
+ struct svc_export exp = {}, *expp;
+ int an_int;
+
+ if (mesg[mlen-1] != '\n')
+ return -EINVAL;
+ mesg[mlen-1] = 0;
+
+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /* client */
+ err = -EINVAL;
+ len = qword_get(&mesg, buf, PAGE_SIZE);
+ if (len <= 0)
+ goto out;
+
+ err = -ENOENT;
+ dom = auth_domain_find(buf);
+ if (!dom)
+ goto out;
+
+ /* path */
+ err = -EINVAL;
+ if ((len = qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ goto out1;
+
+ err = kern_path(buf, 0, &exp.ex_path);
+ if (err)
+ goto out1;
+
+ exp.ex_client = dom;
+
+ /* expiry */
+ err = -EINVAL;
+ exp.h.expiry_time = get_expiry(&mesg);
+ if (exp.h.expiry_time == 0)
+ goto out3;
+
+ /* flags */
+ err = get_int(&mesg, &an_int);
+ if (err == -ENOENT) {
+ err = 0;
+ set_bit(CACHE_NEGATIVE, &exp.h.flags);
+ } else {
+ if (err || an_int < 0)
+ goto out3;
+ exp.ex_flags= an_int;
+
+ /* anon uid */
+ err = get_int(&mesg, &an_int);
+ if (err)
+ goto out3;
+ exp.ex_anon_uid= an_int;
+
+ /* anon gid */
+ err = get_int(&mesg, &an_int);
+ if (err)
+ goto out3;
+ exp.ex_anon_gid= an_int;
+
+ /* fsid */
+ err = get_int(&mesg, &an_int);
+ if (err)
+ goto out3;
+ exp.ex_fsid = an_int;
+
+ while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) {
+ if (strcmp(buf, "fsloc") == 0)
+ err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
+ else if (strcmp(buf, "uuid") == 0) {
+ /* expect a 16 byte uuid encoded as \xXXXX... */
+ len = qword_get(&mesg, buf, PAGE_SIZE);
+ if (len != 16)
+ err = -EINVAL;
+ else {
+ exp.ex_uuid =
+ kmemdup(buf, 16, GFP_KERNEL);
+ if (exp.ex_uuid == NULL)
+ err = -ENOMEM;
+ }
+ } else if (strcmp(buf, "secinfo") == 0)
+ err = secinfo_parse(&mesg, buf, &exp);
+ else
+ /* quietly ignore unknown words and anything
+ * following. Newer user-space can try to set
+ * new values, then see what the result was.
+ */
+ break;
+ if (err)
+ goto out4;
+ }
+
+ err = check_export(exp.ex_path.dentry->d_inode, &exp.ex_flags,
+ exp.ex_uuid);
+ if (err)
+ goto out4;
+ }
+
+ expp = svc_export_lookup(&exp);
+ if (expp)
+ expp = svc_export_update(&exp, expp);
+ else
+ err = -ENOMEM;
+ cache_flush();
+ if (expp == NULL)
+ err = -ENOMEM;
+ else
+ exp_put(expp);
+out4:
+ nfsd4_fslocs_free(&exp.ex_fslocs);
+ kfree(exp.ex_uuid);
+out3:
+ path_put(&exp.ex_path);
+out1:
+ auth_domain_put(dom);
+out:
+ kfree(buf);
+ return err;
+}
+
+static void exp_flags(struct seq_file *m, int flag, int fsid,
+ uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs);
+static void show_secinfo(struct seq_file *m, struct svc_export *exp);
+
+static int svc_export_show(struct seq_file *m,
+ struct cache_detail *cd,
+ struct cache_head *h)
+{
+ struct svc_export *exp ;
+
+ if (h ==NULL) {
+ seq_puts(m, "#path domain(flags)\n");
+ return 0;
+ }
+ exp = container_of(h, struct svc_export, h);
+ seq_path(m, &exp->ex_path, " \t\n\\");
+ seq_putc(m, '\t');
+ seq_escape(m, exp->ex_client->name, " \t\n\\");
+ seq_putc(m, '(');
+ if (test_bit(CACHE_VALID, &h->flags) &&
+ !test_bit(CACHE_NEGATIVE, &h->flags)) {
+ exp_flags(m, exp->ex_flags, exp->ex_fsid,
+ exp->ex_anon_uid, exp->ex_anon_gid, &exp->ex_fslocs);
+ if (exp->ex_uuid) {
+ int i;
+ seq_puts(m, ",uuid=");
+ for (i=0; i<16; i++) {
+ if ((i&3) == 0 && i)
+ seq_putc(m, ':');
+ seq_printf(m, "%02x", exp->ex_uuid[i]);
+ }
+ }
+ show_secinfo(m, exp);
+ }
+ seq_puts(m, ")\n");
+ return 0;
+}
+static int svc_export_match(struct cache_head *a, struct cache_head *b)
+{
+ struct svc_export *orig = container_of(a, struct svc_export, h);
+ struct svc_export *new = container_of(b, struct svc_export, h);
+ return orig->ex_client == new->ex_client &&
+ orig->ex_path.dentry == new->ex_path.dentry &&
+ orig->ex_path.mnt == new->ex_path.mnt;
+}
+
+static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
+{
+ struct svc_export *new = container_of(cnew, struct svc_export, h);
+ struct svc_export *item = container_of(citem, struct svc_export, h);
+
+ kref_get(&item->ex_client->ref);
+ new->ex_client = item->ex_client;
+ new->ex_path.dentry = dget(item->ex_path.dentry);
+ new->ex_path.mnt = mntget(item->ex_path.mnt);
+ new->ex_fslocs.locations = NULL;
+ new->ex_fslocs.locations_count = 0;
+ new->ex_fslocs.migrated = 0;
+}
+
+static void export_update(struct cache_head *cnew, struct cache_head *citem)
+{
+ struct svc_export *new = container_of(cnew, struct svc_export, h);
+ struct svc_export *item = container_of(citem, struct svc_export, h);
+ int i;
+
+ new->ex_flags = item->ex_flags;
+ new->ex_anon_uid = item->ex_anon_uid;
+ new->ex_anon_gid = item->ex_anon_gid;
+ new->ex_fsid = item->ex_fsid;
+ new->ex_uuid = item->ex_uuid;
+ item->ex_uuid = NULL;
+ new->ex_fslocs.locations = item->ex_fslocs.locations;
+ item->ex_fslocs.locations = NULL;
+ new->ex_fslocs.locations_count = item->ex_fslocs.locations_count;
+ item->ex_fslocs.locations_count = 0;
+ new->ex_fslocs.migrated = item->ex_fslocs.migrated;
+ item->ex_fslocs.migrated = 0;
+ new->ex_nflavors = item->ex_nflavors;
+ for (i = 0; i < MAX_SECINFO_LIST; i++) {
+ new->ex_flavors[i] = item->ex_flavors[i];
+ }
+}
+
+static struct cache_head *svc_export_alloc(void)
+{
+ struct svc_export *i = kmalloc(sizeof(*i), GFP_KERNEL);
+ if (i)
+ return &i->h;
+ else
+ return NULL;
+}
+
+struct cache_detail svc_export_cache = {
+ .owner = THIS_MODULE,
+ .hash_size = EXPORT_HASHMAX,
+ .hash_table = export_table,
+ .name = "nfsd.export",
+ .cache_put = svc_export_put,
+ .cache_upcall = svc_export_upcall,
+ .cache_parse = svc_export_parse,
+ .cache_show = svc_export_show,
+ .match = svc_export_match,
+ .init = svc_export_init,
+ .update = export_update,
+ .alloc = svc_export_alloc,
+};
+
+static int
+svc_export_hash(struct svc_export *exp)
+{
+ int hash;
+
+ hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS);
+ hash ^= hash_ptr(exp->ex_path.dentry, EXPORT_HASHBITS);
+ hash ^= hash_ptr(exp->ex_path.mnt, EXPORT_HASHBITS);
+ return hash;
+}
+
+static struct svc_export *
+svc_export_lookup(struct svc_export *exp)
+{
+ struct cache_head *ch;
+ int hash = svc_export_hash(exp);
+
+ ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h,
+ hash);
+ if (ch)
+ return container_of(ch, struct svc_export, h);
+ else
+ return NULL;
+}
+
+static struct svc_export *
+svc_export_update(struct svc_export *new, struct svc_export *old)
+{
+ struct cache_head *ch;
+ int hash = svc_export_hash(old);
+
+ ch = sunrpc_cache_update(&svc_export_cache, &new->h,
+ &old->h,
+ hash);
+ if (ch)
+ return container_of(ch, struct svc_export, h);
+ else
+ return NULL;
+}
+
+
+static struct svc_expkey *
+exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
+{
+ struct svc_expkey key, *ek;
+ int err;
+
+ if (!clp)
+ return ERR_PTR(-ENOENT);
+
+ key.ek_client = clp;
+ key.ek_fsidtype = fsid_type;
+ memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
+
+ ek = svc_expkey_lookup(&key);
+ if (ek == NULL)
+ return ERR_PTR(-ENOMEM);
+ err = cache_check(&svc_expkey_cache, &ek->h, reqp);
+ if (err)
+ return ERR_PTR(err);
+ return ek;
+}
+
+
+static svc_export *exp_get_by_name(svc_client *clp, const struct path *path,
+ struct cache_req *reqp)
+{
+ struct svc_export *exp, key;
+ int err;
+
+ if (!clp)
+ return ERR_PTR(-ENOENT);
+
+ key.ex_client = clp;
+ key.ex_path = *path;
+
+ exp = svc_export_lookup(&key);
+ if (exp == NULL)
+ return ERR_PTR(-ENOMEM);
+ err = cache_check(&svc_export_cache, &exp->h, reqp);
+ if (err)
+ return ERR_PTR(err);
+ return exp;
+}
+
+/*
+ * Find the export entry for a given dentry.
+ */
+static struct svc_export *exp_parent(svc_client *clp, struct path *path)
+{
+ struct dentry *saved = dget(path->dentry);
+ svc_export *exp = exp_get_by_name(clp, path, NULL);
+
+ while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
+ struct dentry *parent = dget_parent(path->dentry);
+ dput(path->dentry);
+ path->dentry = parent;
+ exp = exp_get_by_name(clp, path, NULL);
+ }
+ dput(path->dentry);
+ path->dentry = saved;
+ return exp;
+}
+
+
+
+/*
+ * Obtain the root fh on behalf of a client.
+ * This could be done in user space, but I feel that it adds some safety
+ * since its harder to fool a kernel module than a user space program.
+ */
+int
+exp_rootfh(svc_client *clp, char *name, struct knfsd_fh *f, int maxsize)
+{
+ struct svc_export *exp;
+ struct path path;
+ struct inode *inode;
+ struct svc_fh fh;
+ int err;
+
+ err = -EPERM;
+ /* NB: we probably ought to check that it's NUL-terminated */
+ if (kern_path(name, 0, &path)) {
+ printk("nfsd: exp_rootfh path not found %s", name);
+ return err;
+ }
+ inode = path.dentry->d_inode;
+
+ dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n",
+ name, path.dentry, clp->name,
+ inode->i_sb->s_id, inode->i_ino);
+ exp = exp_parent(clp, &path);
+ if (IS_ERR(exp)) {
+ err = PTR_ERR(exp);
+ goto out;
+ }
+
+ /*
+ * fh must be initialized before calling fh_compose
+ */
+ fh_init(&fh, maxsize);
+ if (fh_compose(&fh, exp, path.dentry, NULL))
+ err = -EINVAL;
+ else
+ err = 0;
+ memcpy(f, &fh.fh_handle, sizeof(struct knfsd_fh));
+ fh_put(&fh);
+ exp_put(exp);
+out:
+ path_put(&path);
+ return err;
+}
+
+static struct svc_export *exp_find(struct auth_domain *clp, int fsid_type,
+ u32 *fsidv, struct cache_req *reqp)
+{
+ struct svc_export *exp;
+ struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp);
+ if (IS_ERR(ek))
+ return ERR_CAST(ek);
+
+ exp = exp_get_by_name(clp, &ek->ek_path, reqp);
+ cache_put(&ek->h, &svc_expkey_cache);
+
+ if (IS_ERR(exp))
+ return ERR_CAST(exp);
+ return exp;
+}
+
+__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
+{
+ struct exp_flavor_info *f;
+ struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
+
+ /* legacy gss-only clients are always OK: */
+ if (exp->ex_client == rqstp->rq_gssclient)
+ return 0;
+ /* ip-address based client; check sec= export option: */
+ for (f = exp->ex_flavors; f < end; f++) {
+ if (f->pseudoflavor == rqstp->rq_flavor)
+ return 0;
+ }
+ /* defaults in absence of sec= options: */
+ if (exp->ex_nflavors == 0) {
+ if (rqstp->rq_flavor == RPC_AUTH_NULL ||
+ rqstp->rq_flavor == RPC_AUTH_UNIX)
+ return 0;
+ }
+ return nfserr_wrongsec;
+}
+
+/*
+ * Uses rq_client and rq_gssclient to find an export; uses rq_client (an
+ * auth_unix client) if it's available and has secinfo information;
+ * otherwise, will try to use rq_gssclient.
+ *
+ * Called from functions that handle requests; functions that do work on
+ * behalf of mountd are passed a single client name to use, and should
+ * use exp_get_by_name() or exp_find().
+ */
+struct svc_export *
+rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path)
+{
+ struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
+
+ if (rqstp->rq_client == NULL)
+ goto gss;
+
+ /* First try the auth_unix client: */
+ exp = exp_get_by_name(rqstp->rq_client, path, &rqstp->rq_chandle);
+ if (PTR_ERR(exp) == -ENOENT)
+ goto gss;
+ if (IS_ERR(exp))
+ return exp;
+ /* If it has secinfo, assume there are no gss/... clients */
+ if (exp->ex_nflavors > 0)
+ return exp;
+gss:
+ /* Otherwise, try falling back on gss client */
+ if (rqstp->rq_gssclient == NULL)
+ return exp;
+ gssexp = exp_get_by_name(rqstp->rq_gssclient, path, &rqstp->rq_chandle);
+ if (PTR_ERR(gssexp) == -ENOENT)
+ return exp;
+ if (!IS_ERR(exp))
+ exp_put(exp);
+ return gssexp;
+}
+
+struct svc_export *
+rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv)
+{
+ struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
+
+ if (rqstp->rq_client == NULL)
+ goto gss;
+
+ /* First try the auth_unix client: */
+ exp = exp_find(rqstp->rq_client, fsid_type, fsidv, &rqstp->rq_chandle);
+ if (PTR_ERR(exp) == -ENOENT)
+ goto gss;
+ if (IS_ERR(exp))
+ return exp;
+ /* If it has secinfo, assume there are no gss/... clients */
+ if (exp->ex_nflavors > 0)
+ return exp;
+gss:
+ /* Otherwise, try falling back on gss client */
+ if (rqstp->rq_gssclient == NULL)
+ return exp;
+ gssexp = exp_find(rqstp->rq_gssclient, fsid_type, fsidv,
+ &rqstp->rq_chandle);
+ if (PTR_ERR(gssexp) == -ENOENT)
+ return exp;
+ if (!IS_ERR(exp))
+ exp_put(exp);
+ return gssexp;
+}
+
+struct svc_export *
+rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
+{
+ struct dentry *saved = dget(path->dentry);
+ struct svc_export *exp = rqst_exp_get_by_name(rqstp, path);
+
+ while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
+ struct dentry *parent = dget_parent(path->dentry);
+ dput(path->dentry);
+ path->dentry = parent;
+ exp = rqst_exp_get_by_name(rqstp, path);
+ }
+ dput(path->dentry);
+ path->dentry = saved;
+ return exp;
+}
+
+struct svc_export *rqst_find_fsidzero_export(struct svc_rqst *rqstp)
+{
+ u32 fsidv[2];
+
+ mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
+
+ return rqst_exp_find(rqstp, FSID_NUM, fsidv);
+}
+
+/*
+ * Called when we need the filehandle for the root of the pseudofs,
+ * for a given NFSv4 client. The root is defined to be the
+ * export point with fsid==0
+ */
+__be32
+exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
+{
+ struct svc_export *exp;
+ __be32 rv;
+
+ exp = rqst_find_fsidzero_export(rqstp);
+ if (IS_ERR(exp))
+ return nfserrno(PTR_ERR(exp));
+ rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL);
+ exp_put(exp);
+ return rv;
+}
+
+/* Iterator */
+
+static void *e_start(struct seq_file *m, loff_t *pos)
+ __acquires(svc_export_cache.hash_lock)
+{
+ loff_t n = *pos;
+ unsigned hash, export;
+ struct cache_head *ch;
+
+ read_lock(&svc_export_cache.hash_lock);
+ if (!n--)
+ return SEQ_START_TOKEN;
+ hash = n >> 32;
+ export = n & ((1LL<<32) - 1);
+
+
+ for (ch=export_table[hash]; ch; ch=ch->next)
+ if (!export--)
+ return ch;
+ n &= ~((1LL<<32) - 1);
+ do {
+ hash++;
+ n += 1LL<<32;
+ } while(hash < EXPORT_HASHMAX && export_table[hash]==NULL);
+ if (hash >= EXPORT_HASHMAX)
+ return NULL;
+ *pos = n+1;
+ return export_table[hash];
+}
+
+static void *e_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ struct cache_head *ch = p;
+ int hash = (*pos >> 32);
+
+ if (p == SEQ_START_TOKEN)
+ hash = 0;
+ else if (ch->next == NULL) {
+ hash++;
+ *pos += 1LL<<32;
+ } else {
+ ++*pos;
+ return ch->next;
+ }
+ *pos &= ~((1LL<<32) - 1);
+ while (hash < EXPORT_HASHMAX && export_table[hash] == NULL) {
+ hash++;
+ *pos += 1LL<<32;
+ }
+ if (hash >= EXPORT_HASHMAX)
+ return NULL;
+ ++*pos;
+ return export_table[hash];
+}
+
+static void e_stop(struct seq_file *m, void *p)
+ __releases(svc_export_cache.hash_lock)
+{
+ read_unlock(&svc_export_cache.hash_lock);
+}
+
+static struct flags {
+ int flag;
+ char *name[2];
+} expflags[] = {
+ { NFSEXP_READONLY, {"ro", "rw"}},
+ { NFSEXP_INSECURE_PORT, {"insecure", ""}},
+ { NFSEXP_ROOTSQUASH, {"root_squash", "no_root_squash"}},
+ { NFSEXP_ALLSQUASH, {"all_squash", ""}},
+ { NFSEXP_ASYNC, {"async", "sync"}},
+ { NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}},
+ { NFSEXP_NOHIDE, {"nohide", ""}},
+ { NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
+ { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
+ { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
+ { NFSEXP_V4ROOT, {"v4root", ""}},
+ { 0, {"", ""}}
+};
+
+static void show_expflags(struct seq_file *m, int flags, int mask)
+{
+ struct flags *flg;
+ int state, first = 0;
+
+ for (flg = expflags; flg->flag; flg++) {
+ if (flg->flag & ~mask)
+ continue;
+ state = (flg->flag & flags) ? 0 : 1;
+ if (*flg->name[state])
+ seq_printf(m, "%s%s", first++?",":"", flg->name[state]);
+ }
+}
+
+static void show_secinfo_flags(struct seq_file *m, int flags)
+{
+ seq_printf(m, ",");
+ show_expflags(m, flags, NFSEXP_SECINFO_FLAGS);
+}
+
+static bool secinfo_flags_equal(int f, int g)
+{
+ f &= NFSEXP_SECINFO_FLAGS;
+ g &= NFSEXP_SECINFO_FLAGS;
+ return f == g;
+}
+
+static int show_secinfo_run(struct seq_file *m, struct exp_flavor_info **fp, struct exp_flavor_info *end)
+{
+ int flags;
+
+ flags = (*fp)->flags;
+ seq_printf(m, ",sec=%d", (*fp)->pseudoflavor);
+ (*fp)++;
+ while (*fp != end && secinfo_flags_equal(flags, (*fp)->flags)) {
+ seq_printf(m, ":%d", (*fp)->pseudoflavor);
+ (*fp)++;
+ }
+ return flags;
+}
+
+static void show_secinfo(struct seq_file *m, struct svc_export *exp)
+{
+ struct exp_flavor_info *f;
+ struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
+ int flags;
+
+ if (exp->ex_nflavors == 0)
+ return;
+ f = exp->ex_flavors;
+ flags = show_secinfo_run(m, &f, end);
+ if (!secinfo_flags_equal(flags, exp->ex_flags))
+ show_secinfo_flags(m, flags);
+ while (f != end) {
+ flags = show_secinfo_run(m, &f, end);
+ show_secinfo_flags(m, flags);
+ }
+}
+
+static void exp_flags(struct seq_file *m, int flag, int fsid,
+ uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc)
+{
+ show_expflags(m, flag, NFSEXP_ALLFLAGS);
+ if (flag & NFSEXP_FSID)
+ seq_printf(m, ",fsid=%d", fsid);
+ if (anonu != (uid_t)-2 && anonu != (0x10000-2))
+ seq_printf(m, ",anonuid=%u", anonu);
+ if (anong != (gid_t)-2 && anong != (0x10000-2))
+ seq_printf(m, ",anongid=%u", anong);
+ if (fsloc && fsloc->locations_count > 0) {
+ char *loctype = (fsloc->migrated) ? "refer" : "replicas";
+ int i;
+
+ seq_printf(m, ",%s=", loctype);
+ seq_escape(m, fsloc->locations[0].path, ",;@ \t\n\\");
+ seq_putc(m, '@');
+ seq_escape(m, fsloc->locations[0].hosts, ",;@ \t\n\\");
+ for (i = 1; i < fsloc->locations_count; i++) {
+ seq_putc(m, ';');
+ seq_escape(m, fsloc->locations[i].path, ",;@ \t\n\\");
+ seq_putc(m, '@');
+ seq_escape(m, fsloc->locations[i].hosts, ",;@ \t\n\\");
+ }
+ }
+}
+
+static int e_show(struct seq_file *m, void *p)
+{
+ struct cache_head *cp = p;
+ struct svc_export *exp = container_of(cp, struct svc_export, h);
+
+ if (p == SEQ_START_TOKEN) {
+ seq_puts(m, "# Version 1.1\n");
+ seq_puts(m, "# Path Client(Flags) # IPs\n");
+ return 0;
+ }
+
+ cache_get(&exp->h);
+ if (cache_check(&svc_export_cache, &exp->h, NULL))
+ return 0;
+ cache_put(&exp->h, &svc_export_cache);
+ return svc_export_show(m, &svc_export_cache, cp);
+}
+
+const struct seq_operations nfs_exports_op = {
+ .start = e_start,
+ .next = e_next,
+ .stop = e_stop,
+ .show = e_show,
+};
+
+
+/*
+ * Initialize the exports module.
+ */
+int
+nfsd_export_init(void)
+{
+ int rv;
+ dprintk("nfsd: initializing export module.\n");
+
+ rv = cache_register_net(&svc_export_cache, &init_net);
+ if (rv)
+ return rv;
+ rv = cache_register_net(&svc_expkey_cache, &init_net);
+ if (rv)
+ cache_unregister_net(&svc_export_cache, &init_net);
+ return rv;
+
+}
+
+/*
+ * Flush exports table - called when last nfsd thread is killed
+ */
+void
+nfsd_export_flush(void)
+{
+ cache_purge(&svc_expkey_cache);
+ cache_purge(&svc_export_cache);
+}
+
+/*
+ * Shutdown the exports module.
+ */
+void
+nfsd_export_shutdown(void)
+{
+
+ dprintk("nfsd: shutting down export module.\n");
+
+ cache_unregister_net(&svc_expkey_cache, &init_net);
+ cache_unregister_net(&svc_export_cache, &init_net);
+ svcauth_unix_purge();
+
+ dprintk("nfsd: export shutdown complete.\n");
+}
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
new file mode 100644
index 00000000000..ce7f0758d84
--- /dev/null
+++ b/fs/nfsd/fault_inject.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
+ *
+ * Uses debugfs to create fault injection points for client testing
+ */
+
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+
+#include "state.h"
+#include "fault_inject.h"
+
+struct nfsd_fault_inject_op {
+ char *file;
+ void (*func)(u64);
+};
+
+static struct nfsd_fault_inject_op inject_ops[] = {
+ {
+ .file = "forget_clients",
+ .func = nfsd_forget_clients,
+ },
+ {
+ .file = "forget_locks",
+ .func = nfsd_forget_locks,
+ },
+ {
+ .file = "forget_openowners",
+ .func = nfsd_forget_openowners,
+ },
+ {
+ .file = "forget_delegations",
+ .func = nfsd_forget_delegations,
+ },
+ {
+ .file = "recall_delegations",
+ .func = nfsd_recall_delegations,
+ },
+};
+
+static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op);
+static struct dentry *debug_dir;
+
+static int nfsd_inject_set(void *op_ptr, u64 val)
+{
+ struct nfsd_fault_inject_op *op = op_ptr;
+
+ if (val == 0)
+ printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file);
+ else
+ printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val);
+
+ op->func(val);
+ return 0;
+}
+
+static int nfsd_inject_get(void *data, u64 *val)
+{
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n");
+
+void nfsd_fault_inject_cleanup(void)
+{
+ debugfs_remove_recursive(debug_dir);
+}
+
+int nfsd_fault_inject_init(void)
+{
+ unsigned int i;
+ struct nfsd_fault_inject_op *op;
+ mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+
+ debug_dir = debugfs_create_dir("nfsd", NULL);
+ if (!debug_dir)
+ goto fail;
+
+ for (i = 0; i < NUM_INJECT_OPS; i++) {
+ op = &inject_ops[i];
+ if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd))
+ goto fail;
+ }
+ return 0;
+
+fail:
+ nfsd_fault_inject_cleanup();
+ return -ENOMEM;
+}
diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h
new file mode 100644
index 00000000000..90bd0570956
--- /dev/null
+++ b/fs/nfsd/fault_inject.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
+ *
+ * Function definitions for fault injection
+ */
+
+#ifndef LINUX_NFSD_FAULT_INJECT_H
+#define LINUX_NFSD_FAULT_INJECT_H
+
+#ifdef CONFIG_NFSD_FAULT_INJECTION
+int nfsd_fault_inject_init(void);
+void nfsd_fault_inject_cleanup(void);
+void nfsd_forget_clients(u64);
+void nfsd_forget_locks(u64);
+void nfsd_forget_openowners(u64);
+void nfsd_forget_delegations(u64);
+void nfsd_recall_delegations(u64);
+#else /* CONFIG_NFSD_FAULT_INJECTION */
+static inline int nfsd_fault_inject_init(void) { return 0; }
+static inline void nfsd_fault_inject_cleanup(void) {}
+static inline void nfsd_forget_clients(u64 num) {}
+static inline void nfsd_forget_locks(u64 num) {}
+static inline void nfsd_forget_openowners(u64 num) {}
+static inline void nfsd_forget_delegations(u64 num) {}
+static inline void nfsd_recall_delegations(u64 num) {}
+#endif /* CONFIG_NFSD_FAULT_INJECTION */
+
+#endif /* LINUX_NFSD_FAULT_INJECT_H */
diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h
new file mode 100644
index 00000000000..2f3be132153
--- /dev/null
+++ b/fs/nfsd/idmap.h
@@ -0,0 +1,62 @@
+/*
+ * Mapping of UID to name and vice versa.
+ *
+ * Copyright (c) 2002, 2003 The Regents of the University of
+ * Michigan. All rights reserved.
+> *
+ * Marius Aamodt Eriksen <marius@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef LINUX_NFSD_IDMAP_H
+#define LINUX_NFSD_IDMAP_H
+
+#include <linux/in.h>
+#include <linux/sunrpc/svc.h>
+
+/* XXX from linux/nfs_idmap.h */
+#define IDMAP_NAMESZ 128
+
+#ifdef CONFIG_NFSD_V4
+int nfsd_idmap_init(void);
+void nfsd_idmap_shutdown(void);
+#else
+static inline int nfsd_idmap_init(void)
+{
+ return 0;
+}
+static inline void nfsd_idmap_shutdown(void)
+{
+}
+#endif
+
+__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *);
+__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *);
+int nfsd_map_uid_to_name(struct svc_rqst *, __u32, char *);
+int nfsd_map_gid_to_name(struct svc_rqst *, __u32, char *);
+
+#endif /* LINUX_NFSD_IDMAP_H */
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
new file mode 100644
index 00000000000..77e7a5cca88
--- /dev/null
+++ b/fs/nfsd/lockd.c
@@ -0,0 +1,77 @@
+/*
+ * This file contains all the stubs needed when communicating with lockd.
+ * This level of indirection is necessary so we can run nfsd+lockd without
+ * requiring the nfs client to be compiled in/loaded, and vice versa.
+ *
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/file.h>
+#include <linux/lockd/bind.h>
+#include "nfsd.h"
+#include "vfs.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_LOCKD
+
+#ifdef CONFIG_LOCKD_V4
+#define nlm_stale_fh nlm4_stale_fh
+#define nlm_failed nlm4_failed
+#else
+#define nlm_stale_fh nlm_lck_denied_nolocks
+#define nlm_failed nlm_lck_denied_nolocks
+#endif
+/*
+ * Note: we hold the dentry use count while the file is open.
+ */
+static __be32
+nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
+{
+ __be32 nfserr;
+ struct svc_fh fh;
+
+ /* must initialize before using! but maxsize doesn't matter */
+ fh_init(&fh,0);
+ fh.fh_handle.fh_size = f->size;
+ memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size);
+ fh.fh_export = NULL;
+
+ nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
+ fh_put(&fh);
+ /* We return nlm error codes as nlm doesn't know
+ * about nfsd, but nfsd does know about nlm..
+ */
+ switch (nfserr) {
+ case nfs_ok:
+ return 0;
+ case nfserr_dropit:
+ return nlm_drop_reply;
+ case nfserr_stale:
+ return nlm_stale_fh;
+ default:
+ return nlm_failed;
+ }
+}
+
+static void
+nlm_fclose(struct file *filp)
+{
+ fput(filp);
+}
+
+static struct nlmsvc_binding nfsd_nlm_ops = {
+ .fopen = nlm_fopen, /* open file for locking */
+ .fclose = nlm_fclose, /* close file */
+};
+
+void
+nfsd_lockd_init(void)
+{
+ dprintk("nfsd: initializing lockd\n");
+ nlmsvc_ops = &nfsd_nlm_ops;
+}
+
+void
+nfsd_lockd_shutdown(void)
+{
+ nlmsvc_ops = NULL;
+}
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
new file mode 100644
index 00000000000..6aa5590c367
--- /dev/null
+++ b/fs/nfsd/nfs2acl.c
@@ -0,0 +1,356 @@
+/*
+ * Process version 2 NFSACL requests.
+ *
+ * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
+ */
+
+#include "nfsd.h"
+/* FIXME: nfsacl.h is a broken header */
+#include <linux/nfsacl.h>
+#include <linux/gfp.h>
+#include "cache.h"
+#include "xdr3.h"
+#include "vfs.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_PROC
+#define RETURN_STATUS(st) { resp->status = (st); return (st); }
+
+/*
+ * NULL call.
+ */
+static __be32
+nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+ return nfs_ok;
+}
+
+/*
+ * Get the Access and/or Default ACL of a file.
+ */
+static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
+ struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+{
+ svc_fh *fh;
+ struct posix_acl *acl;
+ __be32 nfserr = 0;
+
+ dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh));
+
+ fh = fh_copy(&resp->fh, &argp->fh);
+ nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
+ if (nfserr)
+ RETURN_STATUS(nfserr);
+
+ if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
+ RETURN_STATUS(nfserr_inval);
+ resp->mask = argp->mask;
+
+ if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
+ acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl)) {
+ int err = PTR_ERR(acl);
+
+ if (err == -ENODATA || err == -EOPNOTSUPP)
+ acl = NULL;
+ else {
+ nfserr = nfserrno(err);
+ goto fail;
+ }
+ }
+ if (acl == NULL) {
+ /* Solaris returns the inode's minimum ACL. */
+
+ struct inode *inode = fh->fh_dentry->d_inode;
+ acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
+ }
+ resp->acl_access = acl;
+ }
+ if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
+ /* Check how Solaris handles requests for the Default ACL
+ of a non-directory! */
+
+ acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT);
+ if (IS_ERR(acl)) {
+ int err = PTR_ERR(acl);
+
+ if (err == -ENODATA || err == -EOPNOTSUPP)
+ acl = NULL;
+ else {
+ nfserr = nfserrno(err);
+ goto fail;
+ }
+ }
+ resp->acl_default = acl;
+ }
+
+ /* resp->acl_{access,default} are released in nfssvc_release_getacl. */
+ RETURN_STATUS(0);
+
+fail:
+ posix_acl_release(resp->acl_access);
+ posix_acl_release(resp->acl_default);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Set the Access and/or Default ACL of a file.
+ */
+static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
+ struct nfsd3_setaclargs *argp,
+ struct nfsd_attrstat *resp)
+{
+ svc_fh *fh;
+ __be32 nfserr = 0;
+
+ dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh));
+
+ fh = fh_copy(&resp->fh, &argp->fh);
+ nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);
+
+ if (!nfserr) {
+ nfserr = nfserrno( nfsd_set_posix_acl(
+ fh, ACL_TYPE_ACCESS, argp->acl_access) );
+ }
+ if (!nfserr) {
+ nfserr = nfserrno( nfsd_set_posix_acl(
+ fh, ACL_TYPE_DEFAULT, argp->acl_default) );
+ }
+
+ /* argp->acl_{access,default} may have been allocated in
+ nfssvc_decode_setaclargs. */
+ posix_acl_release(argp->acl_access);
+ posix_acl_release(argp->acl_default);
+ return nfserr;
+}
+
+/*
+ * Check file attributes
+ */
+static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
+ struct nfsd_fhandle *argp, struct nfsd_attrstat *resp)
+{
+ dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
+
+ fh_copy(&resp->fh, &argp->fh);
+ return fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
+}
+
+/*
+ * Check file access
+ */
+static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
+ struct nfsd3_accessres *resp)
+{
+ __be32 nfserr;
+
+ dprintk("nfsd: ACCESS(2acl) %s 0x%x\n",
+ SVCFH_fmt(&argp->fh),
+ argp->access);
+
+ fh_copy(&resp->fh, &argp->fh);
+ resp->access = argp->access;
+ nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL);
+ return nfserr;
+}
+
+/*
+ * XDR decode functions
+ */
+static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_getaclargs *argp)
+{
+ if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+ return 0;
+ argp->mask = ntohl(*p); p++;
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+
+static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_setaclargs *argp)
+{
+ struct kvec *head = rqstp->rq_arg.head;
+ unsigned int base;
+ int n;
+
+ if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+ return 0;
+ argp->mask = ntohl(*p++);
+ if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||
+ !xdr_argsize_check(rqstp, p))
+ return 0;
+
+ base = (char *)p - (char *)head->iov_base;
+ n = nfsacl_decode(&rqstp->rq_arg, base, NULL,
+ (argp->mask & NFS_ACL) ?
+ &argp->acl_access : NULL);
+ if (n > 0)
+ n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL,
+ (argp->mask & NFS_DFACL) ?
+ &argp->acl_default : NULL);
+ return (n > 0);
+}
+
+static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd_fhandle *argp)
+{
+ if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+ return 0;
+ return xdr_argsize_check(rqstp, p);
+}
+
+static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_accessargs *argp)
+{
+ if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+ return 0;
+ argp->access = ntohl(*p++);
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+/*
+ * XDR encode functions
+ */
+
+/*
+ * There must be an encoding function for void results so svc_process
+ * will work properly.
+ */
+int
+nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+{
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* GETACL */
+static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_getaclres *resp)
+{
+ struct dentry *dentry = resp->fh.fh_dentry;
+ struct inode *inode;
+ struct kvec *head = rqstp->rq_res.head;
+ unsigned int base;
+ int n;
+ int w;
+
+ /*
+ * Since this is version 2, the check for nfserr in
+ * nfsd_dispatch actually ensures the following cannot happen.
+ * However, it seems fragile to depend on that.
+ */
+ if (dentry == NULL || dentry->d_inode == NULL)
+ return 0;
+ inode = dentry->d_inode;
+
+ p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
+ *p++ = htonl(resp->mask);
+ if (!xdr_ressize_check(rqstp, p))
+ return 0;
+ base = (char *)p - (char *)head->iov_base;
+
+ rqstp->rq_res.page_len = w = nfsacl_size(
+ (resp->mask & NFS_ACL) ? resp->acl_access : NULL,
+ (resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
+ while (w > 0) {
+ if (!rqstp->rq_respages[rqstp->rq_resused++])
+ return 0;
+ w -= PAGE_SIZE;
+ }
+
+ n = nfsacl_encode(&rqstp->rq_res, base, inode,
+ resp->acl_access,
+ resp->mask & NFS_ACL, 0);
+ if (n > 0)
+ n = nfsacl_encode(&rqstp->rq_res, base + n, inode,
+ resp->acl_default,
+ resp->mask & NFS_DFACL,
+ NFS_ACL_DEFAULT);
+ if (n <= 0)
+ return 0;
+ return 1;
+}
+
+static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd_attrstat *resp)
+{
+ p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* ACCESS */
+static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_accessres *resp)
+{
+ p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
+ *p++ = htonl(resp->access);
+ return xdr_ressize_check(rqstp, p);
+}
+
+/*
+ * XDR release functions
+ */
+static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_getaclres *resp)
+{
+ fh_put(&resp->fh);
+ posix_acl_release(resp->acl_access);
+ posix_acl_release(resp->acl_default);
+ return 1;
+}
+
+static int nfsaclsvc_release_attrstat(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd_attrstat *resp)
+{
+ fh_put(&resp->fh);
+ return 1;
+}
+
+static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_accessres *resp)
+{
+ fh_put(&resp->fh);
+ return 1;
+}
+
+#define nfsaclsvc_decode_voidargs NULL
+#define nfsaclsvc_release_void NULL
+#define nfsd3_fhandleargs nfsd_fhandle
+#define nfsd3_attrstatres nfsd_attrstat
+#define nfsd3_voidres nfsd3_voidargs
+struct nfsd3_voidargs { int dummy; };
+
+#define PROC(name, argt, rest, relt, cache, respsize) \
+ { (svc_procfunc) nfsacld_proc_##name, \
+ (kxdrproc_t) nfsaclsvc_decode_##argt##args, \
+ (kxdrproc_t) nfsaclsvc_encode_##rest##res, \
+ (kxdrproc_t) nfsaclsvc_release_##relt, \
+ sizeof(struct nfsd3_##argt##args), \
+ sizeof(struct nfsd3_##rest##res), \
+ 0, \
+ cache, \
+ respsize, \
+ }
+
+#define ST 1 /* status*/
+#define AT 21 /* attributes */
+#define pAT (1+AT) /* post attributes - conditional */
+#define ACL (1+NFS_ACL_MAX_ENTRIES*3) /* Access Control List */
+
+static struct svc_procedure nfsd_acl_procedures2[] = {
+ PROC(null, void, void, void, RC_NOCACHE, ST),
+ PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)),
+ PROC(setacl, setacl, attrstat, attrstat, RC_NOCACHE, ST+AT),
+ PROC(getattr, fhandle, attrstat, attrstat, RC_NOCACHE, ST+AT),
+ PROC(access, access, access, access, RC_NOCACHE, ST+AT+1),
+};
+
+struct svc_version nfsd_acl_version2 = {
+ .vs_vers = 2,
+ .vs_nproc = 5,
+ .vs_proc = nfsd_acl_procedures2,
+ .vs_dispatch = nfsd_dispatch,
+ .vs_xdrsize = NFS3_SVC_XDRSIZE,
+ .vs_hidden = 0,
+};
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
new file mode 100644
index 00000000000..a596e9d987e
--- /dev/null
+++ b/fs/nfsd/nfs3acl.c
@@ -0,0 +1,267 @@
+/*
+ * Process version 3 NFSACL requests.
+ *
+ * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
+ */
+
+#include "nfsd.h"
+/* FIXME: nfsacl.h is a broken header */
+#include <linux/nfsacl.h>
+#include <linux/gfp.h>
+#include "cache.h"
+#include "xdr3.h"
+#include "vfs.h"
+
+#define RETURN_STATUS(st) { resp->status = (st); return (st); }
+
+/*
+ * NULL call.
+ */
+static __be32
+nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+ return nfs_ok;
+}
+
+/*
+ * Get the Access and/or Default ACL of a file.
+ */
+static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
+ struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+{
+ svc_fh *fh;
+ struct posix_acl *acl;
+ __be32 nfserr = 0;
+
+ fh = fh_copy(&resp->fh, &argp->fh);
+ nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
+ if (nfserr)
+ RETURN_STATUS(nfserr);
+
+ if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
+ RETURN_STATUS(nfserr_inval);
+ resp->mask = argp->mask;
+
+ if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
+ acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl)) {
+ int err = PTR_ERR(acl);
+
+ if (err == -ENODATA || err == -EOPNOTSUPP)
+ acl = NULL;
+ else {
+ nfserr = nfserrno(err);
+ goto fail;
+ }
+ }
+ if (acl == NULL) {
+ /* Solaris returns the inode's minimum ACL. */
+
+ struct inode *inode = fh->fh_dentry->d_inode;
+ acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
+ }
+ resp->acl_access = acl;
+ }
+ if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
+ /* Check how Solaris handles requests for the Default ACL
+ of a non-directory! */
+
+ acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT);
+ if (IS_ERR(acl)) {
+ int err = PTR_ERR(acl);
+
+ if (err == -ENODATA || err == -EOPNOTSUPP)
+ acl = NULL;
+ else {
+ nfserr = nfserrno(err);
+ goto fail;
+ }
+ }
+ resp->acl_default = acl;
+ }
+
+ /* resp->acl_{access,default} are released in nfs3svc_release_getacl. */
+ RETURN_STATUS(0);
+
+fail:
+ posix_acl_release(resp->acl_access);
+ posix_acl_release(resp->acl_default);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Set the Access and/or Default ACL of a file.
+ */
+static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
+ struct nfsd3_setaclargs *argp,
+ struct nfsd3_attrstat *resp)
+{
+ svc_fh *fh;
+ __be32 nfserr = 0;
+
+ fh = fh_copy(&resp->fh, &argp->fh);
+ nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);
+
+ if (!nfserr) {
+ nfserr = nfserrno( nfsd_set_posix_acl(
+ fh, ACL_TYPE_ACCESS, argp->acl_access) );
+ }
+ if (!nfserr) {
+ nfserr = nfserrno( nfsd_set_posix_acl(
+ fh, ACL_TYPE_DEFAULT, argp->acl_default) );
+ }
+
+ /* argp->acl_{access,default} may have been allocated in
+ nfs3svc_decode_setaclargs. */
+ posix_acl_release(argp->acl_access);
+ posix_acl_release(argp->acl_default);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * XDR decode functions
+ */
+static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_getaclargs *args)
+{
+ if (!(p = nfs3svc_decode_fh(p, &args->fh)))
+ return 0;
+ args->mask = ntohl(*p); p++;
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+
+static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_setaclargs *args)
+{
+ struct kvec *head = rqstp->rq_arg.head;
+ unsigned int base;
+ int n;
+
+ if (!(p = nfs3svc_decode_fh(p, &args->fh)))
+ return 0;
+ args->mask = ntohl(*p++);
+ if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||
+ !xdr_argsize_check(rqstp, p))
+ return 0;
+
+ base = (char *)p - (char *)head->iov_base;
+ n = nfsacl_decode(&rqstp->rq_arg, base, NULL,
+ (args->mask & NFS_ACL) ?
+ &args->acl_access : NULL);
+ if (n > 0)
+ n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL,
+ (args->mask & NFS_DFACL) ?
+ &args->acl_default : NULL);
+ return (n > 0);
+}
+
+/*
+ * XDR encode functions
+ */
+
+/* GETACL */
+static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_getaclres *resp)
+{
+ struct dentry *dentry = resp->fh.fh_dentry;
+
+ p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
+ if (resp->status == 0 && dentry && dentry->d_inode) {
+ struct inode *inode = dentry->d_inode;
+ struct kvec *head = rqstp->rq_res.head;
+ unsigned int base;
+ int n;
+ int w;
+
+ *p++ = htonl(resp->mask);
+ if (!xdr_ressize_check(rqstp, p))
+ return 0;
+ base = (char *)p - (char *)head->iov_base;
+
+ rqstp->rq_res.page_len = w = nfsacl_size(
+ (resp->mask & NFS_ACL) ? resp->acl_access : NULL,
+ (resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
+ while (w > 0) {
+ if (!rqstp->rq_respages[rqstp->rq_resused++])
+ return 0;
+ w -= PAGE_SIZE;
+ }
+
+ n = nfsacl_encode(&rqstp->rq_res, base, inode,
+ resp->acl_access,
+ resp->mask & NFS_ACL, 0);
+ if (n > 0)
+ n = nfsacl_encode(&rqstp->rq_res, base + n, inode,
+ resp->acl_default,
+ resp->mask & NFS_DFACL,
+ NFS_ACL_DEFAULT);
+ if (n <= 0)
+ return 0;
+ } else
+ if (!xdr_ressize_check(rqstp, p))
+ return 0;
+
+ return 1;
+}
+
+/* SETACL */
+static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_attrstat *resp)
+{
+ p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
+
+ return xdr_ressize_check(rqstp, p);
+}
+
+/*
+ * XDR release functions
+ */
+static int nfs3svc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_getaclres *resp)
+{
+ fh_put(&resp->fh);
+ posix_acl_release(resp->acl_access);
+ posix_acl_release(resp->acl_default);
+ return 1;
+}
+
+#define nfs3svc_decode_voidargs NULL
+#define nfs3svc_release_void NULL
+#define nfsd3_setaclres nfsd3_attrstat
+#define nfsd3_voidres nfsd3_voidargs
+struct nfsd3_voidargs { int dummy; };
+
+#define PROC(name, argt, rest, relt, cache, respsize) \
+ { (svc_procfunc) nfsd3_proc_##name, \
+ (kxdrproc_t) nfs3svc_decode_##argt##args, \
+ (kxdrproc_t) nfs3svc_encode_##rest##res, \
+ (kxdrproc_t) nfs3svc_release_##relt, \
+ sizeof(struct nfsd3_##argt##args), \
+ sizeof(struct nfsd3_##rest##res), \
+ 0, \
+ cache, \
+ respsize, \
+ }
+
+#define ST 1 /* status*/
+#define AT 21 /* attributes */
+#define pAT (1+AT) /* post attributes - conditional */
+#define ACL (1+NFS_ACL_MAX_ENTRIES*3) /* Access Control List */
+
+static struct svc_procedure nfsd_acl_procedures3[] = {
+ PROC(null, void, void, void, RC_NOCACHE, ST),
+ PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)),
+ PROC(setacl, setacl, setacl, fhandle, RC_NOCACHE, ST+pAT),
+};
+
+struct svc_version nfsd_acl_version3 = {
+ .vs_vers = 3,
+ .vs_nproc = 3,
+ .vs_proc = nfsd_acl_procedures3,
+ .vs_dispatch = nfsd_dispatch,
+ .vs_xdrsize = NFS3_SVC_XDRSIZE,
+ .vs_hidden = 0,
+};
+
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
new file mode 100644
index 00000000000..9095f3c21df
--- /dev/null
+++ b/fs/nfsd/nfs3proc.c
@@ -0,0 +1,896 @@
+/*
+ * Process version 3 NFS requests.
+ *
+ * Copyright (C) 1996, 1997, 1998 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/magic.h>
+
+#include "cache.h"
+#include "xdr3.h"
+#include "vfs.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_PROC
+
+#define RETURN_STATUS(st) { resp->status = (st); return (st); }
+
+static int nfs3_ftypes[] = {
+ 0, /* NF3NON */
+ S_IFREG, /* NF3REG */
+ S_IFDIR, /* NF3DIR */
+ S_IFBLK, /* NF3BLK */
+ S_IFCHR, /* NF3CHR */
+ S_IFLNK, /* NF3LNK */
+ S_IFSOCK, /* NF3SOCK */
+ S_IFIFO, /* NF3FIFO */
+};
+
+/*
+ * NULL call.
+ */
+static __be32
+nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+ return nfs_ok;
+}
+
+/*
+ * Get a file's attributes
+ */
+static __be32
+nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
+ struct nfsd3_attrstat *resp)
+{
+ int err;
+ __be32 nfserr;
+
+ dprintk("nfsd: GETATTR(3) %s\n",
+ SVCFH_fmt(&argp->fh));
+
+ fh_copy(&resp->fh, &argp->fh);
+ nfserr = fh_verify(rqstp, &resp->fh, 0,
+ NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT);
+ if (nfserr)
+ RETURN_STATUS(nfserr);
+
+ err = vfs_getattr(resp->fh.fh_export->ex_path.mnt,
+ resp->fh.fh_dentry, &resp->stat);
+ nfserr = nfserrno(err);
+
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Set a file's attributes
+ */
+static __be32
+nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
+ struct nfsd3_attrstat *resp)
+{
+ __be32 nfserr;
+
+ dprintk("nfsd: SETATTR(3) %s\n",
+ SVCFH_fmt(&argp->fh));
+
+ fh_copy(&resp->fh, &argp->fh);
+ nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,
+ argp->check_guard, argp->guardtime);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Look up a path name component
+ */
+static __be32
+nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
+ struct nfsd3_diropres *resp)
+{
+ __be32 nfserr;
+
+ dprintk("nfsd: LOOKUP(3) %s %.*s\n",
+ SVCFH_fmt(&argp->fh),
+ argp->len,
+ argp->name);
+
+ fh_copy(&resp->dirfh, &argp->fh);
+ fh_init(&resp->fh, NFS3_FHSIZE);
+
+ nfserr = nfsd_lookup(rqstp, &resp->dirfh,
+ argp->name,
+ argp->len,
+ &resp->fh);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Check file access
+ */
+static __be32
+nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
+ struct nfsd3_accessres *resp)
+{
+ __be32 nfserr;
+
+ dprintk("nfsd: ACCESS(3) %s 0x%x\n",
+ SVCFH_fmt(&argp->fh),
+ argp->access);
+
+ fh_copy(&resp->fh, &argp->fh);
+ resp->access = argp->access;
+ nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Read a symlink.
+ */
+static __be32
+nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
+ struct nfsd3_readlinkres *resp)
+{
+ __be32 nfserr;
+
+ dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh));
+
+ /* Read the symlink. */
+ fh_copy(&resp->fh, &argp->fh);
+ resp->len = NFS3_MAXPATHLEN;
+ nfserr = nfsd_readlink(rqstp, &resp->fh, argp->buffer, &resp->len);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Read a portion of a file.
+ */
+static __be32
+nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
+ struct nfsd3_readres *resp)
+{
+ __be32 nfserr;
+ u32 max_blocksize = svc_max_payload(rqstp);
+
+ dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
+ SVCFH_fmt(&argp->fh),
+ (unsigned long) argp->count,
+ (unsigned long long) argp->offset);
+
+ /* Obtain buffer pointer for payload.
+ * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
+ * + 1 (xdr opaque byte count) = 26
+ */
+
+ resp->count = argp->count;
+ if (max_blocksize < resp->count)
+ resp->count = max_blocksize;
+
+ svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
+
+ fh_copy(&resp->fh, &argp->fh);
+ nfserr = nfsd_read(rqstp, &resp->fh,
+ argp->offset,
+ rqstp->rq_vec, argp->vlen,
+ &resp->count);
+ if (nfserr == 0) {
+ struct inode *inode = resp->fh.fh_dentry->d_inode;
+
+ resp->eof = (argp->offset + resp->count) >= inode->i_size;
+ }
+
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Write data to a file
+ */
+static __be32
+nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
+ struct nfsd3_writeres *resp)
+{
+ __be32 nfserr;
+ unsigned long cnt = argp->len;
+
+ dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n",
+ SVCFH_fmt(&argp->fh),
+ argp->len,
+ (unsigned long long) argp->offset,
+ argp->stable? " stable" : "");
+
+ fh_copy(&resp->fh, &argp->fh);
+ resp->committed = argp->stable;
+ nfserr = nfsd_write(rqstp, &resp->fh, NULL,
+ argp->offset,
+ rqstp->rq_vec, argp->vlen,
+ &cnt,
+ &resp->committed);
+ resp->count = cnt;
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * With NFSv3, CREATE processing is a lot easier than with NFSv2.
+ * At least in theory; we'll see how it fares in practice when the
+ * first reports about SunOS compatibility problems start to pour in...
+ */
+static __be32
+nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
+ struct nfsd3_diropres *resp)
+{
+ svc_fh *dirfhp, *newfhp = NULL;
+ struct iattr *attr;
+ __be32 nfserr;
+
+ dprintk("nfsd: CREATE(3) %s %.*s\n",
+ SVCFH_fmt(&argp->fh),
+ argp->len,
+ argp->name);
+
+ dirfhp = fh_copy(&resp->dirfh, &argp->fh);
+ newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
+ attr = &argp->attrs;
+
+ /* Get the directory inode */
+ nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE);
+ if (nfserr)
+ RETURN_STATUS(nfserr);
+
+ /* Unfudge the mode bits */
+ attr->ia_mode &= ~S_IFMT;
+ if (!(attr->ia_valid & ATTR_MODE)) {
+ attr->ia_valid |= ATTR_MODE;
+ attr->ia_mode = S_IFREG;
+ } else {
+ attr->ia_mode = (attr->ia_mode & ~S_IFMT) | S_IFREG;
+ }
+
+ /* Now create the file and set attributes */
+ nfserr = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len,
+ attr, newfhp,
+ argp->createmode, argp->verf, NULL, NULL);
+
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Make directory. This operation is not idempotent.
+ */
+static __be32
+nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
+ struct nfsd3_diropres *resp)
+{
+ __be32 nfserr;
+
+ dprintk("nfsd: MKDIR(3) %s %.*s\n",
+ SVCFH_fmt(&argp->fh),
+ argp->len,
+ argp->name);
+
+ argp->attrs.ia_valid &= ~ATTR_SIZE;
+ fh_copy(&resp->dirfh, &argp->fh);
+ fh_init(&resp->fh, NFS3_FHSIZE);
+ nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
+ &argp->attrs, S_IFDIR, 0, &resp->fh);
+ fh_unlock(&resp->dirfh);
+ RETURN_STATUS(nfserr);
+}
+
+static __be32
+nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
+ struct nfsd3_diropres *resp)
+{
+ __be32 nfserr;
+
+ dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n",
+ SVCFH_fmt(&argp->ffh),
+ argp->flen, argp->fname,
+ argp->tlen, argp->tname);
+
+ fh_copy(&resp->dirfh, &argp->ffh);
+ fh_init(&resp->fh, NFS3_FHSIZE);
+ nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen,
+ argp->tname, argp->tlen,
+ &resp->fh, &argp->attrs);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Make socket/fifo/device.
+ */
+static __be32
+nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
+ struct nfsd3_diropres *resp)
+{
+ __be32 nfserr;
+ int type;
+ dev_t rdev = 0;
+
+ dprintk("nfsd: MKNOD(3) %s %.*s\n",
+ SVCFH_fmt(&argp->fh),
+ argp->len,
+ argp->name);
+
+ fh_copy(&resp->dirfh, &argp->fh);
+ fh_init(&resp->fh, NFS3_FHSIZE);
+
+ if (argp->ftype == 0 || argp->ftype >= NF3BAD)
+ RETURN_STATUS(nfserr_inval);
+ if (argp->ftype == NF3CHR || argp->ftype == NF3BLK) {
+ rdev = MKDEV(argp->major, argp->minor);
+ if (MAJOR(rdev) != argp->major ||
+ MINOR(rdev) != argp->minor)
+ RETURN_STATUS(nfserr_inval);
+ } else
+ if (argp->ftype != NF3SOCK && argp->ftype != NF3FIFO)
+ RETURN_STATUS(nfserr_inval);
+
+ type = nfs3_ftypes[argp->ftype];
+ nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
+ &argp->attrs, type, rdev, &resp->fh);
+ fh_unlock(&resp->dirfh);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Remove file/fifo/socket etc.
+ */
+static __be32
+nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
+ struct nfsd3_attrstat *resp)
+{
+ __be32 nfserr;
+
+ dprintk("nfsd: REMOVE(3) %s %.*s\n",
+ SVCFH_fmt(&argp->fh),
+ argp->len,
+ argp->name);
+
+ /* Unlink. -S_IFDIR means file must not be a directory */
+ fh_copy(&resp->fh, &argp->fh);
+ nfserr = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len);
+ fh_unlock(&resp->fh);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Remove a directory
+ */
+static __be32
+nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
+ struct nfsd3_attrstat *resp)
+{
+ __be32 nfserr;
+
+ dprintk("nfsd: RMDIR(3) %s %.*s\n",
+ SVCFH_fmt(&argp->fh),
+ argp->len,
+ argp->name);
+
+ fh_copy(&resp->fh, &argp->fh);
+ nfserr = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len);
+ fh_unlock(&resp->fh);
+ RETURN_STATUS(nfserr);
+}
+
+static __be32
+nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
+ struct nfsd3_renameres *resp)
+{
+ __be32 nfserr;
+
+ dprintk("nfsd: RENAME(3) %s %.*s ->\n",
+ SVCFH_fmt(&argp->ffh),
+ argp->flen,
+ argp->fname);
+ dprintk("nfsd: -> %s %.*s\n",
+ SVCFH_fmt(&argp->tfh),
+ argp->tlen,
+ argp->tname);
+
+ fh_copy(&resp->ffh, &argp->ffh);
+ fh_copy(&resp->tfh, &argp->tfh);
+ nfserr = nfsd_rename(rqstp, &resp->ffh, argp->fname, argp->flen,
+ &resp->tfh, argp->tname, argp->tlen);
+ RETURN_STATUS(nfserr);
+}
+
+static __be32
+nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
+ struct nfsd3_linkres *resp)
+{
+ __be32 nfserr;
+
+ dprintk("nfsd: LINK(3) %s ->\n",
+ SVCFH_fmt(&argp->ffh));
+ dprintk("nfsd: -> %s %.*s\n",
+ SVCFH_fmt(&argp->tfh),
+ argp->tlen,
+ argp->tname);
+
+ fh_copy(&resp->fh, &argp->ffh);
+ fh_copy(&resp->tfh, &argp->tfh);
+ nfserr = nfsd_link(rqstp, &resp->tfh, argp->tname, argp->tlen,
+ &resp->fh);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Read a portion of a directory.
+ */
+static __be32
+nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
+ struct nfsd3_readdirres *resp)
+{
+ __be32 nfserr;
+ int count;
+
+ dprintk("nfsd: READDIR(3) %s %d bytes at %d\n",
+ SVCFH_fmt(&argp->fh),
+ argp->count, (u32) argp->cookie);
+
+ /* Make sure we've room for the NULL ptr & eof flag, and shrink to
+ * client read size */
+ count = (argp->count >> 2) - 2;
+
+ /* Read directory and encode entries on the fly */
+ fh_copy(&resp->fh, &argp->fh);
+
+ resp->buflen = count;
+ resp->common.err = nfs_ok;
+ resp->buffer = argp->buffer;
+ resp->rqstp = rqstp;
+ nfserr = nfsd_readdir(rqstp, &resp->fh, (loff_t*) &argp->cookie,
+ &resp->common, nfs3svc_encode_entry);
+ memcpy(resp->verf, argp->verf, 8);
+ resp->count = resp->buffer - argp->buffer;
+ if (resp->offset)
+ xdr_encode_hyper(resp->offset, argp->cookie);
+
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Read a portion of a directory, including file handles and attrs.
+ * For now, we choose to ignore the dircount parameter.
+ */
+static __be32
+nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
+ struct nfsd3_readdirres *resp)
+{
+ __be32 nfserr;
+ int count = 0;
+ loff_t offset;
+ int i;
+ caddr_t page_addr = NULL;
+
+ dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n",
+ SVCFH_fmt(&argp->fh),
+ argp->count, (u32) argp->cookie);
+
+ /* Convert byte count to number of words (i.e. >> 2),
+ * and reserve room for the NULL ptr & eof flag (-2 words) */
+ resp->count = (argp->count >> 2) - 2;
+
+ /* Read directory and encode entries on the fly */
+ fh_copy(&resp->fh, &argp->fh);
+
+ resp->common.err = nfs_ok;
+ resp->buffer = argp->buffer;
+ resp->buflen = resp->count;
+ resp->rqstp = rqstp;
+ offset = argp->cookie;
+ nfserr = nfsd_readdir(rqstp, &resp->fh,
+ &offset,
+ &resp->common,
+ nfs3svc_encode_entry_plus);
+ memcpy(resp->verf, argp->verf, 8);
+ for (i=1; i<rqstp->rq_resused ; i++) {
+ page_addr = page_address(rqstp->rq_respages[i]);
+
+ if (((caddr_t)resp->buffer >= page_addr) &&
+ ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
+ count += (caddr_t)resp->buffer - page_addr;
+ break;
+ }
+ count += PAGE_SIZE;
+ }
+ resp->count = count >> 2;
+ if (resp->offset) {
+ if (unlikely(resp->offset1)) {
+ /* we ended up with offset on a page boundary */
+ *resp->offset = htonl(offset >> 32);
+ *resp->offset1 = htonl(offset & 0xffffffff);
+ resp->offset1 = NULL;
+ } else {
+ xdr_encode_hyper(resp->offset, offset);
+ }
+ }
+
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Get file system stats
+ */
+static __be32
+nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
+ struct nfsd3_fsstatres *resp)
+{
+ __be32 nfserr;
+
+ dprintk("nfsd: FSSTAT(3) %s\n",
+ SVCFH_fmt(&argp->fh));
+
+ nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0);
+ fh_put(&argp->fh);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Get file system info
+ */
+static __be32
+nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
+ struct nfsd3_fsinfores *resp)
+{
+ __be32 nfserr;
+ u32 max_blocksize = svc_max_payload(rqstp);
+
+ dprintk("nfsd: FSINFO(3) %s\n",
+ SVCFH_fmt(&argp->fh));
+
+ resp->f_rtmax = max_blocksize;
+ resp->f_rtpref = max_blocksize;
+ resp->f_rtmult = PAGE_SIZE;
+ resp->f_wtmax = max_blocksize;
+ resp->f_wtpref = max_blocksize;
+ resp->f_wtmult = PAGE_SIZE;
+ resp->f_dtpref = PAGE_SIZE;
+ resp->f_maxfilesize = ~(u32) 0;
+ resp->f_properties = NFS3_FSF_DEFAULT;
+
+ nfserr = fh_verify(rqstp, &argp->fh, 0,
+ NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT);
+
+ /* Check special features of the file system. May request
+ * different read/write sizes for file systems known to have
+ * problems with large blocks */
+ if (nfserr == 0) {
+ struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;
+
+ /* Note that we don't care for remote fs's here */
+ if (sb->s_magic == MSDOS_SUPER_MAGIC) {
+ resp->f_properties = NFS3_FSF_BILLYBOY;
+ }
+ resp->f_maxfilesize = sb->s_maxbytes;
+ }
+
+ fh_put(&argp->fh);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Get pathconf info for the specified file
+ */
+static __be32
+nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
+ struct nfsd3_pathconfres *resp)
+{
+ __be32 nfserr;
+
+ dprintk("nfsd: PATHCONF(3) %s\n",
+ SVCFH_fmt(&argp->fh));
+
+ /* Set default pathconf */
+ resp->p_link_max = 255; /* at least */
+ resp->p_name_max = 255; /* at least */
+ resp->p_no_trunc = 0;
+ resp->p_chown_restricted = 1;
+ resp->p_case_insensitive = 0;
+ resp->p_case_preserving = 1;
+
+ nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);
+
+ if (nfserr == 0) {
+ struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;
+
+ /* Note that we don't care for remote fs's here */
+ switch (sb->s_magic) {
+ case EXT2_SUPER_MAGIC:
+ resp->p_link_max = EXT2_LINK_MAX;
+ resp->p_name_max = EXT2_NAME_LEN;
+ break;
+ case MSDOS_SUPER_MAGIC:
+ resp->p_case_insensitive = 1;
+ resp->p_case_preserving = 0;
+ break;
+ }
+ }
+
+ fh_put(&argp->fh);
+ RETURN_STATUS(nfserr);
+}
+
+
+/*
+ * Commit a file (range) to stable storage.
+ */
+static __be32
+nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp,
+ struct nfsd3_commitres *resp)
+{
+ __be32 nfserr;
+
+ dprintk("nfsd: COMMIT(3) %s %u@%Lu\n",
+ SVCFH_fmt(&argp->fh),
+ argp->count,
+ (unsigned long long) argp->offset);
+
+ if (argp->offset > NFS_OFFSET_MAX)
+ RETURN_STATUS(nfserr_inval);
+
+ fh_copy(&resp->fh, &argp->fh);
+ nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count);
+
+ RETURN_STATUS(nfserr);
+}
+
+
+/*
+ * NFSv3 Server procedures.
+ * Only the results of non-idempotent operations are cached.
+ */
+#define nfs3svc_decode_fhandleargs nfs3svc_decode_fhandle
+#define nfs3svc_encode_attrstatres nfs3svc_encode_attrstat
+#define nfs3svc_encode_wccstatres nfs3svc_encode_wccstat
+#define nfsd3_mkdirargs nfsd3_createargs
+#define nfsd3_readdirplusargs nfsd3_readdirargs
+#define nfsd3_fhandleargs nfsd_fhandle
+#define nfsd3_fhandleres nfsd3_attrstat
+#define nfsd3_attrstatres nfsd3_attrstat
+#define nfsd3_wccstatres nfsd3_attrstat
+#define nfsd3_createres nfsd3_diropres
+#define nfsd3_voidres nfsd3_voidargs
+struct nfsd3_voidargs { int dummy; };
+
+#define PROC(name, argt, rest, relt, cache, respsize) \
+ { (svc_procfunc) nfsd3_proc_##name, \
+ (kxdrproc_t) nfs3svc_decode_##argt##args, \
+ (kxdrproc_t) nfs3svc_encode_##rest##res, \
+ (kxdrproc_t) nfs3svc_release_##relt, \
+ sizeof(struct nfsd3_##argt##args), \
+ sizeof(struct nfsd3_##rest##res), \
+ 0, \
+ cache, \
+ respsize, \
+ }
+
+#define ST 1 /* status*/
+#define FH 17 /* filehandle with length */
+#define AT 21 /* attributes */
+#define pAT (1+AT) /* post attributes - conditional */
+#define WC (7+pAT) /* WCC attributes */
+
+static struct svc_procedure nfsd_procedures3[22] = {
+ [NFS3PROC_NULL] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_null,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd3_voidargs),
+ .pc_ressize = sizeof(struct nfsd3_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST,
+ },
+ [NFS3PROC_GETATTR] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_getattr,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_fhandleargs),
+ .pc_ressize = sizeof(struct nfsd3_attrstatres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+AT,
+ },
+ [NFS3PROC_SETATTR] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_setattr,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_sattrargs),
+ .pc_ressize = sizeof(struct nfsd3_wccstatres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+WC,
+ },
+ [NFS3PROC_LOOKUP] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_lookup,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+ .pc_argsize = sizeof(struct nfsd3_diropargs),
+ .pc_ressize = sizeof(struct nfsd3_diropres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+FH+pAT+pAT,
+ },
+ [NFS3PROC_ACCESS] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_access,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_accessargs),
+ .pc_ressize = sizeof(struct nfsd3_accessres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+pAT+1,
+ },
+ [NFS3PROC_READLINK] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_readlink,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_readlinkargs),
+ .pc_ressize = sizeof(struct nfsd3_readlinkres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4,
+ },
+ [NFS3PROC_READ] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_read,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_readargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_readargs),
+ .pc_ressize = sizeof(struct nfsd3_readres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4,
+ },
+ [NFS3PROC_WRITE] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_write,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_writeargs),
+ .pc_ressize = sizeof(struct nfsd3_writeres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+WC+4,
+ },
+ [NFS3PROC_CREATE] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_create,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_createargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+ .pc_argsize = sizeof(struct nfsd3_createargs),
+ .pc_ressize = sizeof(struct nfsd3_createres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+(1+FH+pAT)+WC,
+ },
+ [NFS3PROC_MKDIR] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_mkdir,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+ .pc_argsize = sizeof(struct nfsd3_mkdirargs),
+ .pc_ressize = sizeof(struct nfsd3_createres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+(1+FH+pAT)+WC,
+ },
+ [NFS3PROC_SYMLINK] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_symlink,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+ .pc_argsize = sizeof(struct nfsd3_symlinkargs),
+ .pc_ressize = sizeof(struct nfsd3_createres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+(1+FH+pAT)+WC,
+ },
+ [NFS3PROC_MKNOD] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_mknod,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+ .pc_argsize = sizeof(struct nfsd3_mknodargs),
+ .pc_ressize = sizeof(struct nfsd3_createres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+(1+FH+pAT)+WC,
+ },
+ [NFS3PROC_REMOVE] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_remove,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_diropargs),
+ .pc_ressize = sizeof(struct nfsd3_wccstatres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+WC,
+ },
+ [NFS3PROC_RMDIR] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_rmdir,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_diropargs),
+ .pc_ressize = sizeof(struct nfsd3_wccstatres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+WC,
+ },
+ [NFS3PROC_RENAME] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_rename,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+ .pc_argsize = sizeof(struct nfsd3_renameargs),
+ .pc_ressize = sizeof(struct nfsd3_renameres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+WC+WC,
+ },
+ [NFS3PROC_LINK] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_link,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+ .pc_argsize = sizeof(struct nfsd3_linkargs),
+ .pc_ressize = sizeof(struct nfsd3_linkres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+pAT+WC,
+ },
+ [NFS3PROC_READDIR] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_readdir,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_readdirargs),
+ .pc_ressize = sizeof(struct nfsd3_readdirres),
+ .pc_cachetype = RC_NOCACHE,
+ },
+ [NFS3PROC_READDIRPLUS] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_readdirplus,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_readdirplusargs),
+ .pc_ressize = sizeof(struct nfsd3_readdirres),
+ .pc_cachetype = RC_NOCACHE,
+ },
+ [NFS3PROC_FSSTAT] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_fsstat,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres,
+ .pc_argsize = sizeof(struct nfsd3_fhandleargs),
+ .pc_ressize = sizeof(struct nfsd3_fsstatres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+pAT+2*6+1,
+ },
+ [NFS3PROC_FSINFO] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_fsinfo,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores,
+ .pc_argsize = sizeof(struct nfsd3_fhandleargs),
+ .pc_ressize = sizeof(struct nfsd3_fsinfores),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+pAT+12,
+ },
+ [NFS3PROC_PATHCONF] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_pathconf,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres,
+ .pc_argsize = sizeof(struct nfsd3_fhandleargs),
+ .pc_ressize = sizeof(struct nfsd3_pathconfres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+pAT+6,
+ },
+ [NFS3PROC_COMMIT] = {
+ .pc_func = (svc_procfunc) nfsd3_proc_commit,
+ .pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs,
+ .pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
+ .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_commitargs),
+ .pc_ressize = sizeof(struct nfsd3_commitres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+WC+2,
+ },
+};
+
+struct svc_version nfsd_version3 = {
+ .vs_vers = 3,
+ .vs_nproc = 22,
+ .vs_proc = nfsd_procedures3,
+ .vs_dispatch = nfsd_dispatch,
+ .vs_xdrsize = NFS3_SVC_XDRSIZE,
+};
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
new file mode 100644
index 00000000000..08c6e36ab2e
--- /dev/null
+++ b/fs/nfsd/nfs3xdr.c
@@ -0,0 +1,1118 @@
+/*
+ * XDR support for nfsd/protocol version 3.
+ *
+ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
+ *
+ * 2003-08-09 Jamie Lokier: Use htonl() for nanoseconds, not htons()!
+ */
+
+#include <linux/namei.h>
+#include "xdr3.h"
+#include "auth.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_XDR
+
+
+/*
+ * Mapping of S_IF* types to NFS file types
+ */
+static u32 nfs3_ftypes[] = {
+ NF3NON, NF3FIFO, NF3CHR, NF3BAD,
+ NF3DIR, NF3BAD, NF3BLK, NF3BAD,
+ NF3REG, NF3BAD, NF3LNK, NF3BAD,
+ NF3SOCK, NF3BAD, NF3LNK, NF3BAD,
+};
+
+/*
+ * XDR functions for basic NFS types
+ */
+static __be32 *
+encode_time3(__be32 *p, struct timespec *time)
+{
+ *p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec);
+ return p;
+}
+
+static __be32 *
+decode_time3(__be32 *p, struct timespec *time)
+{
+ time->tv_sec = ntohl(*p++);
+ time->tv_nsec = ntohl(*p++);
+ return p;
+}
+
+static __be32 *
+decode_fh(__be32 *p, struct svc_fh *fhp)
+{
+ unsigned int size;
+ fh_init(fhp, NFS3_FHSIZE);
+ size = ntohl(*p++);
+ if (size > NFS3_FHSIZE)
+ return NULL;
+
+ memcpy(&fhp->fh_handle.fh_base, p, size);
+ fhp->fh_handle.fh_size = size;
+ return p + XDR_QUADLEN(size);
+}
+
+/* Helper function for NFSv3 ACL code */
+__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp)
+{
+ return decode_fh(p, fhp);
+}
+
+static __be32 *
+encode_fh(__be32 *p, struct svc_fh *fhp)
+{
+ unsigned int size = fhp->fh_handle.fh_size;
+ *p++ = htonl(size);
+ if (size) p[XDR_QUADLEN(size)-1]=0;
+ memcpy(p, &fhp->fh_handle.fh_base, size);
+ return p + XDR_QUADLEN(size);
+}
+
+/*
+ * Decode a file name and make sure that the path contains
+ * no slashes or null bytes.
+ */
+static __be32 *
+decode_filename(__be32 *p, char **namp, unsigned int *lenp)
+{
+ char *name;
+ unsigned int i;
+
+ if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) {
+ for (i = 0, name = *namp; i < *lenp; i++, name++) {
+ if (*name == '\0' || *name == '/')
+ return NULL;
+ }
+ }
+
+ return p;
+}
+
+static __be32 *
+decode_sattr3(__be32 *p, struct iattr *iap)
+{
+ u32 tmp;
+
+ iap->ia_valid = 0;
+
+ if (*p++) {
+ iap->ia_valid |= ATTR_MODE;
+ iap->ia_mode = ntohl(*p++);
+ }
+ if (*p++) {
+ iap->ia_valid |= ATTR_UID;
+ iap->ia_uid = ntohl(*p++);
+ }
+ if (*p++) {
+ iap->ia_valid |= ATTR_GID;
+ iap->ia_gid = ntohl(*p++);
+ }
+ if (*p++) {
+ u64 newsize;
+
+ iap->ia_valid |= ATTR_SIZE;
+ p = xdr_decode_hyper(p, &newsize);
+ if (newsize <= NFS_OFFSET_MAX)
+ iap->ia_size = newsize;
+ else
+ iap->ia_size = NFS_OFFSET_MAX;
+ }
+ if ((tmp = ntohl(*p++)) == 1) { /* set to server time */
+ iap->ia_valid |= ATTR_ATIME;
+ } else if (tmp == 2) { /* set to client time */
+ iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
+ iap->ia_atime.tv_sec = ntohl(*p++);
+ iap->ia_atime.tv_nsec = ntohl(*p++);
+ }
+ if ((tmp = ntohl(*p++)) == 1) { /* set to server time */
+ iap->ia_valid |= ATTR_MTIME;
+ } else if (tmp == 2) { /* set to client time */
+ iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
+ iap->ia_mtime.tv_sec = ntohl(*p++);
+ iap->ia_mtime.tv_nsec = ntohl(*p++);
+ }
+ return p;
+}
+
+static __be32 *encode_fsid(__be32 *p, struct svc_fh *fhp)
+{
+ u64 f;
+ switch(fsid_source(fhp)) {
+ default:
+ case FSIDSOURCE_DEV:
+ p = xdr_encode_hyper(p, (u64)huge_encode_dev
+ (fhp->fh_dentry->d_inode->i_sb->s_dev));
+ break;
+ case FSIDSOURCE_FSID:
+ p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid);
+ break;
+ case FSIDSOURCE_UUID:
+ f = ((u64*)fhp->fh_export->ex_uuid)[0];
+ f ^= ((u64*)fhp->fh_export->ex_uuid)[1];
+ p = xdr_encode_hyper(p, f);
+ break;
+ }
+ return p;
+}
+
+static __be32 *
+encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
+ struct kstat *stat)
+{
+ *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
+ *p++ = htonl((u32) stat->mode);
+ *p++ = htonl((u32) stat->nlink);
+ *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
+ *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
+ if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
+ p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
+ } else {
+ p = xdr_encode_hyper(p, (u64) stat->size);
+ }
+ p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9);
+ *p++ = htonl((u32) MAJOR(stat->rdev));
+ *p++ = htonl((u32) MINOR(stat->rdev));
+ p = encode_fsid(p, fhp);
+ p = xdr_encode_hyper(p, stat->ino);
+ p = encode_time3(p, &stat->atime);
+ p = encode_time3(p, &stat->mtime);
+ p = encode_time3(p, &stat->ctime);
+
+ return p;
+}
+
+static __be32 *
+encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
+{
+ /* Attributes to follow */
+ *p++ = xdr_one;
+ return encode_fattr3(rqstp, p, fhp, &fhp->fh_post_attr);
+}
+
+/*
+ * Encode post-operation attributes.
+ * The inode may be NULL if the call failed because of a stale file
+ * handle. In this case, no attributes are returned.
+ */
+static __be32 *
+encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
+{
+ struct dentry *dentry = fhp->fh_dentry;
+ if (dentry && dentry->d_inode) {
+ int err;
+ struct kstat stat;
+
+ err = vfs_getattr(fhp->fh_export->ex_path.mnt, dentry, &stat);
+ if (!err) {
+ *p++ = xdr_one; /* attributes follow */
+ lease_get_mtime(dentry->d_inode, &stat.mtime);
+ return encode_fattr3(rqstp, p, fhp, &stat);
+ }
+ }
+ *p++ = xdr_zero;
+ return p;
+}
+
+/* Helper for NFSv3 ACLs */
+__be32 *
+nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
+{
+ return encode_post_op_attr(rqstp, p, fhp);
+}
+
+/*
+ * Enocde weak cache consistency data
+ */
+static __be32 *
+encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
+{
+ struct dentry *dentry = fhp->fh_dentry;
+
+ if (dentry && dentry->d_inode && fhp->fh_post_saved) {
+ if (fhp->fh_pre_saved) {
+ *p++ = xdr_one;
+ p = xdr_encode_hyper(p, (u64) fhp->fh_pre_size);
+ p = encode_time3(p, &fhp->fh_pre_mtime);
+ p = encode_time3(p, &fhp->fh_pre_ctime);
+ } else {
+ *p++ = xdr_zero;
+ }
+ return encode_saved_post_attr(rqstp, p, fhp);
+ }
+ /* no pre- or post-attrs */
+ *p++ = xdr_zero;
+ return encode_post_op_attr(rqstp, p, fhp);
+}
+
+/*
+ * Fill in the post_op attr for the wcc data
+ */
+void fill_post_wcc(struct svc_fh *fhp)
+{
+ int err;
+
+ if (fhp->fh_post_saved)
+ printk("nfsd: inode locked twice during operation.\n");
+
+ err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry,
+ &fhp->fh_post_attr);
+ fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version;
+ if (err) {
+ fhp->fh_post_saved = 0;
+ /* Grab the ctime anyway - set_change_info might use it */
+ fhp->fh_post_attr.ctime = fhp->fh_dentry->d_inode->i_ctime;
+ } else
+ fhp->fh_post_saved = 1;
+}
+
+/*
+ * XDR decode functions
+ */
+int
+nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
+{
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_sattrargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ p = decode_sattr3(p, &args->attrs);
+
+ if ((args->check_guard = ntohl(*p++)) != 0) {
+ struct timespec time;
+ p = decode_time3(p, &time);
+ args->guardtime = time.tv_sec;
+ }
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_diropargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh))
+ || !(p = decode_filename(p, &args->name, &args->len)))
+ return 0;
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_accessargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ args->access = ntohl(*p++);
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_readargs *args)
+{
+ unsigned int len;
+ int v,pn;
+ u32 max_blocksize = svc_max_payload(rqstp);
+
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ p = xdr_decode_hyper(p, &args->offset);
+
+ len = args->count = ntohl(*p++);
+
+ if (len > max_blocksize)
+ len = max_blocksize;
+
+ /* set up the kvec */
+ v=0;
+ while (len > 0) {
+ pn = rqstp->rq_resused++;
+ rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+ rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
+ len -= rqstp->rq_vec[v].iov_len;
+ v++;
+ }
+ args->vlen = v;
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_writeargs *args)
+{
+ unsigned int len, v, hdr, dlen;
+ u32 max_blocksize = svc_max_payload(rqstp);
+
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ p = xdr_decode_hyper(p, &args->offset);
+
+ args->count = ntohl(*p++);
+ args->stable = ntohl(*p++);
+ len = args->len = ntohl(*p++);
+ /*
+ * The count must equal the amount of data passed.
+ */
+ if (args->count != args->len)
+ return 0;
+
+ /*
+ * Check to make sure that we got the right number of
+ * bytes.
+ */
+ hdr = (void*)p - rqstp->rq_arg.head[0].iov_base;
+ dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len
+ - hdr;
+ /*
+ * Round the length of the data which was specified up to
+ * the next multiple of XDR units and then compare that
+ * against the length which was actually received.
+ * Note that when RPCSEC/GSS (for example) is used, the
+ * data buffer can be padded so dlen might be larger
+ * than required. It must never be smaller.
+ */
+ if (dlen < XDR_QUADLEN(len)*4)
+ return 0;
+
+ if (args->count > max_blocksize) {
+ args->count = max_blocksize;
+ len = args->len = max_blocksize;
+ }
+ rqstp->rq_vec[0].iov_base = (void*)p;
+ rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
+ v = 0;
+ while (len > rqstp->rq_vec[v].iov_len) {
+ len -= rqstp->rq_vec[v].iov_len;
+ v++;
+ rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
+ rqstp->rq_vec[v].iov_len = PAGE_SIZE;
+ }
+ rqstp->rq_vec[v].iov_len = len;
+ args->vlen = v + 1;
+ return 1;
+}
+
+int
+nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_createargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh))
+ || !(p = decode_filename(p, &args->name, &args->len)))
+ return 0;
+
+ switch (args->createmode = ntohl(*p++)) {
+ case NFS3_CREATE_UNCHECKED:
+ case NFS3_CREATE_GUARDED:
+ p = decode_sattr3(p, &args->attrs);
+ break;
+ case NFS3_CREATE_EXCLUSIVE:
+ args->verf = p;
+ p += 2;
+ break;
+ default:
+ return 0;
+ }
+
+ return xdr_argsize_check(rqstp, p);
+}
+int
+nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_createargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh)) ||
+ !(p = decode_filename(p, &args->name, &args->len)))
+ return 0;
+ p = decode_sattr3(p, &args->attrs);
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_symlinkargs *args)
+{
+ unsigned int len, avail;
+ char *old, *new;
+ struct kvec *vec;
+
+ if (!(p = decode_fh(p, &args->ffh)) ||
+ !(p = decode_filename(p, &args->fname, &args->flen))
+ )
+ return 0;
+ p = decode_sattr3(p, &args->attrs);
+
+ /* now decode the pathname, which might be larger than the first page.
+ * As we have to check for nul's anyway, we copy it into a new page
+ * This page appears in the rq_res.pages list, but as pages_len is always
+ * 0, it won't get in the way
+ */
+ len = ntohl(*p++);
+ if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
+ return 0;
+ args->tname = new =
+ page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+ args->tlen = len;
+ /* first copy and check from the first page */
+ old = (char*)p;
+ vec = &rqstp->rq_arg.head[0];
+ avail = vec->iov_len - (old - (char*)vec->iov_base);
+ while (len && avail && *old) {
+ *new++ = *old++;
+ len--;
+ avail--;
+ }
+ /* now copy next page if there is one */
+ if (len && !avail && rqstp->rq_arg.page_len) {
+ avail = rqstp->rq_arg.page_len;
+ if (avail > PAGE_SIZE)
+ avail = PAGE_SIZE;
+ old = page_address(rqstp->rq_arg.pages[0]);
+ }
+ while (len && avail && *old) {
+ *new++ = *old++;
+ len--;
+ avail--;
+ }
+ *new = '\0';
+ if (len)
+ return 0;
+
+ return 1;
+}
+
+int
+nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_mknodargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh))
+ || !(p = decode_filename(p, &args->name, &args->len)))
+ return 0;
+
+ args->ftype = ntohl(*p++);
+
+ if (args->ftype == NF3BLK || args->ftype == NF3CHR
+ || args->ftype == NF3SOCK || args->ftype == NF3FIFO)
+ p = decode_sattr3(p, &args->attrs);
+
+ if (args->ftype == NF3BLK || args->ftype == NF3CHR) {
+ args->major = ntohl(*p++);
+ args->minor = ntohl(*p++);
+ }
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_renameargs *args)
+{
+ if (!(p = decode_fh(p, &args->ffh))
+ || !(p = decode_filename(p, &args->fname, &args->flen))
+ || !(p = decode_fh(p, &args->tfh))
+ || !(p = decode_filename(p, &args->tname, &args->tlen)))
+ return 0;
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_readlinkargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ args->buffer =
+ page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_linkargs *args)
+{
+ if (!(p = decode_fh(p, &args->ffh))
+ || !(p = decode_fh(p, &args->tfh))
+ || !(p = decode_filename(p, &args->tname, &args->tlen)))
+ return 0;
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_readdirargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ p = xdr_decode_hyper(p, &args->cookie);
+ args->verf = p; p += 2;
+ args->dircount = ~0;
+ args->count = ntohl(*p++);
+
+ if (args->count > PAGE_SIZE)
+ args->count = PAGE_SIZE;
+
+ args->buffer =
+ page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_readdirargs *args)
+{
+ int len, pn;
+ u32 max_blocksize = svc_max_payload(rqstp);
+
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ p = xdr_decode_hyper(p, &args->cookie);
+ args->verf = p; p += 2;
+ args->dircount = ntohl(*p++);
+ args->count = ntohl(*p++);
+
+ len = (args->count > max_blocksize) ? max_blocksize :
+ args->count;
+ args->count = len;
+
+ while (len > 0) {
+ pn = rqstp->rq_resused++;
+ if (!args->buffer)
+ args->buffer = page_address(rqstp->rq_respages[pn]);
+ len -= PAGE_SIZE;
+ }
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_commitargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ p = xdr_decode_hyper(p, &args->offset);
+ args->count = ntohl(*p++);
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+/*
+ * XDR encode functions
+ */
+/*
+ * There must be an encoding function for void results so svc_process
+ * will work properly.
+ */
+int
+nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+{
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* GETATTR */
+int
+nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_attrstat *resp)
+{
+ if (resp->status == 0) {
+ lease_get_mtime(resp->fh.fh_dentry->d_inode,
+ &resp->stat.mtime);
+ p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat);
+ }
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* SETATTR, REMOVE, RMDIR */
+int
+nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_attrstat *resp)
+{
+ p = encode_wcc_data(rqstp, p, &resp->fh);
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* LOOKUP */
+int
+nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_diropres *resp)
+{
+ if (resp->status == 0) {
+ p = encode_fh(p, &resp->fh);
+ p = encode_post_op_attr(rqstp, p, &resp->fh);
+ }
+ p = encode_post_op_attr(rqstp, p, &resp->dirfh);
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* ACCESS */
+int
+nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_accessres *resp)
+{
+ p = encode_post_op_attr(rqstp, p, &resp->fh);
+ if (resp->status == 0)
+ *p++ = htonl(resp->access);
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* READLINK */
+int
+nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_readlinkres *resp)
+{
+ p = encode_post_op_attr(rqstp, p, &resp->fh);
+ if (resp->status == 0) {
+ *p++ = htonl(resp->len);
+ xdr_ressize_check(rqstp, p);
+ rqstp->rq_res.page_len = resp->len;
+ if (resp->len & 3) {
+ /* need to pad the tail */
+ rqstp->rq_res.tail[0].iov_base = p;
+ *p = 0;
+ rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
+ }
+ return 1;
+ } else
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* READ */
+int
+nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_readres *resp)
+{
+ p = encode_post_op_attr(rqstp, p, &resp->fh);
+ if (resp->status == 0) {
+ *p++ = htonl(resp->count);
+ *p++ = htonl(resp->eof);
+ *p++ = htonl(resp->count); /* xdr opaque count */
+ xdr_ressize_check(rqstp, p);
+ /* now update rqstp->rq_res to reflect data as well */
+ rqstp->rq_res.page_len = resp->count;
+ if (resp->count & 3) {
+ /* need to pad the tail */
+ rqstp->rq_res.tail[0].iov_base = p;
+ *p = 0;
+ rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3);
+ }
+ return 1;
+ } else
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* WRITE */
+int
+nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_writeres *resp)
+{
+ p = encode_wcc_data(rqstp, p, &resp->fh);
+ if (resp->status == 0) {
+ *p++ = htonl(resp->count);
+ *p++ = htonl(resp->committed);
+ *p++ = htonl(nfssvc_boot.tv_sec);
+ *p++ = htonl(nfssvc_boot.tv_usec);
+ }
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* CREATE, MKDIR, SYMLINK, MKNOD */
+int
+nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_diropres *resp)
+{
+ if (resp->status == 0) {
+ *p++ = xdr_one;
+ p = encode_fh(p, &resp->fh);
+ p = encode_post_op_attr(rqstp, p, &resp->fh);
+ }
+ p = encode_wcc_data(rqstp, p, &resp->dirfh);
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* RENAME */
+int
+nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_renameres *resp)
+{
+ p = encode_wcc_data(rqstp, p, &resp->ffh);
+ p = encode_wcc_data(rqstp, p, &resp->tfh);
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* LINK */
+int
+nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_linkres *resp)
+{
+ p = encode_post_op_attr(rqstp, p, &resp->fh);
+ p = encode_wcc_data(rqstp, p, &resp->tfh);
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* READDIR */
+int
+nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_readdirres *resp)
+{
+ p = encode_post_op_attr(rqstp, p, &resp->fh);
+
+ if (resp->status == 0) {
+ /* stupid readdir cookie */
+ memcpy(p, resp->verf, 8); p += 2;
+ xdr_ressize_check(rqstp, p);
+ if (rqstp->rq_res.head[0].iov_len + (2<<2) > PAGE_SIZE)
+ return 1; /*No room for trailer */
+ rqstp->rq_res.page_len = (resp->count) << 2;
+
+ /* add the 'tail' to the end of the 'head' page - page 0. */
+ rqstp->rq_res.tail[0].iov_base = p;
+ *p++ = 0; /* no more entries */
+ *p++ = htonl(resp->common.err == nfserr_eof);
+ rqstp->rq_res.tail[0].iov_len = 2<<2;
+ return 1;
+ } else
+ return xdr_ressize_check(rqstp, p);
+}
+
+static __be32 *
+encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
+ int namlen, u64 ino)
+{
+ *p++ = xdr_one; /* mark entry present */
+ p = xdr_encode_hyper(p, ino); /* file id */
+ p = xdr_encode_array(p, name, namlen);/* name length & name */
+
+ cd->offset = p; /* remember pointer */
+ p = xdr_encode_hyper(p, NFS_OFFSET_MAX);/* offset of next entry */
+
+ return p;
+}
+
+static int
+compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
+ const char *name, int namlen)
+{
+ struct svc_export *exp;
+ struct dentry *dparent, *dchild;
+ int rv = 0;
+
+ dparent = cd->fh.fh_dentry;
+ exp = cd->fh.fh_export;
+
+ if (isdotent(name, namlen)) {
+ if (namlen == 2) {
+ dchild = dget_parent(dparent);
+ if (dchild == dparent) {
+ /* filesystem root - cannot return filehandle for ".." */
+ dput(dchild);
+ return -ENOENT;
+ }
+ } else
+ dchild = dget(dparent);
+ } else
+ dchild = lookup_one_len(name, dparent, namlen);
+ if (IS_ERR(dchild))
+ return -ENOENT;
+ rv = -ENOENT;
+ if (d_mountpoint(dchild))
+ goto out;
+ rv = fh_compose(fhp, exp, dchild, &cd->fh);
+ if (rv)
+ goto out;
+ if (!dchild->d_inode)
+ goto out;
+ rv = 0;
+out:
+ dput(dchild);
+ return rv;
+}
+
+static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
+{
+ struct svc_fh fh;
+ int err;
+
+ fh_init(&fh, NFS3_FHSIZE);
+ err = compose_entry_fh(cd, &fh, name, namlen);
+ if (err) {
+ *p++ = 0;
+ *p++ = 0;
+ goto out;
+ }
+ p = encode_post_op_attr(cd->rqstp, p, &fh);
+ *p++ = xdr_one; /* yes, a file handle follows */
+ p = encode_fh(p, &fh);
+out:
+ fh_put(&fh);
+ return p;
+}
+
+/*
+ * Encode a directory entry. This one works for both normal readdir
+ * and readdirplus.
+ * The normal readdir reply requires 2 (fileid) + 1 (stringlen)
+ * + string + 2 (cookie) + 1 (next) words, i.e. 6 + strlen.
+ *
+ * The readdirplus baggage is 1+21 words for post_op_attr, plus the
+ * file handle.
+ */
+
+#define NFS3_ENTRY_BAGGAGE (2 + 1 + 2 + 1)
+#define NFS3_ENTRYPLUS_BAGGAGE (1 + 21 + 1 + (NFS3_FHSIZE >> 2))
+static int
+encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
+ loff_t offset, u64 ino, unsigned int d_type, int plus)
+{
+ struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres,
+ common);
+ __be32 *p = cd->buffer;
+ caddr_t curr_page_addr = NULL;
+ int pn; /* current page number */
+ int slen; /* string (name) length */
+ int elen; /* estimated entry length in words */
+ int num_entry_words = 0; /* actual number of words */
+
+ if (cd->offset) {
+ u64 offset64 = offset;
+
+ if (unlikely(cd->offset1)) {
+ /* we ended up with offset on a page boundary */
+ *cd->offset = htonl(offset64 >> 32);
+ *cd->offset1 = htonl(offset64 & 0xffffffff);
+ cd->offset1 = NULL;
+ } else {
+ xdr_encode_hyper(cd->offset, offset64);
+ }
+ }
+
+ /*
+ dprintk("encode_entry(%.*s @%ld%s)\n",
+ namlen, name, (long) offset, plus? " plus" : "");
+ */
+
+ /* truncate filename if too long */
+ if (namlen > NFS3_MAXNAMLEN)
+ namlen = NFS3_MAXNAMLEN;
+
+ slen = XDR_QUADLEN(namlen);
+ elen = slen + NFS3_ENTRY_BAGGAGE
+ + (plus? NFS3_ENTRYPLUS_BAGGAGE : 0);
+
+ if (cd->buflen < elen) {
+ cd->common.err = nfserr_toosmall;
+ return -EINVAL;
+ }
+
+ /* determine which page in rq_respages[] we are currently filling */
+ for (pn=1; pn < cd->rqstp->rq_resused; pn++) {
+ curr_page_addr = page_address(cd->rqstp->rq_respages[pn]);
+
+ if (((caddr_t)cd->buffer >= curr_page_addr) &&
+ ((caddr_t)cd->buffer < curr_page_addr + PAGE_SIZE))
+ break;
+ }
+
+ if ((caddr_t)(cd->buffer + elen) < (curr_page_addr + PAGE_SIZE)) {
+ /* encode entry in current page */
+
+ p = encode_entry_baggage(cd, p, name, namlen, ino);
+
+ if (plus)
+ p = encode_entryplus_baggage(cd, p, name, namlen);
+ num_entry_words = p - cd->buffer;
+ } else if (cd->rqstp->rq_respages[pn+1] != NULL) {
+ /* temporarily encode entry into next page, then move back to
+ * current and next page in rq_respages[] */
+ __be32 *p1, *tmp;
+ int len1, len2;
+
+ /* grab next page for temporary storage of entry */
+ p1 = tmp = page_address(cd->rqstp->rq_respages[pn+1]);
+
+ p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
+
+ if (plus)
+ p1 = encode_entryplus_baggage(cd, p1, name, namlen);
+
+ /* determine entry word length and lengths to go in pages */
+ num_entry_words = p1 - tmp;
+ len1 = curr_page_addr + PAGE_SIZE - (caddr_t)cd->buffer;
+ if ((num_entry_words << 2) < len1) {
+ /* the actual number of words in the entry is less
+ * than elen and can still fit in the current page
+ */
+ memmove(p, tmp, num_entry_words << 2);
+ p += num_entry_words;
+
+ /* update offset */
+ cd->offset = cd->buffer + (cd->offset - tmp);
+ } else {
+ unsigned int offset_r = (cd->offset - tmp) << 2;
+
+ /* update pointer to offset location.
+ * This is a 64bit quantity, so we need to
+ * deal with 3 cases:
+ * - entirely in first page
+ * - entirely in second page
+ * - 4 bytes in each page
+ */
+ if (offset_r + 8 <= len1) {
+ cd->offset = p + (cd->offset - tmp);
+ } else if (offset_r >= len1) {
+ cd->offset -= len1 >> 2;
+ } else {
+ /* sitting on the fence */
+ BUG_ON(offset_r != len1 - 4);
+ cd->offset = p + (cd->offset - tmp);
+ cd->offset1 = tmp;
+ }
+
+ len2 = (num_entry_words << 2) - len1;
+
+ /* move from temp page to current and next pages */
+ memmove(p, tmp, len1);
+ memmove(tmp, (caddr_t)tmp+len1, len2);
+
+ p = tmp + (len2 >> 2);
+ }
+ }
+ else {
+ cd->common.err = nfserr_toosmall;
+ return -EINVAL;
+ }
+
+ cd->buflen -= num_entry_words;
+ cd->buffer = p;
+ cd->common.err = nfs_ok;
+ return 0;
+
+}
+
+int
+nfs3svc_encode_entry(void *cd, const char *name,
+ int namlen, loff_t offset, u64 ino, unsigned int d_type)
+{
+ return encode_entry(cd, name, namlen, offset, ino, d_type, 0);
+}
+
+int
+nfs3svc_encode_entry_plus(void *cd, const char *name,
+ int namlen, loff_t offset, u64 ino,
+ unsigned int d_type)
+{
+ return encode_entry(cd, name, namlen, offset, ino, d_type, 1);
+}
+
+/* FSSTAT */
+int
+nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_fsstatres *resp)
+{
+ struct kstatfs *s = &resp->stats;
+ u64 bs = s->f_bsize;
+
+ *p++ = xdr_zero; /* no post_op_attr */
+
+ if (resp->status == 0) {
+ p = xdr_encode_hyper(p, bs * s->f_blocks); /* total bytes */
+ p = xdr_encode_hyper(p, bs * s->f_bfree); /* free bytes */
+ p = xdr_encode_hyper(p, bs * s->f_bavail); /* user available bytes */
+ p = xdr_encode_hyper(p, s->f_files); /* total inodes */
+ p = xdr_encode_hyper(p, s->f_ffree); /* free inodes */
+ p = xdr_encode_hyper(p, s->f_ffree); /* user available inodes */
+ *p++ = htonl(resp->invarsec); /* mean unchanged time */
+ }
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* FSINFO */
+int
+nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_fsinfores *resp)
+{
+ *p++ = xdr_zero; /* no post_op_attr */
+
+ if (resp->status == 0) {
+ *p++ = htonl(resp->f_rtmax);
+ *p++ = htonl(resp->f_rtpref);
+ *p++ = htonl(resp->f_rtmult);
+ *p++ = htonl(resp->f_wtmax);
+ *p++ = htonl(resp->f_wtpref);
+ *p++ = htonl(resp->f_wtmult);
+ *p++ = htonl(resp->f_dtpref);
+ p = xdr_encode_hyper(p, resp->f_maxfilesize);
+ *p++ = xdr_one;
+ *p++ = xdr_zero;
+ *p++ = htonl(resp->f_properties);
+ }
+
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* PATHCONF */
+int
+nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_pathconfres *resp)
+{
+ *p++ = xdr_zero; /* no post_op_attr */
+
+ if (resp->status == 0) {
+ *p++ = htonl(resp->p_link_max);
+ *p++ = htonl(resp->p_name_max);
+ *p++ = htonl(resp->p_no_trunc);
+ *p++ = htonl(resp->p_chown_restricted);
+ *p++ = htonl(resp->p_case_insensitive);
+ *p++ = htonl(resp->p_case_preserving);
+ }
+
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* COMMIT */
+int
+nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_commitres *resp)
+{
+ p = encode_wcc_data(rqstp, p, &resp->fh);
+ /* Write verifier */
+ if (resp->status == 0) {
+ *p++ = htonl(nfssvc_boot.tv_sec);
+ *p++ = htonl(nfssvc_boot.tv_usec);
+ }
+ return xdr_ressize_check(rqstp, p);
+}
+
+/*
+ * XDR release functions
+ */
+int
+nfs3svc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_attrstat *resp)
+{
+ fh_put(&resp->fh);
+ return 1;
+}
+
+int
+nfs3svc_release_fhandle2(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_fhandle_pair *resp)
+{
+ fh_put(&resp->fh1);
+ fh_put(&resp->fh2);
+ return 1;
+}
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
new file mode 100644
index 00000000000..9c51aff02ae
--- /dev/null
+++ b/fs/nfsd/nfs4acl.c
@@ -0,0 +1,839 @@
+/*
+ * Common NFSv4 ACL handling code.
+ *
+ * Copyright (c) 2002, 2003 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Marius Aamodt Eriksen <marius@umich.edu>
+ * Jeff Sedlak <jsedlak@umich.edu>
+ * J. Bruce Fields <bfields@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/slab.h>
+#include <linux/nfs_fs.h>
+#include <linux/export.h>
+#include "acl.h"
+
+
+/* mode bit translations: */
+#define NFS4_READ_MODE (NFS4_ACE_READ_DATA)
+#define NFS4_WRITE_MODE (NFS4_ACE_WRITE_DATA | NFS4_ACE_APPEND_DATA)
+#define NFS4_EXECUTE_MODE NFS4_ACE_EXECUTE
+#define NFS4_ANYONE_MODE (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL | NFS4_ACE_SYNCHRONIZE)
+#define NFS4_OWNER_MODE (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL)
+
+/* We don't support these bits; insist they be neither allowed nor denied */
+#define NFS4_MASK_UNSUPP (NFS4_ACE_DELETE | NFS4_ACE_WRITE_OWNER \
+ | NFS4_ACE_READ_NAMED_ATTRS | NFS4_ACE_WRITE_NAMED_ATTRS)
+
+/* flags used to simulate posix default ACLs */
+#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \
+ | NFS4_ACE_DIRECTORY_INHERIT_ACE)
+
+#define NFS4_SUPPORTED_FLAGS (NFS4_INHERITANCE_FLAGS \
+ | NFS4_ACE_INHERIT_ONLY_ACE \
+ | NFS4_ACE_IDENTIFIER_GROUP)
+
+#define MASK_EQUAL(mask1, mask2) \
+ ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) )
+
+static u32
+mask_from_posix(unsigned short perm, unsigned int flags)
+{
+ int mask = NFS4_ANYONE_MODE;
+
+ if (flags & NFS4_ACL_OWNER)
+ mask |= NFS4_OWNER_MODE;
+ if (perm & ACL_READ)
+ mask |= NFS4_READ_MODE;
+ if (perm & ACL_WRITE)
+ mask |= NFS4_WRITE_MODE;
+ if ((perm & ACL_WRITE) && (flags & NFS4_ACL_DIR))
+ mask |= NFS4_ACE_DELETE_CHILD;
+ if (perm & ACL_EXECUTE)
+ mask |= NFS4_EXECUTE_MODE;
+ return mask;
+}
+
+static u32
+deny_mask_from_posix(unsigned short perm, u32 flags)
+{
+ u32 mask = 0;
+
+ if (perm & ACL_READ)
+ mask |= NFS4_READ_MODE;
+ if (perm & ACL_WRITE)
+ mask |= NFS4_WRITE_MODE;
+ if ((perm & ACL_WRITE) && (flags & NFS4_ACL_DIR))
+ mask |= NFS4_ACE_DELETE_CHILD;
+ if (perm & ACL_EXECUTE)
+ mask |= NFS4_EXECUTE_MODE;
+ return mask;
+}
+
+/* XXX: modify functions to return NFS errors; they're only ever
+ * used by nfs code, after all.... */
+
+/* We only map from NFSv4 to POSIX ACLs when setting ACLs, when we err on the
+ * side of being more restrictive, so the mode bit mapping below is
+ * pessimistic. An optimistic version would be needed to handle DENY's,
+ * but we espect to coalesce all ALLOWs and DENYs before mapping to mode
+ * bits. */
+
+static void
+low_mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
+{
+ u32 write_mode = NFS4_WRITE_MODE;
+
+ if (flags & NFS4_ACL_DIR)
+ write_mode |= NFS4_ACE_DELETE_CHILD;
+ *mode = 0;
+ if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE)
+ *mode |= ACL_READ;
+ if ((perm & write_mode) == write_mode)
+ *mode |= ACL_WRITE;
+ if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE)
+ *mode |= ACL_EXECUTE;
+}
+
+struct ace_container {
+ struct nfs4_ace *ace;
+ struct list_head ace_l;
+};
+
+static short ace2type(struct nfs4_ace *);
+static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *,
+ unsigned int);
+
+struct nfs4_acl *
+nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl,
+ unsigned int flags)
+{
+ struct nfs4_acl *acl;
+ int size = 0;
+
+ if (pacl) {
+ if (posix_acl_valid(pacl) < 0)
+ return ERR_PTR(-EINVAL);
+ size += 2*pacl->a_count;
+ }
+ if (dpacl) {
+ if (posix_acl_valid(dpacl) < 0)
+ return ERR_PTR(-EINVAL);
+ size += 2*dpacl->a_count;
+ }
+
+ /* Allocate for worst case: one (deny, allow) pair each: */
+ acl = nfs4_acl_new(size);
+ if (acl == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ if (pacl)
+ _posix_to_nfsv4_one(pacl, acl, flags & ~NFS4_ACL_TYPE_DEFAULT);
+
+ if (dpacl)
+ _posix_to_nfsv4_one(dpacl, acl, flags | NFS4_ACL_TYPE_DEFAULT);
+
+ return acl;
+}
+
+struct posix_acl_summary {
+ unsigned short owner;
+ unsigned short users;
+ unsigned short group;
+ unsigned short groups;
+ unsigned short other;
+ unsigned short mask;
+};
+
+static void
+summarize_posix_acl(struct posix_acl *acl, struct posix_acl_summary *pas)
+{
+ struct posix_acl_entry *pa, *pe;
+
+ /*
+ * Only pas.users and pas.groups need initialization; previous
+ * posix_acl_valid() calls ensure that the other fields will be
+ * initialized in the following loop. But, just to placate gcc:
+ */
+ memset(pas, 0, sizeof(*pas));
+ pas->mask = 07;
+
+ pe = acl->a_entries + acl->a_count;
+
+ FOREACH_ACL_ENTRY(pa, acl, pe) {
+ switch (pa->e_tag) {
+ case ACL_USER_OBJ:
+ pas->owner = pa->e_perm;
+ break;
+ case ACL_GROUP_OBJ:
+ pas->group = pa->e_perm;
+ break;
+ case ACL_USER:
+ pas->users |= pa->e_perm;
+ break;
+ case ACL_GROUP:
+ pas->groups |= pa->e_perm;
+ break;
+ case ACL_OTHER:
+ pas->other = pa->e_perm;
+ break;
+ case ACL_MASK:
+ pas->mask = pa->e_perm;
+ break;
+ }
+ }
+ /* We'll only care about effective permissions: */
+ pas->users &= pas->mask;
+ pas->group &= pas->mask;
+ pas->groups &= pas->mask;
+}
+
+/* We assume the acl has been verified with posix_acl_valid. */
+static void
+_posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
+ unsigned int flags)
+{
+ struct posix_acl_entry *pa, *group_owner_entry;
+ struct nfs4_ace *ace;
+ struct posix_acl_summary pas;
+ unsigned short deny;
+ int eflag = ((flags & NFS4_ACL_TYPE_DEFAULT) ?
+ NFS4_INHERITANCE_FLAGS | NFS4_ACE_INHERIT_ONLY_ACE : 0);
+
+ BUG_ON(pacl->a_count < 3);
+ summarize_posix_acl(pacl, &pas);
+
+ pa = pacl->a_entries;
+ ace = acl->aces + acl->naces;
+
+ /* We could deny everything not granted by the owner: */
+ deny = ~pas.owner;
+ /*
+ * but it is equivalent (and simpler) to deny only what is not
+ * granted by later entries:
+ */
+ deny &= pas.users | pas.group | pas.groups | pas.other;
+ if (deny) {
+ ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
+ ace->flag = eflag;
+ ace->access_mask = deny_mask_from_posix(deny, flags);
+ ace->whotype = NFS4_ACL_WHO_OWNER;
+ ace++;
+ acl->naces++;
+ }
+
+ ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE;
+ ace->flag = eflag;
+ ace->access_mask = mask_from_posix(pa->e_perm, flags | NFS4_ACL_OWNER);
+ ace->whotype = NFS4_ACL_WHO_OWNER;
+ ace++;
+ acl->naces++;
+ pa++;
+
+ while (pa->e_tag == ACL_USER) {
+ deny = ~(pa->e_perm & pas.mask);
+ deny &= pas.groups | pas.group | pas.other;
+ if (deny) {
+ ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
+ ace->flag = eflag;
+ ace->access_mask = deny_mask_from_posix(deny, flags);
+ ace->whotype = NFS4_ACL_WHO_NAMED;
+ ace->who = pa->e_id;
+ ace++;
+ acl->naces++;
+ }
+ ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE;
+ ace->flag = eflag;
+ ace->access_mask = mask_from_posix(pa->e_perm & pas.mask,
+ flags);
+ ace->whotype = NFS4_ACL_WHO_NAMED;
+ ace->who = pa->e_id;
+ ace++;
+ acl->naces++;
+ pa++;
+ }
+
+ /* In the case of groups, we apply allow ACEs first, then deny ACEs,
+ * since a user can be in more than one group. */
+
+ /* allow ACEs */
+
+ group_owner_entry = pa;
+
+ ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE;
+ ace->flag = eflag;
+ ace->access_mask = mask_from_posix(pas.group, flags);
+ ace->whotype = NFS4_ACL_WHO_GROUP;
+ ace++;
+ acl->naces++;
+ pa++;
+
+ while (pa->e_tag == ACL_GROUP) {
+ ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE;
+ ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP;
+ ace->access_mask = mask_from_posix(pa->e_perm & pas.mask,
+ flags);
+ ace->whotype = NFS4_ACL_WHO_NAMED;
+ ace->who = pa->e_id;
+ ace++;
+ acl->naces++;
+ pa++;
+ }
+
+ /* deny ACEs */
+
+ pa = group_owner_entry;
+
+ deny = ~pas.group & pas.other;
+ if (deny) {
+ ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
+ ace->flag = eflag;
+ ace->access_mask = deny_mask_from_posix(deny, flags);
+ ace->whotype = NFS4_ACL_WHO_GROUP;
+ ace++;
+ acl->naces++;
+ }
+ pa++;
+
+ while (pa->e_tag == ACL_GROUP) {
+ deny = ~(pa->e_perm & pas.mask);
+ deny &= pas.other;
+ if (deny) {
+ ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
+ ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP;
+ ace->access_mask = deny_mask_from_posix(deny, flags);
+ ace->whotype = NFS4_ACL_WHO_NAMED;
+ ace->who = pa->e_id;
+ ace++;
+ acl->naces++;
+ }
+ pa++;
+ }
+
+ if (pa->e_tag == ACL_MASK)
+ pa++;
+ ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE;
+ ace->flag = eflag;
+ ace->access_mask = mask_from_posix(pa->e_perm, flags);
+ ace->whotype = NFS4_ACL_WHO_EVERYONE;
+ acl->naces++;
+}
+
+static void
+sort_pacl_range(struct posix_acl *pacl, int start, int end) {
+ int sorted = 0, i;
+ struct posix_acl_entry tmp;
+
+ /* We just do a bubble sort; easy to do in place, and we're not
+ * expecting acl's to be long enough to justify anything more. */
+ while (!sorted) {
+ sorted = 1;
+ for (i = start; i < end; i++) {
+ if (pacl->a_entries[i].e_id
+ > pacl->a_entries[i+1].e_id) {
+ sorted = 0;
+ tmp = pacl->a_entries[i];
+ pacl->a_entries[i] = pacl->a_entries[i+1];
+ pacl->a_entries[i+1] = tmp;
+ }
+ }
+ }
+}
+
+static void
+sort_pacl(struct posix_acl *pacl)
+{
+ /* posix_acl_valid requires that users and groups be in order
+ * by uid/gid. */
+ int i, j;
+
+ if (pacl->a_count <= 4)
+ return; /* no users or groups */
+ i = 1;
+ while (pacl->a_entries[i].e_tag == ACL_USER)
+ i++;
+ sort_pacl_range(pacl, 1, i-1);
+
+ BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ);
+ j = ++i;
+ while (pacl->a_entries[j].e_tag == ACL_GROUP)
+ j++;
+ sort_pacl_range(pacl, i, j-1);
+ return;
+}
+
+/*
+ * While processing the NFSv4 ACE, this maintains bitmasks representing
+ * which permission bits have been allowed and which denied to a given
+ * entity: */
+struct posix_ace_state {
+ u32 allow;
+ u32 deny;
+};
+
+struct posix_user_ace_state {
+ uid_t uid;
+ struct posix_ace_state perms;
+};
+
+struct posix_ace_state_array {
+ int n;
+ struct posix_user_ace_state aces[];
+};
+
+/*
+ * While processing the NFSv4 ACE, this maintains the partial permissions
+ * calculated so far: */
+
+struct posix_acl_state {
+ int empty;
+ struct posix_ace_state owner;
+ struct posix_ace_state group;
+ struct posix_ace_state other;
+ struct posix_ace_state everyone;
+ struct posix_ace_state mask; /* Deny unused in this case */
+ struct posix_ace_state_array *users;
+ struct posix_ace_state_array *groups;
+};
+
+static int
+init_state(struct posix_acl_state *state, int cnt)
+{
+ int alloc;
+
+ memset(state, 0, sizeof(struct posix_acl_state));
+ state->empty = 1;
+ /*
+ * In the worst case, each individual acl could be for a distinct
+ * named user or group, but we don't no which, so we allocate
+ * enough space for either:
+ */
+ alloc = sizeof(struct posix_ace_state_array)
+ + cnt*sizeof(struct posix_user_ace_state);
+ state->users = kzalloc(alloc, GFP_KERNEL);
+ if (!state->users)
+ return -ENOMEM;
+ state->groups = kzalloc(alloc, GFP_KERNEL);
+ if (!state->groups) {
+ kfree(state->users);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void
+free_state(struct posix_acl_state *state) {
+ kfree(state->users);
+ kfree(state->groups);
+}
+
+static inline void add_to_mask(struct posix_acl_state *state, struct posix_ace_state *astate)
+{
+ state->mask.allow |= astate->allow;
+}
+
+/*
+ * Certain bits (SYNCHRONIZE, DELETE, WRITE_OWNER, READ/WRITE_NAMED_ATTRS,
+ * READ_ATTRIBUTES, READ_ACL) are currently unenforceable and don't translate
+ * to traditional read/write/execute permissions.
+ *
+ * It's problematic to reject acls that use certain mode bits, because it
+ * places the burden on users to learn the rules about which bits one
+ * particular server sets, without giving the user a lot of help--we return an
+ * error that could mean any number of different things. To make matters
+ * worse, the problematic bits might be introduced by some application that's
+ * automatically mapping from some other acl model.
+ *
+ * So wherever possible we accept anything, possibly erring on the side of
+ * denying more permissions than necessary.
+ *
+ * However we do reject *explicit* DENY's of a few bits representing
+ * permissions we could never deny:
+ */
+
+static inline int check_deny(u32 mask, int isowner)
+{
+ if (mask & (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL))
+ return -EINVAL;
+ if (!isowner)
+ return 0;
+ if (mask & (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL))
+ return -EINVAL;
+ return 0;
+}
+
+static struct posix_acl *
+posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
+{
+ struct posix_acl_entry *pace;
+ struct posix_acl *pacl;
+ int nace;
+ int i, error = 0;
+
+ /*
+ * ACLs with no ACEs are treated differently in the inheritable
+ * and effective cases: when there are no inheritable ACEs, we
+ * set a zero-length default posix acl:
+ */
+ if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) {
+ pacl = posix_acl_alloc(0, GFP_KERNEL);
+ return pacl ? pacl : ERR_PTR(-ENOMEM);
+ }
+ /*
+ * When there are no effective ACEs, the following will end
+ * up setting a 3-element effective posix ACL with all
+ * permissions zero.
+ */
+ nace = 4 + state->users->n + state->groups->n;
+ pacl = posix_acl_alloc(nace, GFP_KERNEL);
+ if (!pacl)
+ return ERR_PTR(-ENOMEM);
+
+ pace = pacl->a_entries;
+ pace->e_tag = ACL_USER_OBJ;
+ error = check_deny(state->owner.deny, 1);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags);
+ pace->e_id = ACL_UNDEFINED_ID;
+
+ for (i=0; i < state->users->n; i++) {
+ pace++;
+ pace->e_tag = ACL_USER;
+ error = check_deny(state->users->aces[i].perms.deny, 0);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->users->aces[i].perms.allow,
+ &pace->e_perm, flags);
+ pace->e_id = state->users->aces[i].uid;
+ add_to_mask(state, &state->users->aces[i].perms);
+ }
+
+ pace++;
+ pace->e_tag = ACL_GROUP_OBJ;
+ error = check_deny(state->group.deny, 0);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags);
+ pace->e_id = ACL_UNDEFINED_ID;
+ add_to_mask(state, &state->group);
+
+ for (i=0; i < state->groups->n; i++) {
+ pace++;
+ pace->e_tag = ACL_GROUP;
+ error = check_deny(state->groups->aces[i].perms.deny, 0);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->groups->aces[i].perms.allow,
+ &pace->e_perm, flags);
+ pace->e_id = state->groups->aces[i].uid;
+ add_to_mask(state, &state->groups->aces[i].perms);
+ }
+
+ pace++;
+ pace->e_tag = ACL_MASK;
+ low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
+ pace->e_id = ACL_UNDEFINED_ID;
+
+ pace++;
+ pace->e_tag = ACL_OTHER;
+ error = check_deny(state->other.deny, 0);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags);
+ pace->e_id = ACL_UNDEFINED_ID;
+
+ return pacl;
+out_err:
+ posix_acl_release(pacl);
+ return ERR_PTR(error);
+}
+
+static inline void allow_bits(struct posix_ace_state *astate, u32 mask)
+{
+ /* Allow all bits in the mask not already denied: */
+ astate->allow |= mask & ~astate->deny;
+}
+
+static inline void deny_bits(struct posix_ace_state *astate, u32 mask)
+{
+ /* Deny all bits in the mask not already allowed: */
+ astate->deny |= mask & ~astate->allow;
+}
+
+static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array *a, uid_t uid)
+{
+ int i;
+
+ for (i = 0; i < a->n; i++)
+ if (a->aces[i].uid == uid)
+ return i;
+ /* Not found: */
+ a->n++;
+ a->aces[i].uid = uid;
+ a->aces[i].perms.allow = state->everyone.allow;
+ a->aces[i].perms.deny = state->everyone.deny;
+
+ return i;
+}
+
+static void deny_bits_array(struct posix_ace_state_array *a, u32 mask)
+{
+ int i;
+
+ for (i=0; i < a->n; i++)
+ deny_bits(&a->aces[i].perms, mask);
+}
+
+static void allow_bits_array(struct posix_ace_state_array *a, u32 mask)
+{
+ int i;
+
+ for (i=0; i < a->n; i++)
+ allow_bits(&a->aces[i].perms, mask);
+}
+
+static void process_one_v4_ace(struct posix_acl_state *state,
+ struct nfs4_ace *ace)
+{
+ u32 mask = ace->access_mask;
+ int i;
+
+ state->empty = 0;
+
+ switch (ace2type(ace)) {
+ case ACL_USER_OBJ:
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->owner, mask);
+ } else {
+ deny_bits(&state->owner, mask);
+ }
+ break;
+ case ACL_USER:
+ i = find_uid(state, state->users, ace->who);
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->users->aces[i].perms, mask);
+ } else {
+ deny_bits(&state->users->aces[i].perms, mask);
+ mask = state->users->aces[i].perms.deny;
+ deny_bits(&state->owner, mask);
+ }
+ break;
+ case ACL_GROUP_OBJ:
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->group, mask);
+ } else {
+ deny_bits(&state->group, mask);
+ mask = state->group.deny;
+ deny_bits(&state->owner, mask);
+ deny_bits(&state->everyone, mask);
+ deny_bits_array(state->users, mask);
+ deny_bits_array(state->groups, mask);
+ }
+ break;
+ case ACL_GROUP:
+ i = find_uid(state, state->groups, ace->who);
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->groups->aces[i].perms, mask);
+ } else {
+ deny_bits(&state->groups->aces[i].perms, mask);
+ mask = state->groups->aces[i].perms.deny;
+ deny_bits(&state->owner, mask);
+ deny_bits(&state->group, mask);
+ deny_bits(&state->everyone, mask);
+ deny_bits_array(state->users, mask);
+ deny_bits_array(state->groups, mask);
+ }
+ break;
+ case ACL_OTHER:
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->owner, mask);
+ allow_bits(&state->group, mask);
+ allow_bits(&state->other, mask);
+ allow_bits(&state->everyone, mask);
+ allow_bits_array(state->users, mask);
+ allow_bits_array(state->groups, mask);
+ } else {
+ deny_bits(&state->owner, mask);
+ deny_bits(&state->group, mask);
+ deny_bits(&state->other, mask);
+ deny_bits(&state->everyone, mask);
+ deny_bits_array(state->users, mask);
+ deny_bits_array(state->groups, mask);
+ }
+ }
+}
+
+int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl,
+ struct posix_acl **dpacl, unsigned int flags)
+{
+ struct posix_acl_state effective_acl_state, default_acl_state;
+ struct nfs4_ace *ace;
+ int ret;
+
+ ret = init_state(&effective_acl_state, acl->naces);
+ if (ret)
+ return ret;
+ ret = init_state(&default_acl_state, acl->naces);
+ if (ret)
+ goto out_estate;
+ ret = -EINVAL;
+ for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) {
+ if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE &&
+ ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
+ goto out_dstate;
+ if (ace->flag & ~NFS4_SUPPORTED_FLAGS)
+ goto out_dstate;
+ if ((ace->flag & NFS4_INHERITANCE_FLAGS) == 0) {
+ process_one_v4_ace(&effective_acl_state, ace);
+ continue;
+ }
+ if (!(flags & NFS4_ACL_DIR))
+ goto out_dstate;
+ /*
+ * Note that when only one of FILE_INHERIT or DIRECTORY_INHERIT
+ * is set, we're effectively turning on the other. That's OK,
+ * according to rfc 3530.
+ */
+ process_one_v4_ace(&default_acl_state, ace);
+
+ if (!(ace->flag & NFS4_ACE_INHERIT_ONLY_ACE))
+ process_one_v4_ace(&effective_acl_state, ace);
+ }
+ *pacl = posix_state_to_acl(&effective_acl_state, flags);
+ if (IS_ERR(*pacl)) {
+ ret = PTR_ERR(*pacl);
+ *pacl = NULL;
+ goto out_dstate;
+ }
+ *dpacl = posix_state_to_acl(&default_acl_state,
+ flags | NFS4_ACL_TYPE_DEFAULT);
+ if (IS_ERR(*dpacl)) {
+ ret = PTR_ERR(*dpacl);
+ *dpacl = NULL;
+ posix_acl_release(*pacl);
+ *pacl = NULL;
+ goto out_dstate;
+ }
+ sort_pacl(*pacl);
+ sort_pacl(*dpacl);
+ ret = 0;
+out_dstate:
+ free_state(&default_acl_state);
+out_estate:
+ free_state(&effective_acl_state);
+ return ret;
+}
+
+static short
+ace2type(struct nfs4_ace *ace)
+{
+ switch (ace->whotype) {
+ case NFS4_ACL_WHO_NAMED:
+ return (ace->flag & NFS4_ACE_IDENTIFIER_GROUP ?
+ ACL_GROUP : ACL_USER);
+ case NFS4_ACL_WHO_OWNER:
+ return ACL_USER_OBJ;
+ case NFS4_ACL_WHO_GROUP:
+ return ACL_GROUP_OBJ;
+ case NFS4_ACL_WHO_EVERYONE:
+ return ACL_OTHER;
+ }
+ BUG();
+ return -1;
+}
+
+EXPORT_SYMBOL(nfs4_acl_posix_to_nfsv4);
+EXPORT_SYMBOL(nfs4_acl_nfsv4_to_posix);
+
+struct nfs4_acl *
+nfs4_acl_new(int n)
+{
+ struct nfs4_acl *acl;
+
+ acl = kmalloc(sizeof(*acl) + n*sizeof(struct nfs4_ace), GFP_KERNEL);
+ if (acl == NULL)
+ return NULL;
+ acl->naces = 0;
+ return acl;
+}
+
+static struct {
+ char *string;
+ int stringlen;
+ int type;
+} s2t_map[] = {
+ {
+ .string = "OWNER@",
+ .stringlen = sizeof("OWNER@") - 1,
+ .type = NFS4_ACL_WHO_OWNER,
+ },
+ {
+ .string = "GROUP@",
+ .stringlen = sizeof("GROUP@") - 1,
+ .type = NFS4_ACL_WHO_GROUP,
+ },
+ {
+ .string = "EVERYONE@",
+ .stringlen = sizeof("EVERYONE@") - 1,
+ .type = NFS4_ACL_WHO_EVERYONE,
+ },
+};
+
+int
+nfs4_acl_get_whotype(char *p, u32 len)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(s2t_map); i++) {
+ if (s2t_map[i].stringlen == len &&
+ 0 == memcmp(s2t_map[i].string, p, len))
+ return s2t_map[i].type;
+ }
+ return NFS4_ACL_WHO_NAMED;
+}
+
+int
+nfs4_acl_write_who(int who, char *p)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(s2t_map); i++) {
+ if (s2t_map[i].type == who) {
+ memcpy(p, s2t_map[i].string, s2t_map[i].stringlen);
+ return s2t_map[i].stringlen;
+ }
+ }
+ BUG();
+ return -1;
+}
+
+EXPORT_SYMBOL(nfs4_acl_new);
+EXPORT_SYMBOL(nfs4_acl_get_whotype);
+EXPORT_SYMBOL(nfs4_acl_write_who);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
new file mode 100644
index 00000000000..6f3ebb48b12
--- /dev/null
+++ b/fs/nfsd/nfs4callback.c
@@ -0,0 +1,1036 @@
+/*
+ * Copyright (c) 2001 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Kendrick Smith <kmsmith@umich.edu>
+ * Andy Adamson <andros@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svc_xprt.h>
+#include <linux/slab.h>
+#include "nfsd.h"
+#include "state.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_PROC
+
+static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason);
+
+#define NFSPROC4_CB_NULL 0
+#define NFSPROC4_CB_COMPOUND 1
+
+/* Index of predefined Linux callback client operations */
+
+enum {
+ NFSPROC4_CLNT_CB_NULL = 0,
+ NFSPROC4_CLNT_CB_RECALL,
+ NFSPROC4_CLNT_CB_SEQUENCE,
+};
+
+#define NFS4_MAXTAGLEN 20
+
+#define NFS4_enc_cb_null_sz 0
+#define NFS4_dec_cb_null_sz 0
+#define cb_compound_enc_hdr_sz 4
+#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2))
+#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2)
+#define cb_sequence_enc_sz (sessionid_sz + 4 + \
+ 1 /* no referring calls list yet */)
+#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4)
+
+#define op_enc_sz 1
+#define op_dec_sz 2
+#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2))
+#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2)
+#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \
+ cb_sequence_enc_sz + \
+ 1 + enc_stateid_sz + \
+ enc_nfs4_fh_sz)
+
+#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \
+ cb_sequence_dec_sz + \
+ op_dec_sz)
+
+struct nfs4_cb_compound_hdr {
+ /* args */
+ u32 ident; /* minorversion 0 only */
+ u32 nops;
+ __be32 *nops_p;
+ u32 minorversion;
+ /* res */
+ int status;
+};
+
+/*
+ * Handle decode buffer overflows out-of-line.
+ */
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+ dprintk("NFS: %s prematurely hit the end of our receive buffer. "
+ "Remaining buffer length is %tu words.\n",
+ func, xdr->end - xdr->p);
+}
+
+static __be32 *xdr_encode_empty_array(__be32 *p)
+{
+ *p++ = xdr_zero;
+ return p;
+}
+
+/*
+ * Encode/decode NFSv4 CB basic data types
+ *
+ * Basic NFSv4 callback data types are defined in section 15 of RFC
+ * 3530: "Network File System (NFS) version 4 Protocol" and section
+ * 20 of RFC 5661: "Network File System (NFS) Version 4 Minor Version
+ * 1 Protocol"
+ */
+
+/*
+ * nfs_cb_opnum4
+ *
+ * enum nfs_cb_opnum4 {
+ * OP_CB_GETATTR = 3,
+ * ...
+ * };
+ */
+enum nfs_cb_opnum4 {
+ OP_CB_GETATTR = 3,
+ OP_CB_RECALL = 4,
+ OP_CB_LAYOUTRECALL = 5,
+ OP_CB_NOTIFY = 6,
+ OP_CB_PUSH_DELEG = 7,
+ OP_CB_RECALL_ANY = 8,
+ OP_CB_RECALLABLE_OBJ_AVAIL = 9,
+ OP_CB_RECALL_SLOT = 10,
+ OP_CB_SEQUENCE = 11,
+ OP_CB_WANTS_CANCELLED = 12,
+ OP_CB_NOTIFY_LOCK = 13,
+ OP_CB_NOTIFY_DEVICEID = 14,
+ OP_CB_ILLEGAL = 10044
+};
+
+static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+ *p = cpu_to_be32(op);
+}
+
+/*
+ * nfs_fh4
+ *
+ * typedef opaque nfs_fh4<NFS4_FHSIZE>;
+ */
+static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh)
+{
+ u32 length = fh->fh_size;
+ __be32 *p;
+
+ BUG_ON(length > NFS4_FHSIZE);
+ p = xdr_reserve_space(xdr, 4 + length);
+ xdr_encode_opaque(p, &fh->fh_base, length);
+}
+
+/*
+ * stateid4
+ *
+ * struct stateid4 {
+ * uint32_t seqid;
+ * opaque other[12];
+ * };
+ */
+static void encode_stateid4(struct xdr_stream *xdr, const stateid_t *sid)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(sid->si_generation);
+ xdr_encode_opaque_fixed(p, &sid->si_opaque, NFS4_STATEID_OTHER_SIZE);
+}
+
+/*
+ * sessionid4
+ *
+ * typedef opaque sessionid4[NFS4_SESSIONID_SIZE];
+ */
+static void encode_sessionid4(struct xdr_stream *xdr,
+ const struct nfsd4_session *session)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN);
+ xdr_encode_opaque_fixed(p, session->se_sessionid.data,
+ NFS4_MAX_SESSIONID_LEN);
+}
+
+/*
+ * nfsstat4
+ */
+static const struct {
+ int stat;
+ int errno;
+} nfs_cb_errtbl[] = {
+ { NFS4_OK, 0 },
+ { NFS4ERR_PERM, -EPERM },
+ { NFS4ERR_NOENT, -ENOENT },
+ { NFS4ERR_IO, -EIO },
+ { NFS4ERR_NXIO, -ENXIO },
+ { NFS4ERR_ACCESS, -EACCES },
+ { NFS4ERR_EXIST, -EEXIST },
+ { NFS4ERR_XDEV, -EXDEV },
+ { NFS4ERR_NOTDIR, -ENOTDIR },
+ { NFS4ERR_ISDIR, -EISDIR },
+ { NFS4ERR_INVAL, -EINVAL },
+ { NFS4ERR_FBIG, -EFBIG },
+ { NFS4ERR_NOSPC, -ENOSPC },
+ { NFS4ERR_ROFS, -EROFS },
+ { NFS4ERR_MLINK, -EMLINK },
+ { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG },
+ { NFS4ERR_NOTEMPTY, -ENOTEMPTY },
+ { NFS4ERR_DQUOT, -EDQUOT },
+ { NFS4ERR_STALE, -ESTALE },
+ { NFS4ERR_BADHANDLE, -EBADHANDLE },
+ { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
+ { NFS4ERR_NOTSUPP, -ENOTSUPP },
+ { NFS4ERR_TOOSMALL, -ETOOSMALL },
+ { NFS4ERR_SERVERFAULT, -ESERVERFAULT },
+ { NFS4ERR_BADTYPE, -EBADTYPE },
+ { NFS4ERR_LOCKED, -EAGAIN },
+ { NFS4ERR_RESOURCE, -EREMOTEIO },
+ { NFS4ERR_SYMLINK, -ELOOP },
+ { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
+ { NFS4ERR_DEADLOCK, -EDEADLK },
+ { -1, -EIO }
+};
+
+/*
+ * If we cannot translate the error, the recovery routines should
+ * handle it.
+ *
+ * Note: remaining NFSv4 error codes have values > 10000, so should
+ * not conflict with native Linux error codes.
+ */
+static int nfs_cb_stat_to_errno(int status)
+{
+ int i;
+
+ for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) {
+ if (nfs_cb_errtbl[i].stat == status)
+ return nfs_cb_errtbl[i].errno;
+ }
+
+ dprintk("NFSD: Unrecognized NFS CB status value: %u\n", status);
+ return -status;
+}
+
+static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
+ enum nfsstat4 *status)
+{
+ __be32 *p;
+ u32 op;
+
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ op = be32_to_cpup(p++);
+ if (unlikely(op != expected))
+ goto out_unexpected;
+ *status = be32_to_cpup(p);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+out_unexpected:
+ dprintk("NFSD: Callback server returned operation %d but "
+ "we issued a request for %d\n", op, expected);
+ return -EIO;
+}
+
+/*
+ * CB_COMPOUND4args
+ *
+ * struct CB_COMPOUND4args {
+ * utf8str_cs tag;
+ * uint32_t minorversion;
+ * uint32_t callback_ident;
+ * nfs_cb_argop4 argarray<>;
+ * };
+*/
+static void encode_cb_compound4args(struct xdr_stream *xdr,
+ struct nfs4_cb_compound_hdr *hdr)
+{
+ __be32 * p;
+
+ p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4);
+ p = xdr_encode_empty_array(p); /* empty tag */
+ *p++ = cpu_to_be32(hdr->minorversion);
+ *p++ = cpu_to_be32(hdr->ident);
+
+ hdr->nops_p = p;
+ *p = cpu_to_be32(hdr->nops); /* argarray element count */
+}
+
+/*
+ * Update argarray element count
+ */
+static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr)
+{
+ BUG_ON(hdr->nops > NFS4_MAX_BACK_CHANNEL_OPS);
+ *hdr->nops_p = cpu_to_be32(hdr->nops);
+}
+
+/*
+ * CB_COMPOUND4res
+ *
+ * struct CB_COMPOUND4res {
+ * nfsstat4 status;
+ * utf8str_cs tag;
+ * nfs_cb_resop4 resarray<>;
+ * };
+ */
+static int decode_cb_compound4res(struct xdr_stream *xdr,
+ struct nfs4_cb_compound_hdr *hdr)
+{
+ u32 length;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ hdr->status = be32_to_cpup(p++);
+ /* Ignore the tag */
+ length = be32_to_cpup(p++);
+ p = xdr_inline_decode(xdr, length + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ hdr->nops = be32_to_cpup(p);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * CB_RECALL4args
+ *
+ * struct CB_RECALL4args {
+ * stateid4 stateid;
+ * bool truncate;
+ * nfs_fh4 fh;
+ * };
+ */
+static void encode_cb_recall4args(struct xdr_stream *xdr,
+ const struct nfs4_delegation *dp,
+ struct nfs4_cb_compound_hdr *hdr)
+{
+ __be32 *p;
+
+ encode_nfs_cb_opnum4(xdr, OP_CB_RECALL);
+ encode_stateid4(xdr, &dp->dl_stid.sc_stateid);
+
+ p = xdr_reserve_space(xdr, 4);
+ *p++ = xdr_zero; /* truncate */
+
+ encode_nfs_fh4(xdr, &dp->dl_fh);
+
+ hdr->nops++;
+}
+
+/*
+ * CB_SEQUENCE4args
+ *
+ * struct CB_SEQUENCE4args {
+ * sessionid4 csa_sessionid;
+ * sequenceid4 csa_sequenceid;
+ * slotid4 csa_slotid;
+ * slotid4 csa_highest_slotid;
+ * bool csa_cachethis;
+ * referring_call_list4 csa_referring_call_lists<>;
+ * };
+ */
+static void encode_cb_sequence4args(struct xdr_stream *xdr,
+ const struct nfsd4_callback *cb,
+ struct nfs4_cb_compound_hdr *hdr)
+{
+ struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
+ __be32 *p;
+
+ if (hdr->minorversion == 0)
+ return;
+
+ encode_nfs_cb_opnum4(xdr, OP_CB_SEQUENCE);
+ encode_sessionid4(xdr, session);
+
+ p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4);
+ *p++ = cpu_to_be32(session->se_cb_seq_nr); /* csa_sequenceid */
+ *p++ = xdr_zero; /* csa_slotid */
+ *p++ = xdr_zero; /* csa_highest_slotid */
+ *p++ = xdr_zero; /* csa_cachethis */
+ xdr_encode_empty_array(p); /* csa_referring_call_lists */
+
+ hdr->nops++;
+}
+
+/*
+ * CB_SEQUENCE4resok
+ *
+ * struct CB_SEQUENCE4resok {
+ * sessionid4 csr_sessionid;
+ * sequenceid4 csr_sequenceid;
+ * slotid4 csr_slotid;
+ * slotid4 csr_highest_slotid;
+ * slotid4 csr_target_highest_slotid;
+ * };
+ *
+ * union CB_SEQUENCE4res switch (nfsstat4 csr_status) {
+ * case NFS4_OK:
+ * CB_SEQUENCE4resok csr_resok4;
+ * default:
+ * void;
+ * };
+ *
+ * Our current back channel implmentation supports a single backchannel
+ * with a single slot.
+ */
+static int decode_cb_sequence4resok(struct xdr_stream *xdr,
+ struct nfsd4_callback *cb)
+{
+ struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
+ struct nfs4_sessionid id;
+ int status;
+ __be32 *p;
+ u32 dummy;
+
+ status = -ESERVERFAULT;
+
+ /*
+ * If the server returns different values for sessionID, slotID or
+ * sequence number, the server is looney tunes.
+ */
+ p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
+ if (memcmp(id.data, session->se_sessionid.data,
+ NFS4_MAX_SESSIONID_LEN) != 0) {
+ dprintk("NFS: %s Invalid session id\n", __func__);
+ goto out;
+ }
+ p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
+
+ dummy = be32_to_cpup(p++);
+ if (dummy != session->se_cb_seq_nr) {
+ dprintk("NFS: %s Invalid sequence number\n", __func__);
+ goto out;
+ }
+
+ dummy = be32_to_cpup(p++);
+ if (dummy != 0) {
+ dprintk("NFS: %s Invalid slotid\n", __func__);
+ goto out;
+ }
+
+ /*
+ * FIXME: process highest slotid and target highest slotid
+ */
+ status = 0;
+out:
+ if (status)
+ nfsd4_mark_cb_fault(cb->cb_clp, status);
+ return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int decode_cb_sequence4res(struct xdr_stream *xdr,
+ struct nfsd4_callback *cb)
+{
+ enum nfsstat4 nfserr;
+ int status;
+
+ if (cb->cb_minorversion == 0)
+ return 0;
+
+ status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr);
+ if (unlikely(status))
+ goto out;
+ if (unlikely(nfserr != NFS4_OK))
+ goto out_default;
+ status = decode_cb_sequence4resok(xdr, cb);
+out:
+ return status;
+out_default:
+ return nfs_cb_stat_to_errno(nfserr);
+}
+
+/*
+ * NFSv4.0 and NFSv4.1 XDR encode functions
+ *
+ * NFSv4.0 callback argument types are defined in section 15 of RFC
+ * 3530: "Network File System (NFS) version 4 Protocol" and section 20
+ * of RFC 5661: "Network File System (NFS) Version 4 Minor Version 1
+ * Protocol".
+ */
+
+/*
+ * NB: Without this zero space reservation, callbacks over krb5p fail
+ */
+static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
+ void *__unused)
+{
+ xdr_reserve_space(xdr, 0);
+}
+
+/*
+ * 20.2. Operation 4: CB_RECALL - Recall a Delegation
+ */
+static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfsd4_callback *cb)
+{
+ const struct nfs4_delegation *args = cb->cb_op;
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = cb->cb_clp->cl_cb_ident,
+ .minorversion = cb->cb_minorversion,
+ };
+
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+ encode_cb_recall4args(xdr, args, &hdr);
+ encode_cb_nops(&hdr);
+}
+
+
+/*
+ * NFSv4.0 and NFSv4.1 XDR decode functions
+ *
+ * NFSv4.0 callback result types are defined in section 15 of RFC
+ * 3530: "Network File System (NFS) version 4 Protocol" and section 20
+ * of RFC 5661: "Network File System (NFS) Version 4 Minor Version 1
+ * Protocol".
+ */
+
+static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
+ void *__unused)
+{
+ return 0;
+}
+
+/*
+ * 20.2. Operation 4: CB_RECALL - Recall a Delegation
+ */
+static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfsd4_callback *cb)
+{
+ struct nfs4_cb_compound_hdr hdr;
+ enum nfsstat4 nfserr;
+ int status;
+
+ status = decode_cb_compound4res(xdr, &hdr);
+ if (unlikely(status))
+ goto out;
+
+ if (cb != NULL) {
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status))
+ goto out;
+ }
+
+ status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr);
+ if (unlikely(status))
+ goto out;
+ if (unlikely(nfserr != NFS4_OK))
+ status = nfs_cb_stat_to_errno(nfserr);
+out:
+ return status;
+}
+
+/*
+ * RPC procedure tables
+ */
+#define PROC(proc, call, argtype, restype) \
+[NFSPROC4_CLNT_##proc] = { \
+ .p_proc = NFSPROC4_CB_##call, \
+ .p_encode = (kxdreproc_t)nfs4_xdr_enc_##argtype, \
+ .p_decode = (kxdrdproc_t)nfs4_xdr_dec_##restype, \
+ .p_arglen = NFS4_enc_##argtype##_sz, \
+ .p_replen = NFS4_dec_##restype##_sz, \
+ .p_statidx = NFSPROC4_CB_##call, \
+ .p_name = #proc, \
+}
+
+static struct rpc_procinfo nfs4_cb_procedures[] = {
+ PROC(CB_NULL, NULL, cb_null, cb_null),
+ PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall),
+};
+
+static struct rpc_version nfs_cb_version4 = {
+/*
+ * Note on the callback rpc program version number: despite language in rfc
+ * 5661 section 18.36.3 requiring servers to use 4 in this field, the
+ * official xdr descriptions for both 4.0 and 4.1 specify version 1, and
+ * in practice that appears to be what implementations use. The section
+ * 18.36.3 language is expected to be fixed in an erratum.
+ */
+ .number = 1,
+ .nrprocs = ARRAY_SIZE(nfs4_cb_procedures),
+ .procs = nfs4_cb_procedures
+};
+
+static struct rpc_version *nfs_cb_version[] = {
+ &nfs_cb_version4,
+};
+
+static struct rpc_program cb_program;
+
+static struct rpc_stat cb_stats = {
+ .program = &cb_program
+};
+
+#define NFS4_CALLBACK 0x40000000
+static struct rpc_program cb_program = {
+ .name = "nfs4_cb",
+ .number = NFS4_CALLBACK,
+ .nrvers = ARRAY_SIZE(nfs_cb_version),
+ .version = nfs_cb_version,
+ .stats = &cb_stats,
+ .pipe_dir_name = "/nfsd4_cb",
+};
+
+static int max_cb_time(void)
+{
+ return max(nfsd4_lease/10, (time_t)1) * HZ;
+}
+
+
+static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
+{
+ struct rpc_timeout timeparms = {
+ .to_initval = max_cb_time(),
+ .to_retries = 0,
+ };
+ struct rpc_create_args args = {
+ .net = &init_net,
+ .address = (struct sockaddr *) &conn->cb_addr,
+ .addrsize = conn->cb_addrlen,
+ .saddress = (struct sockaddr *) &conn->cb_saddr,
+ .timeout = &timeparms,
+ .program = &cb_program,
+ .version = 0,
+ .authflavor = clp->cl_flavor,
+ .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
+ };
+ struct rpc_clnt *client;
+
+ if (clp->cl_minorversion == 0) {
+ if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
+ return -EINVAL;
+ args.client_name = clp->cl_principal;
+ args.prognumber = conn->cb_prog,
+ args.protocol = XPRT_TRANSPORT_TCP;
+ clp->cl_cb_ident = conn->cb_ident;
+ } else {
+ if (!conn->cb_xprt)
+ return -EINVAL;
+ clp->cl_cb_conn.cb_xprt = conn->cb_xprt;
+ clp->cl_cb_session = ses;
+ args.bc_xprt = conn->cb_xprt;
+ args.prognumber = clp->cl_cb_session->se_cb_prog;
+ args.protocol = XPRT_TRANSPORT_BC_TCP;
+ }
+ /* Create RPC client */
+ client = rpc_create(&args);
+ if (IS_ERR(client)) {
+ dprintk("NFSD: couldn't create callback client: %ld\n",
+ PTR_ERR(client));
+ return PTR_ERR(client);
+ }
+ clp->cl_cb_client = client;
+ return 0;
+
+}
+
+static void warn_no_callback_path(struct nfs4_client *clp, int reason)
+{
+ dprintk("NFSD: warning: no callback path to client %.*s: error %d\n",
+ (int)clp->cl_name.len, clp->cl_name.data, reason);
+}
+
+static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason)
+{
+ clp->cl_cb_state = NFSD4_CB_DOWN;
+ warn_no_callback_path(clp, reason);
+}
+
+static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason)
+{
+ clp->cl_cb_state = NFSD4_CB_FAULT;
+ warn_no_callback_path(clp, reason);
+}
+
+static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
+{
+ struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
+
+ if (task->tk_status)
+ nfsd4_mark_cb_down(clp, task->tk_status);
+ else
+ clp->cl_cb_state = NFSD4_CB_UP;
+}
+
+static const struct rpc_call_ops nfsd4_cb_probe_ops = {
+ /* XXX: release method to ensure we set the cb channel down if
+ * necessary on early failure? */
+ .rpc_call_done = nfsd4_cb_probe_done,
+};
+
+static struct rpc_cred *callback_cred;
+
+int set_callback_cred(void)
+{
+ if (callback_cred)
+ return 0;
+ callback_cred = rpc_lookup_machine_cred("nfs");
+ if (!callback_cred)
+ return -ENOMEM;
+ return 0;
+}
+
+static struct workqueue_struct *callback_wq;
+
+static void run_nfsd4_cb(struct nfsd4_callback *cb)
+{
+ queue_work(callback_wq, &cb->cb_work);
+}
+
+static void do_probe_callback(struct nfs4_client *clp)
+{
+ struct nfsd4_callback *cb = &clp->cl_cb_null;
+
+ cb->cb_op = NULL;
+ cb->cb_clp = clp;
+
+ cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL];
+ cb->cb_msg.rpc_argp = NULL;
+ cb->cb_msg.rpc_resp = NULL;
+ cb->cb_msg.rpc_cred = callback_cred;
+
+ cb->cb_ops = &nfsd4_cb_probe_ops;
+
+ run_nfsd4_cb(cb);
+}
+
+/*
+ * Poke the callback thread to process any updates to the callback
+ * parameters, and send a null probe.
+ */
+void nfsd4_probe_callback(struct nfs4_client *clp)
+{
+ /* XXX: atomicity? Also, should we be using cl_cb_flags? */
+ clp->cl_cb_state = NFSD4_CB_UNKNOWN;
+ set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags);
+ do_probe_callback(clp);
+}
+
+void nfsd4_probe_callback_sync(struct nfs4_client *clp)
+{
+ nfsd4_probe_callback(clp);
+ flush_workqueue(callback_wq);
+}
+
+void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
+{
+ clp->cl_cb_state = NFSD4_CB_UNKNOWN;
+ spin_lock(&clp->cl_lock);
+ memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn));
+ spin_unlock(&clp->cl_lock);
+}
+
+/*
+ * There's currently a single callback channel slot.
+ * If the slot is available, then mark it busy. Otherwise, set the
+ * thread for sleeping on the callback RPC wait queue.
+ */
+static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task)
+{
+ if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
+ rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
+ dprintk("%s slot is busy\n", __func__);
+ return false;
+ }
+ return true;
+}
+
+/*
+ * TODO: cb_sequence should support referring call lists, cachethis, multiple
+ * slots, and mark callback channel down on communication errors.
+ */
+static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
+{
+ struct nfsd4_callback *cb = calldata;
+ struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
+ struct nfs4_client *clp = dp->dl_stid.sc_client;
+ u32 minorversion = clp->cl_minorversion;
+
+ cb->cb_minorversion = minorversion;
+ if (minorversion) {
+ if (!nfsd41_cb_get_slot(clp, task))
+ return;
+ }
+ spin_lock(&clp->cl_lock);
+ if (list_empty(&cb->cb_per_client)) {
+ /* This is the first call, not a restart */
+ cb->cb_done = false;
+ list_add(&cb->cb_per_client, &clp->cl_callbacks);
+ }
+ spin_unlock(&clp->cl_lock);
+ rpc_call_start(task);
+}
+
+static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
+{
+ struct nfsd4_callback *cb = calldata;
+ struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
+ struct nfs4_client *clp = dp->dl_stid.sc_client;
+
+ dprintk("%s: minorversion=%d\n", __func__,
+ clp->cl_minorversion);
+
+ if (clp->cl_minorversion) {
+ /* No need for lock, access serialized in nfsd4_cb_prepare */
+ ++clp->cl_cb_session->se_cb_seq_nr;
+ clear_bit(0, &clp->cl_cb_slot_busy);
+ rpc_wake_up_next(&clp->cl_cb_waitq);
+ dprintk("%s: freed slot, new seqid=%d\n", __func__,
+ clp->cl_cb_session->se_cb_seq_nr);
+
+ /* We're done looking into the sequence information */
+ task->tk_msg.rpc_resp = NULL;
+ }
+}
+
+
+static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
+{
+ struct nfsd4_callback *cb = calldata;
+ struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
+ struct nfs4_client *clp = dp->dl_stid.sc_client;
+ struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
+
+ nfsd4_cb_done(task, calldata);
+
+ if (current_rpc_client != task->tk_client) {
+ /* We're shutting down or changing cl_cb_client; leave
+ * it to nfsd4_process_cb_update to restart the call if
+ * necessary. */
+ return;
+ }
+
+ if (cb->cb_done)
+ return;
+ switch (task->tk_status) {
+ case 0:
+ cb->cb_done = true;
+ return;
+ case -EBADHANDLE:
+ case -NFS4ERR_BAD_STATEID:
+ /* Race: client probably got cb_recall
+ * before open reply granting delegation */
+ break;
+ default:
+ /* Network partition? */
+ nfsd4_mark_cb_down(clp, task->tk_status);
+ }
+ if (dp->dl_retries--) {
+ rpc_delay(task, 2*HZ);
+ task->tk_status = 0;
+ rpc_restart_call_prepare(task);
+ return;
+ }
+ nfsd4_mark_cb_down(clp, task->tk_status);
+ cb->cb_done = true;
+}
+
+static void nfsd4_cb_recall_release(void *calldata)
+{
+ struct nfsd4_callback *cb = calldata;
+ struct nfs4_client *clp = cb->cb_clp;
+ struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
+
+ if (cb->cb_done) {
+ spin_lock(&clp->cl_lock);
+ list_del(&cb->cb_per_client);
+ spin_unlock(&clp->cl_lock);
+ nfs4_put_delegation(dp);
+ }
+}
+
+static const struct rpc_call_ops nfsd4_cb_recall_ops = {
+ .rpc_call_prepare = nfsd4_cb_prepare,
+ .rpc_call_done = nfsd4_cb_recall_done,
+ .rpc_release = nfsd4_cb_recall_release,
+};
+
+int nfsd4_create_callback_queue(void)
+{
+ callback_wq = create_singlethread_workqueue("nfsd4_callbacks");
+ if (!callback_wq)
+ return -ENOMEM;
+ return 0;
+}
+
+void nfsd4_destroy_callback_queue(void)
+{
+ destroy_workqueue(callback_wq);
+}
+
+/* must be called under the state lock */
+void nfsd4_shutdown_callback(struct nfs4_client *clp)
+{
+ set_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags);
+ /*
+ * Note this won't actually result in a null callback;
+ * instead, nfsd4_do_callback_rpc() will detect the killed
+ * client, destroy the rpc client, and stop:
+ */
+ do_probe_callback(clp);
+ flush_workqueue(callback_wq);
+}
+
+static void nfsd4_release_cb(struct nfsd4_callback *cb)
+{
+ if (cb->cb_ops->rpc_release)
+ cb->cb_ops->rpc_release(cb);
+}
+
+/* requires cl_lock: */
+static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
+{
+ struct nfsd4_session *s;
+ struct nfsd4_conn *c;
+
+ list_for_each_entry(s, &clp->cl_sessions, se_perclnt) {
+ list_for_each_entry(c, &s->se_conns, cn_persession) {
+ if (c->cn_flags & NFS4_CDFC4_BACK)
+ return c;
+ }
+ }
+ return NULL;
+}
+
+static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
+{
+ struct nfs4_cb_conn conn;
+ struct nfs4_client *clp = cb->cb_clp;
+ struct nfsd4_session *ses = NULL;
+ struct nfsd4_conn *c;
+ int err;
+
+ /*
+ * This is either an update, or the client dying; in either case,
+ * kill the old client:
+ */
+ if (clp->cl_cb_client) {
+ rpc_shutdown_client(clp->cl_cb_client);
+ clp->cl_cb_client = NULL;
+ }
+ if (clp->cl_cb_conn.cb_xprt) {
+ svc_xprt_put(clp->cl_cb_conn.cb_xprt);
+ clp->cl_cb_conn.cb_xprt = NULL;
+ }
+ if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags))
+ return;
+ spin_lock(&clp->cl_lock);
+ /*
+ * Only serialized callback code is allowed to clear these
+ * flags; main nfsd code can only set them:
+ */
+ BUG_ON(!clp->cl_cb_flags);
+ clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags);
+ memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn));
+ c = __nfsd4_find_backchannel(clp);
+ if (c) {
+ svc_xprt_get(c->cn_xprt);
+ conn.cb_xprt = c->cn_xprt;
+ ses = c->cn_session;
+ }
+ spin_unlock(&clp->cl_lock);
+
+ err = setup_callback_client(clp, &conn, ses);
+ if (err) {
+ warn_no_callback_path(clp, err);
+ return;
+ }
+ /* Yay, the callback channel's back! Restart any callbacks: */
+ list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client)
+ run_nfsd4_cb(cb);
+}
+
+void nfsd4_do_callback_rpc(struct work_struct *w)
+{
+ struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work);
+ struct nfs4_client *clp = cb->cb_clp;
+ struct rpc_clnt *clnt;
+
+ if (clp->cl_cb_flags)
+ nfsd4_process_cb_update(cb);
+
+ clnt = clp->cl_cb_client;
+ if (!clnt) {
+ /* Callback channel broken, or client killed; give up: */
+ nfsd4_release_cb(cb);
+ return;
+ }
+ rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
+ cb->cb_ops, cb);
+}
+
+void nfsd4_cb_recall(struct nfs4_delegation *dp)
+{
+ struct nfsd4_callback *cb = &dp->dl_recall;
+ struct nfs4_client *clp = dp->dl_stid.sc_client;
+
+ dp->dl_retries = 1;
+ cb->cb_op = dp;
+ cb->cb_clp = clp;
+ cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL];
+ cb->cb_msg.rpc_argp = cb;
+ cb->cb_msg.rpc_resp = cb;
+ cb->cb_msg.rpc_cred = callback_cred;
+
+ cb->cb_ops = &nfsd4_cb_recall_ops;
+ dp->dl_retries = 1;
+
+ INIT_LIST_HEAD(&cb->cb_per_client);
+ cb->cb_done = true;
+
+ run_nfsd4_cb(&dp->dl_recall);
+}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
new file mode 100644
index 00000000000..94096273cd6
--- /dev/null
+++ b/fs/nfsd/nfs4idmap.c
@@ -0,0 +1,588 @@
+/*
+ * Mapping of UID/GIDs to name and vice versa.
+ *
+ * Copyright (c) 2002, 2003 The Regents of the University of
+ * Michigan. All rights reserved.
+ *
+ * Marius Aamodt Eriksen <marius@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <net/net_namespace.h>
+#include "idmap.h"
+#include "nfsd.h"
+
+/*
+ * Cache entry
+ */
+
+/*
+ * XXX we know that IDMAP_NAMESZ < PAGE_SIZE, but it's ugly to rely on
+ * that.
+ */
+
+#define IDMAP_TYPE_USER 0
+#define IDMAP_TYPE_GROUP 1
+
+struct ent {
+ struct cache_head h;
+ int type; /* User / Group */
+ uid_t id;
+ char name[IDMAP_NAMESZ];
+ char authname[IDMAP_NAMESZ];
+};
+
+/* Common entry handling */
+
+#define ENT_HASHBITS 8
+#define ENT_HASHMAX (1 << ENT_HASHBITS)
+
+static void
+ent_init(struct cache_head *cnew, struct cache_head *citm)
+{
+ struct ent *new = container_of(cnew, struct ent, h);
+ struct ent *itm = container_of(citm, struct ent, h);
+
+ new->id = itm->id;
+ new->type = itm->type;
+
+ strlcpy(new->name, itm->name, sizeof(new->name));
+ strlcpy(new->authname, itm->authname, sizeof(new->name));
+}
+
+static void
+ent_put(struct kref *ref)
+{
+ struct ent *map = container_of(ref, struct ent, h.ref);
+ kfree(map);
+}
+
+static struct cache_head *
+ent_alloc(void)
+{
+ struct ent *e = kmalloc(sizeof(*e), GFP_KERNEL);
+ if (e)
+ return &e->h;
+ else
+ return NULL;
+}
+
+/*
+ * ID -> Name cache
+ */
+
+static struct cache_head *idtoname_table[ENT_HASHMAX];
+
+static uint32_t
+idtoname_hash(struct ent *ent)
+{
+ uint32_t hash;
+
+ hash = hash_str(ent->authname, ENT_HASHBITS);
+ hash = hash_long(hash ^ ent->id, ENT_HASHBITS);
+
+ /* Flip LSB for user/group */
+ if (ent->type == IDMAP_TYPE_GROUP)
+ hash ^= 1;
+
+ return hash;
+}
+
+static void
+idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
+ int *blen)
+{
+ struct ent *ent = container_of(ch, struct ent, h);
+ char idstr[11];
+
+ qword_add(bpp, blen, ent->authname);
+ snprintf(idstr, sizeof(idstr), "%u", ent->id);
+ qword_add(bpp, blen, ent->type == IDMAP_TYPE_GROUP ? "group" : "user");
+ qword_add(bpp, blen, idstr);
+
+ (*bpp)[-1] = '\n';
+}
+
+static int
+idtoname_upcall(struct cache_detail *cd, struct cache_head *ch)
+{
+ return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request);
+}
+
+static int
+idtoname_match(struct cache_head *ca, struct cache_head *cb)
+{
+ struct ent *a = container_of(ca, struct ent, h);
+ struct ent *b = container_of(cb, struct ent, h);
+
+ return (a->id == b->id && a->type == b->type &&
+ strcmp(a->authname, b->authname) == 0);
+}
+
+static int
+idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
+{
+ struct ent *ent;
+
+ if (h == NULL) {
+ seq_puts(m, "#domain type id [name]\n");
+ return 0;
+ }
+ ent = container_of(h, struct ent, h);
+ seq_printf(m, "%s %s %u", ent->authname,
+ ent->type == IDMAP_TYPE_GROUP ? "group" : "user",
+ ent->id);
+ if (test_bit(CACHE_VALID, &h->flags))
+ seq_printf(m, " %s", ent->name);
+ seq_printf(m, "\n");
+ return 0;
+}
+
+static void
+warn_no_idmapd(struct cache_detail *detail, int has_died)
+{
+ printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n",
+ has_died ? "died" : "not been started");
+}
+
+
+static int idtoname_parse(struct cache_detail *, char *, int);
+static struct ent *idtoname_lookup(struct ent *);
+static struct ent *idtoname_update(struct ent *, struct ent *);
+
+static struct cache_detail idtoname_cache = {
+ .owner = THIS_MODULE,
+ .hash_size = ENT_HASHMAX,
+ .hash_table = idtoname_table,
+ .name = "nfs4.idtoname",
+ .cache_put = ent_put,
+ .cache_upcall = idtoname_upcall,
+ .cache_parse = idtoname_parse,
+ .cache_show = idtoname_show,
+ .warn_no_listener = warn_no_idmapd,
+ .match = idtoname_match,
+ .init = ent_init,
+ .update = ent_init,
+ .alloc = ent_alloc,
+};
+
+static int
+idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
+{
+ struct ent ent, *res;
+ char *buf1, *bp;
+ int len;
+ int error = -EINVAL;
+
+ if (buf[buflen - 1] != '\n')
+ return (-EINVAL);
+ buf[buflen - 1]= '\0';
+
+ buf1 = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (buf1 == NULL)
+ return (-ENOMEM);
+
+ memset(&ent, 0, sizeof(ent));
+
+ /* Authentication name */
+ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
+ goto out;
+ memcpy(ent.authname, buf1, sizeof(ent.authname));
+
+ /* Type */
+ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
+ goto out;
+ ent.type = strcmp(buf1, "user") == 0 ?
+ IDMAP_TYPE_USER : IDMAP_TYPE_GROUP;
+
+ /* ID */
+ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
+ goto out;
+ ent.id = simple_strtoul(buf1, &bp, 10);
+ if (bp == buf1)
+ goto out;
+
+ /* expiry */
+ ent.h.expiry_time = get_expiry(&buf);
+ if (ent.h.expiry_time == 0)
+ goto out;
+
+ error = -ENOMEM;
+ res = idtoname_lookup(&ent);
+ if (!res)
+ goto out;
+
+ /* Name */
+ error = -EINVAL;
+ len = qword_get(&buf, buf1, PAGE_SIZE);
+ if (len < 0)
+ goto out;
+ if (len == 0)
+ set_bit(CACHE_NEGATIVE, &ent.h.flags);
+ else if (len >= IDMAP_NAMESZ)
+ goto out;
+ else
+ memcpy(ent.name, buf1, sizeof(ent.name));
+ error = -ENOMEM;
+ res = idtoname_update(&ent, res);
+ if (res == NULL)
+ goto out;
+
+ cache_put(&res->h, &idtoname_cache);
+
+ error = 0;
+out:
+ kfree(buf1);
+
+ return error;
+}
+
+
+static struct ent *
+idtoname_lookup(struct ent *item)
+{
+ struct cache_head *ch = sunrpc_cache_lookup(&idtoname_cache,
+ &item->h,
+ idtoname_hash(item));
+ if (ch)
+ return container_of(ch, struct ent, h);
+ else
+ return NULL;
+}
+
+static struct ent *
+idtoname_update(struct ent *new, struct ent *old)
+{
+ struct cache_head *ch = sunrpc_cache_update(&idtoname_cache,
+ &new->h, &old->h,
+ idtoname_hash(new));
+ if (ch)
+ return container_of(ch, struct ent, h);
+ else
+ return NULL;
+}
+
+
+/*
+ * Name -> ID cache
+ */
+
+static struct cache_head *nametoid_table[ENT_HASHMAX];
+
+static inline int
+nametoid_hash(struct ent *ent)
+{
+ return hash_str(ent->name, ENT_HASHBITS);
+}
+
+static void
+nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
+ int *blen)
+{
+ struct ent *ent = container_of(ch, struct ent, h);
+
+ qword_add(bpp, blen, ent->authname);
+ qword_add(bpp, blen, ent->type == IDMAP_TYPE_GROUP ? "group" : "user");
+ qword_add(bpp, blen, ent->name);
+
+ (*bpp)[-1] = '\n';
+}
+
+static int
+nametoid_upcall(struct cache_detail *cd, struct cache_head *ch)
+{
+ return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request);
+}
+
+static int
+nametoid_match(struct cache_head *ca, struct cache_head *cb)
+{
+ struct ent *a = container_of(ca, struct ent, h);
+ struct ent *b = container_of(cb, struct ent, h);
+
+ return (a->type == b->type && strcmp(a->name, b->name) == 0 &&
+ strcmp(a->authname, b->authname) == 0);
+}
+
+static int
+nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
+{
+ struct ent *ent;
+
+ if (h == NULL) {
+ seq_puts(m, "#domain type name [id]\n");
+ return 0;
+ }
+ ent = container_of(h, struct ent, h);
+ seq_printf(m, "%s %s %s", ent->authname,
+ ent->type == IDMAP_TYPE_GROUP ? "group" : "user",
+ ent->name);
+ if (test_bit(CACHE_VALID, &h->flags))
+ seq_printf(m, " %u", ent->id);
+ seq_printf(m, "\n");
+ return 0;
+}
+
+static struct ent *nametoid_lookup(struct ent *);
+static struct ent *nametoid_update(struct ent *, struct ent *);
+static int nametoid_parse(struct cache_detail *, char *, int);
+
+static struct cache_detail nametoid_cache = {
+ .owner = THIS_MODULE,
+ .hash_size = ENT_HASHMAX,
+ .hash_table = nametoid_table,
+ .name = "nfs4.nametoid",
+ .cache_put = ent_put,
+ .cache_upcall = nametoid_upcall,
+ .cache_parse = nametoid_parse,
+ .cache_show = nametoid_show,
+ .warn_no_listener = warn_no_idmapd,
+ .match = nametoid_match,
+ .init = ent_init,
+ .update = ent_init,
+ .alloc = ent_alloc,
+};
+
+static int
+nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
+{
+ struct ent ent, *res;
+ char *buf1;
+ int error = -EINVAL;
+
+ if (buf[buflen - 1] != '\n')
+ return (-EINVAL);
+ buf[buflen - 1]= '\0';
+
+ buf1 = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (buf1 == NULL)
+ return (-ENOMEM);
+
+ memset(&ent, 0, sizeof(ent));
+
+ /* Authentication name */
+ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
+ goto out;
+ memcpy(ent.authname, buf1, sizeof(ent.authname));
+
+ /* Type */
+ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
+ goto out;
+ ent.type = strcmp(buf1, "user") == 0 ?
+ IDMAP_TYPE_USER : IDMAP_TYPE_GROUP;
+
+ /* Name */
+ error = qword_get(&buf, buf1, PAGE_SIZE);
+ if (error <= 0 || error >= IDMAP_NAMESZ)
+ goto out;
+ memcpy(ent.name, buf1, sizeof(ent.name));
+
+ /* expiry */
+ ent.h.expiry_time = get_expiry(&buf);
+ if (ent.h.expiry_time == 0)
+ goto out;
+
+ /* ID */
+ error = get_int(&buf, &ent.id);
+ if (error == -EINVAL)
+ goto out;
+ if (error == -ENOENT)
+ set_bit(CACHE_NEGATIVE, &ent.h.flags);
+
+ error = -ENOMEM;
+ res = nametoid_lookup(&ent);
+ if (res == NULL)
+ goto out;
+ res = nametoid_update(&ent, res);
+ if (res == NULL)
+ goto out;
+
+ cache_put(&res->h, &nametoid_cache);
+ error = 0;
+out:
+ kfree(buf1);
+
+ return (error);
+}
+
+
+static struct ent *
+nametoid_lookup(struct ent *item)
+{
+ struct cache_head *ch = sunrpc_cache_lookup(&nametoid_cache,
+ &item->h,
+ nametoid_hash(item));
+ if (ch)
+ return container_of(ch, struct ent, h);
+ else
+ return NULL;
+}
+
+static struct ent *
+nametoid_update(struct ent *new, struct ent *old)
+{
+ struct cache_head *ch = sunrpc_cache_update(&nametoid_cache,
+ &new->h, &old->h,
+ nametoid_hash(new));
+ if (ch)
+ return container_of(ch, struct ent, h);
+ else
+ return NULL;
+}
+
+/*
+ * Exported API
+ */
+
+int
+nfsd_idmap_init(void)
+{
+ int rv;
+
+ rv = cache_register_net(&idtoname_cache, &init_net);
+ if (rv)
+ return rv;
+ rv = cache_register_net(&nametoid_cache, &init_net);
+ if (rv)
+ cache_unregister_net(&idtoname_cache, &init_net);
+ return rv;
+}
+
+void
+nfsd_idmap_shutdown(void)
+{
+ cache_unregister_net(&idtoname_cache, &init_net);
+ cache_unregister_net(&nametoid_cache, &init_net);
+}
+
+static int
+idmap_lookup(struct svc_rqst *rqstp,
+ struct ent *(*lookup_fn)(struct ent *), struct ent *key,
+ struct cache_detail *detail, struct ent **item)
+{
+ int ret;
+
+ *item = lookup_fn(key);
+ if (!*item)
+ return -ENOMEM;
+ retry:
+ ret = cache_check(detail, &(*item)->h, &rqstp->rq_chandle);
+
+ if (ret == -ETIMEDOUT) {
+ struct ent *prev_item = *item;
+ *item = lookup_fn(key);
+ if (*item != prev_item)
+ goto retry;
+ cache_put(&(*item)->h, detail);
+ }
+ return ret;
+}
+
+static char *
+rqst_authname(struct svc_rqst *rqstp)
+{
+ struct auth_domain *clp;
+
+ clp = rqstp->rq_gssclient ? rqstp->rq_gssclient : rqstp->rq_client;
+ return clp->name;
+}
+
+static __be32
+idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen,
+ uid_t *id)
+{
+ struct ent *item, key = {
+ .type = type,
+ };
+ int ret;
+
+ if (namelen + 1 > sizeof(key.name))
+ return nfserr_badowner;
+ memcpy(key.name, name, namelen);
+ key.name[namelen] = '\0';
+ strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
+ ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item);
+ if (ret == -ENOENT)
+ return nfserr_badowner;
+ if (ret)
+ return nfserrno(ret);
+ *id = item->id;
+ cache_put(&item->h, &nametoid_cache);
+ return 0;
+}
+
+static int
+idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
+{
+ struct ent *item, key = {
+ .id = id,
+ .type = type,
+ };
+ int ret;
+
+ strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
+ ret = idmap_lookup(rqstp, idtoname_lookup, &key, &idtoname_cache, &item);
+ if (ret == -ENOENT)
+ return sprintf(name, "%u", id);
+ if (ret)
+ return ret;
+ ret = strlen(item->name);
+ BUG_ON(ret > IDMAP_NAMESZ);
+ memcpy(name, item->name, ret);
+ cache_put(&item->h, &idtoname_cache);
+ return ret;
+}
+
+__be32
+nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
+ __u32 *id)
+{
+ return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id);
+}
+
+__be32
+nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
+ __u32 *id)
+{
+ return idmap_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id);
+}
+
+int
+nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name)
+{
+ return idmap_id_to_name(rqstp, IDMAP_TYPE_USER, id, name);
+}
+
+int
+nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name)
+{
+ return idmap_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name);
+}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
new file mode 100644
index 00000000000..896da74ec56
--- /dev/null
+++ b/fs/nfsd/nfs4proc.c
@@ -0,0 +1,1720 @@
+/*
+ * Server-side procedures for NFSv4.
+ *
+ * Copyright (c) 2002 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Kendrick Smith <kmsmith@umich.edu>
+ * Andy Adamson <andros@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <linux/file.h>
+#include <linux/slab.h>
+
+#include "idmap.h"
+#include "cache.h"
+#include "xdr4.h"
+#include "vfs.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_PROC
+
+static u32 nfsd_attrmask[] = {
+ NFSD_WRITEABLE_ATTRS_WORD0,
+ NFSD_WRITEABLE_ATTRS_WORD1,
+ NFSD_WRITEABLE_ATTRS_WORD2
+};
+
+static u32 nfsd41_ex_attrmask[] = {
+ NFSD_SUPPATTR_EXCLCREAT_WORD0,
+ NFSD_SUPPATTR_EXCLCREAT_WORD1,
+ NFSD_SUPPATTR_EXCLCREAT_WORD2
+};
+
+static __be32
+check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ u32 *bmval, u32 *writable)
+{
+ struct dentry *dentry = cstate->current_fh.fh_dentry;
+
+ /*
+ * Check about attributes are supported by the NFSv4 server or not.
+ * According to spec, unsupported attributes return ERR_ATTRNOTSUPP.
+ */
+ if ((bmval[0] & ~nfsd_suppattrs0(cstate->minorversion)) ||
+ (bmval[1] & ~nfsd_suppattrs1(cstate->minorversion)) ||
+ (bmval[2] & ~nfsd_suppattrs2(cstate->minorversion)))
+ return nfserr_attrnotsupp;
+
+ /*
+ * Check FATTR4_WORD0_ACL can be supported
+ * in current environment or not.
+ */
+ if (bmval[0] & FATTR4_WORD0_ACL) {
+ if (!IS_POSIXACL(dentry->d_inode))
+ return nfserr_attrnotsupp;
+ }
+
+ /*
+ * According to spec, read-only attributes return ERR_INVAL.
+ */
+ if (writable) {
+ if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) ||
+ (bmval[2] & ~writable[2]))
+ return nfserr_inval;
+ }
+
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_check_open_attributes(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
+{
+ __be32 status = nfs_ok;
+
+ if (open->op_create == NFS4_OPEN_CREATE) {
+ if (open->op_createmode == NFS4_CREATE_UNCHECKED
+ || open->op_createmode == NFS4_CREATE_GUARDED)
+ status = check_attr_support(rqstp, cstate,
+ open->op_bmval, nfsd_attrmask);
+ else if (open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1)
+ status = check_attr_support(rqstp, cstate,
+ open->op_bmval, nfsd41_ex_attrmask);
+ }
+
+ return status;
+}
+
+static int
+is_create_with_attrs(struct nfsd4_open *open)
+{
+ return open->op_create == NFS4_OPEN_CREATE
+ && (open->op_createmode == NFS4_CREATE_UNCHECKED
+ || open->op_createmode == NFS4_CREATE_GUARDED
+ || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1);
+}
+
+/*
+ * if error occurs when setting the acl, just clear the acl bit
+ * in the returned attr bitmap.
+ */
+static void
+do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct nfs4_acl *acl, u32 *bmval)
+{
+ __be32 status;
+
+ status = nfsd4_set_nfs4_acl(rqstp, fhp, acl);
+ if (status)
+ /*
+ * We should probably fail the whole open at this point,
+ * but we've already created the file, so it's too late;
+ * So this seems the least of evils:
+ */
+ bmval[0] &= ~FATTR4_WORD0_ACL;
+}
+
+static inline void
+fh_dup2(struct svc_fh *dst, struct svc_fh *src)
+{
+ fh_put(dst);
+ dget(src->fh_dentry);
+ if (src->fh_export)
+ cache_get(&src->fh_export->h);
+ *dst = *src;
+}
+
+static __be32
+do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, int accmode)
+{
+ __be32 status;
+
+ if (open->op_truncate &&
+ !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
+ return nfserr_inval;
+
+ accmode |= NFSD_MAY_READ_IF_EXEC;
+
+ if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
+ accmode |= NFSD_MAY_READ;
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+ accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC);
+ if (open->op_share_deny & NFS4_SHARE_DENY_READ)
+ accmode |= NFSD_MAY_WRITE;
+
+ status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
+
+ return status;
+}
+
+static __be32 nfsd_check_obj_isreg(struct svc_fh *fh)
+{
+ umode_t mode = fh->fh_dentry->d_inode->i_mode;
+
+ if (S_ISREG(mode))
+ return nfs_ok;
+ if (S_ISDIR(mode))
+ return nfserr_isdir;
+ /*
+ * Using err_symlink as our catch-all case may look odd; but
+ * there's no other obvious error for this case in 4.0, and we
+ * happen to know that it will cause the linux v4 client to do
+ * the right thing on attempts to open something other than a
+ * regular file.
+ */
+ return nfserr_symlink;
+}
+
+static __be32
+do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+{
+ struct svc_fh resfh;
+ __be32 status;
+
+ fh_init(&resfh, NFS4_FHSIZE);
+ open->op_truncate = 0;
+
+ if (open->op_create) {
+ /* FIXME: check session persistence and pnfs flags.
+ * The nfsv4.1 spec requires the following semantics:
+ *
+ * Persistent | pNFS | Server REQUIRED | Client Allowed
+ * Reply Cache | server | |
+ * -------------+--------+-----------------+--------------------
+ * no | no | EXCLUSIVE4_1 | EXCLUSIVE4_1
+ * | | | (SHOULD)
+ * | | and EXCLUSIVE4 | or EXCLUSIVE4
+ * | | | (SHOULD NOT)
+ * no | yes | EXCLUSIVE4_1 | EXCLUSIVE4_1
+ * yes | no | GUARDED4 | GUARDED4
+ * yes | yes | GUARDED4 | GUARDED4
+ */
+
+ /*
+ * Note: create modes (UNCHECKED,GUARDED...) are the same
+ * in NFSv4 as in v3 except EXCLUSIVE4_1.
+ */
+ status = do_nfsd_create(rqstp, current_fh, open->op_fname.data,
+ open->op_fname.len, &open->op_iattr,
+ &resfh, open->op_createmode,
+ (u32 *)open->op_verf.data,
+ &open->op_truncate, &open->op_created);
+
+ /*
+ * Following rfc 3530 14.2.16, use the returned bitmask
+ * to indicate which attributes we used to store the
+ * verifier:
+ */
+ if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0)
+ open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS |
+ FATTR4_WORD1_TIME_MODIFY);
+ } else {
+ status = nfsd_lookup(rqstp, current_fh,
+ open->op_fname.data, open->op_fname.len, &resfh);
+ fh_unlock(current_fh);
+ if (status)
+ goto out;
+ status = nfsd_check_obj_isreg(&resfh);
+ }
+ if (status)
+ goto out;
+
+ if (is_create_with_attrs(open) && open->op_acl != NULL)
+ do_set_nfs4_acl(rqstp, &resfh, open->op_acl, open->op_bmval);
+
+ set_change_info(&open->op_cinfo, current_fh);
+ fh_dup2(current_fh, &resfh);
+
+ /* set reply cache */
+ fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh,
+ &resfh.fh_handle);
+ if (!open->op_created)
+ status = do_open_permission(rqstp, current_fh, open,
+ NFSD_MAY_NOP);
+
+out:
+ fh_put(&resfh);
+ return status;
+}
+
+static __be32
+do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+{
+ __be32 status;
+
+ /* We don't know the target directory, and therefore can not
+ * set the change info
+ */
+
+ memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info));
+
+ /* set replay cache */
+ fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh,
+ &current_fh->fh_handle);
+
+ open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) &&
+ (open->op_iattr.ia_size == 0);
+
+ status = do_open_permission(rqstp, current_fh, open,
+ NFSD_MAY_OWNER_OVERRIDE);
+
+ return status;
+}
+
+static void
+copy_clientid(clientid_t *clid, struct nfsd4_session *session)
+{
+ struct nfsd4_sessionid *sid =
+ (struct nfsd4_sessionid *)session->se_sessionid.data;
+
+ clid->cl_boot = sid->clientid.cl_boot;
+ clid->cl_id = sid->clientid.cl_id;
+}
+
+static __be32
+nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_open *open)
+{
+ __be32 status;
+ struct nfsd4_compoundres *resp;
+
+ dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n",
+ (int)open->op_fname.len, open->op_fname.data,
+ open->op_openowner);
+
+ /* This check required by spec. */
+ if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
+ return nfserr_inval;
+
+ /* We don't yet support WANT bits: */
+ open->op_share_access &= NFS4_SHARE_ACCESS_MASK;
+
+ open->op_created = 0;
+ /*
+ * RFC5661 18.51.3
+ * Before RECLAIM_COMPLETE done, server should deny new lock
+ */
+ if (nfsd4_has_session(cstate) &&
+ !cstate->session->se_client->cl_firststate &&
+ open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
+ return nfserr_grace;
+
+ if (nfsd4_has_session(cstate))
+ copy_clientid(&open->op_clientid, cstate->session);
+
+ nfs4_lock_state();
+
+ /* check seqid for replay. set nfs4_owner */
+ resp = rqstp->rq_resp;
+ status = nfsd4_process_open1(&resp->cstate, open);
+ if (status == nfserr_replay_me) {
+ struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay;
+ fh_put(&cstate->current_fh);
+ fh_copy_shallow(&cstate->current_fh.fh_handle,
+ &rp->rp_openfh);
+ status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
+ if (status)
+ dprintk("nfsd4_open: replay failed"
+ " restoring previous filehandle\n");
+ else
+ status = nfserr_replay_me;
+ }
+ if (status)
+ goto out;
+
+ status = nfsd4_check_open_attributes(rqstp, cstate, open);
+ if (status)
+ goto out;
+
+ /* Openowner is now set, so sequence id will get bumped. Now we need
+ * these checks before we do any creates: */
+ status = nfserr_grace;
+ if (locks_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
+ goto out;
+ status = nfserr_no_grace;
+ if (!locks_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
+ goto out;
+
+ switch (open->op_claim_type) {
+ case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+ case NFS4_OPEN_CLAIM_NULL:
+ status = do_open_lookup(rqstp, &cstate->current_fh,
+ open);
+ if (status)
+ goto out;
+ break;
+ case NFS4_OPEN_CLAIM_PREVIOUS:
+ open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
+ status = nfs4_check_open_reclaim(&open->op_clientid);
+ if (status)
+ goto out;
+ case NFS4_OPEN_CLAIM_FH:
+ case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
+ status = do_open_fhandle(rqstp, &cstate->current_fh,
+ open);
+ if (status)
+ goto out;
+ break;
+ case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
+ case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+ open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
+ dprintk("NFSD: unsupported OPEN claim type %d\n",
+ open->op_claim_type);
+ status = nfserr_notsupp;
+ goto out;
+ default:
+ dprintk("NFSD: Invalid OPEN claim type %d\n",
+ open->op_claim_type);
+ status = nfserr_inval;
+ goto out;
+ }
+ /*
+ * nfsd4_process_open2() does the actual opening of the file. If
+ * successful, it (1) truncates the file if open->op_truncate was
+ * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
+ */
+ status = nfsd4_process_open2(rqstp, &cstate->current_fh, open);
+ WARN_ON(status && open->op_created);
+out:
+ nfsd4_cleanup_open_state(open, status);
+ if (open->op_openowner)
+ cstate->replay_owner = &open->op_openowner->oo_owner;
+ else
+ nfs4_unlock_state();
+ return status;
+}
+
+/*
+ * filehandle-manipulating ops.
+ */
+static __be32
+nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct svc_fh **getfh)
+{
+ if (!cstate->current_fh.fh_dentry)
+ return nfserr_nofilehandle;
+
+ *getfh = &cstate->current_fh;
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_putfh *putfh)
+{
+ fh_put(&cstate->current_fh);
+ cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
+ memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
+ putfh->pf_fhlen);
+ return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS);
+}
+
+static __be32
+nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ void *arg)
+{
+ __be32 status;
+
+ fh_put(&cstate->current_fh);
+ status = exp_pseudoroot(rqstp, &cstate->current_fh);
+ return status;
+}
+
+static __be32
+nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ void *arg)
+{
+ if (!cstate->save_fh.fh_dentry)
+ return nfserr_restorefh;
+
+ fh_dup2(&cstate->current_fh, &cstate->save_fh);
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ void *arg)
+{
+ if (!cstate->current_fh.fh_dentry)
+ return nfserr_nofilehandle;
+
+ fh_dup2(&cstate->save_fh, &cstate->current_fh);
+ return nfs_ok;
+}
+
+/*
+ * misc nfsv4 ops
+ */
+static __be32
+nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_access *access)
+{
+ if (access->ac_req_access & ~NFS3_ACCESS_FULL)
+ return nfserr_inval;
+
+ access->ac_resp_access = access->ac_req_access;
+ return nfsd_access(rqstp, &cstate->current_fh, &access->ac_resp_access,
+ &access->ac_supported);
+}
+
+static __be32
+nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_commit *commit)
+{
+ u32 *p = (u32 *)commit->co_verf.data;
+ *p++ = nfssvc_boot.tv_sec;
+ *p++ = nfssvc_boot.tv_usec;
+
+ return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
+ commit->co_count);
+}
+
+static __be32
+nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_create *create)
+{
+ struct svc_fh resfh;
+ __be32 status;
+ dev_t rdev;
+
+ fh_init(&resfh, NFS4_FHSIZE);
+
+ status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR,
+ NFSD_MAY_CREATE);
+ if (status)
+ return status;
+
+ status = check_attr_support(rqstp, cstate, create->cr_bmval,
+ nfsd_attrmask);
+ if (status)
+ return status;
+
+ switch (create->cr_type) {
+ case NF4LNK:
+ /* ugh! we have to null-terminate the linktext, or
+ * vfs_symlink() will choke. it is always safe to
+ * null-terminate by brute force, since at worst we
+ * will overwrite the first byte of the create namelen
+ * in the XDR buffer, which has already been extracted
+ * during XDR decode.
+ */
+ create->cr_linkname[create->cr_linklen] = 0;
+
+ status = nfsd_symlink(rqstp, &cstate->current_fh,
+ create->cr_name, create->cr_namelen,
+ create->cr_linkname, create->cr_linklen,
+ &resfh, &create->cr_iattr);
+ break;
+
+ case NF4BLK:
+ rdev = MKDEV(create->cr_specdata1, create->cr_specdata2);
+ if (MAJOR(rdev) != create->cr_specdata1 ||
+ MINOR(rdev) != create->cr_specdata2)
+ return nfserr_inval;
+ status = nfsd_create(rqstp, &cstate->current_fh,
+ create->cr_name, create->cr_namelen,
+ &create->cr_iattr, S_IFBLK, rdev, &resfh);
+ break;
+
+ case NF4CHR:
+ rdev = MKDEV(create->cr_specdata1, create->cr_specdata2);
+ if (MAJOR(rdev) != create->cr_specdata1 ||
+ MINOR(rdev) != create->cr_specdata2)
+ return nfserr_inval;
+ status = nfsd_create(rqstp, &cstate->current_fh,
+ create->cr_name, create->cr_namelen,
+ &create->cr_iattr,S_IFCHR, rdev, &resfh);
+ break;
+
+ case NF4SOCK:
+ status = nfsd_create(rqstp, &cstate->current_fh,
+ create->cr_name, create->cr_namelen,
+ &create->cr_iattr, S_IFSOCK, 0, &resfh);
+ break;
+
+ case NF4FIFO:
+ status = nfsd_create(rqstp, &cstate->current_fh,
+ create->cr_name, create->cr_namelen,
+ &create->cr_iattr, S_IFIFO, 0, &resfh);
+ break;
+
+ case NF4DIR:
+ create->cr_iattr.ia_valid &= ~ATTR_SIZE;
+ status = nfsd_create(rqstp, &cstate->current_fh,
+ create->cr_name, create->cr_namelen,
+ &create->cr_iattr, S_IFDIR, 0, &resfh);
+ break;
+
+ default:
+ status = nfserr_badtype;
+ }
+
+ if (status)
+ goto out;
+
+ if (create->cr_acl != NULL)
+ do_set_nfs4_acl(rqstp, &resfh, create->cr_acl,
+ create->cr_bmval);
+
+ fh_unlock(&cstate->current_fh);
+ set_change_info(&create->cr_cinfo, &cstate->current_fh);
+ fh_dup2(&cstate->current_fh, &resfh);
+out:
+ fh_put(&resfh);
+ return status;
+}
+
+static __be32
+nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_getattr *getattr)
+{
+ __be32 status;
+
+ status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
+ if (status)
+ return status;
+
+ if (getattr->ga_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)
+ return nfserr_inval;
+
+ getattr->ga_bmval[0] &= nfsd_suppattrs0(cstate->minorversion);
+ getattr->ga_bmval[1] &= nfsd_suppattrs1(cstate->minorversion);
+ getattr->ga_bmval[2] &= nfsd_suppattrs2(cstate->minorversion);
+
+ getattr->ga_fhp = &cstate->current_fh;
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_link *link)
+{
+ __be32 status = nfserr_nofilehandle;
+
+ if (!cstate->save_fh.fh_dentry)
+ return status;
+ status = nfsd_link(rqstp, &cstate->current_fh,
+ link->li_name, link->li_namelen, &cstate->save_fh);
+ if (!status)
+ set_change_info(&link->li_cinfo, &cstate->current_fh);
+ return status;
+}
+
+static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
+{
+ struct svc_fh tmp_fh;
+ __be32 ret;
+
+ fh_init(&tmp_fh, NFS4_FHSIZE);
+ ret = exp_pseudoroot(rqstp, &tmp_fh);
+ if (ret)
+ return ret;
+ if (tmp_fh.fh_dentry == fh->fh_dentry) {
+ fh_put(&tmp_fh);
+ return nfserr_noent;
+ }
+ fh_put(&tmp_fh);
+ return nfsd_lookup(rqstp, fh, "..", 2, fh);
+}
+
+static __be32
+nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ void *arg)
+{
+ return nfsd4_do_lookupp(rqstp, &cstate->current_fh);
+}
+
+static __be32
+nfsd4_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_lookup *lookup)
+{
+ return nfsd_lookup(rqstp, &cstate->current_fh,
+ lookup->lo_name, lookup->lo_len,
+ &cstate->current_fh);
+}
+
+static __be32
+nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_read *read)
+{
+ __be32 status;
+
+ /* no need to check permission - this will be done in nfsd_read() */
+
+ read->rd_filp = NULL;
+ if (read->rd_offset >= OFFSET_MAX)
+ return nfserr_inval;
+
+ nfs4_lock_state();
+ /* check stateid */
+ if ((status = nfs4_preprocess_stateid_op(cstate, &read->rd_stateid,
+ RD_STATE, &read->rd_filp))) {
+ dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
+ goto out;
+ }
+ if (read->rd_filp)
+ get_file(read->rd_filp);
+ status = nfs_ok;
+out:
+ nfs4_unlock_state();
+ read->rd_rqstp = rqstp;
+ read->rd_fhp = &cstate->current_fh;
+ return status;
+}
+
+static __be32
+nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_readdir *readdir)
+{
+ u64 cookie = readdir->rd_cookie;
+ static const nfs4_verifier zeroverf;
+
+ /* no need to check permission - this will be done in nfsd_readdir() */
+
+ if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)
+ return nfserr_inval;
+
+ readdir->rd_bmval[0] &= nfsd_suppattrs0(cstate->minorversion);
+ readdir->rd_bmval[1] &= nfsd_suppattrs1(cstate->minorversion);
+ readdir->rd_bmval[2] &= nfsd_suppattrs2(cstate->minorversion);
+
+ if ((cookie == 1) || (cookie == 2) ||
+ (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE)))
+ return nfserr_bad_cookie;
+
+ readdir->rd_rqstp = rqstp;
+ readdir->rd_fhp = &cstate->current_fh;
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_readlink(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_readlink *readlink)
+{
+ readlink->rl_rqstp = rqstp;
+ readlink->rl_fhp = &cstate->current_fh;
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_remove *remove)
+{
+ __be32 status;
+
+ if (locks_in_grace())
+ return nfserr_grace;
+ status = nfsd_unlink(rqstp, &cstate->current_fh, 0,
+ remove->rm_name, remove->rm_namelen);
+ if (!status) {
+ fh_unlock(&cstate->current_fh);
+ set_change_info(&remove->rm_cinfo, &cstate->current_fh);
+ }
+ return status;
+}
+
+static __be32
+nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_rename *rename)
+{
+ __be32 status = nfserr_nofilehandle;
+
+ if (!cstate->save_fh.fh_dentry)
+ return status;
+ if (locks_in_grace() && !(cstate->save_fh.fh_export->ex_flags
+ & NFSEXP_NOSUBTREECHECK))
+ return nfserr_grace;
+ status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname,
+ rename->rn_snamelen, &cstate->current_fh,
+ rename->rn_tname, rename->rn_tnamelen);
+
+ /* the underlying filesystem returns different error's than required
+ * by NFSv4. both save_fh and current_fh have been verified.. */
+ if (status == nfserr_isdir)
+ status = nfserr_exist;
+ else if ((status == nfserr_notdir) &&
+ (S_ISDIR(cstate->save_fh.fh_dentry->d_inode->i_mode) &&
+ S_ISDIR(cstate->current_fh.fh_dentry->d_inode->i_mode)))
+ status = nfserr_exist;
+
+ if (!status) {
+ set_change_info(&rename->rn_sinfo, &cstate->current_fh);
+ set_change_info(&rename->rn_tinfo, &cstate->save_fh);
+ }
+ return status;
+}
+
+static __be32
+nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_secinfo *secinfo)
+{
+ struct svc_fh resfh;
+ struct svc_export *exp;
+ struct dentry *dentry;
+ __be32 err;
+
+ fh_init(&resfh, NFS4_FHSIZE);
+ err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC);
+ if (err)
+ return err;
+ err = nfsd_lookup_dentry(rqstp, &cstate->current_fh,
+ secinfo->si_name, secinfo->si_namelen,
+ &exp, &dentry);
+ if (err)
+ return err;
+ if (dentry->d_inode == NULL) {
+ exp_put(exp);
+ err = nfserr_noent;
+ } else
+ secinfo->si_exp = exp;
+ dput(dentry);
+ if (cstate->minorversion)
+ /* See rfc 5661 section 2.6.3.1.1.8 */
+ fh_put(&cstate->current_fh);
+ return err;
+}
+
+static __be32
+nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_secinfo_no_name *sin)
+{
+ __be32 err;
+
+ switch (sin->sin_style) {
+ case NFS4_SECINFO_STYLE4_CURRENT_FH:
+ break;
+ case NFS4_SECINFO_STYLE4_PARENT:
+ err = nfsd4_do_lookupp(rqstp, &cstate->current_fh);
+ if (err)
+ return err;
+ break;
+ default:
+ return nfserr_inval;
+ }
+ exp_get(cstate->current_fh.fh_export);
+ sin->sin_exp = cstate->current_fh.fh_export;
+ fh_put(&cstate->current_fh);
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_setattr *setattr)
+{
+ __be32 status = nfs_ok;
+
+ if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
+ nfs4_lock_state();
+ status = nfs4_preprocess_stateid_op(cstate,
+ &setattr->sa_stateid, WR_STATE, NULL);
+ nfs4_unlock_state();
+ if (status) {
+ dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
+ return status;
+ }
+ }
+ status = fh_want_write(&cstate->current_fh);
+ if (status)
+ return status;
+ status = nfs_ok;
+
+ status = check_attr_support(rqstp, cstate, setattr->sa_bmval,
+ nfsd_attrmask);
+ if (status)
+ goto out;
+
+ if (setattr->sa_acl != NULL)
+ status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh,
+ setattr->sa_acl);
+ if (status)
+ goto out;
+ status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
+ 0, (time_t)0);
+out:
+ fh_drop_write(&cstate->current_fh);
+ return status;
+}
+
+static __be32
+nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_write *write)
+{
+ stateid_t *stateid = &write->wr_stateid;
+ struct file *filp = NULL;
+ u32 *p;
+ __be32 status = nfs_ok;
+ unsigned long cnt;
+
+ /* no need to check permission - this will be done in nfsd_write() */
+
+ if (write->wr_offset >= OFFSET_MAX)
+ return nfserr_inval;
+
+ nfs4_lock_state();
+ status = nfs4_preprocess_stateid_op(cstate, stateid, WR_STATE, &filp);
+ if (filp)
+ get_file(filp);
+ nfs4_unlock_state();
+
+ if (status) {
+ dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
+ return status;
+ }
+
+ cnt = write->wr_buflen;
+ write->wr_how_written = write->wr_stable_how;
+ p = (u32 *)write->wr_verifier.data;
+ *p++ = nfssvc_boot.tv_sec;
+ *p++ = nfssvc_boot.tv_usec;
+
+ status = nfsd_write(rqstp, &cstate->current_fh, filp,
+ write->wr_offset, rqstp->rq_vec, write->wr_vlen,
+ &cnt, &write->wr_how_written);
+ if (filp)
+ fput(filp);
+
+ write->wr_bytes_written = cnt;
+
+ return status;
+}
+
+/* This routine never returns NFS_OK! If there are no other errors, it
+ * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the
+ * attributes matched. VERIFY is implemented by mapping NFSERR_SAME
+ * to NFS_OK after the call; NVERIFY by mapping NFSERR_NOT_SAME to NFS_OK.
+ */
+static __be32
+_nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_verify *verify)
+{
+ __be32 *buf, *p;
+ int count;
+ __be32 status;
+
+ status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
+ if (status)
+ return status;
+
+ status = check_attr_support(rqstp, cstate, verify->ve_bmval, NULL);
+ if (status)
+ return status;
+
+ if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)
+ || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1))
+ return nfserr_inval;
+ if (verify->ve_attrlen & 3)
+ return nfserr_inval;
+
+ /* count in words:
+ * bitmap_len(1) + bitmap(2) + attr_len(1) = 4
+ */
+ count = 4 + (verify->ve_attrlen >> 2);
+ buf = kmalloc(count << 2, GFP_KERNEL);
+ if (!buf)
+ return nfserr_jukebox;
+
+ status = nfsd4_encode_fattr(&cstate->current_fh,
+ cstate->current_fh.fh_export,
+ cstate->current_fh.fh_dentry, buf,
+ &count, verify->ve_bmval,
+ rqstp, 0);
+
+ /* this means that nfsd4_encode_fattr() ran out of space */
+ if (status == nfserr_resource && count == 0)
+ status = nfserr_not_same;
+ if (status)
+ goto out_kfree;
+
+ /* skip bitmap */
+ p = buf + 1 + ntohl(buf[0]);
+ status = nfserr_not_same;
+ if (ntohl(*p++) != verify->ve_attrlen)
+ goto out_kfree;
+ if (!memcmp(p, verify->ve_attrval, verify->ve_attrlen))
+ status = nfserr_same;
+
+out_kfree:
+ kfree(buf);
+ return status;
+}
+
+static __be32
+nfsd4_nverify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_verify *verify)
+{
+ __be32 status;
+
+ status = _nfsd4_verify(rqstp, cstate, verify);
+ return status == nfserr_not_same ? nfs_ok : status;
+}
+
+static __be32
+nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_verify *verify)
+{
+ __be32 status;
+
+ status = _nfsd4_verify(rqstp, cstate, verify);
+ return status == nfserr_same ? nfs_ok : status;
+}
+
+/*
+ * NULL call.
+ */
+static __be32
+nfsd4_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+ return nfs_ok;
+}
+
+static inline void nfsd4_increment_op_stats(u32 opnum)
+{
+ if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP)
+ nfsdstats.nfs4_opcount[opnum]++;
+}
+
+typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
+ void *);
+typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
+
+enum nfsd4_op_flags {
+ ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
+ ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */
+ ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */
+ /* For rfc 5661 section 2.6.3.1.1: */
+ OP_HANDLES_WRONGSEC = 1 << 3,
+ OP_IS_PUTFH_LIKE = 1 << 4,
+ /*
+ * These are the ops whose result size we estimate before
+ * encoding, to avoid performing an op then not being able to
+ * respond or cache a response. This includes writes and setattrs
+ * as well as the operations usually called "nonidempotent":
+ */
+ OP_MODIFIES_SOMETHING = 1 << 5,
+ /*
+ * Cache compounds containing these ops in the xid-based drc:
+ * We use the DRC for compounds containing non-idempotent
+ * operations, *except* those that are 4.1-specific (since
+ * sessions provide their own EOS), and except for stateful
+ * operations other than setclientid and setclientid_confirm
+ * (since sequence numbers provide EOS for open, lock, etc in
+ * the v4.0 case).
+ */
+ OP_CACHEME = 1 << 6,
+};
+
+struct nfsd4_operation {
+ nfsd4op_func op_func;
+ u32 op_flags;
+ char *op_name;
+ /* Try to get response size before operation */
+ nfsd4op_rsize op_rsize_bop;
+};
+
+static struct nfsd4_operation nfsd4_ops[];
+
+static const char *nfsd4_op_name(unsigned opnum);
+
+/*
+ * Enforce NFSv4.1 COMPOUND ordering rules:
+ *
+ * Also note, enforced elsewhere:
+ * - SEQUENCE other than as first op results in
+ * NFS4ERR_SEQUENCE_POS. (Enforced in nfsd4_sequence().)
+ * - BIND_CONN_TO_SESSION must be the only op in its compound.
+ * (Enforced in nfsd4_bind_conn_to_session().)
+ * - DESTROY_SESSION must be the final operation in a compound, if
+ * sessionid's in SEQUENCE and DESTROY_SESSION are the same.
+ * (Enforced in nfsd4_destroy_session().)
+ */
+static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
+{
+ struct nfsd4_op *op = &args->ops[0];
+
+ /* These ordering requirements don't apply to NFSv4.0: */
+ if (args->minorversion == 0)
+ return nfs_ok;
+ /* This is weird, but OK, not our problem: */
+ if (args->opcnt == 0)
+ return nfs_ok;
+ if (op->status == nfserr_op_illegal)
+ return nfs_ok;
+ if (!(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP))
+ return nfserr_op_not_in_session;
+ if (op->opnum == OP_SEQUENCE)
+ return nfs_ok;
+ if (args->opcnt != 1)
+ return nfserr_not_only_op;
+ return nfs_ok;
+}
+
+static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
+{
+ return &nfsd4_ops[op->opnum];
+}
+
+bool nfsd4_cache_this_op(struct nfsd4_op *op)
+{
+ return OPDESC(op)->op_flags & OP_CACHEME;
+}
+
+static bool need_wrongsec_check(struct svc_rqst *rqstp)
+{
+ struct nfsd4_compoundres *resp = rqstp->rq_resp;
+ struct nfsd4_compoundargs *argp = rqstp->rq_argp;
+ struct nfsd4_op *this = &argp->ops[resp->opcnt - 1];
+ struct nfsd4_op *next = &argp->ops[resp->opcnt];
+ struct nfsd4_operation *thisd;
+ struct nfsd4_operation *nextd;
+
+ thisd = OPDESC(this);
+ /*
+ * Most ops check wronsec on our own; only the putfh-like ops
+ * have special rules.
+ */
+ if (!(thisd->op_flags & OP_IS_PUTFH_LIKE))
+ return false;
+ /*
+ * rfc 5661 2.6.3.1.1.6: don't bother erroring out a
+ * put-filehandle operation if we're not going to use the
+ * result:
+ */
+ if (argp->opcnt == resp->opcnt)
+ return false;
+
+ nextd = OPDESC(next);
+ /*
+ * Rest of 2.6.3.1.1: certain operations will return WRONGSEC
+ * errors themselves as necessary; others should check for them
+ * now:
+ */
+ return !(nextd->op_flags & OP_HANDLES_WRONGSEC);
+}
+
+/*
+ * COMPOUND call.
+ */
+static __be32
+nfsd4_proc_compound(struct svc_rqst *rqstp,
+ struct nfsd4_compoundargs *args,
+ struct nfsd4_compoundres *resp)
+{
+ struct nfsd4_op *op;
+ struct nfsd4_operation *opdesc;
+ struct nfsd4_compound_state *cstate = &resp->cstate;
+ int slack_bytes;
+ u32 plen = 0;
+ __be32 status;
+
+ resp->xbuf = &rqstp->rq_res;
+ resp->p = rqstp->rq_res.head[0].iov_base +
+ rqstp->rq_res.head[0].iov_len;
+ resp->tagp = resp->p;
+ /* reserve space for: taglen, tag, and opcnt */
+ resp->p += 2 + XDR_QUADLEN(args->taglen);
+ resp->end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE;
+ resp->taglen = args->taglen;
+ resp->tag = args->tag;
+ resp->opcnt = 0;
+ resp->rqstp = rqstp;
+ resp->cstate.minorversion = args->minorversion;
+ resp->cstate.replay_owner = NULL;
+ resp->cstate.session = NULL;
+ fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
+ fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
+ /*
+ * Don't use the deferral mechanism for NFSv4; compounds make it
+ * too hard to avoid non-idempotency problems.
+ */
+ rqstp->rq_usedeferral = 0;
+
+ /*
+ * According to RFC3010, this takes precedence over all other errors.
+ */
+ status = nfserr_minor_vers_mismatch;
+ if (args->minorversion > nfsd_supported_minorversion)
+ goto out;
+
+ status = nfs41_check_op_ordering(args);
+ if (status) {
+ op = &args->ops[0];
+ op->status = status;
+ goto encode_op;
+ }
+
+ while (!status && resp->opcnt < args->opcnt) {
+ op = &args->ops[resp->opcnt++];
+
+ dprintk("nfsv4 compound op #%d/%d: %d (%s)\n",
+ resp->opcnt, args->opcnt, op->opnum,
+ nfsd4_op_name(op->opnum));
+ /*
+ * The XDR decode routines may have pre-set op->status;
+ * for example, if there is a miscellaneous XDR error
+ * it will be set to nfserr_bad_xdr.
+ */
+ if (op->status)
+ goto encode_op;
+
+ /* We must be able to encode a successful response to
+ * this operation, with enough room left over to encode a
+ * failed response to the next operation. If we don't
+ * have enough room, fail with ERR_RESOURCE.
+ */
+ slack_bytes = (char *)resp->end - (char *)resp->p;
+ if (slack_bytes < COMPOUND_SLACK_SPACE
+ + COMPOUND_ERR_SLACK_SPACE) {
+ BUG_ON(slack_bytes < COMPOUND_ERR_SLACK_SPACE);
+ op->status = nfserr_resource;
+ goto encode_op;
+ }
+
+ opdesc = OPDESC(op);
+
+ if (!cstate->current_fh.fh_dentry) {
+ if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
+ op->status = nfserr_nofilehandle;
+ goto encode_op;
+ }
+ } else if (cstate->current_fh.fh_export->ex_fslocs.migrated &&
+ !(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) {
+ op->status = nfserr_moved;
+ goto encode_op;
+ }
+
+ /* If op is non-idempotent */
+ if (opdesc->op_flags & OP_MODIFIES_SOMETHING) {
+ plen = opdesc->op_rsize_bop(rqstp, op);
+ op->status = nfsd4_check_resp_size(resp, plen);
+ }
+
+ if (op->status)
+ goto encode_op;
+
+ if (opdesc->op_func)
+ op->status = opdesc->op_func(rqstp, cstate, &op->u);
+ else
+ BUG_ON(op->status == nfs_ok);
+
+ if (!op->status && need_wrongsec_check(rqstp))
+ op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp);
+
+encode_op:
+ /* Only from SEQUENCE */
+ if (resp->cstate.status == nfserr_replay_cache) {
+ dprintk("%s NFS4.1 replay from cache\n", __func__);
+ status = op->status;
+ goto out;
+ }
+ if (op->status == nfserr_replay_me) {
+ op->replay = &cstate->replay_owner->so_replay;
+ nfsd4_encode_replay(resp, op);
+ status = op->status = op->replay->rp_status;
+ } else {
+ nfsd4_encode_operation(resp, op);
+ status = op->status;
+ }
+
+ dprintk("nfsv4 compound op %p opcnt %d #%d: %d: status %d\n",
+ args->ops, args->opcnt, resp->opcnt, op->opnum,
+ be32_to_cpu(status));
+
+ if (cstate->replay_owner) {
+ nfs4_unlock_state();
+ cstate->replay_owner = NULL;
+ }
+ /* XXX Ugh, we need to get rid of this kind of special case: */
+ if (op->opnum == OP_READ && op->u.read.rd_filp)
+ fput(op->u.read.rd_filp);
+
+ nfsd4_increment_op_stats(op->opnum);
+ }
+
+ resp->cstate.status = status;
+ fh_put(&resp->cstate.current_fh);
+ fh_put(&resp->cstate.save_fh);
+ BUG_ON(resp->cstate.replay_owner);
+out:
+ /* Reset deferral mechanism for RPC deferrals */
+ rqstp->rq_usedeferral = 1;
+ dprintk("nfsv4 compound returned %d\n", ntohl(status));
+ return status;
+}
+
+#define op_encode_hdr_size (2)
+#define op_encode_stateid_maxsz (XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define op_encode_verifier_maxsz (XDR_QUADLEN(NFS4_VERIFIER_SIZE))
+#define op_encode_change_info_maxsz (5)
+#define nfs4_fattr_bitmap_maxsz (4)
+
+#define op_encode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+#define op_encode_lock_denied_maxsz (8 + op_encode_lockowner_maxsz)
+
+#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+
+#define op_encode_ace_maxsz (3 + nfs4_owner_maxsz)
+#define op_encode_delegation_maxsz (1 + op_encode_stateid_maxsz + 1 + \
+ op_encode_ace_maxsz)
+
+#define op_encode_channel_attrs_maxsz (6 + 1 + 1)
+
+static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32);
+}
+
+static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_change_info_maxsz
+ + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_change_info_maxsz)
+ * sizeof(__be32);
+}
+
+static inline u32 nfsd4_lock_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_lock_denied_maxsz)
+ * sizeof(__be32);
+}
+
+static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_stateid_maxsz
+ + op_encode_change_info_maxsz + 1
+ + nfs4_fattr_bitmap_maxsz
+ + op_encode_delegation_maxsz) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ u32 maxcount = 0, rlen = 0;
+
+ maxcount = svc_max_payload(rqstp);
+ rlen = op->u.read.rd_length;
+
+ if (rlen > maxcount)
+ rlen = maxcount;
+
+ return (op_encode_hdr_size + 2) * sizeof(__be32) + rlen;
+}
+
+static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ u32 rlen = op->u.readdir.rd_maxcount;
+
+ if (rlen > PAGE_SIZE)
+ rlen = PAGE_SIZE;
+
+ return (op_encode_hdr_size + op_encode_verifier_maxsz)
+ * sizeof(__be32) + rlen;
+}
+
+static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_change_info_maxsz)
+ * sizeof(__be32);
+}
+
+static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_change_info_maxsz
+ + op_encode_change_info_maxsz) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\
+ 1 + 1 + 0 + /* eir_flags, spr_how, SP4_NONE (for now) */\
+ 2 + /*eir_server_owner.so_minor_id */\
+ /* eir_server_owner.so_major_id<> */\
+ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\
+ /* eir_server_scope<> */\
+ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\
+ 1 + /* eir_server_impl_id array length */\
+ 0 /* ignored eir_server_impl_id contents */) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_bind_conn_to_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* bctsr_sessid */\
+ 2 /* bctsr_dir, use_conn_in_rdma_mode */) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* sessionid */\
+ 2 + /* csr_sequence, csr_flags */\
+ op_encode_channel_attrs_maxsz + \
+ op_encode_channel_attrs_maxsz) * sizeof(__be32);
+}
+
+static struct nfsd4_operation nfsd4_ops[] = {
+ [OP_ACCESS] = {
+ .op_func = (nfsd4op_func)nfsd4_access,
+ .op_name = "OP_ACCESS",
+ },
+ [OP_CLOSE] = {
+ .op_func = (nfsd4op_func)nfsd4_close,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_CLOSE",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
+ },
+ [OP_COMMIT] = {
+ .op_func = (nfsd4op_func)nfsd4_commit,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_COMMIT",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_commit_rsize,
+ },
+ [OP_CREATE] = {
+ .op_func = (nfsd4op_func)nfsd4_create,
+ .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_name = "OP_CREATE",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_rsize,
+ },
+ [OP_DELEGRETURN] = {
+ .op_func = (nfsd4op_func)nfsd4_delegreturn,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_DELEGRETURN",
+ .op_rsize_bop = nfsd4_only_status_rsize,
+ },
+ [OP_GETATTR] = {
+ .op_func = (nfsd4op_func)nfsd4_getattr,
+ .op_flags = ALLOWED_ON_ABSENT_FS,
+ .op_name = "OP_GETATTR",
+ },
+ [OP_GETFH] = {
+ .op_func = (nfsd4op_func)nfsd4_getfh,
+ .op_name = "OP_GETFH",
+ },
+ [OP_LINK] = {
+ .op_func = (nfsd4op_func)nfsd4_link,
+ .op_flags = ALLOWED_ON_ABSENT_FS | OP_MODIFIES_SOMETHING
+ | OP_CACHEME,
+ .op_name = "OP_LINK",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_link_rsize,
+ },
+ [OP_LOCK] = {
+ .op_func = (nfsd4op_func)nfsd4_lock,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_LOCK",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
+ },
+ [OP_LOCKT] = {
+ .op_func = (nfsd4op_func)nfsd4_lockt,
+ .op_name = "OP_LOCKT",
+ },
+ [OP_LOCKU] = {
+ .op_func = (nfsd4op_func)nfsd4_locku,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_LOCKU",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
+ },
+ [OP_LOOKUP] = {
+ .op_func = (nfsd4op_func)nfsd4_lookup,
+ .op_flags = OP_HANDLES_WRONGSEC,
+ .op_name = "OP_LOOKUP",
+ },
+ [OP_LOOKUPP] = {
+ .op_func = (nfsd4op_func)nfsd4_lookupp,
+ .op_flags = OP_HANDLES_WRONGSEC,
+ .op_name = "OP_LOOKUPP",
+ },
+ [OP_NVERIFY] = {
+ .op_func = (nfsd4op_func)nfsd4_nverify,
+ .op_name = "OP_NVERIFY",
+ },
+ [OP_OPEN] = {
+ .op_func = (nfsd4op_func)nfsd4_open,
+ .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
+ .op_name = "OP_OPEN",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize,
+ },
+ [OP_OPEN_CONFIRM] = {
+ .op_func = (nfsd4op_func)nfsd4_open_confirm,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_OPEN_CONFIRM",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
+ },
+ [OP_OPEN_DOWNGRADE] = {
+ .op_func = (nfsd4op_func)nfsd4_open_downgrade,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_OPEN_DOWNGRADE",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
+ },
+ [OP_PUTFH] = {
+ .op_func = (nfsd4op_func)nfsd4_putfh,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
+ | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
+ .op_name = "OP_PUTFH",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+ },
+ [OP_PUTPUBFH] = {
+ .op_func = (nfsd4op_func)nfsd4_putrootfh,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
+ | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
+ .op_name = "OP_PUTPUBFH",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+ },
+ [OP_PUTROOTFH] = {
+ .op_func = (nfsd4op_func)nfsd4_putrootfh,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
+ | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
+ .op_name = "OP_PUTROOTFH",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+ },
+ [OP_READ] = {
+ .op_func = (nfsd4op_func)nfsd4_read,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_READ",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize,
+ },
+ [OP_READDIR] = {
+ .op_func = (nfsd4op_func)nfsd4_readdir,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_READDIR",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize,
+ },
+ [OP_READLINK] = {
+ .op_func = (nfsd4op_func)nfsd4_readlink,
+ .op_name = "OP_READLINK",
+ },
+ [OP_REMOVE] = {
+ .op_func = (nfsd4op_func)nfsd4_remove,
+ .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_name = "OP_REMOVE",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_remove_rsize,
+ },
+ [OP_RENAME] = {
+ .op_func = (nfsd4op_func)nfsd4_rename,
+ .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_name = "OP_RENAME",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_rename_rsize,
+ },
+ [OP_RENEW] = {
+ .op_func = (nfsd4op_func)nfsd4_renew,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
+ | OP_MODIFIES_SOMETHING,
+ .op_name = "OP_RENEW",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+
+ },
+ [OP_RESTOREFH] = {
+ .op_func = (nfsd4op_func)nfsd4_restorefh,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
+ | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
+ .op_name = "OP_RESTOREFH",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+ },
+ [OP_SAVEFH] = {
+ .op_func = (nfsd4op_func)nfsd4_savefh,
+ .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
+ .op_name = "OP_SAVEFH",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+ },
+ [OP_SECINFO] = {
+ .op_func = (nfsd4op_func)nfsd4_secinfo,
+ .op_flags = OP_HANDLES_WRONGSEC,
+ .op_name = "OP_SECINFO",
+ },
+ [OP_SETATTR] = {
+ .op_func = (nfsd4op_func)nfsd4_setattr,
+ .op_name = "OP_SETATTR",
+ .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize,
+ },
+ [OP_SETCLIENTID] = {
+ .op_func = (nfsd4op_func)nfsd4_setclientid,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
+ | OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_name = "OP_SETCLIENTID",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_setclientid_rsize,
+ },
+ [OP_SETCLIENTID_CONFIRM] = {
+ .op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
+ | OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_name = "OP_SETCLIENTID_CONFIRM",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+ },
+ [OP_VERIFY] = {
+ .op_func = (nfsd4op_func)nfsd4_verify,
+ .op_name = "OP_VERIFY",
+ },
+ [OP_WRITE] = {
+ .op_func = (nfsd4op_func)nfsd4_write,
+ .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_name = "OP_WRITE",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
+ },
+ [OP_RELEASE_LOCKOWNER] = {
+ .op_func = (nfsd4op_func)nfsd4_release_lockowner,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
+ | OP_MODIFIES_SOMETHING,
+ .op_name = "OP_RELEASE_LOCKOWNER",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+ },
+
+ /* NFSv4.1 operations */
+ [OP_EXCHANGE_ID] = {
+ .op_func = (nfsd4op_func)nfsd4_exchange_id,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
+ | OP_MODIFIES_SOMETHING,
+ .op_name = "OP_EXCHANGE_ID",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_exchange_id_rsize,
+ },
+ [OP_BIND_CONN_TO_SESSION] = {
+ .op_func = (nfsd4op_func)nfsd4_bind_conn_to_session,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
+ | OP_MODIFIES_SOMETHING,
+ .op_name = "OP_BIND_CONN_TO_SESSION",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_bind_conn_to_session_rsize,
+ },
+ [OP_CREATE_SESSION] = {
+ .op_func = (nfsd4op_func)nfsd4_create_session,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
+ | OP_MODIFIES_SOMETHING,
+ .op_name = "OP_CREATE_SESSION",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_session_rsize,
+ },
+ [OP_DESTROY_SESSION] = {
+ .op_func = (nfsd4op_func)nfsd4_destroy_session,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
+ | OP_MODIFIES_SOMETHING,
+ .op_name = "OP_DESTROY_SESSION",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+ },
+ [OP_SEQUENCE] = {
+ .op_func = (nfsd4op_func)nfsd4_sequence,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
+ .op_name = "OP_SEQUENCE",
+ },
+ [OP_DESTROY_CLIENTID] = {
+ .op_func = (nfsd4op_func)nfsd4_destroy_clientid,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
+ | OP_MODIFIES_SOMETHING,
+ .op_name = "OP_DESTROY_CLIENTID",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+ },
+ [OP_RECLAIM_COMPLETE] = {
+ .op_func = (nfsd4op_func)nfsd4_reclaim_complete,
+ .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
+ .op_name = "OP_RECLAIM_COMPLETE",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+ },
+ [OP_SECINFO_NO_NAME] = {
+ .op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
+ .op_flags = OP_HANDLES_WRONGSEC,
+ .op_name = "OP_SECINFO_NO_NAME",
+ },
+ [OP_TEST_STATEID] = {
+ .op_func = (nfsd4op_func)nfsd4_test_stateid,
+ .op_flags = ALLOWED_WITHOUT_FH,
+ .op_name = "OP_TEST_STATEID",
+ },
+ [OP_FREE_STATEID] = {
+ .op_func = (nfsd4op_func)nfsd4_free_stateid,
+ .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
+ .op_name = "OP_FREE_STATEID",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+ },
+};
+
+static const char *nfsd4_op_name(unsigned opnum)
+{
+ if (opnum < ARRAY_SIZE(nfsd4_ops))
+ return nfsd4_ops[opnum].op_name;
+ return "unknown_operation";
+}
+
+#define nfsd4_voidres nfsd4_voidargs
+struct nfsd4_voidargs { int dummy; };
+
+static struct svc_procedure nfsd_procedures4[2] = {
+ [NFSPROC4_NULL] = {
+ .pc_func = (svc_procfunc) nfsd4_proc_null,
+ .pc_encode = (kxdrproc_t) nfs4svc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd4_voidargs),
+ .pc_ressize = sizeof(struct nfsd4_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = 1,
+ },
+ [NFSPROC4_COMPOUND] = {
+ .pc_func = (svc_procfunc) nfsd4_proc_compound,
+ .pc_decode = (kxdrproc_t) nfs4svc_decode_compoundargs,
+ .pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres,
+ .pc_argsize = sizeof(struct nfsd4_compoundargs),
+ .pc_ressize = sizeof(struct nfsd4_compoundres),
+ .pc_release = nfsd4_release_compoundargs,
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = NFSD_BUFSIZE/4,
+ },
+};
+
+struct svc_version nfsd_version4 = {
+ .vs_vers = 4,
+ .vs_nproc = 2,
+ .vs_proc = nfsd_procedures4,
+ .vs_dispatch = nfsd_dispatch,
+ .vs_xdrsize = NFS4_SVC_XDRSIZE,
+};
+
+/*
+ * Local variables:
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
new file mode 100644
index 00000000000..0b3e875d1ab
--- /dev/null
+++ b/fs/nfsd/nfs4recover.c
@@ -0,0 +1,427 @@
+/*
+* Copyright (c) 2004 The Regents of the University of Michigan.
+* All rights reserved.
+*
+* Andy Adamson <andros@citi.umich.edu>
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* 1. Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* 2. Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the distribution.
+* 3. Neither the name of the University nor the names of its
+* contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/namei.h>
+#include <linux/crypto.h>
+#include <linux/sched.h>
+
+#include "nfsd.h"
+#include "state.h"
+#include "vfs.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_PROC
+
+/* Globals */
+static struct file *rec_file;
+static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
+
+static int
+nfs4_save_creds(const struct cred **original_creds)
+{
+ struct cred *new;
+
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+
+ new->fsuid = 0;
+ new->fsgid = 0;
+ *original_creds = override_creds(new);
+ put_cred(new);
+ return 0;
+}
+
+static void
+nfs4_reset_creds(const struct cred *original)
+{
+ revert_creds(original);
+}
+
+static void
+md5_to_hex(char *out, char *md5)
+{
+ int i;
+
+ for (i=0; i<16; i++) {
+ unsigned char c = md5[i];
+
+ *out++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
+ *out++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
+ }
+ *out = '\0';
+}
+
+__be32
+nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
+{
+ struct xdr_netobj cksum;
+ struct hash_desc desc;
+ struct scatterlist sg;
+ __be32 status = nfserr_jukebox;
+
+ dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
+ clname->len, clname->data);
+ desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(desc.tfm))
+ goto out_no_tfm;
+ cksum.len = crypto_hash_digestsize(desc.tfm);
+ cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+ if (cksum.data == NULL)
+ goto out;
+
+ sg_init_one(&sg, clname->data, clname->len);
+
+ if (crypto_hash_digest(&desc, &sg, sg.length, cksum.data))
+ goto out;
+
+ md5_to_hex(dname, cksum.data);
+
+ status = nfs_ok;
+out:
+ kfree(cksum.data);
+ crypto_free_hash(desc.tfm);
+out_no_tfm:
+ return status;
+}
+
+void nfsd4_create_clid_dir(struct nfs4_client *clp)
+{
+ const struct cred *original_cred;
+ char *dname = clp->cl_recdir;
+ struct dentry *dir, *dentry;
+ int status;
+
+ dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
+
+ if (clp->cl_firststate)
+ return;
+ clp->cl_firststate = 1;
+ if (!rec_file)
+ return;
+ status = nfs4_save_creds(&original_cred);
+ if (status < 0)
+ return;
+
+ dir = rec_file->f_path.dentry;
+ /* lock the parent */
+ mutex_lock(&dir->d_inode->i_mutex);
+
+ dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
+ if (IS_ERR(dentry)) {
+ status = PTR_ERR(dentry);
+ goto out_unlock;
+ }
+ if (dentry->d_inode)
+ /*
+ * In the 4.1 case, where we're called from
+ * reclaim_complete(), records from the previous reboot
+ * may still be left, so this is OK.
+ *
+ * In the 4.0 case, we should never get here; but we may
+ * as well be forgiving and just succeed silently.
+ */
+ goto out_put;
+ status = mnt_want_write_file(rec_file);
+ if (status)
+ goto out_put;
+ status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU);
+ mnt_drop_write_file(rec_file);
+out_put:
+ dput(dentry);
+out_unlock:
+ mutex_unlock(&dir->d_inode->i_mutex);
+ if (status == 0)
+ vfs_fsync(rec_file, 0);
+ else
+ printk(KERN_ERR "NFSD: failed to write recovery record"
+ " (err %d); please check that %s exists"
+ " and is writeable", status,
+ user_recovery_dirname);
+ nfs4_reset_creds(original_cred);
+}
+
+typedef int (recdir_func)(struct dentry *, struct dentry *);
+
+struct name_list {
+ char name[HEXDIR_LEN];
+ struct list_head list;
+};
+
+static int
+nfsd4_build_namelist(void *arg, const char *name, int namlen,
+ loff_t offset, u64 ino, unsigned int d_type)
+{
+ struct list_head *names = arg;
+ struct name_list *entry;
+
+ if (namlen != HEXDIR_LEN - 1)
+ return 0;
+ entry = kmalloc(sizeof(struct name_list), GFP_KERNEL);
+ if (entry == NULL)
+ return -ENOMEM;
+ memcpy(entry->name, name, HEXDIR_LEN - 1);
+ entry->name[HEXDIR_LEN - 1] = '\0';
+ list_add(&entry->list, names);
+ return 0;
+}
+
+static int
+nfsd4_list_rec_dir(recdir_func *f)
+{
+ const struct cred *original_cred;
+ struct dentry *dir = rec_file->f_path.dentry;
+ LIST_HEAD(names);
+ int status;
+
+ status = nfs4_save_creds(&original_cred);
+ if (status < 0)
+ return status;
+
+ status = vfs_llseek(rec_file, 0, SEEK_SET);
+ if (status < 0) {
+ nfs4_reset_creds(original_cred);
+ return status;
+ }
+
+ status = vfs_readdir(rec_file, nfsd4_build_namelist, &names);
+ mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+ while (!list_empty(&names)) {
+ struct name_list *entry;
+ entry = list_entry(names.next, struct name_list, list);
+ if (!status) {
+ struct dentry *dentry;
+ dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
+ if (IS_ERR(dentry)) {
+ status = PTR_ERR(dentry);
+ break;
+ }
+ status = f(dir, dentry);
+ dput(dentry);
+ }
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ mutex_unlock(&dir->d_inode->i_mutex);
+ nfs4_reset_creds(original_cred);
+ return status;
+}
+
+static int
+nfsd4_unlink_clid_dir(char *name, int namlen)
+{
+ struct dentry *dir, *dentry;
+ int status;
+
+ dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
+
+ dir = rec_file->f_path.dentry;
+ mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+ dentry = lookup_one_len(name, dir, namlen);
+ if (IS_ERR(dentry)) {
+ status = PTR_ERR(dentry);
+ goto out_unlock;
+ }
+ status = -ENOENT;
+ if (!dentry->d_inode)
+ goto out;
+ status = vfs_rmdir(dir->d_inode, dentry);
+out:
+ dput(dentry);
+out_unlock:
+ mutex_unlock(&dir->d_inode->i_mutex);
+ return status;
+}
+
+void
+nfsd4_remove_clid_dir(struct nfs4_client *clp)
+{
+ const struct cred *original_cred;
+ int status;
+
+ if (!rec_file || !clp->cl_firststate)
+ return;
+
+ status = mnt_want_write_file(rec_file);
+ if (status)
+ goto out;
+ clp->cl_firststate = 0;
+
+ status = nfs4_save_creds(&original_cred);
+ if (status < 0)
+ goto out;
+
+ status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
+ nfs4_reset_creds(original_cred);
+ if (status == 0)
+ vfs_fsync(rec_file, 0);
+ mnt_drop_write_file(rec_file);
+out:
+ if (status)
+ printk("NFSD: Failed to remove expired client state directory"
+ " %.*s\n", HEXDIR_LEN, clp->cl_recdir);
+ return;
+}
+
+static int
+purge_old(struct dentry *parent, struct dentry *child)
+{
+ int status;
+
+ if (nfs4_has_reclaimed_state(child->d_name.name, false))
+ return 0;
+
+ status = vfs_rmdir(parent->d_inode, child);
+ if (status)
+ printk("failed to remove client recovery directory %s\n",
+ child->d_name.name);
+ /* Keep trying, success or failure: */
+ return 0;
+}
+
+void
+nfsd4_recdir_purge_old(void) {
+ int status;
+
+ if (!rec_file)
+ return;
+ status = mnt_want_write_file(rec_file);
+ if (status)
+ goto out;
+ status = nfsd4_list_rec_dir(purge_old);
+ if (status == 0)
+ vfs_fsync(rec_file, 0);
+ mnt_drop_write_file(rec_file);
+out:
+ if (status)
+ printk("nfsd4: failed to purge old clients from recovery"
+ " directory %s\n", rec_file->f_path.dentry->d_name.name);
+}
+
+static int
+load_recdir(struct dentry *parent, struct dentry *child)
+{
+ if (child->d_name.len != HEXDIR_LEN - 1) {
+ printk("nfsd4: illegal name %s in recovery directory\n",
+ child->d_name.name);
+ /* Keep trying; maybe the others are OK: */
+ return 0;
+ }
+ nfs4_client_to_reclaim(child->d_name.name);
+ return 0;
+}
+
+int
+nfsd4_recdir_load(void) {
+ int status;
+
+ if (!rec_file)
+ return 0;
+
+ status = nfsd4_list_rec_dir(load_recdir);
+ if (status)
+ printk("nfsd4: failed loading clients from recovery"
+ " directory %s\n", rec_file->f_path.dentry->d_name.name);
+ return status;
+}
+
+/*
+ * Hold reference to the recovery directory.
+ */
+
+void
+nfsd4_init_recdir()
+{
+ const struct cred *original_cred;
+ int status;
+
+ printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
+ user_recovery_dirname);
+
+ BUG_ON(rec_file);
+
+ status = nfs4_save_creds(&original_cred);
+ if (status < 0) {
+ printk("NFSD: Unable to change credentials to find recovery"
+ " directory: error %d\n",
+ status);
+ return;
+ }
+
+ rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0);
+ if (IS_ERR(rec_file)) {
+ printk("NFSD: unable to find recovery directory %s\n",
+ user_recovery_dirname);
+ rec_file = NULL;
+ }
+
+ nfs4_reset_creds(original_cred);
+}
+
+void
+nfsd4_shutdown_recdir(void)
+{
+ if (!rec_file)
+ return;
+ fput(rec_file);
+ rec_file = NULL;
+}
+
+/*
+ * Change the NFSv4 recovery directory to recdir.
+ */
+int
+nfs4_reset_recoverydir(char *recdir)
+{
+ int status;
+ struct path path;
+
+ status = kern_path(recdir, LOOKUP_FOLLOW, &path);
+ if (status)
+ return status;
+ status = -ENOTDIR;
+ if (S_ISDIR(path.dentry->d_inode->i_mode)) {
+ strcpy(user_recovery_dirname, recdir);
+ status = 0;
+ }
+ path_put(&path);
+ return status;
+}
+
+char *
+nfs4_recoverydir(void)
+{
+ return user_recovery_dirname;
+}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
new file mode 100644
index 00000000000..e8c98f00967
--- /dev/null
+++ b/fs/nfsd/nfs4state.c
@@ -0,0 +1,4693 @@
+/*
+* Copyright (c) 2001 The Regents of the University of Michigan.
+* All rights reserved.
+*
+* Kendrick Smith <kmsmith@umich.edu>
+* Andy Adamson <kandros@umich.edu>
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* 1. Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* 2. Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the distribution.
+* 3. Neither the name of the University nor the names of its
+* contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/namei.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/sunrpc/svcauth_gss.h>
+#include <linux/sunrpc/clnt.h>
+#include "xdr4.h"
+#include "vfs.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_PROC
+
+/* Globals */
+time_t nfsd4_lease = 90; /* default lease time */
+time_t nfsd4_grace = 90;
+static time_t boot_time;
+
+#define all_ones {{~0,~0},~0}
+static const stateid_t one_stateid = {
+ .si_generation = ~0,
+ .si_opaque = all_ones,
+};
+static const stateid_t zero_stateid = {
+ /* all fields zero */
+};
+
+static u64 current_sessionid = 1;
+
+#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t)))
+#define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))
+
+/* forward declarations */
+static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner);
+
+/* Locking: */
+
+/* Currently used for almost all code touching nfsv4 state: */
+static DEFINE_MUTEX(client_mutex);
+
+/*
+ * Currently used for the del_recall_lru and file hash table. In an
+ * effort to decrease the scope of the client_mutex, this spinlock may
+ * eventually cover more:
+ */
+static DEFINE_SPINLOCK(recall_lock);
+
+static struct kmem_cache *openowner_slab = NULL;
+static struct kmem_cache *lockowner_slab = NULL;
+static struct kmem_cache *file_slab = NULL;
+static struct kmem_cache *stateid_slab = NULL;
+static struct kmem_cache *deleg_slab = NULL;
+
+void
+nfs4_lock_state(void)
+{
+ mutex_lock(&client_mutex);
+}
+
+void
+nfs4_unlock_state(void)
+{
+ mutex_unlock(&client_mutex);
+}
+
+static inline u32
+opaque_hashval(const void *ptr, int nbytes)
+{
+ unsigned char *cptr = (unsigned char *) ptr;
+
+ u32 x = 0;
+ while (nbytes--) {
+ x *= 37;
+ x += *cptr++;
+ }
+ return x;
+}
+
+static struct list_head del_recall_lru;
+
+static void nfsd4_free_file(struct nfs4_file *f)
+{
+ kmem_cache_free(file_slab, f);
+}
+
+static inline void
+put_nfs4_file(struct nfs4_file *fi)
+{
+ if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) {
+ list_del(&fi->fi_hash);
+ spin_unlock(&recall_lock);
+ iput(fi->fi_inode);
+ nfsd4_free_file(fi);
+ }
+}
+
+static inline void
+get_nfs4_file(struct nfs4_file *fi)
+{
+ atomic_inc(&fi->fi_ref);
+}
+
+static int num_delegations;
+unsigned int max_delegations;
+
+/*
+ * Open owner state (share locks)
+ */
+
+/* hash tables for lock and open owners */
+#define OWNER_HASH_BITS 8
+#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
+#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
+
+static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
+{
+ unsigned int ret;
+
+ ret = opaque_hashval(ownername->data, ownername->len);
+ ret += clientid;
+ return ret & OWNER_HASH_MASK;
+}
+
+static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
+
+/* hash table for nfs4_file */
+#define FILE_HASH_BITS 8
+#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
+
+static unsigned int file_hashval(struct inode *ino)
+{
+ /* XXX: why are we hashing on inode pointer, anyway? */
+ return hash_ptr(ino, FILE_HASH_BITS);
+}
+
+static struct list_head file_hashtbl[FILE_HASH_SIZE];
+
+static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag)
+{
+ BUG_ON(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR]));
+ atomic_inc(&fp->fi_access[oflag]);
+}
+
+static void nfs4_file_get_access(struct nfs4_file *fp, int oflag)
+{
+ if (oflag == O_RDWR) {
+ __nfs4_file_get_access(fp, O_RDONLY);
+ __nfs4_file_get_access(fp, O_WRONLY);
+ } else
+ __nfs4_file_get_access(fp, oflag);
+}
+
+static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag)
+{
+ if (fp->fi_fds[oflag]) {
+ fput(fp->fi_fds[oflag]);
+ fp->fi_fds[oflag] = NULL;
+ }
+}
+
+static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
+{
+ if (atomic_dec_and_test(&fp->fi_access[oflag])) {
+ nfs4_file_put_fd(fp, oflag);
+ /*
+ * It's also safe to get rid of the RDWR open *if*
+ * we no longer have need of the other kind of access
+ * or if we already have the other kind of open:
+ */
+ if (fp->fi_fds[1-oflag]
+ || atomic_read(&fp->fi_access[1 - oflag]) == 0)
+ nfs4_file_put_fd(fp, O_RDWR);
+ }
+}
+
+static void nfs4_file_put_access(struct nfs4_file *fp, int oflag)
+{
+ if (oflag == O_RDWR) {
+ __nfs4_file_put_access(fp, O_RDONLY);
+ __nfs4_file_put_access(fp, O_WRONLY);
+ } else
+ __nfs4_file_put_access(fp, oflag);
+}
+
+static inline int get_new_stid(struct nfs4_stid *stid)
+{
+ static int min_stateid = 0;
+ struct idr *stateids = &stid->sc_client->cl_stateids;
+ int new_stid;
+ int error;
+
+ error = idr_get_new_above(stateids, stid, min_stateid, &new_stid);
+ /*
+ * Note: the necessary preallocation was done in
+ * nfs4_alloc_stateid(). The idr code caps the number of
+ * preallocations that can exist at a time, but the state lock
+ * prevents anyone from using ours before we get here:
+ */
+ BUG_ON(error);
+ /*
+ * It shouldn't be a problem to reuse an opaque stateid value.
+ * I don't think it is for 4.1. But with 4.0 I worry that, for
+ * example, a stray write retransmission could be accepted by
+ * the server when it should have been rejected. Therefore,
+ * adopt a trick from the sctp code to attempt to maximize the
+ * amount of time until an id is reused, by ensuring they always
+ * "increase" (mod INT_MAX):
+ */
+
+ min_stateid = new_stid+1;
+ if (min_stateid == INT_MAX)
+ min_stateid = 0;
+ return new_stid;
+}
+
+static void init_stid(struct nfs4_stid *stid, struct nfs4_client *cl, unsigned char type)
+{
+ stateid_t *s = &stid->sc_stateid;
+ int new_id;
+
+ stid->sc_type = type;
+ stid->sc_client = cl;
+ s->si_opaque.so_clid = cl->cl_clientid;
+ new_id = get_new_stid(stid);
+ s->si_opaque.so_id = (u32)new_id;
+ /* Will be incremented before return to client: */
+ s->si_generation = 0;
+}
+
+static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab)
+{
+ struct idr *stateids = &cl->cl_stateids;
+
+ if (!idr_pre_get(stateids, GFP_KERNEL))
+ return NULL;
+ /*
+ * Note: if we fail here (or any time between now and the time
+ * we actually get the new idr), we won't need to undo the idr
+ * preallocation, since the idr code caps the number of
+ * preallocated entries.
+ */
+ return kmem_cache_alloc(slab, GFP_KERNEL);
+}
+
+static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp)
+{
+ return openlockstateid(nfs4_alloc_stid(clp, stateid_slab));
+}
+
+static struct nfs4_delegation *
+alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh, u32 type)
+{
+ struct nfs4_delegation *dp;
+ struct nfs4_file *fp = stp->st_file;
+
+ dprintk("NFSD alloc_init_deleg\n");
+ /*
+ * Major work on the lease subsystem (for example, to support
+ * calbacks on stat) will be required before we can support
+ * write delegations properly.
+ */
+ if (type != NFS4_OPEN_DELEGATE_READ)
+ return NULL;
+ if (fp->fi_had_conflict)
+ return NULL;
+ if (num_delegations > max_delegations)
+ return NULL;
+ dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
+ if (dp == NULL)
+ return dp;
+ init_stid(&dp->dl_stid, clp, NFS4_DELEG_STID);
+ /*
+ * delegation seqid's are never incremented. The 4.1 special
+ * meaning of seqid 0 isn't meaningful, really, but let's avoid
+ * 0 anyway just for consistency and use 1:
+ */
+ dp->dl_stid.sc_stateid.si_generation = 1;
+ num_delegations++;
+ INIT_LIST_HEAD(&dp->dl_perfile);
+ INIT_LIST_HEAD(&dp->dl_perclnt);
+ INIT_LIST_HEAD(&dp->dl_recall_lru);
+ get_nfs4_file(fp);
+ dp->dl_file = fp;
+ dp->dl_type = type;
+ fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
+ dp->dl_time = 0;
+ atomic_set(&dp->dl_count, 1);
+ INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
+ return dp;
+}
+
+void
+nfs4_put_delegation(struct nfs4_delegation *dp)
+{
+ if (atomic_dec_and_test(&dp->dl_count)) {
+ dprintk("NFSD: freeing dp %p\n",dp);
+ put_nfs4_file(dp->dl_file);
+ kmem_cache_free(deleg_slab, dp);
+ num_delegations--;
+ }
+}
+
+static void nfs4_put_deleg_lease(struct nfs4_file *fp)
+{
+ if (atomic_dec_and_test(&fp->fi_delegees)) {
+ vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
+ fp->fi_lease = NULL;
+ fput(fp->fi_deleg_file);
+ fp->fi_deleg_file = NULL;
+ }
+}
+
+static void unhash_stid(struct nfs4_stid *s)
+{
+ struct idr *stateids = &s->sc_client->cl_stateids;
+
+ idr_remove(stateids, s->sc_stateid.si_opaque.so_id);
+}
+
+/* Called under the state lock. */
+static void
+unhash_delegation(struct nfs4_delegation *dp)
+{
+ unhash_stid(&dp->dl_stid);
+ list_del_init(&dp->dl_perclnt);
+ spin_lock(&recall_lock);
+ list_del_init(&dp->dl_perfile);
+ list_del_init(&dp->dl_recall_lru);
+ spin_unlock(&recall_lock);
+ nfs4_put_deleg_lease(dp->dl_file);
+ nfs4_put_delegation(dp);
+}
+
+/*
+ * SETCLIENTID state
+ */
+
+/* client_lock protects the client lru list and session hash table */
+static DEFINE_SPINLOCK(client_lock);
+
+/* Hash tables for nfs4_clientid state */
+#define CLIENT_HASH_BITS 4
+#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS)
+#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1)
+
+static unsigned int clientid_hashval(u32 id)
+{
+ return id & CLIENT_HASH_MASK;
+}
+
+static unsigned int clientstr_hashval(const char *name)
+{
+ return opaque_hashval(name, 8) & CLIENT_HASH_MASK;
+}
+
+/*
+ * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
+ * used in reboot/reset lease grace period processing
+ *
+ * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed
+ * setclientid_confirmed info.
+ *
+ * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed
+ * setclientid info.
+ *
+ * client_lru holds client queue ordered by nfs4_client.cl_time
+ * for lease renewal.
+ *
+ * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time
+ * for last close replay.
+ */
+static struct list_head reclaim_str_hashtbl[CLIENT_HASH_SIZE];
+static int reclaim_str_hashtbl_size = 0;
+static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE];
+static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE];
+static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE];
+static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE];
+static struct list_head client_lru;
+static struct list_head close_lru;
+
+/*
+ * We store the NONE, READ, WRITE, and BOTH bits separately in the
+ * st_{access,deny}_bmap field of the stateid, in order to track not
+ * only what share bits are currently in force, but also what
+ * combinations of share bits previous opens have used. This allows us
+ * to enforce the recommendation of rfc 3530 14.2.19 that the server
+ * return an error if the client attempt to downgrade to a combination
+ * of share bits not explicable by closing some of its previous opens.
+ *
+ * XXX: This enforcement is actually incomplete, since we don't keep
+ * track of access/deny bit combinations; so, e.g., we allow:
+ *
+ * OPEN allow read, deny write
+ * OPEN allow both, deny none
+ * DOWNGRADE allow read, deny none
+ *
+ * which we should reject.
+ */
+static void
+set_access(unsigned int *access, unsigned long bmap) {
+ int i;
+
+ *access = 0;
+ for (i = 1; i < 4; i++) {
+ if (test_bit(i, &bmap))
+ *access |= i;
+ }
+}
+
+static void
+set_deny(unsigned int *deny, unsigned long bmap) {
+ int i;
+
+ *deny = 0;
+ for (i = 0; i < 4; i++) {
+ if (test_bit(i, &bmap))
+ *deny |= i ;
+ }
+}
+
+static int
+test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) {
+ unsigned int access, deny;
+
+ set_access(&access, stp->st_access_bmap);
+ set_deny(&deny, stp->st_deny_bmap);
+ if ((access & open->op_share_deny) || (deny & open->op_share_access))
+ return 0;
+ return 1;
+}
+
+static int nfs4_access_to_omode(u32 access)
+{
+ switch (access & NFS4_SHARE_ACCESS_BOTH) {
+ case NFS4_SHARE_ACCESS_READ:
+ return O_RDONLY;
+ case NFS4_SHARE_ACCESS_WRITE:
+ return O_WRONLY;
+ case NFS4_SHARE_ACCESS_BOTH:
+ return O_RDWR;
+ }
+ BUG();
+}
+
+static void unhash_generic_stateid(struct nfs4_ol_stateid *stp)
+{
+ list_del(&stp->st_perfile);
+ list_del(&stp->st_perstateowner);
+}
+
+static void close_generic_stateid(struct nfs4_ol_stateid *stp)
+{
+ int i;
+
+ if (stp->st_access_bmap) {
+ for (i = 1; i < 4; i++) {
+ if (test_bit(i, &stp->st_access_bmap))
+ nfs4_file_put_access(stp->st_file,
+ nfs4_access_to_omode(i));
+ __clear_bit(i, &stp->st_access_bmap);
+ }
+ }
+ put_nfs4_file(stp->st_file);
+ stp->st_file = NULL;
+}
+
+static void free_generic_stateid(struct nfs4_ol_stateid *stp)
+{
+ kmem_cache_free(stateid_slab, stp);
+}
+
+static void release_lock_stateid(struct nfs4_ol_stateid *stp)
+{
+ struct file *file;
+
+ unhash_generic_stateid(stp);
+ unhash_stid(&stp->st_stid);
+ file = find_any_file(stp->st_file);
+ if (file)
+ locks_remove_posix(file, (fl_owner_t)lockowner(stp->st_stateowner));
+ close_generic_stateid(stp);
+ free_generic_stateid(stp);
+}
+
+static void unhash_lockowner(struct nfs4_lockowner *lo)
+{
+ struct nfs4_ol_stateid *stp;
+
+ list_del(&lo->lo_owner.so_strhash);
+ list_del(&lo->lo_perstateid);
+ list_del(&lo->lo_owner_ino_hash);
+ while (!list_empty(&lo->lo_owner.so_stateids)) {
+ stp = list_first_entry(&lo->lo_owner.so_stateids,
+ struct nfs4_ol_stateid, st_perstateowner);
+ release_lock_stateid(stp);
+ }
+}
+
+static void release_lockowner(struct nfs4_lockowner *lo)
+{
+ unhash_lockowner(lo);
+ nfs4_free_lockowner(lo);
+}
+
+static void
+release_stateid_lockowners(struct nfs4_ol_stateid *open_stp)
+{
+ struct nfs4_lockowner *lo;
+
+ while (!list_empty(&open_stp->st_lockowners)) {
+ lo = list_entry(open_stp->st_lockowners.next,
+ struct nfs4_lockowner, lo_perstateid);
+ release_lockowner(lo);
+ }
+}
+
+static void unhash_open_stateid(struct nfs4_ol_stateid *stp)
+{
+ unhash_generic_stateid(stp);
+ release_stateid_lockowners(stp);
+ close_generic_stateid(stp);
+}
+
+static void release_open_stateid(struct nfs4_ol_stateid *stp)
+{
+ unhash_open_stateid(stp);
+ unhash_stid(&stp->st_stid);
+ free_generic_stateid(stp);
+}
+
+static void unhash_openowner(struct nfs4_openowner *oo)
+{
+ struct nfs4_ol_stateid *stp;
+
+ list_del(&oo->oo_owner.so_strhash);
+ list_del(&oo->oo_perclient);
+ while (!list_empty(&oo->oo_owner.so_stateids)) {
+ stp = list_first_entry(&oo->oo_owner.so_stateids,
+ struct nfs4_ol_stateid, st_perstateowner);
+ release_open_stateid(stp);
+ }
+}
+
+static void release_last_closed_stateid(struct nfs4_openowner *oo)
+{
+ struct nfs4_ol_stateid *s = oo->oo_last_closed_stid;
+
+ if (s) {
+ unhash_stid(&s->st_stid);
+ free_generic_stateid(s);
+ oo->oo_last_closed_stid = NULL;
+ }
+}
+
+static void release_openowner(struct nfs4_openowner *oo)
+{
+ unhash_openowner(oo);
+ list_del(&oo->oo_close_lru);
+ release_last_closed_stateid(oo);
+ nfs4_free_openowner(oo);
+}
+
+#define SESSION_HASH_SIZE 512
+static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE];
+
+static inline int
+hash_sessionid(struct nfs4_sessionid *sessionid)
+{
+ struct nfsd4_sessionid *sid = (struct nfsd4_sessionid *)sessionid;
+
+ return sid->sequence % SESSION_HASH_SIZE;
+}
+
+static inline void
+dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
+{
+ u32 *ptr = (u32 *)(&sessionid->data[0]);
+ dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]);
+}
+
+static void
+gen_sessionid(struct nfsd4_session *ses)
+{
+ struct nfs4_client *clp = ses->se_client;
+ struct nfsd4_sessionid *sid;
+
+ sid = (struct nfsd4_sessionid *)ses->se_sessionid.data;
+ sid->clientid = clp->cl_clientid;
+ sid->sequence = current_sessionid++;
+ sid->reserved = 0;
+}
+
+/*
+ * The protocol defines ca_maxresponssize_cached to include the size of
+ * the rpc header, but all we need to cache is the data starting after
+ * the end of the initial SEQUENCE operation--the rest we regenerate
+ * each time. Therefore we can advertise a ca_maxresponssize_cached
+ * value that is the number of bytes in our cache plus a few additional
+ * bytes. In order to stay on the safe side, and not promise more than
+ * we can cache, those additional bytes must be the minimum possible: 24
+ * bytes of rpc header (xid through accept state, with AUTH_NULL
+ * verifier), 12 for the compound header (with zero-length tag), and 44
+ * for the SEQUENCE op response:
+ */
+#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44)
+
+static void
+free_session_slots(struct nfsd4_session *ses)
+{
+ int i;
+
+ for (i = 0; i < ses->se_fchannel.maxreqs; i++)
+ kfree(ses->se_slots[i]);
+}
+
+/*
+ * We don't actually need to cache the rpc and session headers, so we
+ * can allocate a little less for each slot:
+ */
+static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
+{
+ return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
+}
+
+static int nfsd4_sanitize_slot_size(u32 size)
+{
+ size -= NFSD_MIN_HDR_SEQ_SZ; /* We don't cache the rpc header */
+ size = min_t(u32, size, NFSD_SLOT_CACHE_SIZE);
+
+ return size;
+}
+
+/*
+ * XXX: If we run out of reserved DRC memory we could (up to a point)
+ * re-negotiate active sessions and reduce their slot usage to make
+ * room for new connections. For now we just fail the create session.
+ */
+static int nfsd4_get_drc_mem(int slotsize, u32 num)
+{
+ int avail;
+
+ num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION);
+
+ spin_lock(&nfsd_drc_lock);
+ avail = min_t(int, NFSD_MAX_MEM_PER_SESSION,
+ nfsd_drc_max_mem - nfsd_drc_mem_used);
+ num = min_t(int, num, avail / slotsize);
+ nfsd_drc_mem_used += num * slotsize;
+ spin_unlock(&nfsd_drc_lock);
+
+ return num;
+}
+
+static void nfsd4_put_drc_mem(int slotsize, int num)
+{
+ spin_lock(&nfsd_drc_lock);
+ nfsd_drc_mem_used -= slotsize * num;
+ spin_unlock(&nfsd_drc_lock);
+}
+
+static struct nfsd4_session *alloc_session(int slotsize, int numslots)
+{
+ struct nfsd4_session *new;
+ int mem, i;
+
+ BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *)
+ + sizeof(struct nfsd4_session) > PAGE_SIZE);
+ mem = numslots * sizeof(struct nfsd4_slot *);
+
+ new = kzalloc(sizeof(*new) + mem, GFP_KERNEL);
+ if (!new)
+ return NULL;
+ /* allocate each struct nfsd4_slot and data cache in one piece */
+ for (i = 0; i < numslots; i++) {
+ mem = sizeof(struct nfsd4_slot) + slotsize;
+ new->se_slots[i] = kzalloc(mem, GFP_KERNEL);
+ if (!new->se_slots[i])
+ goto out_free;
+ }
+ return new;
+out_free:
+ while (i--)
+ kfree(new->se_slots[i]);
+ kfree(new);
+ return NULL;
+}
+
+static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4_channel_attrs *req, int numslots, int slotsize)
+{
+ u32 maxrpc = nfsd_serv->sv_max_mesg;
+
+ new->maxreqs = numslots;
+ new->maxresp_cached = min_t(u32, req->maxresp_cached,
+ slotsize + NFSD_MIN_HDR_SEQ_SZ);
+ new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc);
+ new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc);
+ new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND);
+}
+
+static void free_conn(struct nfsd4_conn *c)
+{
+ svc_xprt_put(c->cn_xprt);
+ kfree(c);
+}
+
+static void nfsd4_conn_lost(struct svc_xpt_user *u)
+{
+ struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user);
+ struct nfs4_client *clp = c->cn_session->se_client;
+
+ spin_lock(&clp->cl_lock);
+ if (!list_empty(&c->cn_persession)) {
+ list_del(&c->cn_persession);
+ free_conn(c);
+ }
+ spin_unlock(&clp->cl_lock);
+ nfsd4_probe_callback(clp);
+}
+
+static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags)
+{
+ struct nfsd4_conn *conn;
+
+ conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL);
+ if (!conn)
+ return NULL;
+ svc_xprt_get(rqstp->rq_xprt);
+ conn->cn_xprt = rqstp->rq_xprt;
+ conn->cn_flags = flags;
+ INIT_LIST_HEAD(&conn->cn_xpt_user.list);
+ return conn;
+}
+
+static void __nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
+{
+ conn->cn_session = ses;
+ list_add(&conn->cn_persession, &ses->se_conns);
+}
+
+static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
+{
+ struct nfs4_client *clp = ses->se_client;
+
+ spin_lock(&clp->cl_lock);
+ __nfsd4_hash_conn(conn, ses);
+ spin_unlock(&clp->cl_lock);
+}
+
+static int nfsd4_register_conn(struct nfsd4_conn *conn)
+{
+ conn->cn_xpt_user.callback = nfsd4_conn_lost;
+ return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
+}
+
+static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses, u32 dir)
+{
+ struct nfsd4_conn *conn;
+ int ret;
+
+ conn = alloc_conn(rqstp, dir);
+ if (!conn)
+ return nfserr_jukebox;
+ nfsd4_hash_conn(conn, ses);
+ ret = nfsd4_register_conn(conn);
+ if (ret)
+ /* oops; xprt is already down: */
+ nfsd4_conn_lost(&conn->cn_xpt_user);
+ return nfs_ok;
+}
+
+static __be32 nfsd4_new_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_session *ses)
+{
+ u32 dir = NFS4_CDFC4_FORE;
+
+ if (ses->se_flags & SESSION4_BACK_CHAN)
+ dir |= NFS4_CDFC4_BACK;
+
+ return nfsd4_new_conn(rqstp, ses, dir);
+}
+
+/* must be called under client_lock */
+static void nfsd4_del_conns(struct nfsd4_session *s)
+{
+ struct nfs4_client *clp = s->se_client;
+ struct nfsd4_conn *c;
+
+ spin_lock(&clp->cl_lock);
+ while (!list_empty(&s->se_conns)) {
+ c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession);
+ list_del_init(&c->cn_persession);
+ spin_unlock(&clp->cl_lock);
+
+ unregister_xpt_user(c->cn_xprt, &c->cn_xpt_user);
+ free_conn(c);
+
+ spin_lock(&clp->cl_lock);
+ }
+ spin_unlock(&clp->cl_lock);
+}
+
+void free_session(struct kref *kref)
+{
+ struct nfsd4_session *ses;
+ int mem;
+
+ ses = container_of(kref, struct nfsd4_session, se_ref);
+ nfsd4_del_conns(ses);
+ spin_lock(&nfsd_drc_lock);
+ mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
+ nfsd_drc_mem_used -= mem;
+ spin_unlock(&nfsd_drc_lock);
+ free_session_slots(ses);
+ kfree(ses);
+}
+
+static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses)
+{
+ struct nfsd4_session *new;
+ struct nfsd4_channel_attrs *fchan = &cses->fore_channel;
+ int numslots, slotsize;
+ int status;
+ int idx;
+
+ /*
+ * Note decreasing slot size below client's request may
+ * make it difficult for client to function correctly, whereas
+ * decreasing the number of slots will (just?) affect
+ * performance. When short on memory we therefore prefer to
+ * decrease number of slots instead of their size.
+ */
+ slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached);
+ numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs);
+ if (numslots < 1)
+ return NULL;
+
+ new = alloc_session(slotsize, numslots);
+ if (!new) {
+ nfsd4_put_drc_mem(slotsize, fchan->maxreqs);
+ return NULL;
+ }
+ init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize);
+
+ new->se_client = clp;
+ gen_sessionid(new);
+
+ INIT_LIST_HEAD(&new->se_conns);
+
+ new->se_cb_seq_nr = 1;
+ new->se_flags = cses->flags;
+ new->se_cb_prog = cses->callback_prog;
+ kref_init(&new->se_ref);
+ idx = hash_sessionid(&new->se_sessionid);
+ spin_lock(&client_lock);
+ list_add(&new->se_hash, &sessionid_hashtbl[idx]);
+ spin_lock(&clp->cl_lock);
+ list_add(&new->se_perclnt, &clp->cl_sessions);
+ spin_unlock(&clp->cl_lock);
+ spin_unlock(&client_lock);
+
+ status = nfsd4_new_conn_from_crses(rqstp, new);
+ /* whoops: benny points out, status is ignored! (err, or bogus) */
+ if (status) {
+ free_session(&new->se_ref);
+ return NULL;
+ }
+ if (cses->flags & SESSION4_BACK_CHAN) {
+ struct sockaddr *sa = svc_addr(rqstp);
+ /*
+ * This is a little silly; with sessions there's no real
+ * use for the callback address. Use the peer address
+ * as a reasonable default for now, but consider fixing
+ * the rpc client not to require an address in the
+ * future:
+ */
+ rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa);
+ clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
+ }
+ nfsd4_probe_callback(clp);
+ return new;
+}
+
+/* caller must hold client_lock */
+static struct nfsd4_session *
+find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
+{
+ struct nfsd4_session *elem;
+ int idx;
+
+ dump_sessionid(__func__, sessionid);
+ idx = hash_sessionid(sessionid);
+ /* Search in the appropriate list */
+ list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) {
+ if (!memcmp(elem->se_sessionid.data, sessionid->data,
+ NFS4_MAX_SESSIONID_LEN)) {
+ return elem;
+ }
+ }
+
+ dprintk("%s: session not found\n", __func__);
+ return NULL;
+}
+
+/* caller must hold client_lock */
+static void
+unhash_session(struct nfsd4_session *ses)
+{
+ list_del(&ses->se_hash);
+ spin_lock(&ses->se_client->cl_lock);
+ list_del(&ses->se_perclnt);
+ spin_unlock(&ses->se_client->cl_lock);
+}
+
+/* must be called under the client_lock */
+static inline void
+renew_client_locked(struct nfs4_client *clp)
+{
+ if (is_client_expired(clp)) {
+ dprintk("%s: client (clientid %08x/%08x) already expired\n",
+ __func__,
+ clp->cl_clientid.cl_boot,
+ clp->cl_clientid.cl_id);
+ return;
+ }
+
+ dprintk("renewing client (clientid %08x/%08x)\n",
+ clp->cl_clientid.cl_boot,
+ clp->cl_clientid.cl_id);
+ list_move_tail(&clp->cl_lru, &client_lru);
+ clp->cl_time = get_seconds();
+}
+
+static inline void
+renew_client(struct nfs4_client *clp)
+{
+ spin_lock(&client_lock);
+ renew_client_locked(clp);
+ spin_unlock(&client_lock);
+}
+
+/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
+static int
+STALE_CLIENTID(clientid_t *clid)
+{
+ if (clid->cl_boot == boot_time)
+ return 0;
+ dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n",
+ clid->cl_boot, clid->cl_id, boot_time);
+ return 1;
+}
+
+/*
+ * XXX Should we use a slab cache ?
+ * This type of memory management is somewhat inefficient, but we use it
+ * anyway since SETCLIENTID is not a common operation.
+ */
+static struct nfs4_client *alloc_client(struct xdr_netobj name)
+{
+ struct nfs4_client *clp;
+
+ clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
+ if (clp == NULL)
+ return NULL;
+ clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
+ if (clp->cl_name.data == NULL) {
+ kfree(clp);
+ return NULL;
+ }
+ clp->cl_name.len = name.len;
+ return clp;
+}
+
+static inline void
+free_client(struct nfs4_client *clp)
+{
+ while (!list_empty(&clp->cl_sessions)) {
+ struct nfsd4_session *ses;
+ ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
+ se_perclnt);
+ list_del(&ses->se_perclnt);
+ nfsd4_put_session(ses);
+ }
+ if (clp->cl_cred.cr_group_info)
+ put_group_info(clp->cl_cred.cr_group_info);
+ kfree(clp->cl_principal);
+ kfree(clp->cl_name.data);
+ kfree(clp);
+}
+
+void
+release_session_client(struct nfsd4_session *session)
+{
+ struct nfs4_client *clp = session->se_client;
+
+ if (!atomic_dec_and_lock(&clp->cl_refcount, &client_lock))
+ return;
+ if (is_client_expired(clp)) {
+ free_client(clp);
+ session->se_client = NULL;
+ } else
+ renew_client_locked(clp);
+ spin_unlock(&client_lock);
+}
+
+/* must be called under the client_lock */
+static inline void
+unhash_client_locked(struct nfs4_client *clp)
+{
+ struct nfsd4_session *ses;
+
+ mark_client_expired(clp);
+ list_del(&clp->cl_lru);
+ spin_lock(&clp->cl_lock);
+ list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+ list_del_init(&ses->se_hash);
+ spin_unlock(&clp->cl_lock);
+}
+
+static void
+expire_client(struct nfs4_client *clp)
+{
+ struct nfs4_openowner *oo;
+ struct nfs4_delegation *dp;
+ struct list_head reaplist;
+
+ INIT_LIST_HEAD(&reaplist);
+ spin_lock(&recall_lock);
+ while (!list_empty(&clp->cl_delegations)) {
+ dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
+ list_del_init(&dp->dl_perclnt);
+ list_move(&dp->dl_recall_lru, &reaplist);
+ }
+ spin_unlock(&recall_lock);
+ while (!list_empty(&reaplist)) {
+ dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
+ unhash_delegation(dp);
+ }
+ while (!list_empty(&clp->cl_openowners)) {
+ oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient);
+ release_openowner(oo);
+ }
+ nfsd4_shutdown_callback(clp);
+ if (clp->cl_cb_conn.cb_xprt)
+ svc_xprt_put(clp->cl_cb_conn.cb_xprt);
+ list_del(&clp->cl_idhash);
+ list_del(&clp->cl_strhash);
+ spin_lock(&client_lock);
+ unhash_client_locked(clp);
+ if (atomic_read(&clp->cl_refcount) == 0)
+ free_client(clp);
+ spin_unlock(&client_lock);
+}
+
+static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
+{
+ memcpy(target->cl_verifier.data, source->data,
+ sizeof(target->cl_verifier.data));
+}
+
+static void copy_clid(struct nfs4_client *target, struct nfs4_client *source)
+{
+ target->cl_clientid.cl_boot = source->cl_clientid.cl_boot;
+ target->cl_clientid.cl_id = source->cl_clientid.cl_id;
+}
+
+static void copy_cred(struct svc_cred *target, struct svc_cred *source)
+{
+ target->cr_uid = source->cr_uid;
+ target->cr_gid = source->cr_gid;
+ target->cr_group_info = source->cr_group_info;
+ get_group_info(target->cr_group_info);
+}
+
+static int same_name(const char *n1, const char *n2)
+{
+ return 0 == memcmp(n1, n2, HEXDIR_LEN);
+}
+
+static int
+same_verf(nfs4_verifier *v1, nfs4_verifier *v2)
+{
+ return 0 == memcmp(v1->data, v2->data, sizeof(v1->data));
+}
+
+static int
+same_clid(clientid_t *cl1, clientid_t *cl2)
+{
+ return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id);
+}
+
+/* XXX what about NGROUP */
+static int
+same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
+{
+ return cr1->cr_uid == cr2->cr_uid;
+}
+
+static void gen_clid(struct nfs4_client *clp)
+{
+ static u32 current_clientid = 1;
+
+ clp->cl_clientid.cl_boot = boot_time;
+ clp->cl_clientid.cl_id = current_clientid++;
+}
+
+static void gen_confirm(struct nfs4_client *clp)
+{
+ static u32 i;
+ u32 *p;
+
+ p = (u32 *)clp->cl_confirm.data;
+ *p++ = get_seconds();
+ *p++ = i++;
+}
+
+static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t)
+{
+ return idr_find(&cl->cl_stateids, t->si_opaque.so_id);
+}
+
+static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
+{
+ struct nfs4_stid *s;
+
+ s = find_stateid(cl, t);
+ if (!s)
+ return NULL;
+ if (typemask & s->sc_type)
+ return s;
+ return NULL;
+}
+
+static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
+ struct svc_rqst *rqstp, nfs4_verifier *verf)
+{
+ struct nfs4_client *clp;
+ struct sockaddr *sa = svc_addr(rqstp);
+ char *princ;
+
+ clp = alloc_client(name);
+ if (clp == NULL)
+ return NULL;
+
+ INIT_LIST_HEAD(&clp->cl_sessions);
+
+ princ = svc_gss_principal(rqstp);
+ if (princ) {
+ clp->cl_principal = kstrdup(princ, GFP_KERNEL);
+ if (clp->cl_principal == NULL) {
+ free_client(clp);
+ return NULL;
+ }
+ }
+
+ idr_init(&clp->cl_stateids);
+ memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
+ atomic_set(&clp->cl_refcount, 0);
+ clp->cl_cb_state = NFSD4_CB_UNKNOWN;
+ INIT_LIST_HEAD(&clp->cl_idhash);
+ INIT_LIST_HEAD(&clp->cl_strhash);
+ INIT_LIST_HEAD(&clp->cl_openowners);
+ INIT_LIST_HEAD(&clp->cl_delegations);
+ INIT_LIST_HEAD(&clp->cl_lru);
+ INIT_LIST_HEAD(&clp->cl_callbacks);
+ spin_lock_init(&clp->cl_lock);
+ INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc);
+ clp->cl_time = get_seconds();
+ clear_bit(0, &clp->cl_cb_slot_busy);
+ rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
+ copy_verf(clp, verf);
+ rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
+ clp->cl_flavor = rqstp->rq_flavor;
+ copy_cred(&clp->cl_cred, &rqstp->rq_cred);
+ gen_confirm(clp);
+ clp->cl_cb_session = NULL;
+ return clp;
+}
+
+static void
+add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
+{
+ unsigned int idhashval;
+
+ list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]);
+ idhashval = clientid_hashval(clp->cl_clientid.cl_id);
+ list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]);
+ renew_client(clp);
+}
+
+static void
+move_to_confirmed(struct nfs4_client *clp)
+{
+ unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
+ unsigned int strhashval;
+
+ dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
+ list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
+ strhashval = clientstr_hashval(clp->cl_recdir);
+ list_move(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
+ renew_client(clp);
+}
+
+static struct nfs4_client *
+find_confirmed_client(clientid_t *clid)
+{
+ struct nfs4_client *clp;
+ unsigned int idhashval = clientid_hashval(clid->cl_id);
+
+ list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) {
+ if (same_clid(&clp->cl_clientid, clid)) {
+ renew_client(clp);
+ return clp;
+ }
+ }
+ return NULL;
+}
+
+static struct nfs4_client *
+find_unconfirmed_client(clientid_t *clid)
+{
+ struct nfs4_client *clp;
+ unsigned int idhashval = clientid_hashval(clid->cl_id);
+
+ list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) {
+ if (same_clid(&clp->cl_clientid, clid))
+ return clp;
+ }
+ return NULL;
+}
+
+static bool clp_used_exchangeid(struct nfs4_client *clp)
+{
+ return clp->cl_exchange_flags != 0;
+}
+
+static struct nfs4_client *
+find_confirmed_client_by_str(const char *dname, unsigned int hashval)
+{
+ struct nfs4_client *clp;
+
+ list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) {
+ if (same_name(clp->cl_recdir, dname))
+ return clp;
+ }
+ return NULL;
+}
+
+static struct nfs4_client *
+find_unconfirmed_client_by_str(const char *dname, unsigned int hashval)
+{
+ struct nfs4_client *clp;
+
+ list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) {
+ if (same_name(clp->cl_recdir, dname))
+ return clp;
+ }
+ return NULL;
+}
+
+static void
+gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_rqst *rqstp)
+{
+ struct nfs4_cb_conn *conn = &clp->cl_cb_conn;
+ struct sockaddr *sa = svc_addr(rqstp);
+ u32 scopeid = rpc_get_scope_id(sa);
+ unsigned short expected_family;
+
+ /* Currently, we only support tcp and tcp6 for the callback channel */
+ if (se->se_callback_netid_len == 3 &&
+ !memcmp(se->se_callback_netid_val, "tcp", 3))
+ expected_family = AF_INET;
+ else if (se->se_callback_netid_len == 4 &&
+ !memcmp(se->se_callback_netid_val, "tcp6", 4))
+ expected_family = AF_INET6;
+ else
+ goto out_err;
+
+ conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val,
+ se->se_callback_addr_len,
+ (struct sockaddr *)&conn->cb_addr,
+ sizeof(conn->cb_addr));
+
+ if (!conn->cb_addrlen || conn->cb_addr.ss_family != expected_family)
+ goto out_err;
+
+ if (conn->cb_addr.ss_family == AF_INET6)
+ ((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid;
+
+ conn->cb_prog = se->se_callback_prog;
+ conn->cb_ident = se->se_callback_ident;
+ memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen);
+ return;
+out_err:
+ conn->cb_addr.ss_family = AF_UNSPEC;
+ conn->cb_addrlen = 0;
+ dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
+ "will not receive delegations\n",
+ clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+
+ return;
+}
+
+/*
+ * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size.
+ */
+void
+nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
+{
+ struct nfsd4_slot *slot = resp->cstate.slot;
+ unsigned int base;
+
+ dprintk("--> %s slot %p\n", __func__, slot);
+
+ slot->sl_opcnt = resp->opcnt;
+ slot->sl_status = resp->cstate.status;
+
+ if (nfsd4_not_cached(resp)) {
+ slot->sl_datalen = 0;
+ return;
+ }
+ slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap;
+ base = (char *)resp->cstate.datap -
+ (char *)resp->xbuf->head[0].iov_base;
+ if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data,
+ slot->sl_datalen))
+ WARN("%s: sessions DRC could not cache compound\n", __func__);
+ return;
+}
+
+/*
+ * Encode the replay sequence operation from the slot values.
+ * If cachethis is FALSE encode the uncached rep error on the next
+ * operation which sets resp->p and increments resp->opcnt for
+ * nfs4svc_encode_compoundres.
+ *
+ */
+static __be32
+nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
+ struct nfsd4_compoundres *resp)
+{
+ struct nfsd4_op *op;
+ struct nfsd4_slot *slot = resp->cstate.slot;
+
+ dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__,
+ resp->opcnt, resp->cstate.slot->sl_cachethis);
+
+ /* Encode the replayed sequence operation */
+ op = &args->ops[resp->opcnt - 1];
+ nfsd4_encode_operation(resp, op);
+
+ /* Return nfserr_retry_uncached_rep in next operation. */
+ if (args->opcnt > 1 && slot->sl_cachethis == 0) {
+ op = &args->ops[resp->opcnt++];
+ op->status = nfserr_retry_uncached_rep;
+ nfsd4_encode_operation(resp, op);
+ }
+ return op->status;
+}
+
+/*
+ * The sequence operation is not cached because we can use the slot and
+ * session values.
+ */
+__be32
+nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
+ struct nfsd4_sequence *seq)
+{
+ struct nfsd4_slot *slot = resp->cstate.slot;
+ __be32 status;
+
+ dprintk("--> %s slot %p\n", __func__, slot);
+
+ /* Either returns 0 or nfserr_retry_uncached */
+ status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
+ if (status == nfserr_retry_uncached_rep)
+ return status;
+
+ /* The sequence operation has been encoded, cstate->datap set. */
+ memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen);
+
+ resp->opcnt = slot->sl_opcnt;
+ resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen);
+ status = slot->sl_status;
+
+ return status;
+}
+
+/*
+ * Set the exchange_id flags returned by the server.
+ */
+static void
+nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
+{
+ /* pNFS is not supported */
+ new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
+
+ /* Referrals are supported, Migration is not. */
+ new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
+
+ /* set the wire flags to return to client. */
+ clid->flags = new->cl_exchange_flags;
+}
+
+__be32
+nfsd4_exchange_id(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_exchange_id *exid)
+{
+ struct nfs4_client *unconf, *conf, *new;
+ int status;
+ unsigned int strhashval;
+ char dname[HEXDIR_LEN];
+ char addr_str[INET6_ADDRSTRLEN];
+ nfs4_verifier verf = exid->verifier;
+ struct sockaddr *sa = svc_addr(rqstp);
+
+ rpc_ntop(sa, addr_str, sizeof(addr_str));
+ dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
+ "ip_addr=%s flags %x, spa_how %d\n",
+ __func__, rqstp, exid, exid->clname.len, exid->clname.data,
+ addr_str, exid->flags, exid->spa_how);
+
+ if (exid->flags & ~EXCHGID4_FLAG_MASK_A)
+ return nfserr_inval;
+
+ /* Currently only support SP4_NONE */
+ switch (exid->spa_how) {
+ case SP4_NONE:
+ break;
+ case SP4_SSV:
+ return nfserr_serverfault;
+ default:
+ BUG(); /* checked by xdr code */
+ case SP4_MACH_CRED:
+ return nfserr_serverfault; /* no excuse :-/ */
+ }
+
+ status = nfs4_make_rec_clidname(dname, &exid->clname);
+
+ if (status)
+ goto error;
+
+ strhashval = clientstr_hashval(dname);
+
+ nfs4_lock_state();
+ status = nfs_ok;
+
+ conf = find_confirmed_client_by_str(dname, strhashval);
+ if (conf) {
+ if (!clp_used_exchangeid(conf)) {
+ status = nfserr_clid_inuse; /* XXX: ? */
+ goto out;
+ }
+ if (!same_verf(&verf, &conf->cl_verifier)) {
+ /* 18.35.4 case 8 */
+ if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
+ status = nfserr_not_same;
+ goto out;
+ }
+ /* Client reboot: destroy old state */
+ expire_client(conf);
+ goto out_new;
+ }
+ if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
+ /* 18.35.4 case 9 */
+ if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
+ status = nfserr_perm;
+ goto out;
+ }
+ expire_client(conf);
+ goto out_new;
+ }
+ /*
+ * Set bit when the owner id and verifier map to an already
+ * confirmed client id (18.35.3).
+ */
+ exid->flags |= EXCHGID4_FLAG_CONFIRMED_R;
+
+ /*
+ * Falling into 18.35.4 case 2, possible router replay.
+ * Leave confirmed record intact and return same result.
+ */
+ copy_verf(conf, &verf);
+ new = conf;
+ goto out_copy;
+ }
+
+ /* 18.35.4 case 7 */
+ if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
+ status = nfserr_noent;
+ goto out;
+ }
+
+ unconf = find_unconfirmed_client_by_str(dname, strhashval);
+ if (unconf) {
+ /*
+ * Possible retry or client restart. Per 18.35.4 case 4,
+ * a new unconfirmed record should be generated regardless
+ * of whether any properties have changed.
+ */
+ expire_client(unconf);
+ }
+
+out_new:
+ /* Normal case */
+ new = create_client(exid->clname, dname, rqstp, &verf);
+ if (new == NULL) {
+ status = nfserr_jukebox;
+ goto out;
+ }
+
+ gen_clid(new);
+ add_to_unconfirmed(new, strhashval);
+out_copy:
+ exid->clientid.cl_boot = new->cl_clientid.cl_boot;
+ exid->clientid.cl_id = new->cl_clientid.cl_id;
+
+ exid->seqid = 1;
+ nfsd4_set_ex_flags(new, exid);
+
+ dprintk("nfsd4_exchange_id seqid %d flags %x\n",
+ new->cl_cs_slot.sl_seqid, new->cl_exchange_flags);
+ status = nfs_ok;
+
+out:
+ nfs4_unlock_state();
+error:
+ dprintk("nfsd4_exchange_id returns %d\n", ntohl(status));
+ return status;
+}
+
+static int
+check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
+{
+ dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid,
+ slot_seqid);
+
+ /* The slot is in use, and no response has been sent. */
+ if (slot_inuse) {
+ if (seqid == slot_seqid)
+ return nfserr_jukebox;
+ else
+ return nfserr_seq_misordered;
+ }
+ /* Normal */
+ if (likely(seqid == slot_seqid + 1))
+ return nfs_ok;
+ /* Replay */
+ if (seqid == slot_seqid)
+ return nfserr_replay_cache;
+ /* Wraparound */
+ if (seqid == 1 && (slot_seqid + 1) == 0)
+ return nfs_ok;
+ /* Misordered replay or misordered new request */
+ return nfserr_seq_misordered;
+}
+
+/*
+ * Cache the create session result into the create session single DRC
+ * slot cache by saving the xdr structure. sl_seqid has been set.
+ * Do this for solo or embedded create session operations.
+ */
+static void
+nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses,
+ struct nfsd4_clid_slot *slot, int nfserr)
+{
+ slot->sl_status = nfserr;
+ memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses));
+}
+
+static __be32
+nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses,
+ struct nfsd4_clid_slot *slot)
+{
+ memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses));
+ return slot->sl_status;
+}
+
+#define NFSD_MIN_REQ_HDR_SEQ_SZ ((\
+ 2 * 2 + /* credential,verifier: AUTH_NULL, length 0 */ \
+ 1 + /* MIN tag is length with zero, only length */ \
+ 3 + /* version, opcount, opcode */ \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
+ /* seqid, slotID, slotID, cache */ \
+ 4 ) * sizeof(__be32))
+
+#define NFSD_MIN_RESP_HDR_SEQ_SZ ((\
+ 2 + /* verifier: AUTH_NULL, length 0 */\
+ 1 + /* status */ \
+ 1 + /* MIN tag is length with zero, only length */ \
+ 3 + /* opcount, opcode, opstatus*/ \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
+ /* seqid, slotID, slotID, slotID, status */ \
+ 5 ) * sizeof(__be32))
+
+static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs fchannel)
+{
+ return fchannel.maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ
+ || fchannel.maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ;
+}
+
+__be32
+nfsd4_create_session(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_create_session *cr_ses)
+{
+ struct sockaddr *sa = svc_addr(rqstp);
+ struct nfs4_client *conf, *unconf;
+ struct nfsd4_session *new;
+ struct nfsd4_clid_slot *cs_slot = NULL;
+ bool confirm_me = false;
+ int status = 0;
+
+ if (cr_ses->flags & ~SESSION4_FLAG_MASK_A)
+ return nfserr_inval;
+
+ nfs4_lock_state();
+ unconf = find_unconfirmed_client(&cr_ses->clientid);
+ conf = find_confirmed_client(&cr_ses->clientid);
+
+ if (conf) {
+ cs_slot = &conf->cl_cs_slot;
+ status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
+ if (status == nfserr_replay_cache) {
+ dprintk("Got a create_session replay! seqid= %d\n",
+ cs_slot->sl_seqid);
+ /* Return the cached reply status */
+ status = nfsd4_replay_create_session(cr_ses, cs_slot);
+ goto out;
+ } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) {
+ status = nfserr_seq_misordered;
+ dprintk("Sequence misordered!\n");
+ dprintk("Expected seqid= %d but got seqid= %d\n",
+ cs_slot->sl_seqid, cr_ses->seqid);
+ goto out;
+ }
+ } else if (unconf) {
+ if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
+ !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
+ status = nfserr_clid_inuse;
+ goto out;
+ }
+
+ cs_slot = &unconf->cl_cs_slot;
+ status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
+ if (status) {
+ /* an unconfirmed replay returns misordered */
+ status = nfserr_seq_misordered;
+ goto out;
+ }
+
+ confirm_me = true;
+ conf = unconf;
+ } else {
+ status = nfserr_stale_clientid;
+ goto out;
+ }
+
+ /*
+ * XXX: we should probably set this at creation time, and check
+ * for consistent minorversion use throughout:
+ */
+ conf->cl_minorversion = 1;
+ /*
+ * We do not support RDMA or persistent sessions
+ */
+ cr_ses->flags &= ~SESSION4_PERSIST;
+ cr_ses->flags &= ~SESSION4_RDMA;
+
+ status = nfserr_toosmall;
+ if (check_forechannel_attrs(cr_ses->fore_channel))
+ goto out;
+
+ status = nfserr_jukebox;
+ new = alloc_init_session(rqstp, conf, cr_ses);
+ if (!new)
+ goto out;
+ status = nfs_ok;
+ memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
+ NFS4_MAX_SESSIONID_LEN);
+ memcpy(&cr_ses->fore_channel, &new->se_fchannel,
+ sizeof(struct nfsd4_channel_attrs));
+ cs_slot->sl_seqid++;
+ cr_ses->seqid = cs_slot->sl_seqid;
+
+ /* cache solo and embedded create sessions under the state lock */
+ nfsd4_cache_create_session(cr_ses, cs_slot, status);
+ if (confirm_me)
+ move_to_confirmed(conf);
+out:
+ nfs4_unlock_state();
+ dprintk("%s returns %d\n", __func__, ntohl(status));
+ return status;
+}
+
+static bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
+{
+ struct nfsd4_compoundres *resp = rqstp->rq_resp;
+ struct nfsd4_compoundargs *argp = rqstp->rq_argp;
+
+ return argp->opcnt == resp->opcnt;
+}
+
+static __be32 nfsd4_map_bcts_dir(u32 *dir)
+{
+ switch (*dir) {
+ case NFS4_CDFC4_FORE:
+ case NFS4_CDFC4_BACK:
+ return nfs_ok;
+ case NFS4_CDFC4_FORE_OR_BOTH:
+ case NFS4_CDFC4_BACK_OR_BOTH:
+ *dir = NFS4_CDFC4_BOTH;
+ return nfs_ok;
+ };
+ return nfserr_inval;
+}
+
+__be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_bind_conn_to_session *bcts)
+{
+ __be32 status;
+
+ if (!nfsd4_last_compound_op(rqstp))
+ return nfserr_not_only_op;
+ spin_lock(&client_lock);
+ cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid);
+ /* Sorta weird: we only need the refcnt'ing because new_conn acquires
+ * client_lock iself: */
+ if (cstate->session) {
+ nfsd4_get_session(cstate->session);
+ atomic_inc(&cstate->session->se_client->cl_refcount);
+ }
+ spin_unlock(&client_lock);
+ if (!cstate->session)
+ return nfserr_badsession;
+
+ status = nfsd4_map_bcts_dir(&bcts->dir);
+ if (!status)
+ nfsd4_new_conn(rqstp, cstate->session, bcts->dir);
+ return status;
+}
+
+static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
+{
+ if (!session)
+ return 0;
+ return !memcmp(sid, &session->se_sessionid, sizeof(*sid));
+}
+
+__be32
+nfsd4_destroy_session(struct svc_rqst *r,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_destroy_session *sessionid)
+{
+ struct nfsd4_session *ses;
+ u32 status = nfserr_badsession;
+
+ /* Notes:
+ * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid
+ * - Should we return nfserr_back_chan_busy if waiting for
+ * callbacks on to-be-destroyed session?
+ * - Do we need to clear any callback info from previous session?
+ */
+
+ if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
+ if (!nfsd4_last_compound_op(r))
+ return nfserr_not_only_op;
+ }
+ dump_sessionid(__func__, &sessionid->sessionid);
+ spin_lock(&client_lock);
+ ses = find_in_sessionid_hashtbl(&sessionid->sessionid);
+ if (!ses) {
+ spin_unlock(&client_lock);
+ goto out;
+ }
+
+ unhash_session(ses);
+ spin_unlock(&client_lock);
+
+ nfs4_lock_state();
+ nfsd4_probe_callback_sync(ses->se_client);
+ nfs4_unlock_state();
+
+ nfsd4_del_conns(ses);
+
+ nfsd4_put_session(ses);
+ status = nfs_ok;
+out:
+ dprintk("%s returns %d\n", __func__, ntohl(status));
+ return status;
+}
+
+static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s)
+{
+ struct nfsd4_conn *c;
+
+ list_for_each_entry(c, &s->se_conns, cn_persession) {
+ if (c->cn_xprt == xpt) {
+ return c;
+ }
+ }
+ return NULL;
+}
+
+static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses)
+{
+ struct nfs4_client *clp = ses->se_client;
+ struct nfsd4_conn *c;
+ int ret;
+
+ spin_lock(&clp->cl_lock);
+ c = __nfsd4_find_conn(new->cn_xprt, ses);
+ if (c) {
+ spin_unlock(&clp->cl_lock);
+ free_conn(new);
+ return;
+ }
+ __nfsd4_hash_conn(new, ses);
+ spin_unlock(&clp->cl_lock);
+ ret = nfsd4_register_conn(new);
+ if (ret)
+ /* oops; xprt is already down: */
+ nfsd4_conn_lost(&new->cn_xpt_user);
+ return;
+}
+
+static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session)
+{
+ struct nfsd4_compoundargs *args = rqstp->rq_argp;
+
+ return args->opcnt > session->se_fchannel.maxops;
+}
+
+static bool nfsd4_request_too_big(struct svc_rqst *rqstp,
+ struct nfsd4_session *session)
+{
+ struct xdr_buf *xb = &rqstp->rq_arg;
+
+ return xb->len > session->se_fchannel.maxreq_sz;
+}
+
+__be32
+nfsd4_sequence(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_sequence *seq)
+{
+ struct nfsd4_compoundres *resp = rqstp->rq_resp;
+ struct nfsd4_session *session;
+ struct nfsd4_slot *slot;
+ struct nfsd4_conn *conn;
+ int status;
+
+ if (resp->opcnt != 1)
+ return nfserr_sequence_pos;
+
+ /*
+ * Will be either used or freed by nfsd4_sequence_check_conn
+ * below.
+ */
+ conn = alloc_conn(rqstp, NFS4_CDFC4_FORE);
+ if (!conn)
+ return nfserr_jukebox;
+
+ spin_lock(&client_lock);
+ status = nfserr_badsession;
+ session = find_in_sessionid_hashtbl(&seq->sessionid);
+ if (!session)
+ goto out;
+
+ status = nfserr_too_many_ops;
+ if (nfsd4_session_too_many_ops(rqstp, session))
+ goto out;
+
+ status = nfserr_req_too_big;
+ if (nfsd4_request_too_big(rqstp, session))
+ goto out;
+
+ status = nfserr_badslot;
+ if (seq->slotid >= session->se_fchannel.maxreqs)
+ goto out;
+
+ slot = session->se_slots[seq->slotid];
+ dprintk("%s: slotid %d\n", __func__, seq->slotid);
+
+ /* We do not negotiate the number of slots yet, so set the
+ * maxslots to the session maxreqs which is used to encode
+ * sr_highest_slotid and the sr_target_slot id to maxslots */
+ seq->maxslots = session->se_fchannel.maxreqs;
+
+ status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_inuse);
+ if (status == nfserr_replay_cache) {
+ cstate->slot = slot;
+ cstate->session = session;
+ /* Return the cached reply status and set cstate->status
+ * for nfsd4_proc_compound processing */
+ status = nfsd4_replay_cache_entry(resp, seq);
+ cstate->status = nfserr_replay_cache;
+ goto out;
+ }
+ if (status)
+ goto out;
+
+ nfsd4_sequence_check_conn(conn, session);
+ conn = NULL;
+
+ /* Success! bump slot seqid */
+ slot->sl_inuse = true;
+ slot->sl_seqid = seq->seqid;
+ slot->sl_cachethis = seq->cachethis;
+
+ cstate->slot = slot;
+ cstate->session = session;
+
+out:
+ /* Hold a session reference until done processing the compound. */
+ if (cstate->session) {
+ struct nfs4_client *clp = session->se_client;
+
+ nfsd4_get_session(cstate->session);
+ atomic_inc(&clp->cl_refcount);
+ switch (clp->cl_cb_state) {
+ case NFSD4_CB_DOWN:
+ seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN;
+ break;
+ case NFSD4_CB_FAULT:
+ seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT;
+ break;
+ default:
+ seq->status_flags = 0;
+ }
+ }
+ kfree(conn);
+ spin_unlock(&client_lock);
+ dprintk("%s: return %d\n", __func__, ntohl(status));
+ return status;
+}
+
+static inline bool has_resources(struct nfs4_client *clp)
+{
+ return !list_empty(&clp->cl_openowners)
+ || !list_empty(&clp->cl_delegations)
+ || !list_empty(&clp->cl_sessions);
+}
+
+__be32
+nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc)
+{
+ struct nfs4_client *conf, *unconf, *clp;
+ int status = 0;
+
+ nfs4_lock_state();
+ unconf = find_unconfirmed_client(&dc->clientid);
+ conf = find_confirmed_client(&dc->clientid);
+
+ if (conf) {
+ clp = conf;
+
+ if (!is_client_expired(conf) && has_resources(conf)) {
+ status = nfserr_clientid_busy;
+ goto out;
+ }
+
+ /* rfc5661 18.50.3 */
+ if (cstate->session && conf == cstate->session->se_client) {
+ status = nfserr_clientid_busy;
+ goto out;
+ }
+ } else if (unconf)
+ clp = unconf;
+ else {
+ status = nfserr_stale_clientid;
+ goto out;
+ }
+
+ expire_client(clp);
+out:
+ nfs4_unlock_state();
+ dprintk("%s return %d\n", __func__, ntohl(status));
+ return status;
+}
+
+__be32
+nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
+{
+ int status = 0;
+
+ if (rc->rca_one_fs) {
+ if (!cstate->current_fh.fh_dentry)
+ return nfserr_nofilehandle;
+ /*
+ * We don't take advantage of the rca_one_fs case.
+ * That's OK, it's optional, we can safely ignore it.
+ */
+ return nfs_ok;
+ }
+
+ nfs4_lock_state();
+ status = nfserr_complete_already;
+ if (cstate->session->se_client->cl_firststate)
+ goto out;
+
+ status = nfserr_stale_clientid;
+ if (is_client_expired(cstate->session->se_client))
+ /*
+ * The following error isn't really legal.
+ * But we only get here if the client just explicitly
+ * destroyed the client. Surely it no longer cares what
+ * error it gets back on an operation for the dead
+ * client.
+ */
+ goto out;
+
+ status = nfs_ok;
+ nfsd4_create_clid_dir(cstate->session->se_client);
+out:
+ nfs4_unlock_state();
+ return status;
+}
+
+__be32
+nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_setclientid *setclid)
+{
+ struct xdr_netobj clname = setclid->se_name;
+ nfs4_verifier clverifier = setclid->se_verf;
+ unsigned int strhashval;
+ struct nfs4_client *conf, *unconf, *new;
+ __be32 status;
+ char dname[HEXDIR_LEN];
+
+ status = nfs4_make_rec_clidname(dname, &clname);
+ if (status)
+ return status;
+
+ /*
+ * XXX The Duplicate Request Cache (DRC) has been checked (??)
+ * We get here on a DRC miss.
+ */
+
+ strhashval = clientstr_hashval(dname);
+
+ nfs4_lock_state();
+ conf = find_confirmed_client_by_str(dname, strhashval);
+ if (conf) {
+ /* RFC 3530 14.2.33 CASE 0: */
+ status = nfserr_clid_inuse;
+ if (clp_used_exchangeid(conf))
+ goto out;
+ if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
+ char addr_str[INET6_ADDRSTRLEN];
+ rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str,
+ sizeof(addr_str));
+ dprintk("NFSD: setclientid: string in use by client "
+ "at %s\n", addr_str);
+ goto out;
+ }
+ }
+ /*
+ * section 14.2.33 of RFC 3530 (under the heading "IMPLEMENTATION")
+ * has a description of SETCLIENTID request processing consisting
+ * of 5 bullet points, labeled as CASE0 - CASE4 below.
+ */
+ unconf = find_unconfirmed_client_by_str(dname, strhashval);
+ status = nfserr_jukebox;
+ if (!conf) {
+ /*
+ * RFC 3530 14.2.33 CASE 4:
+ * placed first, because it is the normal case
+ */
+ if (unconf)
+ expire_client(unconf);
+ new = create_client(clname, dname, rqstp, &clverifier);
+ if (new == NULL)
+ goto out;
+ gen_clid(new);
+ } else if (same_verf(&conf->cl_verifier, &clverifier)) {
+ /*
+ * RFC 3530 14.2.33 CASE 1:
+ * probable callback update
+ */
+ if (unconf) {
+ /* Note this is removing unconfirmed {*x***},
+ * which is stronger than RFC recommended {vxc**}.
+ * This has the advantage that there is at most
+ * one {*x***} in either list at any time.
+ */
+ expire_client(unconf);
+ }
+ new = create_client(clname, dname, rqstp, &clverifier);
+ if (new == NULL)
+ goto out;
+ copy_clid(new, conf);
+ } else if (!unconf) {
+ /*
+ * RFC 3530 14.2.33 CASE 2:
+ * probable client reboot; state will be removed if
+ * confirmed.
+ */
+ new = create_client(clname, dname, rqstp, &clverifier);
+ if (new == NULL)
+ goto out;
+ gen_clid(new);
+ } else {
+ /*
+ * RFC 3530 14.2.33 CASE 3:
+ * probable client reboot; state will be removed if
+ * confirmed.
+ */
+ expire_client(unconf);
+ new = create_client(clname, dname, rqstp, &clverifier);
+ if (new == NULL)
+ goto out;
+ gen_clid(new);
+ }
+ /*
+ * XXX: we should probably set this at creation time, and check
+ * for consistent minorversion use throughout:
+ */
+ new->cl_minorversion = 0;
+ gen_callback(new, setclid, rqstp);
+ add_to_unconfirmed(new, strhashval);
+ setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
+ setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
+ memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
+ status = nfs_ok;
+out:
+ nfs4_unlock_state();
+ return status;
+}
+
+
+/*
+ * Section 14.2.34 of RFC 3530 (under the heading "IMPLEMENTATION") has
+ * a description of SETCLIENTID_CONFIRM request processing consisting of 4
+ * bullets, labeled as CASE1 - CASE4 below.
+ */
+__be32
+nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_setclientid_confirm *setclientid_confirm)
+{
+ struct sockaddr *sa = svc_addr(rqstp);
+ struct nfs4_client *conf, *unconf;
+ nfs4_verifier confirm = setclientid_confirm->sc_confirm;
+ clientid_t * clid = &setclientid_confirm->sc_clientid;
+ __be32 status;
+
+ if (STALE_CLIENTID(clid))
+ return nfserr_stale_clientid;
+ /*
+ * XXX The Duplicate Request Cache (DRC) has been checked (??)
+ * We get here on a DRC miss.
+ */
+
+ nfs4_lock_state();
+
+ conf = find_confirmed_client(clid);
+ unconf = find_unconfirmed_client(clid);
+
+ status = nfserr_clid_inuse;
+ if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa))
+ goto out;
+ if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa))
+ goto out;
+
+ /*
+ * section 14.2.34 of RFC 3530 has a description of
+ * SETCLIENTID_CONFIRM request processing consisting
+ * of 4 bullet points, labeled as CASE1 - CASE4 below.
+ */
+ if (conf && unconf && same_verf(&confirm, &unconf->cl_confirm)) {
+ /*
+ * RFC 3530 14.2.34 CASE 1:
+ * callback update
+ */
+ if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
+ status = nfserr_clid_inuse;
+ else {
+ nfsd4_change_callback(conf, &unconf->cl_cb_conn);
+ nfsd4_probe_callback(conf);
+ expire_client(unconf);
+ status = nfs_ok;
+
+ }
+ } else if (conf && !unconf) {
+ /*
+ * RFC 3530 14.2.34 CASE 2:
+ * probable retransmitted request; play it safe and
+ * do nothing.
+ */
+ if (!same_creds(&conf->cl_cred, &rqstp->rq_cred))
+ status = nfserr_clid_inuse;
+ else
+ status = nfs_ok;
+ } else if (!conf && unconf
+ && same_verf(&unconf->cl_confirm, &confirm)) {
+ /*
+ * RFC 3530 14.2.34 CASE 3:
+ * Normal case; new or rebooted client:
+ */
+ if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
+ status = nfserr_clid_inuse;
+ } else {
+ unsigned int hash =
+ clientstr_hashval(unconf->cl_recdir);
+ conf = find_confirmed_client_by_str(unconf->cl_recdir,
+ hash);
+ if (conf) {
+ nfsd4_remove_clid_dir(conf);
+ expire_client(conf);
+ }
+ move_to_confirmed(unconf);
+ conf = unconf;
+ nfsd4_probe_callback(conf);
+ status = nfs_ok;
+ }
+ } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
+ && (!unconf || (unconf && !same_verf(&unconf->cl_confirm,
+ &confirm)))) {
+ /*
+ * RFC 3530 14.2.34 CASE 4:
+ * Client probably hasn't noticed that we rebooted yet.
+ */
+ status = nfserr_stale_clientid;
+ } else {
+ /* check that we have hit one of the cases...*/
+ status = nfserr_clid_inuse;
+ }
+out:
+ nfs4_unlock_state();
+ return status;
+}
+
+static struct nfs4_file *nfsd4_alloc_file(void)
+{
+ return kmem_cache_alloc(file_slab, GFP_KERNEL);
+}
+
+/* OPEN Share state helper functions */
+static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino)
+{
+ unsigned int hashval = file_hashval(ino);
+
+ atomic_set(&fp->fi_ref, 1);
+ INIT_LIST_HEAD(&fp->fi_hash);
+ INIT_LIST_HEAD(&fp->fi_stateids);
+ INIT_LIST_HEAD(&fp->fi_delegations);
+ fp->fi_inode = igrab(ino);
+ fp->fi_had_conflict = false;
+ fp->fi_lease = NULL;
+ memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
+ memset(fp->fi_access, 0, sizeof(fp->fi_access));
+ spin_lock(&recall_lock);
+ list_add(&fp->fi_hash, &file_hashtbl[hashval]);
+ spin_unlock(&recall_lock);
+}
+
+static void
+nfsd4_free_slab(struct kmem_cache **slab)
+{
+ if (*slab == NULL)
+ return;
+ kmem_cache_destroy(*slab);
+ *slab = NULL;
+}
+
+void
+nfsd4_free_slabs(void)
+{
+ nfsd4_free_slab(&openowner_slab);
+ nfsd4_free_slab(&lockowner_slab);
+ nfsd4_free_slab(&file_slab);
+ nfsd4_free_slab(&stateid_slab);
+ nfsd4_free_slab(&deleg_slab);
+}
+
+int
+nfsd4_init_slabs(void)
+{
+ openowner_slab = kmem_cache_create("nfsd4_openowners",
+ sizeof(struct nfs4_openowner), 0, 0, NULL);
+ if (openowner_slab == NULL)
+ goto out_nomem;
+ lockowner_slab = kmem_cache_create("nfsd4_lockowners",
+ sizeof(struct nfs4_openowner), 0, 0, NULL);
+ if (lockowner_slab == NULL)
+ goto out_nomem;
+ file_slab = kmem_cache_create("nfsd4_files",
+ sizeof(struct nfs4_file), 0, 0, NULL);
+ if (file_slab == NULL)
+ goto out_nomem;
+ stateid_slab = kmem_cache_create("nfsd4_stateids",
+ sizeof(struct nfs4_ol_stateid), 0, 0, NULL);
+ if (stateid_slab == NULL)
+ goto out_nomem;
+ deleg_slab = kmem_cache_create("nfsd4_delegations",
+ sizeof(struct nfs4_delegation), 0, 0, NULL);
+ if (deleg_slab == NULL)
+ goto out_nomem;
+ return 0;
+out_nomem:
+ nfsd4_free_slabs();
+ dprintk("nfsd4: out of memory while initializing nfsv4\n");
+ return -ENOMEM;
+}
+
+void nfs4_free_openowner(struct nfs4_openowner *oo)
+{
+ kfree(oo->oo_owner.so_owner.data);
+ kmem_cache_free(openowner_slab, oo);
+}
+
+void nfs4_free_lockowner(struct nfs4_lockowner *lo)
+{
+ kfree(lo->lo_owner.so_owner.data);
+ kmem_cache_free(lockowner_slab, lo);
+}
+
+static void init_nfs4_replay(struct nfs4_replay *rp)
+{
+ rp->rp_status = nfserr_serverfault;
+ rp->rp_buflen = 0;
+ rp->rp_buf = rp->rp_ibuf;
+}
+
+static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp)
+{
+ struct nfs4_stateowner *sop;
+
+ sop = kmem_cache_alloc(slab, GFP_KERNEL);
+ if (!sop)
+ return NULL;
+
+ sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL);
+ if (!sop->so_owner.data) {
+ kmem_cache_free(slab, sop);
+ return NULL;
+ }
+ sop->so_owner.len = owner->len;
+
+ INIT_LIST_HEAD(&sop->so_stateids);
+ sop->so_client = clp;
+ init_nfs4_replay(&sop->so_replay);
+ return sop;
+}
+
+static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)
+{
+ list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
+ list_add(&oo->oo_perclient, &clp->cl_openowners);
+}
+
+static struct nfs4_openowner *
+alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) {
+ struct nfs4_openowner *oo;
+
+ oo = alloc_stateowner(openowner_slab, &open->op_owner, clp);
+ if (!oo)
+ return NULL;
+ oo->oo_owner.so_is_open_owner = 1;
+ oo->oo_owner.so_seqid = open->op_seqid;
+ oo->oo_flags = NFS4_OO_NEW;
+ oo->oo_time = 0;
+ oo->oo_last_closed_stid = NULL;
+ INIT_LIST_HEAD(&oo->oo_close_lru);
+ hash_openowner(oo, clp, strhashval);
+ return oo;
+}
+
+static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
+ struct nfs4_openowner *oo = open->op_openowner;
+ struct nfs4_client *clp = oo->oo_owner.so_client;
+
+ init_stid(&stp->st_stid, clp, NFS4_OPEN_STID);
+ INIT_LIST_HEAD(&stp->st_lockowners);
+ list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
+ list_add(&stp->st_perfile, &fp->fi_stateids);
+ stp->st_stateowner = &oo->oo_owner;
+ get_nfs4_file(fp);
+ stp->st_file = fp;
+ stp->st_access_bmap = 0;
+ stp->st_deny_bmap = 0;
+ __set_bit(open->op_share_access, &stp->st_access_bmap);
+ __set_bit(open->op_share_deny, &stp->st_deny_bmap);
+ stp->st_openstp = NULL;
+}
+
+static void
+move_to_close_lru(struct nfs4_openowner *oo)
+{
+ dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo);
+
+ list_move_tail(&oo->oo_close_lru, &close_lru);
+ oo->oo_time = get_seconds();
+}
+
+static int
+same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner,
+ clientid_t *clid)
+{
+ return (sop->so_owner.len == owner->len) &&
+ 0 == memcmp(sop->so_owner.data, owner->data, owner->len) &&
+ (sop->so_client->cl_clientid.cl_id == clid->cl_id);
+}
+
+static struct nfs4_openowner *
+find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open)
+{
+ struct nfs4_stateowner *so;
+ struct nfs4_openowner *oo;
+
+ list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
+ if (!so->so_is_open_owner)
+ continue;
+ if (same_owner_str(so, &open->op_owner, &open->op_clientid)) {
+ oo = openowner(so);
+ renew_client(oo->oo_owner.so_client);
+ return oo;
+ }
+ }
+ return NULL;
+}
+
+/* search file_hashtbl[] for file */
+static struct nfs4_file *
+find_file(struct inode *ino)
+{
+ unsigned int hashval = file_hashval(ino);
+ struct nfs4_file *fp;
+
+ spin_lock(&recall_lock);
+ list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
+ if (fp->fi_inode == ino) {
+ get_nfs4_file(fp);
+ spin_unlock(&recall_lock);
+ return fp;
+ }
+ }
+ spin_unlock(&recall_lock);
+ return NULL;
+}
+
+/*
+ * Called to check deny when READ with all zero stateid or
+ * WRITE with all zero or all one stateid
+ */
+static __be32
+nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
+{
+ struct inode *ino = current_fh->fh_dentry->d_inode;
+ struct nfs4_file *fp;
+ struct nfs4_ol_stateid *stp;
+ __be32 ret;
+
+ dprintk("NFSD: nfs4_share_conflict\n");
+
+ fp = find_file(ino);
+ if (!fp)
+ return nfs_ok;
+ ret = nfserr_locked;
+ /* Search for conflicting share reservations */
+ list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
+ if (test_bit(deny_type, &stp->st_deny_bmap) ||
+ test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
+ goto out;
+ }
+ ret = nfs_ok;
+out:
+ put_nfs4_file(fp);
+ return ret;
+}
+
+static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
+{
+ /* We're assuming the state code never drops its reference
+ * without first removing the lease. Since we're in this lease
+ * callback (and since the lease code is serialized by the kernel
+ * lock) we know the server hasn't removed the lease yet, we know
+ * it's safe to take a reference: */
+ atomic_inc(&dp->dl_count);
+
+ list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
+
+ /* only place dl_time is set. protected by lock_flocks*/
+ dp->dl_time = get_seconds();
+
+ nfsd4_cb_recall(dp);
+}
+
+/* Called from break_lease() with lock_flocks() held. */
+static void nfsd_break_deleg_cb(struct file_lock *fl)
+{
+ struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
+ struct nfs4_delegation *dp;
+
+ BUG_ON(!fp);
+ /* We assume break_lease is only called once per lease: */
+ BUG_ON(fp->fi_had_conflict);
+ /*
+ * We don't want the locks code to timeout the lease for us;
+ * we'll remove it ourself if a delegation isn't returned
+ * in time:
+ */
+ fl->fl_break_time = 0;
+
+ spin_lock(&recall_lock);
+ fp->fi_had_conflict = true;
+ list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
+ nfsd_break_one_deleg(dp);
+ spin_unlock(&recall_lock);
+}
+
+static
+int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
+{
+ if (arg & F_UNLCK)
+ return lease_modify(onlist, arg);
+ else
+ return -EAGAIN;
+}
+
+static const struct lock_manager_operations nfsd_lease_mng_ops = {
+ .lm_break = nfsd_break_deleg_cb,
+ .lm_change = nfsd_change_deleg_cb,
+};
+
+static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4_stateowner *so, u32 seqid)
+{
+ if (nfsd4_has_session(cstate))
+ return nfs_ok;
+ if (seqid == so->so_seqid - 1)
+ return nfserr_replay_me;
+ if (seqid == so->so_seqid)
+ return nfs_ok;
+ return nfserr_bad_seqid;
+}
+
+__be32
+nfsd4_process_open1(struct nfsd4_compound_state *cstate,
+ struct nfsd4_open *open)
+{
+ clientid_t *clientid = &open->op_clientid;
+ struct nfs4_client *clp = NULL;
+ unsigned int strhashval;
+ struct nfs4_openowner *oo = NULL;
+ __be32 status;
+
+ if (STALE_CLIENTID(&open->op_clientid))
+ return nfserr_stale_clientid;
+ /*
+ * In case we need it later, after we've already created the
+ * file and don't want to risk a further failure:
+ */
+ open->op_file = nfsd4_alloc_file();
+ if (open->op_file == NULL)
+ return nfserr_jukebox;
+
+ strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner);
+ oo = find_openstateowner_str(strhashval, open);
+ open->op_openowner = oo;
+ if (!oo) {
+ clp = find_confirmed_client(clientid);
+ if (clp == NULL)
+ return nfserr_expired;
+ goto new_owner;
+ }
+ if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
+ /* Replace unconfirmed owners without checking for replay. */
+ clp = oo->oo_owner.so_client;
+ release_openowner(oo);
+ open->op_openowner = NULL;
+ goto new_owner;
+ }
+ status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid);
+ if (status)
+ return status;
+ clp = oo->oo_owner.so_client;
+ goto alloc_stateid;
+new_owner:
+ oo = alloc_init_open_stateowner(strhashval, clp, open);
+ if (oo == NULL)
+ return nfserr_jukebox;
+ open->op_openowner = oo;
+alloc_stateid:
+ open->op_stp = nfs4_alloc_stateid(clp);
+ if (!open->op_stp)
+ return nfserr_jukebox;
+ return nfs_ok;
+}
+
+static inline __be32
+nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
+{
+ if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
+ return nfserr_openmode;
+ else
+ return nfs_ok;
+}
+
+static int share_access_to_flags(u32 share_access)
+{
+ share_access &= ~NFS4_SHARE_WANT_MASK;
+
+ return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE;
+}
+
+static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, stateid_t *s)
+{
+ struct nfs4_stid *ret;
+
+ ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID);
+ if (!ret)
+ return NULL;
+ return delegstateid(ret);
+}
+
+static bool nfsd4_is_deleg_cur(struct nfsd4_open *open)
+{
+ return open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR ||
+ open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH;
+}
+
+static __be32
+nfs4_check_deleg(struct nfs4_client *cl, struct nfs4_file *fp, struct nfsd4_open *open,
+ struct nfs4_delegation **dp)
+{
+ int flags;
+ __be32 status = nfserr_bad_stateid;
+
+ *dp = find_deleg_stateid(cl, &open->op_delegate_stateid);
+ if (*dp == NULL)
+ goto out;
+ flags = share_access_to_flags(open->op_share_access);
+ status = nfs4_check_delegmode(*dp, flags);
+ if (status)
+ *dp = NULL;
+out:
+ if (!nfsd4_is_deleg_cur(open))
+ return nfs_ok;
+ if (status)
+ return status;
+ open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
+ return nfs_ok;
+}
+
+static __be32
+nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_stateid **stpp)
+{
+ struct nfs4_ol_stateid *local;
+ struct nfs4_openowner *oo = open->op_openowner;
+
+ list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
+ /* ignore lock owners */
+ if (local->st_stateowner->so_is_open_owner == 0)
+ continue;
+ /* remember if we have seen this open owner */
+ if (local->st_stateowner == &oo->oo_owner)
+ *stpp = local;
+ /* check for conflicting share reservations */
+ if (!test_share(local, open))
+ return nfserr_share_denied;
+ }
+ return nfs_ok;
+}
+
+static void nfs4_free_stateid(struct nfs4_ol_stateid *s)
+{
+ kmem_cache_free(stateid_slab, s);
+}
+
+static inline int nfs4_access_to_access(u32 nfs4_access)
+{
+ int flags = 0;
+
+ if (nfs4_access & NFS4_SHARE_ACCESS_READ)
+ flags |= NFSD_MAY_READ;
+ if (nfs4_access & NFS4_SHARE_ACCESS_WRITE)
+ flags |= NFSD_MAY_WRITE;
+ return flags;
+}
+
+static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
+ struct svc_fh *cur_fh, struct nfsd4_open *open)
+{
+ __be32 status;
+ int oflag = nfs4_access_to_omode(open->op_share_access);
+ int access = nfs4_access_to_access(open->op_share_access);
+
+ if (!fp->fi_fds[oflag]) {
+ status = nfsd_open(rqstp, cur_fh, S_IFREG, access,
+ &fp->fi_fds[oflag]);
+ if (status)
+ return status;
+ }
+ nfs4_file_get_access(fp, oflag);
+
+ return nfs_ok;
+}
+
+static inline __be32
+nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
+ struct nfsd4_open *open)
+{
+ struct iattr iattr = {
+ .ia_valid = ATTR_SIZE,
+ .ia_size = 0,
+ };
+ if (!open->op_truncate)
+ return 0;
+ if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
+ return nfserr_inval;
+ return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
+}
+
+static __be32
+nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
+{
+ u32 op_share_access = open->op_share_access;
+ bool new_access;
+ __be32 status;
+
+ new_access = !test_bit(op_share_access, &stp->st_access_bmap);
+ if (new_access) {
+ status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open);
+ if (status)
+ return status;
+ }
+ status = nfsd4_truncate(rqstp, cur_fh, open);
+ if (status) {
+ if (new_access) {
+ int oflag = nfs4_access_to_omode(op_share_access);
+ nfs4_file_put_access(fp, oflag);
+ }
+ return status;
+ }
+ /* remember the open */
+ __set_bit(op_share_access, &stp->st_access_bmap);
+ __set_bit(open->op_share_deny, &stp->st_deny_bmap);
+
+ return nfs_ok;
+}
+
+
+static void
+nfs4_set_claim_prev(struct nfsd4_open *open)
+{
+ open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
+ open->op_openowner->oo_owner.so_client->cl_firststate = 1;
+}
+
+/* Should we give out recallable state?: */
+static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
+{
+ if (clp->cl_cb_state == NFSD4_CB_UP)
+ return true;
+ /*
+ * In the sessions case, since we don't have to establish a
+ * separate connection for callbacks, we assume it's OK
+ * until we hear otherwise:
+ */
+ return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
+}
+
+static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag)
+{
+ struct file_lock *fl;
+
+ fl = locks_alloc_lock();
+ if (!fl)
+ return NULL;
+ locks_init_lock(fl);
+ fl->fl_lmops = &nfsd_lease_mng_ops;
+ fl->fl_flags = FL_LEASE;
+ fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
+ fl->fl_end = OFFSET_MAX;
+ fl->fl_owner = (fl_owner_t)(dp->dl_file);
+ fl->fl_pid = current->tgid;
+ return fl;
+}
+
+static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
+{
+ struct nfs4_file *fp = dp->dl_file;
+ struct file_lock *fl;
+ int status;
+
+ fl = nfs4_alloc_init_lease(dp, flag);
+ if (!fl)
+ return -ENOMEM;
+ fl->fl_file = find_readable_file(fp);
+ list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
+ status = vfs_setlease(fl->fl_file, fl->fl_type, &fl);
+ if (status) {
+ list_del_init(&dp->dl_perclnt);
+ locks_free_lock(fl);
+ return -ENOMEM;
+ }
+ fp->fi_lease = fl;
+ fp->fi_deleg_file = fl->fl_file;
+ get_file(fp->fi_deleg_file);
+ atomic_set(&fp->fi_delegees, 1);
+ list_add(&dp->dl_perfile, &fp->fi_delegations);
+ return 0;
+}
+
+static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag)
+{
+ struct nfs4_file *fp = dp->dl_file;
+
+ if (!fp->fi_lease)
+ return nfs4_setlease(dp, flag);
+ spin_lock(&recall_lock);
+ if (fp->fi_had_conflict) {
+ spin_unlock(&recall_lock);
+ return -EAGAIN;
+ }
+ atomic_inc(&fp->fi_delegees);
+ list_add(&dp->dl_perfile, &fp->fi_delegations);
+ spin_unlock(&recall_lock);
+ list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
+ return 0;
+}
+
+/*
+ * Attempt to hand out a delegation.
+ */
+static void
+nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_stateid *stp)
+{
+ struct nfs4_delegation *dp;
+ struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner);
+ int cb_up;
+ int status, flag = 0;
+
+ cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
+ flag = NFS4_OPEN_DELEGATE_NONE;
+ open->op_recall = 0;
+ switch (open->op_claim_type) {
+ case NFS4_OPEN_CLAIM_PREVIOUS:
+ if (!cb_up)
+ open->op_recall = 1;
+ flag = open->op_delegate_type;
+ if (flag == NFS4_OPEN_DELEGATE_NONE)
+ goto out;
+ break;
+ case NFS4_OPEN_CLAIM_NULL:
+ /* Let's not give out any delegations till everyone's
+ * had the chance to reclaim theirs.... */
+ if (locks_in_grace())
+ goto out;
+ if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
+ goto out;
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+ flag = NFS4_OPEN_DELEGATE_WRITE;
+ else
+ flag = NFS4_OPEN_DELEGATE_READ;
+ break;
+ default:
+ goto out;
+ }
+
+ dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh, flag);
+ if (dp == NULL)
+ goto out_no_deleg;
+ status = nfs4_set_delegation(dp, flag);
+ if (status)
+ goto out_free;
+
+ memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
+
+ dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
+ STATEID_VAL(&dp->dl_stid.sc_stateid));
+out:
+ if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
+ && flag == NFS4_OPEN_DELEGATE_NONE
+ && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
+ dprintk("NFSD: WARNING: refusing delegation reclaim\n");
+ open->op_delegate_type = flag;
+ return;
+out_free:
+ nfs4_put_delegation(dp);
+out_no_deleg:
+ flag = NFS4_OPEN_DELEGATE_NONE;
+ goto out;
+}
+
+/*
+ * called with nfs4_lock_state() held.
+ */
+__be32
+nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+{
+ struct nfsd4_compoundres *resp = rqstp->rq_resp;
+ struct nfs4_client *cl = open->op_openowner->oo_owner.so_client;
+ struct nfs4_file *fp = NULL;
+ struct inode *ino = current_fh->fh_dentry->d_inode;
+ struct nfs4_ol_stateid *stp = NULL;
+ struct nfs4_delegation *dp = NULL;
+ __be32 status;
+
+ /*
+ * Lookup file; if found, lookup stateid and check open request,
+ * and check for delegations in the process of being recalled.
+ * If not found, create the nfs4_file struct
+ */
+ fp = find_file(ino);
+ if (fp) {
+ if ((status = nfs4_check_open(fp, open, &stp)))
+ goto out;
+ status = nfs4_check_deleg(cl, fp, open, &dp);
+ if (status)
+ goto out;
+ } else {
+ status = nfserr_bad_stateid;
+ if (nfsd4_is_deleg_cur(open))
+ goto out;
+ status = nfserr_jukebox;
+ fp = open->op_file;
+ open->op_file = NULL;
+ nfsd4_init_file(fp, ino);
+ }
+
+ /*
+ * OPEN the file, or upgrade an existing OPEN.
+ * If truncate fails, the OPEN fails.
+ */
+ if (stp) {
+ /* Stateid was found, this is an OPEN upgrade */
+ status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
+ if (status)
+ goto out;
+ } else {
+ status = nfs4_get_vfs_file(rqstp, fp, current_fh, open);
+ if (status)
+ goto out;
+ stp = open->op_stp;
+ open->op_stp = NULL;
+ init_open_stateid(stp, fp, open);
+ status = nfsd4_truncate(rqstp, current_fh, open);
+ if (status) {
+ release_open_stateid(stp);
+ goto out;
+ }
+ }
+ update_stateid(&stp->st_stid.sc_stateid);
+ memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+
+ if (nfsd4_has_session(&resp->cstate))
+ open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
+
+ /*
+ * Attempt to hand out a delegation. No error return, because the
+ * OPEN succeeds even if we fail.
+ */
+ nfs4_open_delegation(current_fh, open, stp);
+
+ status = nfs_ok;
+
+ dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
+ STATEID_VAL(&stp->st_stid.sc_stateid));
+out:
+ if (fp)
+ put_nfs4_file(fp);
+ if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
+ nfs4_set_claim_prev(open);
+ /*
+ * To finish the open response, we just need to set the rflags.
+ */
+ open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
+ if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) &&
+ !nfsd4_has_session(&resp->cstate))
+ open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
+
+ return status;
+}
+
+void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status)
+{
+ if (open->op_openowner) {
+ struct nfs4_openowner *oo = open->op_openowner;
+
+ if (!list_empty(&oo->oo_owner.so_stateids))
+ list_del_init(&oo->oo_close_lru);
+ if (oo->oo_flags & NFS4_OO_NEW) {
+ if (status) {
+ release_openowner(oo);
+ open->op_openowner = NULL;
+ } else
+ oo->oo_flags &= ~NFS4_OO_NEW;
+ }
+ }
+ if (open->op_file)
+ nfsd4_free_file(open->op_file);
+ if (open->op_stp)
+ nfs4_free_stateid(open->op_stp);
+}
+
+__be32
+nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ clientid_t *clid)
+{
+ struct nfs4_client *clp;
+ __be32 status;
+
+ nfs4_lock_state();
+ dprintk("process_renew(%08x/%08x): starting\n",
+ clid->cl_boot, clid->cl_id);
+ status = nfserr_stale_clientid;
+ if (STALE_CLIENTID(clid))
+ goto out;
+ clp = find_confirmed_client(clid);
+ status = nfserr_expired;
+ if (clp == NULL) {
+ /* We assume the client took too long to RENEW. */
+ dprintk("nfsd4_renew: clientid not found!\n");
+ goto out;
+ }
+ status = nfserr_cb_path_down;
+ if (!list_empty(&clp->cl_delegations)
+ && clp->cl_cb_state != NFSD4_CB_UP)
+ goto out;
+ status = nfs_ok;
+out:
+ nfs4_unlock_state();
+ return status;
+}
+
+static struct lock_manager nfsd4_manager = {
+};
+
+static void
+nfsd4_end_grace(void)
+{
+ dprintk("NFSD: end of grace period\n");
+ nfsd4_recdir_purge_old();
+ locks_end_grace(&nfsd4_manager);
+ /*
+ * Now that every NFSv4 client has had the chance to recover and
+ * to see the (possibly new, possibly shorter) lease time, we
+ * can safely set the next grace time to the current lease time:
+ */
+ nfsd4_grace = nfsd4_lease;
+}
+
+static time_t
+nfs4_laundromat(void)
+{
+ struct nfs4_client *clp;
+ struct nfs4_openowner *oo;
+ struct nfs4_delegation *dp;
+ struct list_head *pos, *next, reaplist;
+ time_t cutoff = get_seconds() - nfsd4_lease;
+ time_t t, clientid_val = nfsd4_lease;
+ time_t u, test_val = nfsd4_lease;
+
+ nfs4_lock_state();
+
+ dprintk("NFSD: laundromat service - starting\n");
+ if (locks_in_grace())
+ nfsd4_end_grace();
+ INIT_LIST_HEAD(&reaplist);
+ spin_lock(&client_lock);
+ list_for_each_safe(pos, next, &client_lru) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
+ t = clp->cl_time - cutoff;
+ if (clientid_val > t)
+ clientid_val = t;
+ break;
+ }
+ if (atomic_read(&clp->cl_refcount)) {
+ dprintk("NFSD: client in use (clientid %08x)\n",
+ clp->cl_clientid.cl_id);
+ continue;
+ }
+ unhash_client_locked(clp);
+ list_add(&clp->cl_lru, &reaplist);
+ }
+ spin_unlock(&client_lock);
+ list_for_each_safe(pos, next, &reaplist) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ dprintk("NFSD: purging unused client (clientid %08x)\n",
+ clp->cl_clientid.cl_id);
+ nfsd4_remove_clid_dir(clp);
+ expire_client(clp);
+ }
+ spin_lock(&recall_lock);
+ list_for_each_safe(pos, next, &del_recall_lru) {
+ dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+ if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
+ u = dp->dl_time - cutoff;
+ if (test_val > u)
+ test_val = u;
+ break;
+ }
+ list_move(&dp->dl_recall_lru, &reaplist);
+ }
+ spin_unlock(&recall_lock);
+ list_for_each_safe(pos, next, &reaplist) {
+ dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+ unhash_delegation(dp);
+ }
+ test_val = nfsd4_lease;
+ list_for_each_safe(pos, next, &close_lru) {
+ oo = container_of(pos, struct nfs4_openowner, oo_close_lru);
+ if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) {
+ u = oo->oo_time - cutoff;
+ if (test_val > u)
+ test_val = u;
+ break;
+ }
+ release_openowner(oo);
+ }
+ if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
+ clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
+ nfs4_unlock_state();
+ return clientid_val;
+}
+
+static struct workqueue_struct *laundry_wq;
+static void laundromat_main(struct work_struct *);
+static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main);
+
+static void
+laundromat_main(struct work_struct *not_used)
+{
+ time_t t;
+
+ t = nfs4_laundromat();
+ dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
+ queue_delayed_work(laundry_wq, &laundromat_work, t*HZ);
+}
+
+static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp)
+{
+ if (fhp->fh_dentry->d_inode != stp->st_file->fi_inode)
+ return nfserr_bad_stateid;
+ return nfs_ok;
+}
+
+static int
+STALE_STATEID(stateid_t *stateid)
+{
+ if (stateid->si_opaque.so_clid.cl_boot == boot_time)
+ return 0;
+ dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
+ STATEID_VAL(stateid));
+ return 1;
+}
+
+static inline int
+access_permit_read(unsigned long access_bmap)
+{
+ return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) ||
+ test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap) ||
+ test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap);
+}
+
+static inline int
+access_permit_write(unsigned long access_bmap)
+{
+ return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) ||
+ test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
+}
+
+static
+__be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags)
+{
+ __be32 status = nfserr_openmode;
+
+ /* For lock stateid's, we test the parent open, not the lock: */
+ if (stp->st_openstp)
+ stp = stp->st_openstp;
+ if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap)))
+ goto out;
+ if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap)))
+ goto out;
+ status = nfs_ok;
+out:
+ return status;
+}
+
+static inline __be32
+check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
+{
+ if (ONE_STATEID(stateid) && (flags & RD_STATE))
+ return nfs_ok;
+ else if (locks_in_grace()) {
+ /* Answer in remaining cases depends on existence of
+ * conflicting state; so we must wait out the grace period. */
+ return nfserr_grace;
+ } else if (flags & WR_STATE)
+ return nfs4_share_conflict(current_fh,
+ NFS4_SHARE_DENY_WRITE);
+ else /* (flags & RD_STATE) && ZERO_STATEID(stateid) */
+ return nfs4_share_conflict(current_fh,
+ NFS4_SHARE_DENY_READ);
+}
+
+/*
+ * Allow READ/WRITE during grace period on recovered state only for files
+ * that are not able to provide mandatory locking.
+ */
+static inline int
+grace_disallows_io(struct inode *inode)
+{
+ return locks_in_grace() && mandatory_lock(inode);
+}
+
+/* Returns true iff a is later than b: */
+static bool stateid_generation_after(stateid_t *a, stateid_t *b)
+{
+ return (s32)a->si_generation - (s32)b->si_generation > 0;
+}
+
+static int check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session)
+{
+ /*
+ * When sessions are used the stateid generation number is ignored
+ * when it is zero.
+ */
+ if (has_session && in->si_generation == 0)
+ return nfs_ok;
+
+ if (in->si_generation == ref->si_generation)
+ return nfs_ok;
+
+ /* If the client sends us a stateid from the future, it's buggy: */
+ if (stateid_generation_after(in, ref))
+ return nfserr_bad_stateid;
+ /*
+ * However, we could see a stateid from the past, even from a
+ * non-buggy client. For example, if the client sends a lock
+ * while some IO is outstanding, the lock may bump si_generation
+ * while the IO is still in flight. The client could avoid that
+ * situation by waiting for responses on all the IO requests,
+ * but better performance may result in retrying IO that
+ * receives an old_stateid error if requests are rarely
+ * reordered in flight:
+ */
+ return nfserr_old_stateid;
+}
+
+__be32 nfs4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
+{
+ struct nfs4_stid *s;
+ struct nfs4_ol_stateid *ols;
+ __be32 status;
+
+ if (STALE_STATEID(stateid))
+ return nfserr_stale_stateid;
+
+ s = find_stateid(cl, stateid);
+ if (!s)
+ return nfserr_stale_stateid;
+ status = check_stateid_generation(stateid, &s->sc_stateid, 1);
+ if (status)
+ return status;
+ if (!(s->sc_type & (NFS4_OPEN_STID | NFS4_LOCK_STID)))
+ return nfs_ok;
+ ols = openlockstateid(s);
+ if (ols->st_stateowner->so_is_open_owner
+ && !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED))
+ return nfserr_bad_stateid;
+ return nfs_ok;
+}
+
+static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s)
+{
+ struct nfs4_client *cl;
+
+ if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
+ return nfserr_bad_stateid;
+ if (STALE_STATEID(stateid))
+ return nfserr_stale_stateid;
+ cl = find_confirmed_client(&stateid->si_opaque.so_clid);
+ if (!cl)
+ return nfserr_expired;
+ *s = find_stateid_by_type(cl, stateid, typemask);
+ if (!*s)
+ return nfserr_bad_stateid;
+ return nfs_ok;
+
+}
+
+/*
+* Checks for stateid operations
+*/
+__be32
+nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
+ stateid_t *stateid, int flags, struct file **filpp)
+{
+ struct nfs4_stid *s;
+ struct nfs4_ol_stateid *stp = NULL;
+ struct nfs4_delegation *dp = NULL;
+ struct svc_fh *current_fh = &cstate->current_fh;
+ struct inode *ino = current_fh->fh_dentry->d_inode;
+ __be32 status;
+
+ if (filpp)
+ *filpp = NULL;
+
+ if (grace_disallows_io(ino))
+ return nfserr_grace;
+
+ if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
+ return check_special_stateids(current_fh, stateid, flags);
+
+ status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s);
+ if (status)
+ return status;
+ status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));
+ if (status)
+ goto out;
+ switch (s->sc_type) {
+ case NFS4_DELEG_STID:
+ dp = delegstateid(s);
+ status = nfs4_check_delegmode(dp, flags);
+ if (status)
+ goto out;
+ if (filpp) {
+ *filpp = dp->dl_file->fi_deleg_file;
+ BUG_ON(!*filpp);
+ }
+ break;
+ case NFS4_OPEN_STID:
+ case NFS4_LOCK_STID:
+ stp = openlockstateid(s);
+ status = nfs4_check_fh(current_fh, stp);
+ if (status)
+ goto out;
+ if (stp->st_stateowner->so_is_open_owner
+ && !(openowner(stp->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED))
+ goto out;
+ status = nfs4_check_openmode(stp, flags);
+ if (status)
+ goto out;
+ if (filpp) {
+ if (flags & RD_STATE)
+ *filpp = find_readable_file(stp->st_file);
+ else
+ *filpp = find_writeable_file(stp->st_file);
+ }
+ break;
+ default:
+ return nfserr_bad_stateid;
+ }
+ status = nfs_ok;
+out:
+ return status;
+}
+
+static __be32
+nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp)
+{
+ if (check_for_locks(stp->st_file, lockowner(stp->st_stateowner)))
+ return nfserr_locks_held;
+ release_lock_stateid(stp);
+ return nfs_ok;
+}
+
+/*
+ * Test if the stateid is valid
+ */
+__be32
+nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_test_stateid *test_stateid)
+{
+ /* real work is done during encoding */
+ return nfs_ok;
+}
+
+__be32
+nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_free_stateid *free_stateid)
+{
+ stateid_t *stateid = &free_stateid->fr_stateid;
+ struct nfs4_stid *s;
+ struct nfs4_client *cl = cstate->session->se_client;
+ __be32 ret = nfserr_bad_stateid;
+
+ nfs4_lock_state();
+ s = find_stateid(cl, stateid);
+ if (!s)
+ goto out;
+ switch (s->sc_type) {
+ case NFS4_DELEG_STID:
+ ret = nfserr_locks_held;
+ goto out;
+ case NFS4_OPEN_STID:
+ case NFS4_LOCK_STID:
+ ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
+ if (ret)
+ goto out;
+ if (s->sc_type == NFS4_LOCK_STID)
+ ret = nfsd4_free_lock_stateid(openlockstateid(s));
+ else
+ ret = nfserr_locks_held;
+ break;
+ default:
+ ret = nfserr_bad_stateid;
+ }
+out:
+ nfs4_unlock_state();
+ return ret;
+}
+
+static inline int
+setlkflg (int type)
+{
+ return (type == NFS4_READW_LT || type == NFS4_READ_LT) ?
+ RD_STATE : WR_STATE;
+}
+
+static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_t *stateid, u32 seqid, struct nfs4_ol_stateid *stp)
+{
+ struct svc_fh *current_fh = &cstate->current_fh;
+ struct nfs4_stateowner *sop = stp->st_stateowner;
+ __be32 status;
+
+ status = nfsd4_check_seqid(cstate, sop, seqid);
+ if (status)
+ return status;
+ if (stp->st_stid.sc_type == NFS4_CLOSED_STID)
+ /*
+ * "Closed" stateid's exist *only* to return
+ * nfserr_replay_me from the previous step.
+ */
+ return nfserr_bad_stateid;
+ status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
+ if (status)
+ return status;
+ return nfs4_check_fh(current_fh, stp);
+}
+
+/*
+ * Checks for sequence id mutating operations.
+ */
+static __be32
+nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
+ stateid_t *stateid, char typemask,
+ struct nfs4_ol_stateid **stpp)
+{
+ __be32 status;
+ struct nfs4_stid *s;
+
+ dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
+ seqid, STATEID_VAL(stateid));
+
+ *stpp = NULL;
+ status = nfsd4_lookup_stateid(stateid, typemask, &s);
+ if (status)
+ return status;
+ *stpp = openlockstateid(s);
+ cstate->replay_owner = (*stpp)->st_stateowner;
+
+ return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp);
+}
+
+static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, struct nfs4_ol_stateid **stpp)
+{
+ __be32 status;
+ struct nfs4_openowner *oo;
+
+ status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
+ NFS4_OPEN_STID, stpp);
+ if (status)
+ return status;
+ oo = openowner((*stpp)->st_stateowner);
+ if (!(oo->oo_flags & NFS4_OO_CONFIRMED))
+ return nfserr_bad_stateid;
+ return nfs_ok;
+}
+
+__be32
+nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_open_confirm *oc)
+{
+ __be32 status;
+ struct nfs4_openowner *oo;
+ struct nfs4_ol_stateid *stp;
+
+ dprintk("NFSD: nfsd4_open_confirm on file %.*s\n",
+ (int)cstate->current_fh.fh_dentry->d_name.len,
+ cstate->current_fh.fh_dentry->d_name.name);
+
+ status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
+ if (status)
+ return status;
+
+ nfs4_lock_state();
+
+ status = nfs4_preprocess_seqid_op(cstate,
+ oc->oc_seqid, &oc->oc_req_stateid,
+ NFS4_OPEN_STID, &stp);
+ if (status)
+ goto out;
+ oo = openowner(stp->st_stateowner);
+ status = nfserr_bad_stateid;
+ if (oo->oo_flags & NFS4_OO_CONFIRMED)
+ goto out;
+ oo->oo_flags |= NFS4_OO_CONFIRMED;
+ update_stateid(&stp->st_stid.sc_stateid);
+ memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
+ __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
+
+ nfsd4_create_clid_dir(oo->oo_owner.so_client);
+ status = nfs_ok;
+out:
+ if (!cstate->replay_owner)
+ nfs4_unlock_state();
+ return status;
+}
+
+static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 access)
+{
+ if (!test_bit(access, &stp->st_access_bmap))
+ return;
+ nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access));
+ __clear_bit(access, &stp->st_access_bmap);
+}
+
+static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_access)
+{
+ switch (to_access) {
+ case NFS4_SHARE_ACCESS_READ:
+ nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_WRITE);
+ nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH);
+ break;
+ case NFS4_SHARE_ACCESS_WRITE:
+ nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_READ);
+ nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH);
+ break;
+ case NFS4_SHARE_ACCESS_BOTH:
+ break;
+ default:
+ BUG();
+ }
+}
+
+static void
+reset_union_bmap_deny(unsigned long deny, unsigned long *bmap)
+{
+ int i;
+ for (i = 0; i < 4; i++) {
+ if ((i & deny) != i)
+ __clear_bit(i, bmap);
+ }
+}
+
+__be32
+nfsd4_open_downgrade(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_open_downgrade *od)
+{
+ __be32 status;
+ struct nfs4_ol_stateid *stp;
+
+ dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n",
+ (int)cstate->current_fh.fh_dentry->d_name.len,
+ cstate->current_fh.fh_dentry->d_name.name);
+
+ /* We don't yet support WANT bits: */
+ od->od_share_access &= NFS4_SHARE_ACCESS_MASK;
+
+ nfs4_lock_state();
+ status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid,
+ &od->od_stateid, &stp);
+ if (status)
+ goto out;
+ status = nfserr_inval;
+ if (!test_bit(od->od_share_access, &stp->st_access_bmap)) {
+ dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n",
+ stp->st_access_bmap, od->od_share_access);
+ goto out;
+ }
+ if (!test_bit(od->od_share_deny, &stp->st_deny_bmap)) {
+ dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n",
+ stp->st_deny_bmap, od->od_share_deny);
+ goto out;
+ }
+ nfs4_stateid_downgrade(stp, od->od_share_access);
+
+ reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap);
+
+ update_stateid(&stp->st_stid.sc_stateid);
+ memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ status = nfs_ok;
+out:
+ if (!cstate->replay_owner)
+ nfs4_unlock_state();
+ return status;
+}
+
+void nfsd4_purge_closed_stateid(struct nfs4_stateowner *so)
+{
+ struct nfs4_openowner *oo;
+ struct nfs4_ol_stateid *s;
+
+ if (!so->so_is_open_owner)
+ return;
+ oo = openowner(so);
+ s = oo->oo_last_closed_stid;
+ if (!s)
+ return;
+ if (!(oo->oo_flags & NFS4_OO_PURGE_CLOSE)) {
+ /* Release the last_closed_stid on the next seqid bump: */
+ oo->oo_flags |= NFS4_OO_PURGE_CLOSE;
+ return;
+ }
+ oo->oo_flags &= ~NFS4_OO_PURGE_CLOSE;
+ release_last_closed_stateid(oo);
+}
+
+static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
+{
+ unhash_open_stateid(s);
+ s->st_stid.sc_type = NFS4_CLOSED_STID;
+}
+
+/*
+ * nfs4_unlock_state() called after encode
+ */
+__be32
+nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_close *close)
+{
+ __be32 status;
+ struct nfs4_openowner *oo;
+ struct nfs4_ol_stateid *stp;
+
+ dprintk("NFSD: nfsd4_close on file %.*s\n",
+ (int)cstate->current_fh.fh_dentry->d_name.len,
+ cstate->current_fh.fh_dentry->d_name.name);
+
+ nfs4_lock_state();
+ status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
+ &close->cl_stateid,
+ NFS4_OPEN_STID|NFS4_CLOSED_STID,
+ &stp);
+ if (status)
+ goto out;
+ oo = openowner(stp->st_stateowner);
+ status = nfs_ok;
+ update_stateid(&stp->st_stid.sc_stateid);
+ memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+
+ nfsd4_close_open_stateid(stp);
+ oo->oo_last_closed_stid = stp;
+
+ /* place unused nfs4_stateowners on so_close_lru list to be
+ * released by the laundromat service after the lease period
+ * to enable us to handle CLOSE replay
+ */
+ if (list_empty(&oo->oo_owner.so_stateids))
+ move_to_close_lru(oo);
+out:
+ if (!cstate->replay_owner)
+ nfs4_unlock_state();
+ return status;
+}
+
+__be32
+nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_delegreturn *dr)
+{
+ struct nfs4_delegation *dp;
+ stateid_t *stateid = &dr->dr_stateid;
+ struct nfs4_stid *s;
+ struct inode *inode;
+ __be32 status;
+
+ if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
+ return status;
+ inode = cstate->current_fh.fh_dentry->d_inode;
+
+ nfs4_lock_state();
+ status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s);
+ if (status)
+ goto out;
+ dp = delegstateid(s);
+ status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate));
+ if (status)
+ goto out;
+
+ unhash_delegation(dp);
+out:
+ nfs4_unlock_state();
+
+ return status;
+}
+
+
+#define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start))
+
+#define LOCKOWNER_INO_HASH_BITS 8
+#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS)
+#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1)
+
+static inline u64
+end_offset(u64 start, u64 len)
+{
+ u64 end;
+
+ end = start + len;
+ return end >= start ? end: NFS4_MAX_UINT64;
+}
+
+/* last octet in a range */
+static inline u64
+last_byte_offset(u64 start, u64 len)
+{
+ u64 end;
+
+ BUG_ON(!len);
+ end = start + len;
+ return end > start ? end - 1: NFS4_MAX_UINT64;
+}
+
+static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername)
+{
+ return (file_hashval(inode) + cl_id
+ + opaque_hashval(ownername->data, ownername->len))
+ & LOCKOWNER_INO_HASH_MASK;
+}
+
+static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE];
+
+/*
+ * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
+ * we can't properly handle lock requests that go beyond the (2^63 - 1)-th
+ * byte, because of sign extension problems. Since NFSv4 calls for 64-bit
+ * locking, this prevents us from being completely protocol-compliant. The
+ * real solution to this problem is to start using unsigned file offsets in
+ * the VFS, but this is a very deep change!
+ */
+static inline void
+nfs4_transform_lock_offset(struct file_lock *lock)
+{
+ if (lock->fl_start < 0)
+ lock->fl_start = OFFSET_MAX;
+ if (lock->fl_end < 0)
+ lock->fl_end = OFFSET_MAX;
+}
+
+/* Hack!: For now, we're defining this just so we can use a pointer to it
+ * as a unique cookie to identify our (NFSv4's) posix locks. */
+static const struct lock_manager_operations nfsd_posix_mng_ops = {
+};
+
+static inline void
+nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
+{
+ struct nfs4_lockowner *lo;
+
+ if (fl->fl_lmops == &nfsd_posix_mng_ops) {
+ lo = (struct nfs4_lockowner *) fl->fl_owner;
+ deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data,
+ lo->lo_owner.so_owner.len, GFP_KERNEL);
+ if (!deny->ld_owner.data)
+ /* We just don't care that much */
+ goto nevermind;
+ deny->ld_owner.len = lo->lo_owner.so_owner.len;
+ deny->ld_clientid = lo->lo_owner.so_client->cl_clientid;
+ } else {
+nevermind:
+ deny->ld_owner.len = 0;
+ deny->ld_owner.data = NULL;
+ deny->ld_clientid.cl_boot = 0;
+ deny->ld_clientid.cl_id = 0;
+ }
+ deny->ld_start = fl->fl_start;
+ deny->ld_length = NFS4_MAX_UINT64;
+ if (fl->fl_end != NFS4_MAX_UINT64)
+ deny->ld_length = fl->fl_end - fl->fl_start + 1;
+ deny->ld_type = NFS4_READ_LT;
+ if (fl->fl_type != F_RDLCK)
+ deny->ld_type = NFS4_WRITE_LT;
+}
+
+static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner)
+{
+ struct nfs4_ol_stateid *lst;
+
+ if (!same_owner_str(&lo->lo_owner, owner, clid))
+ return false;
+ lst = list_first_entry(&lo->lo_owner.so_stateids,
+ struct nfs4_ol_stateid, st_perstateowner);
+ return lst->st_file->fi_inode == inode;
+}
+
+static struct nfs4_lockowner *
+find_lockowner_str(struct inode *inode, clientid_t *clid,
+ struct xdr_netobj *owner)
+{
+ unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner);
+ struct nfs4_lockowner *lo;
+
+ list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) {
+ if (same_lockowner_ino(lo, inode, clid, owner))
+ return lo;
+ }
+ return NULL;
+}
+
+static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp)
+{
+ struct inode *inode = open_stp->st_file->fi_inode;
+ unsigned int inohash = lockowner_ino_hashval(inode,
+ clp->cl_clientid.cl_id, &lo->lo_owner.so_owner);
+
+ list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
+ list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]);
+ list_add(&lo->lo_perstateid, &open_stp->st_lockowners);
+}
+
+/*
+ * Alloc a lock owner structure.
+ * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has
+ * occurred.
+ *
+ * strhashval = ownerstr_hashval
+ */
+
+static struct nfs4_lockowner *
+alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp, struct nfsd4_lock *lock) {
+ struct nfs4_lockowner *lo;
+
+ lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
+ if (!lo)
+ return NULL;
+ INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
+ lo->lo_owner.so_is_open_owner = 0;
+ /* It is the openowner seqid that will be incremented in encode in the
+ * case of new lockowners; so increment the lock seqid manually: */
+ lo->lo_owner.so_seqid = lock->lk_new_lock_seqid + 1;
+ hash_lockowner(lo, strhashval, clp, open_stp);
+ return lo;
+}
+
+static struct nfs4_ol_stateid *
+alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct nfs4_ol_stateid *open_stp)
+{
+ struct nfs4_ol_stateid *stp;
+ struct nfs4_client *clp = lo->lo_owner.so_client;
+
+ stp = nfs4_alloc_stateid(clp);
+ if (stp == NULL)
+ return NULL;
+ init_stid(&stp->st_stid, clp, NFS4_LOCK_STID);
+ list_add(&stp->st_perfile, &fp->fi_stateids);
+ list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
+ stp->st_stateowner = &lo->lo_owner;
+ get_nfs4_file(fp);
+ stp->st_file = fp;
+ stp->st_access_bmap = 0;
+ stp->st_deny_bmap = open_stp->st_deny_bmap;
+ stp->st_openstp = open_stp;
+ return stp;
+}
+
+static int
+check_lock_length(u64 offset, u64 length)
+{
+ return ((length == 0) || ((length != NFS4_MAX_UINT64) &&
+ LOFF_OVERFLOW(offset, length)));
+}
+
+static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
+{
+ struct nfs4_file *fp = lock_stp->st_file;
+ int oflag = nfs4_access_to_omode(access);
+
+ if (test_bit(access, &lock_stp->st_access_bmap))
+ return;
+ nfs4_file_get_access(fp, oflag);
+ __set_bit(access, &lock_stp->st_access_bmap);
+}
+
+__be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new)
+{
+ struct nfs4_file *fi = ost->st_file;
+ struct nfs4_openowner *oo = openowner(ost->st_stateowner);
+ struct nfs4_client *cl = oo->oo_owner.so_client;
+ struct nfs4_lockowner *lo;
+ unsigned int strhashval;
+
+ lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner);
+ if (lo) {
+ if (!cstate->minorversion)
+ return nfserr_bad_seqid;
+ /* XXX: a lockowner always has exactly one stateid: */
+ *lst = list_first_entry(&lo->lo_owner.so_stateids,
+ struct nfs4_ol_stateid, st_perstateowner);
+ return nfs_ok;
+ }
+ strhashval = ownerstr_hashval(cl->cl_clientid.cl_id,
+ &lock->v.new.owner);
+ lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
+ if (lo == NULL)
+ return nfserr_jukebox;
+ *lst = alloc_init_lock_stateid(lo, fi, ost);
+ if (*lst == NULL) {
+ release_lockowner(lo);
+ return nfserr_jukebox;
+ }
+ *new = true;
+ return nfs_ok;
+}
+
+/*
+ * LOCK operation
+ */
+__be32
+nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_lock *lock)
+{
+ struct nfs4_openowner *open_sop = NULL;
+ struct nfs4_lockowner *lock_sop = NULL;
+ struct nfs4_ol_stateid *lock_stp;
+ struct nfs4_file *fp;
+ struct file *filp = NULL;
+ struct file_lock file_lock;
+ struct file_lock conflock;
+ __be32 status = 0;
+ bool new_state = false;
+ int lkflg;
+ int err;
+
+ dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
+ (long long) lock->lk_offset,
+ (long long) lock->lk_length);
+
+ if (check_lock_length(lock->lk_offset, lock->lk_length))
+ return nfserr_inval;
+
+ if ((status = fh_verify(rqstp, &cstate->current_fh,
+ S_IFREG, NFSD_MAY_LOCK))) {
+ dprintk("NFSD: nfsd4_lock: permission denied!\n");
+ return status;
+ }
+
+ nfs4_lock_state();
+
+ if (lock->lk_is_new) {
+ /*
+ * Client indicates that this is a new lockowner.
+ * Use open owner and open stateid to create lock owner and
+ * lock stateid.
+ */
+ struct nfs4_ol_stateid *open_stp = NULL;
+
+ if (nfsd4_has_session(cstate))
+ /* See rfc 5661 18.10.3: given clientid is ignored: */
+ memcpy(&lock->v.new.clientid,
+ &cstate->session->se_client->cl_clientid,
+ sizeof(clientid_t));
+
+ status = nfserr_stale_clientid;
+ if (STALE_CLIENTID(&lock->lk_new_clientid))
+ goto out;
+
+ /* validate and update open stateid and open seqid */
+ status = nfs4_preprocess_confirmed_seqid_op(cstate,
+ lock->lk_new_open_seqid,
+ &lock->lk_new_open_stateid,
+ &open_stp);
+ if (status)
+ goto out;
+ open_sop = openowner(open_stp->st_stateowner);
+ status = nfserr_bad_stateid;
+ if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
+ &lock->v.new.clientid))
+ goto out;
+ status = lookup_or_create_lock_state(cstate, open_stp, lock,
+ &lock_stp, &new_state);
+ if (status)
+ goto out;
+ } else {
+ /* lock (lock owner + lock stateid) already exists */
+ status = nfs4_preprocess_seqid_op(cstate,
+ lock->lk_old_lock_seqid,
+ &lock->lk_old_lock_stateid,
+ NFS4_LOCK_STID, &lock_stp);
+ if (status)
+ goto out;
+ }
+ lock_sop = lockowner(lock_stp->st_stateowner);
+ fp = lock_stp->st_file;
+
+ lkflg = setlkflg(lock->lk_type);
+ status = nfs4_check_openmode(lock_stp, lkflg);
+ if (status)
+ goto out;
+
+ status = nfserr_grace;
+ if (locks_in_grace() && !lock->lk_reclaim)
+ goto out;
+ status = nfserr_no_grace;
+ if (!locks_in_grace() && lock->lk_reclaim)
+ goto out;
+
+ locks_init_lock(&file_lock);
+ switch (lock->lk_type) {
+ case NFS4_READ_LT:
+ case NFS4_READW_LT:
+ filp = find_readable_file(lock_stp->st_file);
+ if (filp)
+ get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
+ file_lock.fl_type = F_RDLCK;
+ break;
+ case NFS4_WRITE_LT:
+ case NFS4_WRITEW_LT:
+ filp = find_writeable_file(lock_stp->st_file);
+ if (filp)
+ get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
+ file_lock.fl_type = F_WRLCK;
+ break;
+ default:
+ status = nfserr_inval;
+ goto out;
+ }
+ if (!filp) {
+ status = nfserr_openmode;
+ goto out;
+ }
+ file_lock.fl_owner = (fl_owner_t)lock_sop;
+ file_lock.fl_pid = current->tgid;
+ file_lock.fl_file = filp;
+ file_lock.fl_flags = FL_POSIX;
+ file_lock.fl_lmops = &nfsd_posix_mng_ops;
+
+ file_lock.fl_start = lock->lk_offset;
+ file_lock.fl_end = last_byte_offset(lock->lk_offset, lock->lk_length);
+ nfs4_transform_lock_offset(&file_lock);
+
+ /*
+ * Try to lock the file in the VFS.
+ * Note: locks.c uses the BKL to protect the inode's lock list.
+ */
+
+ err = vfs_lock_file(filp, F_SETLK, &file_lock, &conflock);
+ switch (-err) {
+ case 0: /* success! */
+ update_stateid(&lock_stp->st_stid.sc_stateid);
+ memcpy(&lock->lk_resp_stateid, &lock_stp->st_stid.sc_stateid,
+ sizeof(stateid_t));
+ status = 0;
+ break;
+ case (EAGAIN): /* conflock holds conflicting lock */
+ status = nfserr_denied;
+ dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
+ nfs4_set_lock_denied(&conflock, &lock->lk_denied);
+ break;
+ case (EDEADLK):
+ status = nfserr_deadlock;
+ break;
+ default:
+ dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err);
+ status = nfserrno(err);
+ break;
+ }
+out:
+ if (status && new_state)
+ release_lockowner(lock_sop);
+ if (!cstate->replay_owner)
+ nfs4_unlock_state();
+ return status;
+}
+
+/*
+ * The NFSv4 spec allows a client to do a LOCKT without holding an OPEN,
+ * so we do a temporary open here just to get an open file to pass to
+ * vfs_test_lock. (Arguably perhaps test_lock should be done with an
+ * inode operation.)
+ */
+static int nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
+{
+ struct file *file;
+ int err;
+
+ err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+ if (err)
+ return err;
+ err = vfs_test_lock(file, lock);
+ nfsd_close(file);
+ return err;
+}
+
+/*
+ * LOCKT operation
+ */
+__be32
+nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_lockt *lockt)
+{
+ struct inode *inode;
+ struct file_lock file_lock;
+ struct nfs4_lockowner *lo;
+ int error;
+ __be32 status;
+
+ if (locks_in_grace())
+ return nfserr_grace;
+
+ if (check_lock_length(lockt->lt_offset, lockt->lt_length))
+ return nfserr_inval;
+
+ nfs4_lock_state();
+
+ status = nfserr_stale_clientid;
+ if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid))
+ goto out;
+
+ if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
+ goto out;
+
+ inode = cstate->current_fh.fh_dentry->d_inode;
+ locks_init_lock(&file_lock);
+ switch (lockt->lt_type) {
+ case NFS4_READ_LT:
+ case NFS4_READW_LT:
+ file_lock.fl_type = F_RDLCK;
+ break;
+ case NFS4_WRITE_LT:
+ case NFS4_WRITEW_LT:
+ file_lock.fl_type = F_WRLCK;
+ break;
+ default:
+ dprintk("NFSD: nfs4_lockt: bad lock type!\n");
+ status = nfserr_inval;
+ goto out;
+ }
+
+ lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner);
+ if (lo)
+ file_lock.fl_owner = (fl_owner_t)lo;
+ file_lock.fl_pid = current->tgid;
+ file_lock.fl_flags = FL_POSIX;
+
+ file_lock.fl_start = lockt->lt_offset;
+ file_lock.fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length);
+
+ nfs4_transform_lock_offset(&file_lock);
+
+ status = nfs_ok;
+ error = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock);
+ if (error) {
+ status = nfserrno(error);
+ goto out;
+ }
+ if (file_lock.fl_type != F_UNLCK) {
+ status = nfserr_denied;
+ nfs4_set_lock_denied(&file_lock, &lockt->lt_denied);
+ }
+out:
+ nfs4_unlock_state();
+ return status;
+}
+
+__be32
+nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_locku *locku)
+{
+ struct nfs4_ol_stateid *stp;
+ struct file *filp = NULL;
+ struct file_lock file_lock;
+ __be32 status;
+ int err;
+
+ dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n",
+ (long long) locku->lu_offset,
+ (long long) locku->lu_length);
+
+ if (check_lock_length(locku->lu_offset, locku->lu_length))
+ return nfserr_inval;
+
+ nfs4_lock_state();
+
+ status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
+ &locku->lu_stateid, NFS4_LOCK_STID, &stp);
+ if (status)
+ goto out;
+ filp = find_any_file(stp->st_file);
+ if (!filp) {
+ status = nfserr_lock_range;
+ goto out;
+ }
+ BUG_ON(!filp);
+ locks_init_lock(&file_lock);
+ file_lock.fl_type = F_UNLCK;
+ file_lock.fl_owner = (fl_owner_t)lockowner(stp->st_stateowner);
+ file_lock.fl_pid = current->tgid;
+ file_lock.fl_file = filp;
+ file_lock.fl_flags = FL_POSIX;
+ file_lock.fl_lmops = &nfsd_posix_mng_ops;
+ file_lock.fl_start = locku->lu_offset;
+
+ file_lock.fl_end = last_byte_offset(locku->lu_offset, locku->lu_length);
+ nfs4_transform_lock_offset(&file_lock);
+
+ /*
+ * Try to unlock the file in the VFS.
+ */
+ err = vfs_lock_file(filp, F_SETLK, &file_lock, NULL);
+ if (err) {
+ dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
+ goto out_nfserr;
+ }
+ /*
+ * OK, unlock succeeded; the only thing left to do is update the stateid.
+ */
+ update_stateid(&stp->st_stid.sc_stateid);
+ memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+
+out:
+ if (!cstate->replay_owner)
+ nfs4_unlock_state();
+ return status;
+
+out_nfserr:
+ status = nfserrno(err);
+ goto out;
+}
+
+/*
+ * returns
+ * 1: locks held by lockowner
+ * 0: no locks held by lockowner
+ */
+static int
+check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner)
+{
+ struct file_lock **flpp;
+ struct inode *inode = filp->fi_inode;
+ int status = 0;
+
+ lock_flocks();
+ for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
+ if ((*flpp)->fl_owner == (fl_owner_t)lowner) {
+ status = 1;
+ goto out;
+ }
+ }
+out:
+ unlock_flocks();
+ return status;
+}
+
+__be32
+nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_release_lockowner *rlockowner)
+{
+ clientid_t *clid = &rlockowner->rl_clientid;
+ struct nfs4_stateowner *sop;
+ struct nfs4_lockowner *lo;
+ struct nfs4_ol_stateid *stp;
+ struct xdr_netobj *owner = &rlockowner->rl_owner;
+ struct list_head matches;
+ unsigned int hashval = ownerstr_hashval(clid->cl_id, owner);
+ __be32 status;
+
+ dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
+ clid->cl_boot, clid->cl_id);
+
+ /* XXX check for lease expiration */
+
+ status = nfserr_stale_clientid;
+ if (STALE_CLIENTID(clid))
+ return status;
+
+ nfs4_lock_state();
+
+ status = nfserr_locks_held;
+ INIT_LIST_HEAD(&matches);
+
+ list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) {
+ if (sop->so_is_open_owner)
+ continue;
+ if (!same_owner_str(sop, owner, clid))
+ continue;
+ list_for_each_entry(stp, &sop->so_stateids,
+ st_perstateowner) {
+ lo = lockowner(sop);
+ if (check_for_locks(stp->st_file, lo))
+ goto out;
+ list_add(&lo->lo_list, &matches);
+ }
+ }
+ /* Clients probably won't expect us to return with some (but not all)
+ * of the lockowner state released; so don't release any until all
+ * have been checked. */
+ status = nfs_ok;
+ while (!list_empty(&matches)) {
+ lo = list_entry(matches.next, struct nfs4_lockowner,
+ lo_list);
+ /* unhash_stateowner deletes so_perclient only
+ * for openowners. */
+ list_del(&lo->lo_list);
+ release_lockowner(lo);
+ }
+out:
+ nfs4_unlock_state();
+ return status;
+}
+
+static inline struct nfs4_client_reclaim *
+alloc_reclaim(void)
+{
+ return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
+}
+
+int
+nfs4_has_reclaimed_state(const char *name, bool use_exchange_id)
+{
+ unsigned int strhashval = clientstr_hashval(name);
+ struct nfs4_client *clp;
+
+ clp = find_confirmed_client_by_str(name, strhashval);
+ return clp ? 1 : 0;
+}
+
+/*
+ * failure => all reset bets are off, nfserr_no_grace...
+ */
+int
+nfs4_client_to_reclaim(const char *name)
+{
+ unsigned int strhashval;
+ struct nfs4_client_reclaim *crp = NULL;
+
+ dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
+ crp = alloc_reclaim();
+ if (!crp)
+ return 0;
+ strhashval = clientstr_hashval(name);
+ INIT_LIST_HEAD(&crp->cr_strhash);
+ list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
+ memcpy(crp->cr_recdir, name, HEXDIR_LEN);
+ reclaim_str_hashtbl_size++;
+ return 1;
+}
+
+static void
+nfs4_release_reclaim(void)
+{
+ struct nfs4_client_reclaim *crp = NULL;
+ int i;
+
+ for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+ while (!list_empty(&reclaim_str_hashtbl[i])) {
+ crp = list_entry(reclaim_str_hashtbl[i].next,
+ struct nfs4_client_reclaim, cr_strhash);
+ list_del(&crp->cr_strhash);
+ kfree(crp);
+ reclaim_str_hashtbl_size--;
+ }
+ }
+ BUG_ON(reclaim_str_hashtbl_size);
+}
+
+/*
+ * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
+static struct nfs4_client_reclaim *
+nfs4_find_reclaim_client(clientid_t *clid)
+{
+ unsigned int strhashval;
+ struct nfs4_client *clp;
+ struct nfs4_client_reclaim *crp = NULL;
+
+
+ /* find clientid in conf_id_hashtbl */
+ clp = find_confirmed_client(clid);
+ if (clp == NULL)
+ return NULL;
+
+ dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n",
+ clp->cl_name.len, clp->cl_name.data,
+ clp->cl_recdir);
+
+ /* find clp->cl_name in reclaim_str_hashtbl */
+ strhashval = clientstr_hashval(clp->cl_recdir);
+ list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) {
+ if (same_name(crp->cr_recdir, clp->cl_recdir)) {
+ return crp;
+ }
+ }
+ return NULL;
+}
+
+/*
+* Called from OPEN. Look for clientid in reclaim list.
+*/
+__be32
+nfs4_check_open_reclaim(clientid_t *clid)
+{
+ return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
+}
+
+#ifdef CONFIG_NFSD_FAULT_INJECTION
+
+void nfsd_forget_clients(u64 num)
+{
+ struct nfs4_client *clp, *next;
+ int count = 0;
+
+ nfs4_lock_state();
+ list_for_each_entry_safe(clp, next, &client_lru, cl_lru) {
+ nfsd4_remove_clid_dir(clp);
+ expire_client(clp);
+ if (++count == num)
+ break;
+ }
+ nfs4_unlock_state();
+
+ printk(KERN_INFO "NFSD: Forgot %d clients", count);
+}
+
+static void release_lockowner_sop(struct nfs4_stateowner *sop)
+{
+ release_lockowner(lockowner(sop));
+}
+
+static void release_openowner_sop(struct nfs4_stateowner *sop)
+{
+ release_openowner(openowner(sop));
+}
+
+static int nfsd_release_n_owners(u64 num, bool is_open_owner,
+ void (*release_sop)(struct nfs4_stateowner *))
+{
+ int i, count = 0;
+ struct nfs4_stateowner *sop, *next;
+
+ for (i = 0; i < OWNER_HASH_SIZE; i++) {
+ list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) {
+ if (sop->so_is_open_owner != is_open_owner)
+ continue;
+ release_sop(sop);
+ if (++count == num)
+ return count;
+ }
+ }
+ return count;
+}
+
+void nfsd_forget_locks(u64 num)
+{
+ int count;
+
+ nfs4_lock_state();
+ count = nfsd_release_n_owners(num, false, release_lockowner_sop);
+ nfs4_unlock_state();
+
+ printk(KERN_INFO "NFSD: Forgot %d locks", count);
+}
+
+void nfsd_forget_openowners(u64 num)
+{
+ int count;
+
+ nfs4_lock_state();
+ count = nfsd_release_n_owners(num, true, release_openowner_sop);
+ nfs4_unlock_state();
+
+ printk(KERN_INFO "NFSD: Forgot %d open owners", count);
+}
+
+int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *))
+{
+ int i, count = 0;
+ struct nfs4_file *fp, *fnext;
+ struct nfs4_delegation *dp, *dnext;
+
+ for (i = 0; i < FILE_HASH_SIZE; i++) {
+ list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) {
+ list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) {
+ deleg_func(dp);
+ if (++count == num)
+ return count;
+ }
+ }
+ }
+
+ return count;
+}
+
+void nfsd_forget_delegations(u64 num)
+{
+ unsigned int count;
+
+ nfs4_lock_state();
+ count = nfsd_process_n_delegations(num, unhash_delegation);
+ nfs4_unlock_state();
+
+ printk(KERN_INFO "NFSD: Forgot %d delegations", count);
+}
+
+void nfsd_recall_delegations(u64 num)
+{
+ unsigned int count;
+
+ nfs4_lock_state();
+ spin_lock(&recall_lock);
+ count = nfsd_process_n_delegations(num, nfsd_break_one_deleg);
+ spin_unlock(&recall_lock);
+ nfs4_unlock_state();
+
+ printk(KERN_INFO "NFSD: Recalled %d delegations", count);
+}
+
+#endif /* CONFIG_NFSD_FAULT_INJECTION */
+
+/* initialization to perform at module load time: */
+
+void
+nfs4_state_init(void)
+{
+ int i;
+
+ for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+ INIT_LIST_HEAD(&conf_id_hashtbl[i]);
+ INIT_LIST_HEAD(&conf_str_hashtbl[i]);
+ INIT_LIST_HEAD(&unconf_str_hashtbl[i]);
+ INIT_LIST_HEAD(&unconf_id_hashtbl[i]);
+ INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
+ }
+ for (i = 0; i < SESSION_HASH_SIZE; i++)
+ INIT_LIST_HEAD(&sessionid_hashtbl[i]);
+ for (i = 0; i < FILE_HASH_SIZE; i++) {
+ INIT_LIST_HEAD(&file_hashtbl[i]);
+ }
+ for (i = 0; i < OWNER_HASH_SIZE; i++) {
+ INIT_LIST_HEAD(&ownerstr_hashtbl[i]);
+ }
+ for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
+ INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]);
+ INIT_LIST_HEAD(&close_lru);
+ INIT_LIST_HEAD(&client_lru);
+ INIT_LIST_HEAD(&del_recall_lru);
+ reclaim_str_hashtbl_size = 0;
+}
+
+static void
+nfsd4_load_reboot_recovery_data(void)
+{
+ int status;
+
+ nfs4_lock_state();
+ nfsd4_init_recdir();
+ status = nfsd4_recdir_load();
+ nfs4_unlock_state();
+ if (status)
+ printk("NFSD: Failure reading reboot recovery data\n");
+}
+
+/*
+ * Since the lifetime of a delegation isn't limited to that of an open, a
+ * client may quite reasonably hang on to a delegation as long as it has
+ * the inode cached. This becomes an obvious problem the first time a
+ * client's inode cache approaches the size of the server's total memory.
+ *
+ * For now we avoid this problem by imposing a hard limit on the number
+ * of delegations, which varies according to the server's memory size.
+ */
+static void
+set_max_delegations(void)
+{
+ /*
+ * Allow at most 4 delegations per megabyte of RAM. Quick
+ * estimates suggest that in the worst case (where every delegation
+ * is for a different inode), a delegation could take about 1.5K,
+ * giving a worst case usage of about 6% of memory.
+ */
+ max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT);
+}
+
+/* initialization to perform when the nfsd service is started: */
+
+static int
+__nfs4_state_start(void)
+{
+ int ret;
+
+ boot_time = get_seconds();
+ locks_start_grace(&nfsd4_manager);
+ printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
+ nfsd4_grace);
+ ret = set_callback_cred();
+ if (ret)
+ return -ENOMEM;
+ laundry_wq = create_singlethread_workqueue("nfsd4");
+ if (laundry_wq == NULL)
+ return -ENOMEM;
+ ret = nfsd4_create_callback_queue();
+ if (ret)
+ goto out_free_laundry;
+ queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ);
+ set_max_delegations();
+ return 0;
+out_free_laundry:
+ destroy_workqueue(laundry_wq);
+ return ret;
+}
+
+int
+nfs4_state_start(void)
+{
+ nfsd4_load_reboot_recovery_data();
+ return __nfs4_state_start();
+}
+
+static void
+__nfs4_state_shutdown(void)
+{
+ int i;
+ struct nfs4_client *clp = NULL;
+ struct nfs4_delegation *dp = NULL;
+ struct list_head *pos, *next, reaplist;
+
+ for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+ while (!list_empty(&conf_id_hashtbl[i])) {
+ clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
+ expire_client(clp);
+ }
+ while (!list_empty(&unconf_str_hashtbl[i])) {
+ clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash);
+ expire_client(clp);
+ }
+ }
+ INIT_LIST_HEAD(&reaplist);
+ spin_lock(&recall_lock);
+ list_for_each_safe(pos, next, &del_recall_lru) {
+ dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+ list_move(&dp->dl_recall_lru, &reaplist);
+ }
+ spin_unlock(&recall_lock);
+ list_for_each_safe(pos, next, &reaplist) {
+ dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+ unhash_delegation(dp);
+ }
+
+ nfsd4_shutdown_recdir();
+}
+
+void
+nfs4_state_shutdown(void)
+{
+ cancel_delayed_work_sync(&laundromat_work);
+ destroy_workqueue(laundry_wq);
+ locks_end_grace(&nfsd4_manager);
+ nfs4_lock_state();
+ nfs4_release_reclaim();
+ __nfs4_state_shutdown();
+ nfs4_unlock_state();
+ nfsd4_destroy_callback_queue();
+}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
new file mode 100644
index 00000000000..0ec5a1b9700
--- /dev/null
+++ b/fs/nfsd/nfs4xdr.c
@@ -0,0 +1,3673 @@
+/*
+ * Server-side XDR for NFSv4
+ *
+ * Copyright (c) 2002 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Kendrick Smith <kmsmith@umich.edu>
+ * Andy Adamson <andros@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * TODO: Neil Brown made the following observation: We currently
+ * initially reserve NFSD_BUFSIZE space on the transmit queue and
+ * never release any of that until the request is complete.
+ * It would be good to calculate a new maximum response size while
+ * decoding the COMPOUND, and call svc_reserve with this number
+ * at the end of nfs4svc_decode_compoundargs.
+ */
+
+#include <linux/slab.h>
+#include <linux/namei.h>
+#include <linux/statfs.h>
+#include <linux/utsname.h>
+#include <linux/pagemap.h>
+#include <linux/sunrpc/svcauth_gss.h>
+
+#include "idmap.h"
+#include "acl.h"
+#include "xdr4.h"
+#include "vfs.h"
+#include "state.h"
+#include "cache.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_XDR
+
+/*
+ * As per referral draft, the fsid for a referral MUST be different from the fsid of the containing
+ * directory in order to indicate to the client that a filesystem boundary is present
+ * We use a fixed fsid for a referral
+ */
+#define NFS4_REFERRAL_FSID_MAJOR 0x8000000ULL
+#define NFS4_REFERRAL_FSID_MINOR 0x8000000ULL
+
+static __be32
+check_filename(char *str, int len, __be32 err)
+{
+ int i;
+
+ if (len == 0)
+ return nfserr_inval;
+ if (isdotent(str, len))
+ return err;
+ for (i = 0; i < len; i++)
+ if (str[i] == '/')
+ return err;
+ return 0;
+}
+
+#define DECODE_HEAD \
+ __be32 *p; \
+ __be32 status
+#define DECODE_TAIL \
+ status = 0; \
+out: \
+ return status; \
+xdr_error: \
+ dprintk("NFSD: xdr error (%s:%d)\n", \
+ __FILE__, __LINE__); \
+ status = nfserr_bad_xdr; \
+ goto out
+
+#define READ32(x) (x) = ntohl(*p++)
+#define READ64(x) do { \
+ (x) = (u64)ntohl(*p++) << 32; \
+ (x) |= ntohl(*p++); \
+} while (0)
+#define READTIME(x) do { \
+ p++; \
+ (x) = ntohl(*p++); \
+ p++; \
+} while (0)
+#define READMEM(x,nbytes) do { \
+ x = (char *)p; \
+ p += XDR_QUADLEN(nbytes); \
+} while (0)
+#define SAVEMEM(x,nbytes) do { \
+ if (!(x = (p==argp->tmp || p == argp->tmpp) ? \
+ savemem(argp, p, nbytes) : \
+ (char *)p)) { \
+ dprintk("NFSD: xdr error (%s:%d)\n", \
+ __FILE__, __LINE__); \
+ goto xdr_error; \
+ } \
+ p += XDR_QUADLEN(nbytes); \
+} while (0)
+#define COPYMEM(x,nbytes) do { \
+ memcpy((x), p, nbytes); \
+ p += XDR_QUADLEN(nbytes); \
+} while (0)
+
+/* READ_BUF, read_buf(): nbytes must be <= PAGE_SIZE */
+#define READ_BUF(nbytes) do { \
+ if (nbytes <= (u32)((char *)argp->end - (char *)argp->p)) { \
+ p = argp->p; \
+ argp->p += XDR_QUADLEN(nbytes); \
+ } else if (!(p = read_buf(argp, nbytes))) { \
+ dprintk("NFSD: xdr error (%s:%d)\n", \
+ __FILE__, __LINE__); \
+ goto xdr_error; \
+ } \
+} while (0)
+
+static void save_buf(struct nfsd4_compoundargs *argp, struct nfsd4_saved_compoundargs *savep)
+{
+ savep->p = argp->p;
+ savep->end = argp->end;
+ savep->pagelen = argp->pagelen;
+ savep->pagelist = argp->pagelist;
+}
+
+static void restore_buf(struct nfsd4_compoundargs *argp, struct nfsd4_saved_compoundargs *savep)
+{
+ argp->p = savep->p;
+ argp->end = savep->end;
+ argp->pagelen = savep->pagelen;
+ argp->pagelist = savep->pagelist;
+}
+
+static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
+{
+ /* We want more bytes than seem to be available.
+ * Maybe we need a new page, maybe we have just run out
+ */
+ unsigned int avail = (char *)argp->end - (char *)argp->p;
+ __be32 *p;
+ if (avail + argp->pagelen < nbytes)
+ return NULL;
+ if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */
+ return NULL;
+ /* ok, we can do it with the current plus the next page */
+ if (nbytes <= sizeof(argp->tmp))
+ p = argp->tmp;
+ else {
+ kfree(argp->tmpp);
+ p = argp->tmpp = kmalloc(nbytes, GFP_KERNEL);
+ if (!p)
+ return NULL;
+
+ }
+ /*
+ * The following memcpy is safe because read_buf is always
+ * called with nbytes > avail, and the two cases above both
+ * guarantee p points to at least nbytes bytes.
+ */
+ memcpy(p, argp->p, avail);
+ /* step to next page */
+ argp->p = page_address(argp->pagelist[0]);
+ argp->pagelist++;
+ if (argp->pagelen < PAGE_SIZE) {
+ argp->end = argp->p + (argp->pagelen>>2);
+ argp->pagelen = 0;
+ } else {
+ argp->end = argp->p + (PAGE_SIZE>>2);
+ argp->pagelen -= PAGE_SIZE;
+ }
+ memcpy(((char*)p)+avail, argp->p, (nbytes - avail));
+ argp->p += XDR_QUADLEN(nbytes - avail);
+ return p;
+}
+
+static int zero_clientid(clientid_t *clid)
+{
+ return (clid->cl_boot == 0) && (clid->cl_id == 0);
+}
+
+static int
+defer_free(struct nfsd4_compoundargs *argp,
+ void (*release)(const void *), void *p)
+{
+ struct tmpbuf *tb;
+
+ tb = kmalloc(sizeof(*tb), GFP_KERNEL);
+ if (!tb)
+ return -ENOMEM;
+ tb->buf = p;
+ tb->release = release;
+ tb->next = argp->to_free;
+ argp->to_free = tb;
+ return 0;
+}
+
+static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
+{
+ if (p == argp->tmp) {
+ p = kmemdup(argp->tmp, nbytes, GFP_KERNEL);
+ if (!p)
+ return NULL;
+ } else {
+ BUG_ON(p != argp->tmpp);
+ argp->tmpp = NULL;
+ }
+ if (defer_free(argp, kfree, p)) {
+ kfree(p);
+ return NULL;
+ } else
+ return (char *)p;
+}
+
+static __be32
+nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
+{
+ u32 bmlen;
+ DECODE_HEAD;
+
+ bmval[0] = 0;
+ bmval[1] = 0;
+ bmval[2] = 0;
+
+ READ_BUF(4);
+ READ32(bmlen);
+ if (bmlen > 1000)
+ goto xdr_error;
+
+ READ_BUF(bmlen << 2);
+ if (bmlen > 0)
+ READ32(bmval[0]);
+ if (bmlen > 1)
+ READ32(bmval[1]);
+ if (bmlen > 2)
+ READ32(bmval[2]);
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
+ struct iattr *iattr, struct nfs4_acl **acl)
+{
+ int expected_len, len = 0;
+ u32 dummy32;
+ char *buf;
+ int host_err;
+
+ DECODE_HEAD;
+ iattr->ia_valid = 0;
+ if ((status = nfsd4_decode_bitmap(argp, bmval)))
+ return status;
+
+ READ_BUF(4);
+ READ32(expected_len);
+
+ if (bmval[0] & FATTR4_WORD0_SIZE) {
+ READ_BUF(8);
+ len += 8;
+ READ64(iattr->ia_size);
+ iattr->ia_valid |= ATTR_SIZE;
+ }
+ if (bmval[0] & FATTR4_WORD0_ACL) {
+ int nace;
+ struct nfs4_ace *ace;
+
+ READ_BUF(4); len += 4;
+ READ32(nace);
+
+ if (nace > NFS4_ACL_MAX)
+ return nfserr_resource;
+
+ *acl = nfs4_acl_new(nace);
+ if (*acl == NULL) {
+ host_err = -ENOMEM;
+ goto out_nfserr;
+ }
+ defer_free(argp, kfree, *acl);
+
+ (*acl)->naces = nace;
+ for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) {
+ READ_BUF(16); len += 16;
+ READ32(ace->type);
+ READ32(ace->flag);
+ READ32(ace->access_mask);
+ READ32(dummy32);
+ READ_BUF(dummy32);
+ len += XDR_QUADLEN(dummy32) << 2;
+ READMEM(buf, dummy32);
+ ace->whotype = nfs4_acl_get_whotype(buf, dummy32);
+ status = nfs_ok;
+ if (ace->whotype != NFS4_ACL_WHO_NAMED)
+ ace->who = 0;
+ else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
+ status = nfsd_map_name_to_gid(argp->rqstp,
+ buf, dummy32, &ace->who);
+ else
+ status = nfsd_map_name_to_uid(argp->rqstp,
+ buf, dummy32, &ace->who);
+ if (status)
+ return status;
+ }
+ } else
+ *acl = NULL;
+ if (bmval[1] & FATTR4_WORD1_MODE) {
+ READ_BUF(4);
+ len += 4;
+ READ32(iattr->ia_mode);
+ iattr->ia_mode &= (S_IFMT | S_IALLUGO);
+ iattr->ia_valid |= ATTR_MODE;
+ }
+ if (bmval[1] & FATTR4_WORD1_OWNER) {
+ READ_BUF(4);
+ len += 4;
+ READ32(dummy32);
+ READ_BUF(dummy32);
+ len += (XDR_QUADLEN(dummy32) << 2);
+ READMEM(buf, dummy32);
+ if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
+ return status;
+ iattr->ia_valid |= ATTR_UID;
+ }
+ if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {
+ READ_BUF(4);
+ len += 4;
+ READ32(dummy32);
+ READ_BUF(dummy32);
+ len += (XDR_QUADLEN(dummy32) << 2);
+ READMEM(buf, dummy32);
+ if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
+ return status;
+ iattr->ia_valid |= ATTR_GID;
+ }
+ if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
+ READ_BUF(4);
+ len += 4;
+ READ32(dummy32);
+ switch (dummy32) {
+ case NFS4_SET_TO_CLIENT_TIME:
+ /* We require the high 32 bits of 'seconds' to be 0, and we ignore
+ all 32 bits of 'nseconds'. */
+ READ_BUF(12);
+ len += 12;
+ READ32(dummy32);
+ if (dummy32)
+ return nfserr_inval;
+ READ32(iattr->ia_atime.tv_sec);
+ READ32(iattr->ia_atime.tv_nsec);
+ if (iattr->ia_atime.tv_nsec >= (u32)1000000000)
+ return nfserr_inval;
+ iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
+ break;
+ case NFS4_SET_TO_SERVER_TIME:
+ iattr->ia_valid |= ATTR_ATIME;
+ break;
+ default:
+ goto xdr_error;
+ }
+ }
+ if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
+ READ_BUF(4);
+ len += 4;
+ READ32(dummy32);
+ switch (dummy32) {
+ case NFS4_SET_TO_CLIENT_TIME:
+ /* We require the high 32 bits of 'seconds' to be 0, and we ignore
+ all 32 bits of 'nseconds'. */
+ READ_BUF(12);
+ len += 12;
+ READ32(dummy32);
+ if (dummy32)
+ return nfserr_inval;
+ READ32(iattr->ia_mtime.tv_sec);
+ READ32(iattr->ia_mtime.tv_nsec);
+ if (iattr->ia_mtime.tv_nsec >= (u32)1000000000)
+ return nfserr_inval;
+ iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
+ break;
+ case NFS4_SET_TO_SERVER_TIME:
+ iattr->ia_valid |= ATTR_MTIME;
+ break;
+ default:
+ goto xdr_error;
+ }
+ }
+ if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0
+ || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1
+ || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2)
+ READ_BUF(expected_len - len);
+ else if (len != expected_len)
+ goto xdr_error;
+
+ DECODE_TAIL;
+
+out_nfserr:
+ status = nfserrno(host_err);
+ goto out;
+}
+
+static __be32
+nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid)
+{
+ DECODE_HEAD;
+
+ READ_BUF(sizeof(stateid_t));
+ READ32(sid->si_generation);
+ COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(access->ac_req_access);
+
+ DECODE_TAIL;
+}
+
+static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts)
+{
+ DECODE_HEAD;
+
+ READ_BUF(NFS4_MAX_SESSIONID_LEN + 8);
+ COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN);
+ READ32(bcts->dir);
+ /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker
+ * could help us figure out we should be using it. */
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(close->cl_seqid);
+ return nfsd4_decode_stateid(argp, &close->cl_stateid);
+
+ DECODE_TAIL;
+}
+
+
+static __be32
+nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit)
+{
+ DECODE_HEAD;
+
+ READ_BUF(12);
+ READ64(commit->co_offset);
+ READ32(commit->co_count);
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(create->cr_type);
+ switch (create->cr_type) {
+ case NF4LNK:
+ READ_BUF(4);
+ READ32(create->cr_linklen);
+ READ_BUF(create->cr_linklen);
+ SAVEMEM(create->cr_linkname, create->cr_linklen);
+ break;
+ case NF4BLK:
+ case NF4CHR:
+ READ_BUF(8);
+ READ32(create->cr_specdata1);
+ READ32(create->cr_specdata2);
+ break;
+ case NF4SOCK:
+ case NF4FIFO:
+ case NF4DIR:
+ default:
+ break;
+ }
+
+ READ_BUF(4);
+ READ32(create->cr_namelen);
+ READ_BUF(create->cr_namelen);
+ SAVEMEM(create->cr_name, create->cr_namelen);
+ if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval)))
+ return status;
+
+ status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr,
+ &create->cr_acl);
+ if (status)
+ goto out;
+
+ DECODE_TAIL;
+}
+
+static inline __be32
+nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr)
+{
+ return nfsd4_decode_stateid(argp, &dr->dr_stateid);
+}
+
+static inline __be32
+nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr)
+{
+ return nfsd4_decode_bitmap(argp, getattr->ga_bmval);
+}
+
+static __be32
+nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(link->li_namelen);
+ READ_BUF(link->li_namelen);
+ SAVEMEM(link->li_name, link->li_namelen);
+ if ((status = check_filename(link->li_name, link->li_namelen, nfserr_inval)))
+ return status;
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
+{
+ DECODE_HEAD;
+
+ /*
+ * type, reclaim(boolean), offset, length, new_lock_owner(boolean)
+ */
+ READ_BUF(28);
+ READ32(lock->lk_type);
+ if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT))
+ goto xdr_error;
+ READ32(lock->lk_reclaim);
+ READ64(lock->lk_offset);
+ READ64(lock->lk_length);
+ READ32(lock->lk_is_new);
+
+ if (lock->lk_is_new) {
+ READ_BUF(4);
+ READ32(lock->lk_new_open_seqid);
+ status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid);
+ if (status)
+ return status;
+ READ_BUF(8 + sizeof(clientid_t));
+ READ32(lock->lk_new_lock_seqid);
+ COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t));
+ READ32(lock->lk_new_owner.len);
+ READ_BUF(lock->lk_new_owner.len);
+ READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len);
+ } else {
+ status = nfsd4_decode_stateid(argp, &lock->lk_old_lock_stateid);
+ if (status)
+ return status;
+ READ_BUF(4);
+ READ32(lock->lk_old_lock_seqid);
+ }
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
+{
+ DECODE_HEAD;
+
+ READ_BUF(32);
+ READ32(lockt->lt_type);
+ if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT))
+ goto xdr_error;
+ READ64(lockt->lt_offset);
+ READ64(lockt->lt_length);
+ COPYMEM(&lockt->lt_clientid, 8);
+ READ32(lockt->lt_owner.len);
+ READ_BUF(lockt->lt_owner.len);
+ READMEM(lockt->lt_owner.data, lockt->lt_owner.len);
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
+{
+ DECODE_HEAD;
+
+ READ_BUF(8);
+ READ32(locku->lu_type);
+ if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT))
+ goto xdr_error;
+ READ32(locku->lu_seqid);
+ status = nfsd4_decode_stateid(argp, &locku->lu_stateid);
+ if (status)
+ return status;
+ READ_BUF(16);
+ READ64(locku->lu_offset);
+ READ64(locku->lu_length);
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(lookup->lo_len);
+ READ_BUF(lookup->lo_len);
+ SAVEMEM(lookup->lo_name, lookup->lo_len);
+ if ((status = check_filename(lookup->lo_name, lookup->lo_len, nfserr_noent)))
+ return status;
+
+ DECODE_TAIL;
+}
+
+static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *x)
+{
+ __be32 *p;
+ u32 w;
+
+ READ_BUF(4);
+ READ32(w);
+ *x = w;
+ switch (w & NFS4_SHARE_ACCESS_MASK) {
+ case NFS4_SHARE_ACCESS_READ:
+ case NFS4_SHARE_ACCESS_WRITE:
+ case NFS4_SHARE_ACCESS_BOTH:
+ break;
+ default:
+ return nfserr_bad_xdr;
+ }
+ w &= ~NFS4_SHARE_ACCESS_MASK;
+ if (!w)
+ return nfs_ok;
+ if (!argp->minorversion)
+ return nfserr_bad_xdr;
+ switch (w & NFS4_SHARE_WANT_MASK) {
+ case NFS4_SHARE_WANT_NO_PREFERENCE:
+ case NFS4_SHARE_WANT_READ_DELEG:
+ case NFS4_SHARE_WANT_WRITE_DELEG:
+ case NFS4_SHARE_WANT_ANY_DELEG:
+ case NFS4_SHARE_WANT_NO_DELEG:
+ case NFS4_SHARE_WANT_CANCEL:
+ break;
+ default:
+ return nfserr_bad_xdr;
+ }
+ w &= ~NFS4_SHARE_WANT_MASK;
+ if (!w)
+ return nfs_ok;
+ switch (w) {
+ case NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL:
+ case NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED:
+ case (NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL |
+ NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED):
+ return nfs_ok;
+ }
+xdr_error:
+ return nfserr_bad_xdr;
+}
+
+static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x)
+{
+ __be32 *p;
+
+ READ_BUF(4);
+ READ32(*x);
+ /* Note: unlinke access bits, deny bits may be zero. */
+ if (*x & ~NFS4_SHARE_DENY_BOTH)
+ return nfserr_bad_xdr;
+ return nfs_ok;
+xdr_error:
+ return nfserr_bad_xdr;
+}
+
+static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o)
+{
+ __be32 *p;
+
+ READ_BUF(4);
+ READ32(o->len);
+
+ if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT)
+ return nfserr_bad_xdr;
+
+ READ_BUF(o->len);
+ SAVEMEM(o->data, o->len);
+ return nfs_ok;
+xdr_error:
+ return nfserr_bad_xdr;
+}
+
+static __be32
+nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
+{
+ DECODE_HEAD;
+
+ memset(open->op_bmval, 0, sizeof(open->op_bmval));
+ open->op_iattr.ia_valid = 0;
+ open->op_openowner = NULL;
+
+ /* seqid, share_access, share_deny, clientid, ownerlen */
+ READ_BUF(4);
+ READ32(open->op_seqid);
+ status = nfsd4_decode_share_access(argp, &open->op_share_access);
+ if (status)
+ goto xdr_error;
+ status = nfsd4_decode_share_deny(argp, &open->op_share_deny);
+ if (status)
+ goto xdr_error;
+ READ_BUF(sizeof(clientid_t));
+ COPYMEM(&open->op_clientid, sizeof(clientid_t));
+ status = nfsd4_decode_opaque(argp, &open->op_owner);
+ if (status)
+ goto xdr_error;
+ READ_BUF(4);
+ READ32(open->op_create);
+ switch (open->op_create) {
+ case NFS4_OPEN_NOCREATE:
+ break;
+ case NFS4_OPEN_CREATE:
+ READ_BUF(4);
+ READ32(open->op_createmode);
+ switch (open->op_createmode) {
+ case NFS4_CREATE_UNCHECKED:
+ case NFS4_CREATE_GUARDED:
+ status = nfsd4_decode_fattr(argp, open->op_bmval,
+ &open->op_iattr, &open->op_acl);
+ if (status)
+ goto out;
+ break;
+ case NFS4_CREATE_EXCLUSIVE:
+ READ_BUF(8);
+ COPYMEM(open->op_verf.data, 8);
+ break;
+ case NFS4_CREATE_EXCLUSIVE4_1:
+ if (argp->minorversion < 1)
+ goto xdr_error;
+ READ_BUF(8);
+ COPYMEM(open->op_verf.data, 8);
+ status = nfsd4_decode_fattr(argp, open->op_bmval,
+ &open->op_iattr, &open->op_acl);
+ if (status)
+ goto out;
+ break;
+ default:
+ goto xdr_error;
+ }
+ break;
+ default:
+ goto xdr_error;
+ }
+
+ /* open_claim */
+ READ_BUF(4);
+ READ32(open->op_claim_type);
+ switch (open->op_claim_type) {
+ case NFS4_OPEN_CLAIM_NULL:
+ case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+ READ_BUF(4);
+ READ32(open->op_fname.len);
+ READ_BUF(open->op_fname.len);
+ SAVEMEM(open->op_fname.data, open->op_fname.len);
+ if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval)))
+ return status;
+ break;
+ case NFS4_OPEN_CLAIM_PREVIOUS:
+ READ_BUF(4);
+ READ32(open->op_delegate_type);
+ break;
+ case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+ status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid);
+ if (status)
+ return status;
+ READ_BUF(4);
+ READ32(open->op_fname.len);
+ READ_BUF(open->op_fname.len);
+ SAVEMEM(open->op_fname.data, open->op_fname.len);
+ if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval)))
+ return status;
+ break;
+ case NFS4_OPEN_CLAIM_FH:
+ case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
+ if (argp->minorversion < 1)
+ goto xdr_error;
+ /* void */
+ break;
+ case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
+ if (argp->minorversion < 1)
+ goto xdr_error;
+ status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid);
+ if (status)
+ return status;
+ break;
+ default:
+ goto xdr_error;
+ }
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf)
+{
+ DECODE_HEAD;
+
+ status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid);
+ if (status)
+ return status;
+ READ_BUF(4);
+ READ32(open_conf->oc_seqid);
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down)
+{
+ DECODE_HEAD;
+
+ status = nfsd4_decode_stateid(argp, &open_down->od_stateid);
+ if (status)
+ return status;
+ READ_BUF(4);
+ READ32(open_down->od_seqid);
+ status = nfsd4_decode_share_access(argp, &open_down->od_share_access);
+ if (status)
+ return status;
+ status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny);
+ if (status)
+ return status;
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(putfh->pf_fhlen);
+ if (putfh->pf_fhlen > NFS4_FHSIZE)
+ goto xdr_error;
+ READ_BUF(putfh->pf_fhlen);
+ SAVEMEM(putfh->pf_fhval, putfh->pf_fhlen);
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
+{
+ DECODE_HEAD;
+
+ status = nfsd4_decode_stateid(argp, &read->rd_stateid);
+ if (status)
+ return status;
+ READ_BUF(12);
+ READ64(read->rd_offset);
+ READ32(read->rd_length);
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir)
+{
+ DECODE_HEAD;
+
+ READ_BUF(24);
+ READ64(readdir->rd_cookie);
+ COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data));
+ READ32(readdir->rd_dircount); /* just in case you needed a useless field... */
+ READ32(readdir->rd_maxcount);
+ if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval)))
+ goto out;
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(remove->rm_namelen);
+ READ_BUF(remove->rm_namelen);
+ SAVEMEM(remove->rm_name, remove->rm_namelen);
+ if ((status = check_filename(remove->rm_name, remove->rm_namelen, nfserr_noent)))
+ return status;
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(rename->rn_snamelen);
+ READ_BUF(rename->rn_snamelen + 4);
+ SAVEMEM(rename->rn_sname, rename->rn_snamelen);
+ READ32(rename->rn_tnamelen);
+ READ_BUF(rename->rn_tnamelen);
+ SAVEMEM(rename->rn_tname, rename->rn_tnamelen);
+ if ((status = check_filename(rename->rn_sname, rename->rn_snamelen, nfserr_noent)))
+ return status;
+ if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen, nfserr_inval)))
+ return status;
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid)
+{
+ DECODE_HEAD;
+
+ READ_BUF(sizeof(clientid_t));
+ COPYMEM(clientid, sizeof(clientid_t));
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp,
+ struct nfsd4_secinfo *secinfo)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(secinfo->si_namelen);
+ READ_BUF(secinfo->si_namelen);
+ SAVEMEM(secinfo->si_name, secinfo->si_namelen);
+ status = check_filename(secinfo->si_name, secinfo->si_namelen,
+ nfserr_noent);
+ if (status)
+ return status;
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,
+ struct nfsd4_secinfo_no_name *sin)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(sin->sin_style);
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr)
+{
+ __be32 status;
+
+ status = nfsd4_decode_stateid(argp, &setattr->sa_stateid);
+ if (status)
+ return status;
+ return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr,
+ &setattr->sa_acl);
+}
+
+static __be32
+nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid)
+{
+ DECODE_HEAD;
+
+ READ_BUF(8);
+ COPYMEM(setclientid->se_verf.data, 8);
+
+ status = nfsd4_decode_opaque(argp, &setclientid->se_name);
+ if (status)
+ return nfserr_bad_xdr;
+ READ_BUF(8);
+ READ32(setclientid->se_callback_prog);
+ READ32(setclientid->se_callback_netid_len);
+
+ READ_BUF(setclientid->se_callback_netid_len + 4);
+ SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len);
+ READ32(setclientid->se_callback_addr_len);
+
+ READ_BUF(setclientid->se_callback_addr_len + 4);
+ SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len);
+ READ32(setclientid->se_callback_ident);
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c)
+{
+ DECODE_HEAD;
+
+ READ_BUF(8 + sizeof(nfs4_verifier));
+ COPYMEM(&scd_c->sc_clientid, 8);
+ COPYMEM(&scd_c->sc_confirm, sizeof(nfs4_verifier));
+
+ DECODE_TAIL;
+}
+
+/* Also used for NVERIFY */
+static __be32
+nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify)
+{
+#if 0
+ struct nfsd4_compoundargs save = {
+ .p = argp->p,
+ .end = argp->end,
+ .rqstp = argp->rqstp,
+ };
+ u32 ve_bmval[2];
+ struct iattr ve_iattr; /* request */
+ struct nfs4_acl *ve_acl; /* request */
+#endif
+ DECODE_HEAD;
+
+ if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval)))
+ goto out;
+
+ /* For convenience's sake, we compare raw xdr'd attributes in
+ * nfsd4_proc_verify; however we still decode here just to return
+ * correct error in case of bad xdr. */
+#if 0
+ status = nfsd4_decode_fattr(ve_bmval, &ve_iattr, &ve_acl);
+ if (status == nfserr_inval) {
+ status = nfserrno(status);
+ goto out;
+ }
+#endif
+ READ_BUF(4);
+ READ32(verify->ve_attrlen);
+ READ_BUF(verify->ve_attrlen);
+ SAVEMEM(verify->ve_attrval, verify->ve_attrlen);
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
+{
+ int avail;
+ int v;
+ int len;
+ DECODE_HEAD;
+
+ status = nfsd4_decode_stateid(argp, &write->wr_stateid);
+ if (status)
+ return status;
+ READ_BUF(16);
+ READ64(write->wr_offset);
+ READ32(write->wr_stable_how);
+ if (write->wr_stable_how > 2)
+ goto xdr_error;
+ READ32(write->wr_buflen);
+
+ /* Sorry .. no magic macros for this.. *
+ * READ_BUF(write->wr_buflen);
+ * SAVEMEM(write->wr_buf, write->wr_buflen);
+ */
+ avail = (char*)argp->end - (char*)argp->p;
+ if (avail + argp->pagelen < write->wr_buflen) {
+ dprintk("NFSD: xdr error (%s:%d)\n",
+ __FILE__, __LINE__);
+ goto xdr_error;
+ }
+ argp->rqstp->rq_vec[0].iov_base = p;
+ argp->rqstp->rq_vec[0].iov_len = avail;
+ v = 0;
+ len = write->wr_buflen;
+ while (len > argp->rqstp->rq_vec[v].iov_len) {
+ len -= argp->rqstp->rq_vec[v].iov_len;
+ v++;
+ argp->rqstp->rq_vec[v].iov_base = page_address(argp->pagelist[0]);
+ argp->pagelist++;
+ if (argp->pagelen >= PAGE_SIZE) {
+ argp->rqstp->rq_vec[v].iov_len = PAGE_SIZE;
+ argp->pagelen -= PAGE_SIZE;
+ } else {
+ argp->rqstp->rq_vec[v].iov_len = argp->pagelen;
+ argp->pagelen -= len;
+ }
+ }
+ argp->end = (__be32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len);
+ argp->p = (__be32*) (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
+ argp->rqstp->rq_vec[v].iov_len = len;
+ write->wr_vlen = v+1;
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner)
+{
+ DECODE_HEAD;
+
+ READ_BUF(12);
+ COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t));
+ READ32(rlockowner->rl_owner.len);
+ READ_BUF(rlockowner->rl_owner.len);
+ READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len);
+
+ if (argp->minorversion && !zero_clientid(&rlockowner->rl_clientid))
+ return nfserr_inval;
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
+ struct nfsd4_exchange_id *exid)
+{
+ int dummy, tmp;
+ DECODE_HEAD;
+
+ READ_BUF(NFS4_VERIFIER_SIZE);
+ COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE);
+
+ status = nfsd4_decode_opaque(argp, &exid->clname);
+ if (status)
+ return nfserr_bad_xdr;
+
+ READ_BUF(4);
+ READ32(exid->flags);
+
+ /* Ignore state_protect4_a */
+ READ_BUF(4);
+ READ32(exid->spa_how);
+ switch (exid->spa_how) {
+ case SP4_NONE:
+ break;
+ case SP4_MACH_CRED:
+ /* spo_must_enforce */
+ READ_BUF(4);
+ READ32(dummy);
+ READ_BUF(dummy * 4);
+ p += dummy;
+
+ /* spo_must_allow */
+ READ_BUF(4);
+ READ32(dummy);
+ READ_BUF(dummy * 4);
+ p += dummy;
+ break;
+ case SP4_SSV:
+ /* ssp_ops */
+ READ_BUF(4);
+ READ32(dummy);
+ READ_BUF(dummy * 4);
+ p += dummy;
+
+ READ_BUF(4);
+ READ32(dummy);
+ READ_BUF(dummy * 4);
+ p += dummy;
+
+ /* ssp_hash_algs<> */
+ READ_BUF(4);
+ READ32(tmp);
+ while (tmp--) {
+ READ_BUF(4);
+ READ32(dummy);
+ READ_BUF(dummy);
+ p += XDR_QUADLEN(dummy);
+ }
+
+ /* ssp_encr_algs<> */
+ READ_BUF(4);
+ READ32(tmp);
+ while (tmp--) {
+ READ_BUF(4);
+ READ32(dummy);
+ READ_BUF(dummy);
+ p += XDR_QUADLEN(dummy);
+ }
+
+ /* ssp_window and ssp_num_gss_handles */
+ READ_BUF(8);
+ READ32(dummy);
+ READ32(dummy);
+ break;
+ default:
+ goto xdr_error;
+ }
+
+ /* Ignore Implementation ID */
+ READ_BUF(4); /* nfs_impl_id4 array length */
+ READ32(dummy);
+
+ if (dummy > 1)
+ goto xdr_error;
+
+ if (dummy == 1) {
+ /* nii_domain */
+ READ_BUF(4);
+ READ32(dummy);
+ READ_BUF(dummy);
+ p += XDR_QUADLEN(dummy);
+
+ /* nii_name */
+ READ_BUF(4);
+ READ32(dummy);
+ READ_BUF(dummy);
+ p += XDR_QUADLEN(dummy);
+
+ /* nii_date */
+ READ_BUF(12);
+ p += 3;
+ }
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
+ struct nfsd4_create_session *sess)
+{
+ DECODE_HEAD;
+
+ u32 dummy;
+ char *machine_name;
+ int i;
+ int nr_secflavs;
+
+ READ_BUF(16);
+ COPYMEM(&sess->clientid, 8);
+ READ32(sess->seqid);
+ READ32(sess->flags);
+
+ /* Fore channel attrs */
+ READ_BUF(28);
+ READ32(dummy); /* headerpadsz is always 0 */
+ READ32(sess->fore_channel.maxreq_sz);
+ READ32(sess->fore_channel.maxresp_sz);
+ READ32(sess->fore_channel.maxresp_cached);
+ READ32(sess->fore_channel.maxops);
+ READ32(sess->fore_channel.maxreqs);
+ READ32(sess->fore_channel.nr_rdma_attrs);
+ if (sess->fore_channel.nr_rdma_attrs == 1) {
+ READ_BUF(4);
+ READ32(sess->fore_channel.rdma_attrs);
+ } else if (sess->fore_channel.nr_rdma_attrs > 1) {
+ dprintk("Too many fore channel attr bitmaps!\n");
+ goto xdr_error;
+ }
+
+ /* Back channel attrs */
+ READ_BUF(28);
+ READ32(dummy); /* headerpadsz is always 0 */
+ READ32(sess->back_channel.maxreq_sz);
+ READ32(sess->back_channel.maxresp_sz);
+ READ32(sess->back_channel.maxresp_cached);
+ READ32(sess->back_channel.maxops);
+ READ32(sess->back_channel.maxreqs);
+ READ32(sess->back_channel.nr_rdma_attrs);
+ if (sess->back_channel.nr_rdma_attrs == 1) {
+ READ_BUF(4);
+ READ32(sess->back_channel.rdma_attrs);
+ } else if (sess->back_channel.nr_rdma_attrs > 1) {
+ dprintk("Too many back channel attr bitmaps!\n");
+ goto xdr_error;
+ }
+
+ READ_BUF(8);
+ READ32(sess->callback_prog);
+
+ /* callback_sec_params4 */
+ READ32(nr_secflavs);
+ for (i = 0; i < nr_secflavs; ++i) {
+ READ_BUF(4);
+ READ32(dummy);
+ switch (dummy) {
+ case RPC_AUTH_NULL:
+ /* Nothing to read */
+ break;
+ case RPC_AUTH_UNIX:
+ READ_BUF(8);
+ /* stamp */
+ READ32(dummy);
+
+ /* machine name */
+ READ32(dummy);
+ READ_BUF(dummy);
+ SAVEMEM(machine_name, dummy);
+
+ /* uid, gid */
+ READ_BUF(8);
+ READ32(sess->uid);
+ READ32(sess->gid);
+
+ /* more gids */
+ READ_BUF(4);
+ READ32(dummy);
+ READ_BUF(dummy * 4);
+ break;
+ case RPC_AUTH_GSS:
+ dprintk("RPC_AUTH_GSS callback secflavor "
+ "not supported!\n");
+ READ_BUF(8);
+ /* gcbp_service */
+ READ32(dummy);
+ /* gcbp_handle_from_server */
+ READ32(dummy);
+ READ_BUF(dummy);
+ p += XDR_QUADLEN(dummy);
+ /* gcbp_handle_from_client */
+ READ_BUF(4);
+ READ32(dummy);
+ READ_BUF(dummy);
+ break;
+ default:
+ dprintk("Illegal callback secflavor\n");
+ return nfserr_inval;
+ }
+ }
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp,
+ struct nfsd4_destroy_session *destroy_session)
+{
+ DECODE_HEAD;
+ READ_BUF(NFS4_MAX_SESSIONID_LEN);
+ COPYMEM(destroy_session->sessionid.data, NFS4_MAX_SESSIONID_LEN);
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp,
+ struct nfsd4_free_stateid *free_stateid)
+{
+ DECODE_HEAD;
+
+ READ_BUF(sizeof(stateid_t));
+ READ32(free_stateid->fr_stateid.si_generation);
+ COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t));
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
+ struct nfsd4_sequence *seq)
+{
+ DECODE_HEAD;
+
+ READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
+ COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN);
+ READ32(seq->seqid);
+ READ32(seq->slotid);
+ READ32(seq->maxslots);
+ READ32(seq->cachethis);
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid)
+{
+ unsigned int nbytes;
+ stateid_t si;
+ int i;
+ __be32 *p;
+ __be32 status;
+
+ READ_BUF(4);
+ test_stateid->ts_num_ids = ntohl(*p++);
+
+ nbytes = test_stateid->ts_num_ids * sizeof(stateid_t);
+ if (nbytes > (u32)((char *)argp->end - (char *)argp->p))
+ goto xdr_error;
+
+ test_stateid->ts_saved_args = argp;
+ save_buf(argp, &test_stateid->ts_savedp);
+
+ for (i = 0; i < test_stateid->ts_num_ids; i++) {
+ status = nfsd4_decode_stateid(argp, &si);
+ if (status)
+ return status;
+ }
+
+ status = 0;
+out:
+ return status;
+xdr_error:
+ dprintk("NFSD: xdr error (%s:%d)\n", __FILE__, __LINE__);
+ status = nfserr_bad_xdr;
+ goto out;
+}
+
+static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, struct nfsd4_destroy_clientid *dc)
+{
+ DECODE_HEAD;
+
+ READ_BUF(8);
+ COPYMEM(&dc->clientid, 8);
+
+ DECODE_TAIL;
+}
+
+static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(rc->rca_one_fs);
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
+{
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p)
+{
+ return nfserr_notsupp;
+}
+
+typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *);
+
+static nfsd4_dec nfsd4_dec_ops[] = {
+ [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access,
+ [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close,
+ [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit,
+ [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create,
+ [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn,
+ [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr,
+ [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_LINK] = (nfsd4_dec)nfsd4_decode_link,
+ [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock,
+ [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt,
+ [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku,
+ [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup,
+ [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify,
+ [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open,
+ [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm,
+ [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade,
+ [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh,
+ [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_READ] = (nfsd4_dec)nfsd4_decode_read,
+ [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir,
+ [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove,
+ [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename,
+ [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew,
+ [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo,
+ [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr,
+ [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid,
+ [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm,
+ [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify,
+ [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write,
+ [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner,
+};
+
+static nfsd4_dec nfsd41_dec_ops[] = {
+ [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access,
+ [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close,
+ [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit,
+ [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create,
+ [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn,
+ [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr,
+ [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_LINK] = (nfsd4_dec)nfsd4_decode_link,
+ [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock,
+ [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt,
+ [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku,
+ [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup,
+ [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify,
+ [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open,
+ [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade,
+ [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh,
+ [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_READ] = (nfsd4_dec)nfsd4_decode_read,
+ [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir,
+ [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove,
+ [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename,
+ [OP_RENEW] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo,
+ [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr,
+ [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_SETCLIENTID_CONFIRM]= (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify,
+ [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write,
+ [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_notsupp,
+
+ /* new operations for NFSv4.1 */
+ [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session,
+ [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id,
+ [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session,
+ [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session,
+ [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid,
+ [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name,
+ [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence,
+ [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid,
+ [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid,
+ [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
+};
+
+struct nfsd4_minorversion_ops {
+ nfsd4_dec *decoders;
+ int nops;
+};
+
+static struct nfsd4_minorversion_ops nfsd4_minorversion[] = {
+ [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) },
+ [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) },
+};
+
+static __be32
+nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
+{
+ DECODE_HEAD;
+ struct nfsd4_op *op;
+ struct nfsd4_minorversion_ops *ops;
+ bool cachethis = false;
+ int i;
+
+ /*
+ * XXX: According to spec, we should check the tag
+ * for UTF-8 compliance. I'm postponing this for
+ * now because it seems that some clients do use
+ * binary tags.
+ */
+ READ_BUF(4);
+ READ32(argp->taglen);
+ READ_BUF(argp->taglen + 8);
+ SAVEMEM(argp->tag, argp->taglen);
+ READ32(argp->minorversion);
+ READ32(argp->opcnt);
+
+ if (argp->taglen > NFSD4_MAX_TAGLEN)
+ goto xdr_error;
+ if (argp->opcnt > 100)
+ goto xdr_error;
+
+ if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
+ argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
+ if (!argp->ops) {
+ argp->ops = argp->iops;
+ dprintk("nfsd: couldn't allocate room for COMPOUND\n");
+ goto xdr_error;
+ }
+ }
+
+ if (argp->minorversion >= ARRAY_SIZE(nfsd4_minorversion))
+ argp->opcnt = 0;
+
+ ops = &nfsd4_minorversion[argp->minorversion];
+ for (i = 0; i < argp->opcnt; i++) {
+ op = &argp->ops[i];
+ op->replay = NULL;
+
+ /*
+ * We can't use READ_BUF() here because we need to handle
+ * a missing opcode as an OP_WRITE + 1. So we need to check
+ * to see if we're truly at the end of our buffer or if there
+ * is another page we need to flip to.
+ */
+
+ if (argp->p == argp->end) {
+ if (argp->pagelen < 4) {
+ /* There isn't an opcode still on the wire */
+ op->opnum = OP_WRITE + 1;
+ op->status = nfserr_bad_xdr;
+ argp->opcnt = i+1;
+ break;
+ }
+
+ /*
+ * False alarm. We just hit a page boundary, but there
+ * is still data available. Move pointer across page
+ * boundary. *snip from READ_BUF*
+ */
+ argp->p = page_address(argp->pagelist[0]);
+ argp->pagelist++;
+ if (argp->pagelen < PAGE_SIZE) {
+ argp->end = argp->p + (argp->pagelen>>2);
+ argp->pagelen = 0;
+ } else {
+ argp->end = argp->p + (PAGE_SIZE>>2);
+ argp->pagelen -= PAGE_SIZE;
+ }
+ }
+ op->opnum = ntohl(*argp->p++);
+
+ if (op->opnum >= FIRST_NFS4_OP && op->opnum <= LAST_NFS4_OP)
+ op->status = ops->decoders[op->opnum](argp, &op->u);
+ else {
+ op->opnum = OP_ILLEGAL;
+ op->status = nfserr_op_illegal;
+ }
+
+ if (op->status) {
+ argp->opcnt = i+1;
+ break;
+ }
+ /*
+ * We'll try to cache the result in the DRC if any one
+ * op in the compound wants to be cached:
+ */
+ cachethis |= nfsd4_cache_this_op(op);
+ }
+ /* Sessions make the DRC unnecessary: */
+ if (argp->minorversion)
+ cachethis = false;
+ argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
+
+ DECODE_TAIL;
+}
+
+#define WRITE32(n) *p++ = htonl(n)
+#define WRITE64(n) do { \
+ *p++ = htonl((u32)((n) >> 32)); \
+ *p++ = htonl((u32)(n)); \
+} while (0)
+#define WRITEMEM(ptr,nbytes) do { if (nbytes > 0) { \
+ *(p + XDR_QUADLEN(nbytes) -1) = 0; \
+ memcpy(p, ptr, nbytes); \
+ p += XDR_QUADLEN(nbytes); \
+}} while (0)
+
+static void write32(__be32 **p, u32 n)
+{
+ *(*p)++ = n;
+}
+
+static void write64(__be32 **p, u64 n)
+{
+ write32(p, (u32)(n >> 32));
+ write32(p, (u32)n);
+}
+
+static void write_change(__be32 **p, struct kstat *stat, struct inode *inode)
+{
+ if (IS_I_VERSION(inode)) {
+ write64(p, inode->i_version);
+ } else {
+ write32(p, stat->ctime.tv_sec);
+ write32(p, stat->ctime.tv_nsec);
+ }
+}
+
+static void write_cinfo(__be32 **p, struct nfsd4_change_info *c)
+{
+ write32(p, c->atomic);
+ if (c->change_supported) {
+ write64(p, c->before_change);
+ write64(p, c->after_change);
+ } else {
+ write32(p, c->before_ctime_sec);
+ write32(p, c->before_ctime_nsec);
+ write32(p, c->after_ctime_sec);
+ write32(p, c->after_ctime_nsec);
+ }
+}
+
+#define RESERVE_SPACE(nbytes) do { \
+ p = resp->p; \
+ BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end); \
+} while (0)
+#define ADJUST_ARGS() resp->p = p
+
+/*
+ * Header routine to setup seqid operation replay cache
+ */
+#define ENCODE_SEQID_OP_HEAD \
+ __be32 *save; \
+ \
+ save = resp->p;
+
+/*
+ * Routine for encoding the result of a "seqid-mutating" NFSv4 operation. This
+ * is where sequence id's are incremented, and the replay cache is filled.
+ * Note that we increment sequence id's here, at the last moment, so we're sure
+ * we know whether the error to be returned is a sequence id mutating error.
+ */
+
+static void encode_seqid_op_tail(struct nfsd4_compoundres *resp, __be32 *save, __be32 nfserr)
+{
+ struct nfs4_stateowner *stateowner = resp->cstate.replay_owner;
+
+ if (seqid_mutating_err(ntohl(nfserr)) && stateowner) {
+ stateowner->so_seqid++;
+ stateowner->so_replay.rp_status = nfserr;
+ stateowner->so_replay.rp_buflen =
+ (char *)resp->p - (char *)save;
+ memcpy(stateowner->so_replay.rp_buf, save,
+ stateowner->so_replay.rp_buflen);
+ nfsd4_purge_closed_stateid(stateowner);
+ }
+}
+
+/* Encode as an array of strings the string given with components
+ * separated @sep.
+ */
+static __be32 nfsd4_encode_components(char sep, char *components,
+ __be32 **pp, int *buflen)
+{
+ __be32 *p = *pp;
+ __be32 *countp = p;
+ int strlen, count=0;
+ char *str, *end;
+
+ dprintk("nfsd4_encode_components(%s)\n", components);
+ if ((*buflen -= 4) < 0)
+ return nfserr_resource;
+ WRITE32(0); /* We will fill this in with @count later */
+ end = str = components;
+ while (*end) {
+ for (; *end && (*end != sep); end++)
+ ; /* Point to end of component */
+ strlen = end - str;
+ if (strlen) {
+ if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0)
+ return nfserr_resource;
+ WRITE32(strlen);
+ WRITEMEM(str, strlen);
+ count++;
+ }
+ else
+ end++;
+ str = end;
+ }
+ *pp = p;
+ p = countp;
+ WRITE32(count);
+ return 0;
+}
+
+/*
+ * encode a location element of a fs_locations structure
+ */
+static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
+ __be32 **pp, int *buflen)
+{
+ __be32 status;
+ __be32 *p = *pp;
+
+ status = nfsd4_encode_components(':', location->hosts, &p, buflen);
+ if (status)
+ return status;
+ status = nfsd4_encode_components('/', location->path, &p, buflen);
+ if (status)
+ return status;
+ *pp = p;
+ return 0;
+}
+
+/*
+ * Encode a path in RFC3530 'pathname4' format
+ */
+static __be32 nfsd4_encode_path(const struct path *root,
+ const struct path *path, __be32 **pp, int *buflen)
+{
+ struct path cur = {
+ .mnt = path->mnt,
+ .dentry = path->dentry,
+ };
+ __be32 *p = *pp;
+ struct dentry **components = NULL;
+ unsigned int ncomponents = 0;
+ __be32 err = nfserr_jukebox;
+
+ dprintk("nfsd4_encode_components(");
+
+ path_get(&cur);
+ /* First walk the path up to the nfsd root, and store the
+ * dentries/path components in an array.
+ */
+ for (;;) {
+ if (cur.dentry == root->dentry && cur.mnt == root->mnt)
+ break;
+ if (cur.dentry == cur.mnt->mnt_root) {
+ if (follow_up(&cur))
+ continue;
+ goto out_free;
+ }
+ if ((ncomponents & 15) == 0) {
+ struct dentry **new;
+ new = krealloc(components,
+ sizeof(*new) * (ncomponents + 16),
+ GFP_KERNEL);
+ if (!new)
+ goto out_free;
+ components = new;
+ }
+ components[ncomponents++] = cur.dentry;
+ cur.dentry = dget_parent(cur.dentry);
+ }
+
+ *buflen -= 4;
+ if (*buflen < 0)
+ goto out_free;
+ WRITE32(ncomponents);
+
+ while (ncomponents) {
+ struct dentry *dentry = components[ncomponents - 1];
+ unsigned int len = dentry->d_name.len;
+
+ *buflen -= 4 + (XDR_QUADLEN(len) << 2);
+ if (*buflen < 0)
+ goto out_free;
+ WRITE32(len);
+ WRITEMEM(dentry->d_name.name, len);
+ dprintk("/%s", dentry->d_name.name);
+ dput(dentry);
+ ncomponents--;
+ }
+
+ *pp = p;
+ err = 0;
+out_free:
+ dprintk(")\n");
+ while (ncomponents)
+ dput(components[--ncomponents]);
+ kfree(components);
+ path_put(&cur);
+ return err;
+}
+
+static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp,
+ const struct path *path, __be32 **pp, int *buflen)
+{
+ struct svc_export *exp_ps;
+ __be32 res;
+
+ exp_ps = rqst_find_fsidzero_export(rqstp);
+ if (IS_ERR(exp_ps))
+ return nfserrno(PTR_ERR(exp_ps));
+ res = nfsd4_encode_path(&exp_ps->ex_path, path, pp, buflen);
+ exp_put(exp_ps);
+ return res;
+}
+
+/*
+ * encode a fs_locations structure
+ */
+static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
+ struct svc_export *exp,
+ __be32 **pp, int *buflen)
+{
+ __be32 status;
+ int i;
+ __be32 *p = *pp;
+ struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs;
+
+ status = nfsd4_encode_fsloc_fsroot(rqstp, &exp->ex_path, &p, buflen);
+ if (status)
+ return status;
+ if ((*buflen -= 4) < 0)
+ return nfserr_resource;
+ WRITE32(fslocs->locations_count);
+ for (i=0; i<fslocs->locations_count; i++) {
+ status = nfsd4_encode_fs_location4(&fslocs->locations[i],
+ &p, buflen);
+ if (status)
+ return status;
+ }
+ *pp = p;
+ return 0;
+}
+
+static u32 nfs4_file_type(umode_t mode)
+{
+ switch (mode & S_IFMT) {
+ case S_IFIFO: return NF4FIFO;
+ case S_IFCHR: return NF4CHR;
+ case S_IFDIR: return NF4DIR;
+ case S_IFBLK: return NF4BLK;
+ case S_IFLNK: return NF4LNK;
+ case S_IFREG: return NF4REG;
+ case S_IFSOCK: return NF4SOCK;
+ default: return NF4BAD;
+ };
+}
+
+static __be32
+nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
+ __be32 **p, int *buflen)
+{
+ int status;
+
+ if (*buflen < (XDR_QUADLEN(IDMAP_NAMESZ) << 2) + 4)
+ return nfserr_resource;
+ if (whotype != NFS4_ACL_WHO_NAMED)
+ status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1));
+ else if (group)
+ status = nfsd_map_gid_to_name(rqstp, id, (u8 *)(*p + 1));
+ else
+ status = nfsd_map_uid_to_name(rqstp, id, (u8 *)(*p + 1));
+ if (status < 0)
+ return nfserrno(status);
+ *p = xdr_encode_opaque(*p, NULL, status);
+ *buflen -= (XDR_QUADLEN(status) << 2) + 4;
+ BUG_ON(*buflen < 0);
+ return 0;
+}
+
+static inline __be32
+nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, __be32 **p, int *buflen)
+{
+ return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen);
+}
+
+static inline __be32
+nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, __be32 **p, int *buflen)
+{
+ return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen);
+}
+
+static inline __be32
+nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
+ __be32 **p, int *buflen)
+{
+ return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen);
+}
+
+#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
+ FATTR4_WORD0_RDATTR_ERROR)
+#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID
+
+static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
+{
+ /* As per referral draft: */
+ if (*bmval0 & ~WORD0_ABSENT_FS_ATTRS ||
+ *bmval1 & ~WORD1_ABSENT_FS_ATTRS) {
+ if (*bmval0 & FATTR4_WORD0_RDATTR_ERROR ||
+ *bmval0 & FATTR4_WORD0_FS_LOCATIONS)
+ *rdattr_err = NFSERR_MOVED;
+ else
+ return nfserr_moved;
+ }
+ *bmval0 &= WORD0_ABSENT_FS_ATTRS;
+ *bmval1 &= WORD1_ABSENT_FS_ATTRS;
+ return 0;
+}
+
+/*
+ * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
+ * ourselves.
+ *
+ * @countp is the buffer size in _words_; upon successful return this becomes
+ * replaced with the number of words written.
+ */
+__be32
+nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
+ struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval,
+ struct svc_rqst *rqstp, int ignore_crossmnt)
+{
+ u32 bmval0 = bmval[0];
+ u32 bmval1 = bmval[1];
+ u32 bmval2 = bmval[2];
+ struct kstat stat;
+ struct svc_fh tempfh;
+ struct kstatfs statfs;
+ int buflen = *countp << 2;
+ __be32 *attrlenp;
+ u32 dummy;
+ u64 dummy64;
+ u32 rdattr_err = 0;
+ __be32 *p = buffer;
+ __be32 status;
+ int err;
+ int aclsupport = 0;
+ struct nfs4_acl *acl = NULL;
+ struct nfsd4_compoundres *resp = rqstp->rq_resp;
+ u32 minorversion = resp->cstate.minorversion;
+ struct path path = {
+ .mnt = exp->ex_path.mnt,
+ .dentry = dentry,
+ };
+
+ BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
+ BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion));
+ BUG_ON(bmval1 & ~nfsd_suppattrs1(minorversion));
+ BUG_ON(bmval2 & ~nfsd_suppattrs2(minorversion));
+
+ if (exp->ex_fslocs.migrated) {
+ BUG_ON(bmval[2]);
+ status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err);
+ if (status)
+ goto out;
+ }
+
+ err = vfs_getattr(exp->ex_path.mnt, dentry, &stat);
+ if (err)
+ goto out_nfserr;
+ if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL |
+ FATTR4_WORD0_MAXNAME)) ||
+ (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
+ FATTR4_WORD1_SPACE_TOTAL))) {
+ err = vfs_statfs(&path, &statfs);
+ if (err)
+ goto out_nfserr;
+ }
+ if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) {
+ fh_init(&tempfh, NFS4_FHSIZE);
+ status = fh_compose(&tempfh, exp, dentry, NULL);
+ if (status)
+ goto out;
+ fhp = &tempfh;
+ }
+ if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT
+ | FATTR4_WORD0_SUPPORTED_ATTRS)) {
+ err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl);
+ aclsupport = (err == 0);
+ if (bmval0 & FATTR4_WORD0_ACL) {
+ if (err == -EOPNOTSUPP)
+ bmval0 &= ~FATTR4_WORD0_ACL;
+ else if (err == -EINVAL) {
+ status = nfserr_attrnotsupp;
+ goto out;
+ } else if (err != 0)
+ goto out_nfserr;
+ }
+ }
+
+ if (bmval2) {
+ if ((buflen -= 16) < 0)
+ goto out_resource;
+ WRITE32(3);
+ WRITE32(bmval0);
+ WRITE32(bmval1);
+ WRITE32(bmval2);
+ } else if (bmval1) {
+ if ((buflen -= 12) < 0)
+ goto out_resource;
+ WRITE32(2);
+ WRITE32(bmval0);
+ WRITE32(bmval1);
+ } else {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE32(1);
+ WRITE32(bmval0);
+ }
+ attrlenp = p++; /* to be backfilled later */
+
+ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
+ u32 word0 = nfsd_suppattrs0(minorversion);
+ u32 word1 = nfsd_suppattrs1(minorversion);
+ u32 word2 = nfsd_suppattrs2(minorversion);
+
+ if (!aclsupport)
+ word0 &= ~FATTR4_WORD0_ACL;
+ if (!word2) {
+ if ((buflen -= 12) < 0)
+ goto out_resource;
+ WRITE32(2);
+ WRITE32(word0);
+ WRITE32(word1);
+ } else {
+ if ((buflen -= 16) < 0)
+ goto out_resource;
+ WRITE32(3);
+ WRITE32(word0);
+ WRITE32(word1);
+ WRITE32(word2);
+ }
+ }
+ if (bmval0 & FATTR4_WORD0_TYPE) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ dummy = nfs4_file_type(stat.mode);
+ if (dummy == NF4BAD)
+ goto out_serverfault;
+ WRITE32(dummy);
+ }
+ if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
+ WRITE32(NFS4_FH_PERSISTENT);
+ else
+ WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME);
+ }
+ if (bmval0 & FATTR4_WORD0_CHANGE) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ write_change(&p, &stat, dentry->d_inode);
+ }
+ if (bmval0 & FATTR4_WORD0_SIZE) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE64(stat.size);
+ }
+ if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(1);
+ }
+ if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(1);
+ }
+ if (bmval0 & FATTR4_WORD0_NAMED_ATTR) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(0);
+ }
+ if (bmval0 & FATTR4_WORD0_FSID) {
+ if ((buflen -= 16) < 0)
+ goto out_resource;
+ if (exp->ex_fslocs.migrated) {
+ WRITE64(NFS4_REFERRAL_FSID_MAJOR);
+ WRITE64(NFS4_REFERRAL_FSID_MINOR);
+ } else switch(fsid_source(fhp)) {
+ case FSIDSOURCE_FSID:
+ WRITE64((u64)exp->ex_fsid);
+ WRITE64((u64)0);
+ break;
+ case FSIDSOURCE_DEV:
+ WRITE32(0);
+ WRITE32(MAJOR(stat.dev));
+ WRITE32(0);
+ WRITE32(MINOR(stat.dev));
+ break;
+ case FSIDSOURCE_UUID:
+ WRITEMEM(exp->ex_uuid, 16);
+ break;
+ }
+ }
+ if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(0);
+ }
+ if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(nfsd4_lease);
+ }
+ if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(rdattr_err);
+ }
+ if (bmval0 & FATTR4_WORD0_ACL) {
+ struct nfs4_ace *ace;
+
+ if (acl == NULL) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+
+ WRITE32(0);
+ goto out_acl;
+ }
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(acl->naces);
+
+ for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) {
+ if ((buflen -= 4*3) < 0)
+ goto out_resource;
+ WRITE32(ace->type);
+ WRITE32(ace->flag);
+ WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL);
+ status = nfsd4_encode_aclname(rqstp, ace->whotype,
+ ace->who, ace->flag & NFS4_ACE_IDENTIFIER_GROUP,
+ &p, &buflen);
+ if (status == nfserr_resource)
+ goto out_resource;
+ if (status)
+ goto out;
+ }
+ }
+out_acl:
+ if (bmval0 & FATTR4_WORD0_ACLSUPPORT) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(aclsupport ?
+ ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0);
+ }
+ if (bmval0 & FATTR4_WORD0_CANSETTIME) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(1);
+ }
+ if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(1);
+ }
+ if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(1);
+ }
+ if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(1);
+ }
+ if (bmval0 & FATTR4_WORD0_FILEHANDLE) {
+ buflen -= (XDR_QUADLEN(fhp->fh_handle.fh_size) << 2) + 4;
+ if (buflen < 0)
+ goto out_resource;
+ WRITE32(fhp->fh_handle.fh_size);
+ WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size);
+ }
+ if (bmval0 & FATTR4_WORD0_FILEID) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE64(stat.ino);
+ }
+ if (bmval0 & FATTR4_WORD0_FILES_AVAIL) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE64((u64) statfs.f_ffree);
+ }
+ if (bmval0 & FATTR4_WORD0_FILES_FREE) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE64((u64) statfs.f_ffree);
+ }
+ if (bmval0 & FATTR4_WORD0_FILES_TOTAL) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE64((u64) statfs.f_files);
+ }
+ if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
+ status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen);
+ if (status == nfserr_resource)
+ goto out_resource;
+ if (status)
+ goto out;
+ }
+ if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(1);
+ }
+ if (bmval0 & FATTR4_WORD0_MAXFILESIZE) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE64(~(u64)0);
+ }
+ if (bmval0 & FATTR4_WORD0_MAXLINK) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(255);
+ }
+ if (bmval0 & FATTR4_WORD0_MAXNAME) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(statfs.f_namelen);
+ }
+ if (bmval0 & FATTR4_WORD0_MAXREAD) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE64((u64) svc_max_payload(rqstp));
+ }
+ if (bmval0 & FATTR4_WORD0_MAXWRITE) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE64((u64) svc_max_payload(rqstp));
+ }
+ if (bmval1 & FATTR4_WORD1_MODE) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(stat.mode & S_IALLUGO);
+ }
+ if (bmval1 & FATTR4_WORD1_NO_TRUNC) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(1);
+ }
+ if (bmval1 & FATTR4_WORD1_NUMLINKS) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(stat.nlink);
+ }
+ if (bmval1 & FATTR4_WORD1_OWNER) {
+ status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen);
+ if (status == nfserr_resource)
+ goto out_resource;
+ if (status)
+ goto out;
+ }
+ if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
+ status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen);
+ if (status == nfserr_resource)
+ goto out_resource;
+ if (status)
+ goto out;
+ }
+ if (bmval1 & FATTR4_WORD1_RAWDEV) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE32((u32) MAJOR(stat.rdev));
+ WRITE32((u32) MINOR(stat.rdev));
+ }
+ if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize;
+ WRITE64(dummy64);
+ }
+ if (bmval1 & FATTR4_WORD1_SPACE_FREE) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize;
+ WRITE64(dummy64);
+ }
+ if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize;
+ WRITE64(dummy64);
+ }
+ if (bmval1 & FATTR4_WORD1_SPACE_USED) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ dummy64 = (u64)stat.blocks << 9;
+ WRITE64(dummy64);
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
+ if ((buflen -= 12) < 0)
+ goto out_resource;
+ WRITE32(0);
+ WRITE32(stat.atime.tv_sec);
+ WRITE32(stat.atime.tv_nsec);
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
+ if ((buflen -= 12) < 0)
+ goto out_resource;
+ WRITE32(0);
+ WRITE32(1);
+ WRITE32(0);
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
+ if ((buflen -= 12) < 0)
+ goto out_resource;
+ WRITE32(0);
+ WRITE32(stat.ctime.tv_sec);
+ WRITE32(stat.ctime.tv_nsec);
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
+ if ((buflen -= 12) < 0)
+ goto out_resource;
+ WRITE32(0);
+ WRITE32(stat.mtime.tv_sec);
+ WRITE32(stat.mtime.tv_nsec);
+ }
+ if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ /*
+ * Get parent's attributes if not ignoring crossmount
+ * and this is the root of a cross-mounted filesystem.
+ */
+ if (ignore_crossmnt == 0 &&
+ dentry == exp->ex_path.mnt->mnt_root) {
+ struct path path = exp->ex_path;
+ path_get(&path);
+ while (follow_up(&path)) {
+ if (path.dentry != path.mnt->mnt_root)
+ break;
+ }
+ err = vfs_getattr(path.mnt, path.dentry, &stat);
+ path_put(&path);
+ if (err)
+ goto out_nfserr;
+ }
+ WRITE64(stat.ino);
+ }
+ if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
+ WRITE32(3);
+ WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
+ WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1);
+ WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2);
+ }
+
+ *attrlenp = htonl((char *)p - (char *)attrlenp - 4);
+ *countp = p - buffer;
+ status = nfs_ok;
+
+out:
+ kfree(acl);
+ if (fhp == &tempfh)
+ fh_put(&tempfh);
+ return status;
+out_nfserr:
+ status = nfserrno(err);
+ goto out;
+out_resource:
+ *countp = 0;
+ status = nfserr_resource;
+ goto out;
+out_serverfault:
+ status = nfserr_serverfault;
+ goto out;
+}
+
+static inline int attributes_need_mount(u32 *bmval)
+{
+ if (bmval[0] & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_LEASE_TIME))
+ return 1;
+ if (bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID)
+ return 1;
+ return 0;
+}
+
+static __be32
+nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
+ const char *name, int namlen, __be32 *p, int *buflen)
+{
+ struct svc_export *exp = cd->rd_fhp->fh_export;
+ struct dentry *dentry;
+ __be32 nfserr;
+ int ignore_crossmnt = 0;
+
+ dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
+ if (IS_ERR(dentry))
+ return nfserrno(PTR_ERR(dentry));
+ if (!dentry->d_inode) {
+ /*
+ * nfsd_buffered_readdir drops the i_mutex between
+ * readdir and calling this callback, leaving a window
+ * where this directory entry could have gone away.
+ */
+ dput(dentry);
+ return nfserr_noent;
+ }
+
+ exp_get(exp);
+ /*
+ * In the case of a mountpoint, the client may be asking for
+ * attributes that are only properties of the underlying filesystem
+ * as opposed to the cross-mounted file system. In such a case,
+ * we will not follow the cross mount and will fill the attribtutes
+ * directly from the mountpoint dentry.
+ */
+ if (nfsd_mountpoint(dentry, exp)) {
+ int err;
+
+ if (!(exp->ex_flags & NFSEXP_V4ROOT)
+ && !attributes_need_mount(cd->rd_bmval)) {
+ ignore_crossmnt = 1;
+ goto out_encode;
+ }
+ /*
+ * Why the heck aren't we just using nfsd_lookup??
+ * Different "."/".." handling? Something else?
+ * At least, add a comment here to explain....
+ */
+ err = nfsd_cross_mnt(cd->rd_rqstp, &dentry, &exp);
+ if (err) {
+ nfserr = nfserrno(err);
+ goto out_put;
+ }
+ nfserr = check_nfsd_access(exp, cd->rd_rqstp);
+ if (nfserr)
+ goto out_put;
+
+ }
+out_encode:
+ nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
+ cd->rd_rqstp, ignore_crossmnt);
+out_put:
+ dput(dentry);
+ exp_put(exp);
+ return nfserr;
+}
+
+static __be32 *
+nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
+{
+ __be32 *attrlenp;
+
+ if (buflen < 6)
+ return NULL;
+ *p++ = htonl(2);
+ *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
+ *p++ = htonl(0); /* bmval1 */
+
+ attrlenp = p++;
+ *p++ = nfserr; /* no htonl */
+ *attrlenp = htonl((char *)p - (char *)attrlenp - 4);
+ return p;
+}
+
+static int
+nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
+ loff_t offset, u64 ino, unsigned int d_type)
+{
+ struct readdir_cd *ccd = ccdv;
+ struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
+ int buflen;
+ __be32 *p = cd->buffer;
+ __be32 *cookiep;
+ __be32 nfserr = nfserr_toosmall;
+
+ /* In nfsv4, "." and ".." never make it onto the wire.. */
+ if (name && isdotent(name, namlen)) {
+ cd->common.err = nfs_ok;
+ return 0;
+ }
+
+ if (cd->offset)
+ xdr_encode_hyper(cd->offset, (u64) offset);
+
+ buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
+ if (buflen < 0)
+ goto fail;
+
+ *p++ = xdr_one; /* mark entry present */
+ cookiep = p;
+ p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */
+ p = xdr_encode_array(p, name, namlen); /* name length & name */
+
+ nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen);
+ switch (nfserr) {
+ case nfs_ok:
+ p += buflen;
+ break;
+ case nfserr_resource:
+ nfserr = nfserr_toosmall;
+ goto fail;
+ case nfserr_noent:
+ goto skip_entry;
+ default:
+ /*
+ * If the client requested the RDATTR_ERROR attribute,
+ * we stuff the error code into this attribute
+ * and continue. If this attribute was not requested,
+ * then in accordance with the spec, we fail the
+ * entire READDIR operation(!)
+ */
+ if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
+ goto fail;
+ p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
+ if (p == NULL) {
+ nfserr = nfserr_toosmall;
+ goto fail;
+ }
+ }
+ cd->buflen -= (p - cd->buffer);
+ cd->buffer = p;
+ cd->offset = cookiep;
+skip_entry:
+ cd->common.err = nfs_ok;
+ return 0;
+fail:
+ cd->common.err = nfserr;
+ return -EINVAL;
+}
+
+static void
+nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid)
+{
+ __be32 *p;
+
+ RESERVE_SPACE(sizeof(stateid_t));
+ WRITE32(sid->si_generation);
+ WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
+ ADJUST_ARGS();
+}
+
+static __be32
+nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
+{
+ __be32 *p;
+
+ if (!nfserr) {
+ RESERVE_SPACE(8);
+ WRITE32(access->ac_supported);
+ WRITE32(access->ac_resp_access);
+ ADJUST_ARGS();
+ }
+ return nfserr;
+}
+
+static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts)
+{
+ __be32 *p;
+
+ if (!nfserr) {
+ RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 8);
+ WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN);
+ WRITE32(bcts->dir);
+ /* XXX: ? */
+ WRITE32(0);
+ ADJUST_ARGS();
+ }
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
+{
+ ENCODE_SEQID_OP_HEAD;
+
+ if (!nfserr)
+ nfsd4_encode_stateid(resp, &close->cl_stateid);
+
+ encode_seqid_op_tail(resp, save, nfserr);
+ return nfserr;
+}
+
+
+static __be32
+nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
+{
+ __be32 *p;
+
+ if (!nfserr) {
+ RESERVE_SPACE(8);
+ WRITEMEM(commit->co_verf.data, 8);
+ ADJUST_ARGS();
+ }
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
+{
+ __be32 *p;
+
+ if (!nfserr) {
+ RESERVE_SPACE(32);
+ write_cinfo(&p, &create->cr_cinfo);
+ WRITE32(2);
+ WRITE32(create->cr_bmval[0]);
+ WRITE32(create->cr_bmval[1]);
+ ADJUST_ARGS();
+ }
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr)
+{
+ struct svc_fh *fhp = getattr->ga_fhp;
+ int buflen;
+
+ if (nfserr)
+ return nfserr;
+
+ buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2);
+ nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry,
+ resp->p, &buflen, getattr->ga_bmval,
+ resp->rqstp, 0);
+ if (!nfserr)
+ resp->p += buflen;
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp)
+{
+ struct svc_fh *fhp = *fhpp;
+ unsigned int len;
+ __be32 *p;
+
+ if (!nfserr) {
+ len = fhp->fh_handle.fh_size;
+ RESERVE_SPACE(len + 4);
+ WRITE32(len);
+ WRITEMEM(&fhp->fh_handle.fh_base, len);
+ ADJUST_ARGS();
+ }
+ return nfserr;
+}
+
+/*
+* Including all fields other than the name, a LOCK4denied structure requires
+* 8(clientid) + 4(namelen) + 8(offset) + 8(length) + 4(type) = 32 bytes.
+*/
+static void
+nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld)
+{
+ struct xdr_netobj *conf = &ld->ld_owner;
+ __be32 *p;
+
+ RESERVE_SPACE(32 + XDR_LEN(conf->len));
+ WRITE64(ld->ld_start);
+ WRITE64(ld->ld_length);
+ WRITE32(ld->ld_type);
+ if (conf->len) {
+ WRITEMEM(&ld->ld_clientid, 8);
+ WRITE32(conf->len);
+ WRITEMEM(conf->data, conf->len);
+ kfree(conf->data);
+ } else { /* non - nfsv4 lock in conflict, no clientid nor owner */
+ WRITE64((u64)0); /* clientid */
+ WRITE32(0); /* length of owner name */
+ }
+ ADJUST_ARGS();
+}
+
+static __be32
+nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)
+{
+ ENCODE_SEQID_OP_HEAD;
+
+ if (!nfserr)
+ nfsd4_encode_stateid(resp, &lock->lk_resp_stateid);
+ else if (nfserr == nfserr_denied)
+ nfsd4_encode_lock_denied(resp, &lock->lk_denied);
+
+ encode_seqid_op_tail(resp, save, nfserr);
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt)
+{
+ if (nfserr == nfserr_denied)
+ nfsd4_encode_lock_denied(resp, &lockt->lt_denied);
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)
+{
+ ENCODE_SEQID_OP_HEAD;
+
+ if (!nfserr)
+ nfsd4_encode_stateid(resp, &locku->lu_stateid);
+
+ encode_seqid_op_tail(resp, save, nfserr);
+ return nfserr;
+}
+
+
+static __be32
+nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
+{
+ __be32 *p;
+
+ if (!nfserr) {
+ RESERVE_SPACE(20);
+ write_cinfo(&p, &link->li_cinfo);
+ ADJUST_ARGS();
+ }
+ return nfserr;
+}
+
+
+static __be32
+nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
+{
+ __be32 *p;
+ ENCODE_SEQID_OP_HEAD;
+
+ if (nfserr)
+ goto out;
+
+ nfsd4_encode_stateid(resp, &open->op_stateid);
+ RESERVE_SPACE(40);
+ write_cinfo(&p, &open->op_cinfo);
+ WRITE32(open->op_rflags);
+ WRITE32(2);
+ WRITE32(open->op_bmval[0]);
+ WRITE32(open->op_bmval[1]);
+ WRITE32(open->op_delegate_type);
+ ADJUST_ARGS();
+
+ switch (open->op_delegate_type) {
+ case NFS4_OPEN_DELEGATE_NONE:
+ break;
+ case NFS4_OPEN_DELEGATE_READ:
+ nfsd4_encode_stateid(resp, &open->op_delegate_stateid);
+ RESERVE_SPACE(20);
+ WRITE32(open->op_recall);
+
+ /*
+ * TODO: ACE's in delegations
+ */
+ WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
+ WRITE32(0);
+ WRITE32(0);
+ WRITE32(0); /* XXX: is NULL principal ok? */
+ ADJUST_ARGS();
+ break;
+ case NFS4_OPEN_DELEGATE_WRITE:
+ nfsd4_encode_stateid(resp, &open->op_delegate_stateid);
+ RESERVE_SPACE(32);
+ WRITE32(0);
+
+ /*
+ * TODO: space_limit's in delegations
+ */
+ WRITE32(NFS4_LIMIT_SIZE);
+ WRITE32(~(u32)0);
+ WRITE32(~(u32)0);
+
+ /*
+ * TODO: ACE's in delegations
+ */
+ WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
+ WRITE32(0);
+ WRITE32(0);
+ WRITE32(0); /* XXX: is NULL principal ok? */
+ ADJUST_ARGS();
+ break;
+ default:
+ BUG();
+ }
+ /* XXX save filehandle here */
+out:
+ encode_seqid_op_tail(resp, save, nfserr);
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
+{
+ ENCODE_SEQID_OP_HEAD;
+
+ if (!nfserr)
+ nfsd4_encode_stateid(resp, &oc->oc_resp_stateid);
+
+ encode_seqid_op_tail(resp, save, nfserr);
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
+{
+ ENCODE_SEQID_OP_HEAD;
+
+ if (!nfserr)
+ nfsd4_encode_stateid(resp, &od->od_stateid);
+
+ encode_seqid_op_tail(resp, save, nfserr);
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_read *read)
+{
+ u32 eof;
+ int v, pn;
+ unsigned long maxcount;
+ long len;
+ __be32 *p;
+
+ if (nfserr)
+ return nfserr;
+ if (resp->xbuf->page_len)
+ return nfserr_resource;
+
+ RESERVE_SPACE(8); /* eof flag and byte count */
+
+ maxcount = svc_max_payload(resp->rqstp);
+ if (maxcount > read->rd_length)
+ maxcount = read->rd_length;
+
+ len = maxcount;
+ v = 0;
+ while (len > 0) {
+ pn = resp->rqstp->rq_resused++;
+ resp->rqstp->rq_vec[v].iov_base =
+ page_address(resp->rqstp->rq_respages[pn]);
+ resp->rqstp->rq_vec[v].iov_len =
+ len < PAGE_SIZE ? len : PAGE_SIZE;
+ v++;
+ len -= PAGE_SIZE;
+ }
+ read->rd_vlen = v;
+
+ nfserr = nfsd_read_file(read->rd_rqstp, read->rd_fhp, read->rd_filp,
+ read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
+ &maxcount);
+
+ if (nfserr)
+ return nfserr;
+ eof = (read->rd_offset + maxcount >=
+ read->rd_fhp->fh_dentry->d_inode->i_size);
+
+ WRITE32(eof);
+ WRITE32(maxcount);
+ ADJUST_ARGS();
+ resp->xbuf->head[0].iov_len = (char*)p
+ - (char*)resp->xbuf->head[0].iov_base;
+ resp->xbuf->page_len = maxcount;
+
+ /* Use rest of head for padding and remaining ops: */
+ resp->xbuf->tail[0].iov_base = p;
+ resp->xbuf->tail[0].iov_len = 0;
+ if (maxcount&3) {
+ RESERVE_SPACE(4);
+ WRITE32(0);
+ resp->xbuf->tail[0].iov_base += maxcount&3;
+ resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
+ ADJUST_ARGS();
+ }
+ return 0;
+}
+
+static __be32
+nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink)
+{
+ int maxcount;
+ char *page;
+ __be32 *p;
+
+ if (nfserr)
+ return nfserr;
+ if (resp->xbuf->page_len)
+ return nfserr_resource;
+
+ page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
+
+ maxcount = PAGE_SIZE;
+ RESERVE_SPACE(4);
+
+ /*
+ * XXX: By default, the ->readlink() VFS op will truncate symlinks
+ * if they would overflow the buffer. Is this kosher in NFSv4? If
+ * not, one easy fix is: if ->readlink() precisely fills the buffer,
+ * assume that truncation occurred, and return NFS4ERR_RESOURCE.
+ */
+ nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount);
+ if (nfserr == nfserr_isdir)
+ return nfserr_inval;
+ if (nfserr)
+ return nfserr;
+
+ WRITE32(maxcount);
+ ADJUST_ARGS();
+ resp->xbuf->head[0].iov_len = (char*)p
+ - (char*)resp->xbuf->head[0].iov_base;
+ resp->xbuf->page_len = maxcount;
+
+ /* Use rest of head for padding and remaining ops: */
+ resp->xbuf->tail[0].iov_base = p;
+ resp->xbuf->tail[0].iov_len = 0;
+ if (maxcount&3) {
+ RESERVE_SPACE(4);
+ WRITE32(0);
+ resp->xbuf->tail[0].iov_base += maxcount&3;
+ resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
+ ADJUST_ARGS();
+ }
+ return 0;
+}
+
+static __be32
+nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
+{
+ int maxcount;
+ loff_t offset;
+ __be32 *page, *savep, *tailbase;
+ __be32 *p;
+
+ if (nfserr)
+ return nfserr;
+ if (resp->xbuf->page_len)
+ return nfserr_resource;
+
+ RESERVE_SPACE(8); /* verifier */
+ savep = p;
+
+ /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
+ WRITE32(0);
+ WRITE32(0);
+ ADJUST_ARGS();
+ resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
+ tailbase = p;
+
+ maxcount = PAGE_SIZE;
+ if (maxcount > readdir->rd_maxcount)
+ maxcount = readdir->rd_maxcount;
+
+ /*
+ * Convert from bytes to words, account for the two words already
+ * written, make sure to leave two words at the end for the next
+ * pointer and eof field.
+ */
+ maxcount = (maxcount >> 2) - 4;
+ if (maxcount < 0) {
+ nfserr = nfserr_toosmall;
+ goto err_no_verf;
+ }
+
+ page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
+ readdir->common.err = 0;
+ readdir->buflen = maxcount;
+ readdir->buffer = page;
+ readdir->offset = NULL;
+
+ offset = readdir->rd_cookie;
+ nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
+ &offset,
+ &readdir->common, nfsd4_encode_dirent);
+ if (nfserr == nfs_ok &&
+ readdir->common.err == nfserr_toosmall &&
+ readdir->buffer == page)
+ nfserr = nfserr_toosmall;
+ if (nfserr)
+ goto err_no_verf;
+
+ if (readdir->offset)
+ xdr_encode_hyper(readdir->offset, offset);
+
+ p = readdir->buffer;
+ *p++ = 0; /* no more entries */
+ *p++ = htonl(readdir->common.err == nfserr_eof);
+ resp->xbuf->page_len = ((char*)p) - (char*)page_address(
+ resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+
+ /* Use rest of head for padding and remaining ops: */
+ resp->xbuf->tail[0].iov_base = tailbase;
+ resp->xbuf->tail[0].iov_len = 0;
+ resp->p = resp->xbuf->tail[0].iov_base;
+ resp->end = resp->p + (PAGE_SIZE - resp->xbuf->head[0].iov_len)/4;
+
+ return 0;
+err_no_verf:
+ p = savep;
+ ADJUST_ARGS();
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
+{
+ __be32 *p;
+
+ if (!nfserr) {
+ RESERVE_SPACE(20);
+ write_cinfo(&p, &remove->rm_cinfo);
+ ADJUST_ARGS();
+ }
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
+{
+ __be32 *p;
+
+ if (!nfserr) {
+ RESERVE_SPACE(40);
+ write_cinfo(&p, &rename->rn_sinfo);
+ write_cinfo(&p, &rename->rn_tinfo);
+ ADJUST_ARGS();
+ }
+ return nfserr;
+}
+
+static __be32
+nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp,
+ __be32 nfserr,struct svc_export *exp)
+{
+ int i = 0;
+ u32 nflavs;
+ struct exp_flavor_info *flavs;
+ struct exp_flavor_info def_flavs[2];
+ __be32 *p;
+
+ if (nfserr)
+ goto out;
+ if (exp->ex_nflavors) {
+ flavs = exp->ex_flavors;
+ nflavs = exp->ex_nflavors;
+ } else { /* Handling of some defaults in absence of real secinfo: */
+ flavs = def_flavs;
+ if (exp->ex_client->flavour->flavour == RPC_AUTH_UNIX) {
+ nflavs = 2;
+ flavs[0].pseudoflavor = RPC_AUTH_UNIX;
+ flavs[1].pseudoflavor = RPC_AUTH_NULL;
+ } else if (exp->ex_client->flavour->flavour == RPC_AUTH_GSS) {
+ nflavs = 1;
+ flavs[0].pseudoflavor
+ = svcauth_gss_flavor(exp->ex_client);
+ } else {
+ nflavs = 1;
+ flavs[0].pseudoflavor
+ = exp->ex_client->flavour->flavour;
+ }
+ }
+
+ RESERVE_SPACE(4);
+ WRITE32(nflavs);
+ ADJUST_ARGS();
+ for (i = 0; i < nflavs; i++) {
+ u32 flav = flavs[i].pseudoflavor;
+ struct gss_api_mech *gm = gss_mech_get_by_pseudoflavor(flav);
+
+ if (gm) {
+ RESERVE_SPACE(4);
+ WRITE32(RPC_AUTH_GSS);
+ ADJUST_ARGS();
+ RESERVE_SPACE(4 + gm->gm_oid.len);
+ WRITE32(gm->gm_oid.len);
+ WRITEMEM(gm->gm_oid.data, gm->gm_oid.len);
+ ADJUST_ARGS();
+ RESERVE_SPACE(4);
+ WRITE32(0); /* qop */
+ ADJUST_ARGS();
+ RESERVE_SPACE(4);
+ WRITE32(gss_pseudoflavor_to_service(gm, flav));
+ ADJUST_ARGS();
+ gss_mech_put(gm);
+ } else {
+ RESERVE_SPACE(4);
+ WRITE32(flav);
+ ADJUST_ARGS();
+ }
+ }
+out:
+ if (exp)
+ exp_put(exp);
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_secinfo *secinfo)
+{
+ return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->si_exp);
+}
+
+static __be32
+nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_secinfo_no_name *secinfo)
+{
+ return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->sin_exp);
+}
+
+/*
+ * The SETATTR encode routine is special -- it always encodes a bitmap,
+ * regardless of the error status.
+ */
+static __be32
+nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
+{
+ __be32 *p;
+
+ RESERVE_SPACE(12);
+ if (nfserr) {
+ WRITE32(2);
+ WRITE32(0);
+ WRITE32(0);
+ }
+ else {
+ WRITE32(2);
+ WRITE32(setattr->sa_bmval[0]);
+ WRITE32(setattr->sa_bmval[1]);
+ }
+ ADJUST_ARGS();
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
+{
+ __be32 *p;
+
+ if (!nfserr) {
+ RESERVE_SPACE(8 + sizeof(nfs4_verifier));
+ WRITEMEM(&scd->se_clientid, 8);
+ WRITEMEM(&scd->se_confirm, sizeof(nfs4_verifier));
+ ADJUST_ARGS();
+ }
+ else if (nfserr == nfserr_clid_inuse) {
+ RESERVE_SPACE(8);
+ WRITE32(0);
+ WRITE32(0);
+ ADJUST_ARGS();
+ }
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
+{
+ __be32 *p;
+
+ if (!nfserr) {
+ RESERVE_SPACE(16);
+ WRITE32(write->wr_bytes_written);
+ WRITE32(write->wr_how_written);
+ WRITEMEM(write->wr_verifier.data, 8);
+ ADJUST_ARGS();
+ }
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr,
+ struct nfsd4_exchange_id *exid)
+{
+ __be32 *p;
+ char *major_id;
+ char *server_scope;
+ int major_id_sz;
+ int server_scope_sz;
+ uint64_t minor_id = 0;
+
+ if (nfserr)
+ return nfserr;
+
+ major_id = utsname()->nodename;
+ major_id_sz = strlen(major_id);
+ server_scope = utsname()->nodename;
+ server_scope_sz = strlen(server_scope);
+
+ RESERVE_SPACE(
+ 8 /* eir_clientid */ +
+ 4 /* eir_sequenceid */ +
+ 4 /* eir_flags */ +
+ 4 /* spr_how (SP4_NONE) */ +
+ 8 /* so_minor_id */ +
+ 4 /* so_major_id.len */ +
+ (XDR_QUADLEN(major_id_sz) * 4) +
+ 4 /* eir_server_scope.len */ +
+ (XDR_QUADLEN(server_scope_sz) * 4) +
+ 4 /* eir_server_impl_id.count (0) */);
+
+ WRITEMEM(&exid->clientid, 8);
+ WRITE32(exid->seqid);
+ WRITE32(exid->flags);
+
+ /* state_protect4_r. Currently only support SP4_NONE */
+ BUG_ON(exid->spa_how != SP4_NONE);
+ WRITE32(exid->spa_how);
+
+ /* The server_owner struct */
+ WRITE64(minor_id); /* Minor id */
+ /* major id */
+ WRITE32(major_id_sz);
+ WRITEMEM(major_id, major_id_sz);
+
+ /* Server scope */
+ WRITE32(server_scope_sz);
+ WRITEMEM(server_scope, server_scope_sz);
+
+ /* Implementation id */
+ WRITE32(0); /* zero length nfs_impl_id4 array */
+ ADJUST_ARGS();
+ return 0;
+}
+
+static __be32
+nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr,
+ struct nfsd4_create_session *sess)
+{
+ __be32 *p;
+
+ if (nfserr)
+ return nfserr;
+
+ RESERVE_SPACE(24);
+ WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN);
+ WRITE32(sess->seqid);
+ WRITE32(sess->flags);
+ ADJUST_ARGS();
+
+ RESERVE_SPACE(28);
+ WRITE32(0); /* headerpadsz */
+ WRITE32(sess->fore_channel.maxreq_sz);
+ WRITE32(sess->fore_channel.maxresp_sz);
+ WRITE32(sess->fore_channel.maxresp_cached);
+ WRITE32(sess->fore_channel.maxops);
+ WRITE32(sess->fore_channel.maxreqs);
+ WRITE32(sess->fore_channel.nr_rdma_attrs);
+ ADJUST_ARGS();
+
+ if (sess->fore_channel.nr_rdma_attrs) {
+ RESERVE_SPACE(4);
+ WRITE32(sess->fore_channel.rdma_attrs);
+ ADJUST_ARGS();
+ }
+
+ RESERVE_SPACE(28);
+ WRITE32(0); /* headerpadsz */
+ WRITE32(sess->back_channel.maxreq_sz);
+ WRITE32(sess->back_channel.maxresp_sz);
+ WRITE32(sess->back_channel.maxresp_cached);
+ WRITE32(sess->back_channel.maxops);
+ WRITE32(sess->back_channel.maxreqs);
+ WRITE32(sess->back_channel.nr_rdma_attrs);
+ ADJUST_ARGS();
+
+ if (sess->back_channel.nr_rdma_attrs) {
+ RESERVE_SPACE(4);
+ WRITE32(sess->back_channel.rdma_attrs);
+ ADJUST_ARGS();
+ }
+ return 0;
+}
+
+static __be32
+nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr,
+ struct nfsd4_destroy_session *destroy_session)
+{
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_free_stateid(struct nfsd4_compoundres *resp, int nfserr,
+ struct nfsd4_free_stateid *free_stateid)
+{
+ __be32 *p;
+
+ if (nfserr)
+ return nfserr;
+
+ RESERVE_SPACE(4);
+ WRITE32(nfserr);
+ ADJUST_ARGS();
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
+ struct nfsd4_sequence *seq)
+{
+ __be32 *p;
+
+ if (nfserr)
+ return nfserr;
+
+ RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 20);
+ WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN);
+ WRITE32(seq->seqid);
+ WRITE32(seq->slotid);
+ /* Note slotid's are numbered from zero: */
+ WRITE32(seq->maxslots - 1); /* sr_highest_slotid */
+ WRITE32(seq->maxslots - 1); /* sr_target_highest_slotid */
+ WRITE32(seq->status_flags);
+
+ ADJUST_ARGS();
+ resp->cstate.datap = p; /* DRC cache data pointer */
+ return 0;
+}
+
+__be32
+nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr,
+ struct nfsd4_test_stateid *test_stateid)
+{
+ struct nfsd4_compoundargs *argp;
+ struct nfs4_client *cl = resp->cstate.session->se_client;
+ stateid_t si;
+ __be32 *p;
+ int i;
+ int valid;
+
+ restore_buf(test_stateid->ts_saved_args, &test_stateid->ts_savedp);
+ argp = test_stateid->ts_saved_args;
+
+ RESERVE_SPACE(4);
+ *p++ = htonl(test_stateid->ts_num_ids);
+ resp->p = p;
+
+ nfs4_lock_state();
+ for (i = 0; i < test_stateid->ts_num_ids; i++) {
+ nfsd4_decode_stateid(argp, &si);
+ valid = nfs4_validate_stateid(cl, &si);
+ RESERVE_SPACE(4);
+ *p++ = htonl(valid);
+ resp->p = p;
+ }
+ nfs4_unlock_state();
+
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
+{
+ return nfserr;
+}
+
+typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
+
+/*
+ * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1
+ * since we don't need to filter out obsolete ops as this is
+ * done in the decoding phase.
+ */
+static nfsd4_enc nfsd4_enc_ops[] = {
+ [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access,
+ [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close,
+ [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit,
+ [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create,
+ [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr,
+ [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh,
+ [OP_LINK] = (nfsd4_enc)nfsd4_encode_link,
+ [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock,
+ [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt,
+ [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku,
+ [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open,
+ [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm,
+ [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade,
+ [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_READ] = (nfsd4_enc)nfsd4_encode_read,
+ [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir,
+ [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink,
+ [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove,
+ [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename,
+ [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo,
+ [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr,
+ [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid,
+ [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write,
+ [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop,
+
+ /* NFSv4.1 operations */
+ [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session,
+ [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id,
+ [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session,
+ [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session,
+ [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_free_stateid,
+ [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name,
+ [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence,
+ [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_test_stateid,
+ [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop,
+};
+
+/*
+ * Calculate the total amount of memory that the compound response has taken
+ * after encoding the current operation with pad.
+ *
+ * pad: if operation is non-idempotent, pad was calculate by op_rsize_bop()
+ * which was specified at nfsd4_operation, else pad is zero.
+ *
+ * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached.
+ *
+ * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so
+ * will be at least a page and will therefore hold the xdr_buf head.
+ */
+int nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad)
+{
+ struct xdr_buf *xb = &resp->rqstp->rq_res;
+ struct nfsd4_session *session = NULL;
+ struct nfsd4_slot *slot = resp->cstate.slot;
+ u32 length, tlen = 0;
+
+ if (!nfsd4_has_session(&resp->cstate))
+ return 0;
+
+ session = resp->cstate.session;
+ if (session == NULL)
+ return 0;
+
+ if (xb->page_len == 0) {
+ length = (char *)resp->p - (char *)xb->head[0].iov_base + pad;
+ } else {
+ if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0)
+ tlen = (char *)resp->p - (char *)xb->tail[0].iov_base;
+
+ length = xb->head[0].iov_len + xb->page_len + tlen + pad;
+ }
+ dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__,
+ length, xb->page_len, tlen, pad);
+
+ if (length > session->se_fchannel.maxresp_sz)
+ return nfserr_rep_too_big;
+
+ if (slot->sl_cachethis == 1 &&
+ length > session->se_fchannel.maxresp_cached)
+ return nfserr_rep_too_big_to_cache;
+
+ return 0;
+}
+
+void
+nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
+{
+ __be32 *statp;
+ __be32 *p;
+
+ RESERVE_SPACE(8);
+ WRITE32(op->opnum);
+ statp = p++; /* to be backfilled at the end */
+ ADJUST_ARGS();
+
+ if (op->opnum == OP_ILLEGAL)
+ goto status;
+ BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
+ !nfsd4_enc_ops[op->opnum]);
+ op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
+ /* nfsd4_check_drc_limit guarantees enough room for error status */
+ if (!op->status)
+ op->status = nfsd4_check_resp_size(resp, 0);
+status:
+ /*
+ * Note: We write the status directly, instead of using WRITE32(),
+ * since it is already in network byte order.
+ */
+ *statp = op->status;
+}
+
+/*
+ * Encode the reply stored in the stateowner reply cache
+ *
+ * XDR note: do not encode rp->rp_buflen: the buffer contains the
+ * previously sent already encoded operation.
+ *
+ * called with nfs4_lock_state() held
+ */
+void
+nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
+{
+ __be32 *p;
+ struct nfs4_replay *rp = op->replay;
+
+ BUG_ON(!rp);
+
+ RESERVE_SPACE(8);
+ WRITE32(op->opnum);
+ *p++ = rp->rp_status; /* already xdr'ed */
+ ADJUST_ARGS();
+
+ RESERVE_SPACE(rp->rp_buflen);
+ WRITEMEM(rp->rp_buf, rp->rp_buflen);
+ ADJUST_ARGS();
+}
+
+int
+nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+{
+ return xdr_ressize_check(rqstp, p);
+}
+
+int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
+{
+ struct svc_rqst *rqstp = rq;
+ struct nfsd4_compoundargs *args = rqstp->rq_argp;
+
+ if (args->ops != args->iops) {
+ kfree(args->ops);
+ args->ops = args->iops;
+ }
+ kfree(args->tmpp);
+ args->tmpp = NULL;
+ while (args->to_free) {
+ struct tmpbuf *tb = args->to_free;
+ args->to_free = tb->next;
+ tb->release(tb->buf);
+ kfree(tb);
+ }
+ return 1;
+}
+
+int
+nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args)
+{
+ args->p = p;
+ args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len;
+ args->pagelist = rqstp->rq_arg.pages;
+ args->pagelen = rqstp->rq_arg.page_len;
+ args->tmpp = NULL;
+ args->to_free = NULL;
+ args->ops = args->iops;
+ args->rqstp = rqstp;
+
+ return !nfsd4_decode_compound(args);
+}
+
+int
+nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundres *resp)
+{
+ /*
+ * All that remains is to write the tag and operation count...
+ */
+ struct nfsd4_compound_state *cs = &resp->cstate;
+ struct kvec *iov;
+ p = resp->tagp;
+ *p++ = htonl(resp->taglen);
+ memcpy(p, resp->tag, resp->taglen);
+ p += XDR_QUADLEN(resp->taglen);
+ *p++ = htonl(resp->opcnt);
+
+ if (rqstp->rq_res.page_len)
+ iov = &rqstp->rq_res.tail[0];
+ else
+ iov = &rqstp->rq_res.head[0];
+ iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
+ BUG_ON(iov->iov_len > PAGE_SIZE);
+ if (nfsd4_has_session(cs)) {
+ if (cs->status != nfserr_replay_cache) {
+ nfsd4_store_cache_entry(resp);
+ dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
+ cs->slot->sl_inuse = false;
+ }
+ /* Renew the clientid on success and on replay */
+ release_session_client(cs->session);
+ nfsd4_put_session(cs->session);
+ }
+ return 1;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
new file mode 100644
index 00000000000..2cbac34a55d
--- /dev/null
+++ b/fs/nfsd/nfscache.c
@@ -0,0 +1,323 @@
+/*
+ * Request reply cache. This is currently a global cache, but this may
+ * change in the future and be a per-client cache.
+ *
+ * This code is heavily inspired by the 44BSD implementation, although
+ * it does things a bit differently.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/slab.h>
+
+#include "nfsd.h"
+#include "cache.h"
+
+/* Size of reply cache. Common values are:
+ * 4.3BSD: 128
+ * 4.4BSD: 256
+ * Solaris2: 1024
+ * DEC Unix: 512-4096
+ */
+#define CACHESIZE 1024
+#define HASHSIZE 64
+
+static struct hlist_head * cache_hash;
+static struct list_head lru_head;
+static int cache_disabled = 1;
+
+/*
+ * Calculate the hash index from an XID.
+ */
+static inline u32 request_hash(u32 xid)
+{
+ u32 h = xid;
+ h ^= (xid >> 24);
+ return h & (HASHSIZE-1);
+}
+
+static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
+
+/*
+ * locking for the reply cache:
+ * A cache entry is "single use" if c_state == RC_INPROG
+ * Otherwise, it when accessing _prev or _next, the lock must be held.
+ */
+static DEFINE_SPINLOCK(cache_lock);
+
+int nfsd_reply_cache_init(void)
+{
+ struct svc_cacherep *rp;
+ int i;
+
+ INIT_LIST_HEAD(&lru_head);
+ i = CACHESIZE;
+ while (i) {
+ rp = kmalloc(sizeof(*rp), GFP_KERNEL);
+ if (!rp)
+ goto out_nomem;
+ list_add(&rp->c_lru, &lru_head);
+ rp->c_state = RC_UNUSED;
+ rp->c_type = RC_NOCACHE;
+ INIT_HLIST_NODE(&rp->c_hash);
+ i--;
+ }
+
+ cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
+ if (!cache_hash)
+ goto out_nomem;
+
+ cache_disabled = 0;
+ return 0;
+out_nomem:
+ printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
+ nfsd_reply_cache_shutdown();
+ return -ENOMEM;
+}
+
+void nfsd_reply_cache_shutdown(void)
+{
+ struct svc_cacherep *rp;
+
+ while (!list_empty(&lru_head)) {
+ rp = list_entry(lru_head.next, struct svc_cacherep, c_lru);
+ if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF)
+ kfree(rp->c_replvec.iov_base);
+ list_del(&rp->c_lru);
+ kfree(rp);
+ }
+
+ cache_disabled = 1;
+
+ kfree (cache_hash);
+ cache_hash = NULL;
+}
+
+/*
+ * Move cache entry to end of LRU list
+ */
+static void
+lru_put_end(struct svc_cacherep *rp)
+{
+ list_move_tail(&rp->c_lru, &lru_head);
+}
+
+/*
+ * Move a cache entry from one hash list to another
+ */
+static void
+hash_refile(struct svc_cacherep *rp)
+{
+ hlist_del_init(&rp->c_hash);
+ hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid));
+}
+
+/*
+ * Try to find an entry matching the current call in the cache. When none
+ * is found, we grab the oldest unlocked entry off the LRU list.
+ * Note that no operation within the loop may sleep.
+ */
+int
+nfsd_cache_lookup(struct svc_rqst *rqstp)
+{
+ struct hlist_node *hn;
+ struct hlist_head *rh;
+ struct svc_cacherep *rp;
+ __be32 xid = rqstp->rq_xid;
+ u32 proto = rqstp->rq_prot,
+ vers = rqstp->rq_vers,
+ proc = rqstp->rq_proc;
+ unsigned long age;
+ int type = rqstp->rq_cachetype;
+ int rtn;
+
+ rqstp->rq_cacherep = NULL;
+ if (cache_disabled || type == RC_NOCACHE) {
+ nfsdstats.rcnocache++;
+ return RC_DOIT;
+ }
+
+ spin_lock(&cache_lock);
+ rtn = RC_DOIT;
+
+ rh = &cache_hash[request_hash(xid)];
+ hlist_for_each_entry(rp, hn, rh, c_hash) {
+ if (rp->c_state != RC_UNUSED &&
+ xid == rp->c_xid && proc == rp->c_proc &&
+ proto == rp->c_prot && vers == rp->c_vers &&
+ time_before(jiffies, rp->c_timestamp + 120*HZ) &&
+ memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, sizeof(rp->c_addr))==0) {
+ nfsdstats.rchits++;
+ goto found_entry;
+ }
+ }
+ nfsdstats.rcmisses++;
+
+ /* This loop shouldn't take more than a few iterations normally */
+ {
+ int safe = 0;
+ list_for_each_entry(rp, &lru_head, c_lru) {
+ if (rp->c_state != RC_INPROG)
+ break;
+ if (safe++ > CACHESIZE) {
+ printk("nfsd: loop in repcache LRU list\n");
+ cache_disabled = 1;
+ goto out;
+ }
+ }
+ }
+
+ /* All entries on the LRU are in-progress. This should not happen */
+ if (&rp->c_lru == &lru_head) {
+ static int complaints;
+
+ printk(KERN_WARNING "nfsd: all repcache entries locked!\n");
+ if (++complaints > 5) {
+ printk(KERN_WARNING "nfsd: disabling repcache.\n");
+ cache_disabled = 1;
+ }
+ goto out;
+ }
+
+ rqstp->rq_cacherep = rp;
+ rp->c_state = RC_INPROG;
+ rp->c_xid = xid;
+ rp->c_proc = proc;
+ memcpy(&rp->c_addr, svc_addr_in(rqstp), sizeof(rp->c_addr));
+ rp->c_prot = proto;
+ rp->c_vers = vers;
+ rp->c_timestamp = jiffies;
+
+ hash_refile(rp);
+
+ /* release any buffer */
+ if (rp->c_type == RC_REPLBUFF) {
+ kfree(rp->c_replvec.iov_base);
+ rp->c_replvec.iov_base = NULL;
+ }
+ rp->c_type = RC_NOCACHE;
+ out:
+ spin_unlock(&cache_lock);
+ return rtn;
+
+found_entry:
+ /* We found a matching entry which is either in progress or done. */
+ age = jiffies - rp->c_timestamp;
+ rp->c_timestamp = jiffies;
+ lru_put_end(rp);
+
+ rtn = RC_DROPIT;
+ /* Request being processed or excessive rexmits */
+ if (rp->c_state == RC_INPROG || age < RC_DELAY)
+ goto out;
+
+ /* From the hall of fame of impractical attacks:
+ * Is this a user who tries to snoop on the cache? */
+ rtn = RC_DOIT;
+ if (!rqstp->rq_secure && rp->c_secure)
+ goto out;
+
+ /* Compose RPC reply header */
+ switch (rp->c_type) {
+ case RC_NOCACHE:
+ break;
+ case RC_REPLSTAT:
+ svc_putu32(&rqstp->rq_res.head[0], rp->c_replstat);
+ rtn = RC_REPLY;
+ break;
+ case RC_REPLBUFF:
+ if (!nfsd_cache_append(rqstp, &rp->c_replvec))
+ goto out; /* should not happen */
+ rtn = RC_REPLY;
+ break;
+ default:
+ printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
+ rp->c_state = RC_UNUSED;
+ }
+
+ goto out;
+}
+
+/*
+ * Update a cache entry. This is called from nfsd_dispatch when
+ * the procedure has been executed and the complete reply is in
+ * rqstp->rq_res.
+ *
+ * We're copying around data here rather than swapping buffers because
+ * the toplevel loop requires max-sized buffers, which would be a waste
+ * of memory for a cache with a max reply size of 100 bytes (diropokres).
+ *
+ * If we should start to use different types of cache entries tailored
+ * specifically for attrstat and fh's, we may save even more space.
+ *
+ * Also note that a cachetype of RC_NOCACHE can legally be passed when
+ * nfsd failed to encode a reply that otherwise would have been cached.
+ * In this case, nfsd_cache_update is called with statp == NULL.
+ */
+void
+nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
+{
+ struct svc_cacherep *rp;
+ struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
+ int len;
+
+ if (!(rp = rqstp->rq_cacherep) || cache_disabled)
+ return;
+
+ len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
+ len >>= 2;
+
+ /* Don't cache excessive amounts of data and XDR failures */
+ if (!statp || len > (256 >> 2)) {
+ rp->c_state = RC_UNUSED;
+ return;
+ }
+
+ switch (cachetype) {
+ case RC_REPLSTAT:
+ if (len != 1)
+ printk("nfsd: RC_REPLSTAT/reply len %d!\n",len);
+ rp->c_replstat = *statp;
+ break;
+ case RC_REPLBUFF:
+ cachv = &rp->c_replvec;
+ cachv->iov_base = kmalloc(len << 2, GFP_KERNEL);
+ if (!cachv->iov_base) {
+ spin_lock(&cache_lock);
+ rp->c_state = RC_UNUSED;
+ spin_unlock(&cache_lock);
+ return;
+ }
+ cachv->iov_len = len << 2;
+ memcpy(cachv->iov_base, statp, len << 2);
+ break;
+ }
+ spin_lock(&cache_lock);
+ lru_put_end(rp);
+ rp->c_secure = rqstp->rq_secure;
+ rp->c_type = cachetype;
+ rp->c_state = RC_DONE;
+ rp->c_timestamp = jiffies;
+ spin_unlock(&cache_lock);
+ return;
+}
+
+/*
+ * Copy cached reply to current reply buffer. Should always fit.
+ * FIXME as reply is in a page, we should just attach the page, and
+ * keep a refcount....
+ */
+static int
+nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
+{
+ struct kvec *vec = &rqstp->rq_res.head[0];
+
+ if (vec->iov_len + data->iov_len > PAGE_SIZE) {
+ printk(KERN_WARNING "nfsd: cached reply too large (%Zd).\n",
+ data->iov_len);
+ return 0;
+ }
+ memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len);
+ vec->iov_len += data->iov_len;
+ return 1;
+}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
new file mode 100644
index 00000000000..748eda93ce5
--- /dev/null
+++ b/fs/nfsd/nfsctl.c
@@ -0,0 +1,1192 @@
+/*
+ * Syscall interface to knfsd.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/slab.h>
+#include <linux/namei.h>
+#include <linux/ctype.h>
+
+#include <linux/sunrpc/svcsock.h>
+#include <linux/lockd/lockd.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/gss_krb5_enctypes.h>
+#include <linux/module.h>
+
+#include "idmap.h"
+#include "nfsd.h"
+#include "cache.h"
+#include "fault_inject.h"
+
+/*
+ * We have a single directory with several nodes in it.
+ */
+enum {
+ NFSD_Root = 1,
+ NFSD_List,
+ NFSD_Export_features,
+ NFSD_Fh,
+ NFSD_FO_UnlockIP,
+ NFSD_FO_UnlockFS,
+ NFSD_Threads,
+ NFSD_Pool_Threads,
+ NFSD_Pool_Stats,
+ NFSD_Versions,
+ NFSD_Ports,
+ NFSD_MaxBlkSize,
+ NFSD_SupportedEnctypes,
+ /*
+ * The below MUST come last. Otherwise we leave a hole in nfsd_files[]
+ * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
+ */
+#ifdef CONFIG_NFSD_V4
+ NFSD_Leasetime,
+ NFSD_Gracetime,
+ NFSD_RecoveryDir,
+#endif
+};
+
+/*
+ * write() for these nodes.
+ */
+static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
+static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size);
+static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size);
+static ssize_t write_threads(struct file *file, char *buf, size_t size);
+static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
+static ssize_t write_versions(struct file *file, char *buf, size_t size);
+static ssize_t write_ports(struct file *file, char *buf, size_t size);
+static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
+#ifdef CONFIG_NFSD_V4
+static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
+static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
+static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
+#endif
+
+static ssize_t (*write_op[])(struct file *, char *, size_t) = {
+ [NFSD_Fh] = write_filehandle,
+ [NFSD_FO_UnlockIP] = write_unlock_ip,
+ [NFSD_FO_UnlockFS] = write_unlock_fs,
+ [NFSD_Threads] = write_threads,
+ [NFSD_Pool_Threads] = write_pool_threads,
+ [NFSD_Versions] = write_versions,
+ [NFSD_Ports] = write_ports,
+ [NFSD_MaxBlkSize] = write_maxblksize,
+#ifdef CONFIG_NFSD_V4
+ [NFSD_Leasetime] = write_leasetime,
+ [NFSD_Gracetime] = write_gracetime,
+ [NFSD_RecoveryDir] = write_recoverydir,
+#endif
+};
+
+static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
+{
+ ino_t ino = file->f_path.dentry->d_inode->i_ino;
+ char *data;
+ ssize_t rv;
+
+ if (ino >= ARRAY_SIZE(write_op) || !write_op[ino])
+ return -EINVAL;
+
+ data = simple_transaction_get(file, buf, size);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ rv = write_op[ino](file, data, size);
+ if (rv >= 0) {
+ simple_transaction_set(file, rv);
+ rv = size;
+ }
+ return rv;
+}
+
+static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
+{
+ if (! file->private_data) {
+ /* An attempt to read a transaction file without writing
+ * causes a 0-byte write so that the file can return
+ * state information
+ */
+ ssize_t rv = nfsctl_transaction_write(file, buf, 0, pos);
+ if (rv < 0)
+ return rv;
+ }
+ return simple_transaction_read(file, buf, size, pos);
+}
+
+static const struct file_operations transaction_ops = {
+ .write = nfsctl_transaction_write,
+ .read = nfsctl_transaction_read,
+ .release = simple_transaction_release,
+ .llseek = default_llseek,
+};
+
+static int exports_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &nfs_exports_op);
+}
+
+static const struct file_operations exports_operations = {
+ .open = exports_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+ .owner = THIS_MODULE,
+};
+
+static int export_features_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "0x%x 0x%x\n", NFSEXP_ALLFLAGS, NFSEXP_SECINFO_FLAGS);
+ return 0;
+}
+
+static int export_features_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, export_features_show, NULL);
+}
+
+static struct file_operations export_features_operations = {
+ .open = export_features_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
+static int supported_enctypes_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, KRB5_SUPPORTED_ENCTYPES);
+ return 0;
+}
+
+static int supported_enctypes_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, supported_enctypes_show, NULL);
+}
+
+static struct file_operations supported_enctypes_ops = {
+ .open = supported_enctypes_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
+
+extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
+extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
+
+static const struct file_operations pool_stats_operations = {
+ .open = nfsd_pool_stats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = nfsd_pool_stats_release,
+ .owner = THIS_MODULE,
+};
+
+/*----------------------------------------------------------------------------*/
+/*
+ * payload - write methods
+ */
+
+
+/**
+ * write_unlock_ip - Release all locks used by a client
+ *
+ * Experimental.
+ *
+ * Input:
+ * buf: '\n'-terminated C string containing a
+ * presentation format IP address
+ * size: length of C string in @buf
+ * Output:
+ * On success: returns zero if all specified locks were released;
+ * returns one if one or more locks were not released
+ * On error: return code is negative errno value
+ */
+static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
+{
+ struct sockaddr_storage address;
+ struct sockaddr *sap = (struct sockaddr *)&address;
+ size_t salen = sizeof(address);
+ char *fo_path;
+
+ /* sanity check */
+ if (size == 0)
+ return -EINVAL;
+
+ if (buf[size-1] != '\n')
+ return -EINVAL;
+
+ fo_path = buf;
+ if (qword_get(&buf, fo_path, size) < 0)
+ return -EINVAL;
+
+ if (rpc_pton(fo_path, size, sap, salen) == 0)
+ return -EINVAL;
+
+ return nlmsvc_unlock_all_by_ip(sap);
+}
+
+/**
+ * write_unlock_fs - Release all locks on a local file system
+ *
+ * Experimental.
+ *
+ * Input:
+ * buf: '\n'-terminated C string containing the
+ * absolute pathname of a local file system
+ * size: length of C string in @buf
+ * Output:
+ * On success: returns zero if all specified locks were released;
+ * returns one if one or more locks were not released
+ * On error: return code is negative errno value
+ */
+static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
+{
+ struct path path;
+ char *fo_path;
+ int error;
+
+ /* sanity check */
+ if (size == 0)
+ return -EINVAL;
+
+ if (buf[size-1] != '\n')
+ return -EINVAL;
+
+ fo_path = buf;
+ if (qword_get(&buf, fo_path, size) < 0)
+ return -EINVAL;
+
+ error = kern_path(fo_path, 0, &path);
+ if (error)
+ return error;
+
+ /*
+ * XXX: Needs better sanity checking. Otherwise we could end up
+ * releasing locks on the wrong file system.
+ *
+ * For example:
+ * 1. Does the path refer to a directory?
+ * 2. Is that directory a mount point, or
+ * 3. Is that directory the root of an exported file system?
+ */
+ error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
+
+ path_put(&path);
+ return error;
+}
+
+/**
+ * write_filehandle - Get a variable-length NFS file handle by path
+ *
+ * On input, the buffer contains a '\n'-terminated C string comprised of
+ * three alphanumeric words separated by whitespace. The string may
+ * contain escape sequences.
+ *
+ * Input:
+ * buf:
+ * domain: client domain name
+ * path: export pathname
+ * maxsize: numeric maximum size of
+ * @buf
+ * size: length of C string in @buf
+ * Output:
+ * On success: passed-in buffer filled with '\n'-terminated C
+ * string containing a ASCII hex text version
+ * of the NFS file handle;
+ * return code is the size in bytes of the string
+ * On error: return code is negative errno value
+ */
+static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
+{
+ char *dname, *path;
+ int uninitialized_var(maxsize);
+ char *mesg = buf;
+ int len;
+ struct auth_domain *dom;
+ struct knfsd_fh fh;
+
+ if (size == 0)
+ return -EINVAL;
+
+ if (buf[size-1] != '\n')
+ return -EINVAL;
+ buf[size-1] = 0;
+
+ dname = mesg;
+ len = qword_get(&mesg, dname, size);
+ if (len <= 0)
+ return -EINVAL;
+
+ path = dname+len+1;
+ len = qword_get(&mesg, path, size);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = get_int(&mesg, &maxsize);
+ if (len)
+ return len;
+
+ if (maxsize < NFS_FHSIZE)
+ return -EINVAL;
+ if (maxsize > NFS3_FHSIZE)
+ maxsize = NFS3_FHSIZE;
+
+ if (qword_get(&mesg, mesg, size)>0)
+ return -EINVAL;
+
+ /* we have all the words, they are in buf.. */
+ dom = unix_domain_find(dname);
+ if (!dom)
+ return -ENOMEM;
+
+ len = exp_rootfh(dom, path, &fh, maxsize);
+ auth_domain_put(dom);
+ if (len)
+ return len;
+
+ mesg = buf;
+ len = SIMPLE_TRANSACTION_LIMIT;
+ qword_addhex(&mesg, &len, (char*)&fh.fh_base, fh.fh_size);
+ mesg[-1] = '\n';
+ return mesg - buf;
+}
+
+/**
+ * write_threads - Start NFSD, or report the current number of running threads
+ *
+ * Input:
+ * buf: ignored
+ * size: zero
+ * Output:
+ * On success: passed-in buffer filled with '\n'-terminated C
+ * string numeric value representing the number of
+ * running NFSD threads;
+ * return code is the size in bytes of the string
+ * On error: return code is zero
+ *
+ * OR
+ *
+ * Input:
+ * buf: C string containing an unsigned
+ * integer value representing the
+ * number of NFSD threads to start
+ * size: non-zero length of C string in @buf
+ * Output:
+ * On success: NFS service is started;
+ * passed-in buffer filled with '\n'-terminated C
+ * string numeric value representing the number of
+ * running NFSD threads;
+ * return code is the size in bytes of the string
+ * On error: return code is zero or a negative errno value
+ */
+static ssize_t write_threads(struct file *file, char *buf, size_t size)
+{
+ char *mesg = buf;
+ int rv;
+ if (size > 0) {
+ int newthreads;
+ rv = get_int(&mesg, &newthreads);
+ if (rv)
+ return rv;
+ if (newthreads < 0)
+ return -EINVAL;
+ rv = nfsd_svc(NFS_PORT, newthreads);
+ if (rv < 0)
+ return rv;
+ } else
+ rv = nfsd_nrthreads();
+
+ return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv);
+}
+
+/**
+ * write_pool_threads - Set or report the current number of threads per pool
+ *
+ * Input:
+ * buf: ignored
+ * size: zero
+ *
+ * OR
+ *
+ * Input:
+ * buf: C string containing whitespace-
+ * separated unsigned integer values
+ * representing the number of NFSD
+ * threads to start in each pool
+ * size: non-zero length of C string in @buf
+ * Output:
+ * On success: passed-in buffer filled with '\n'-terminated C
+ * string containing integer values representing the
+ * number of NFSD threads in each pool;
+ * return code is the size in bytes of the string
+ * On error: return code is zero or a negative errno value
+ */
+static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
+{
+ /* if size > 0, look for an array of number of threads per node
+ * and apply them then write out number of threads per node as reply
+ */
+ char *mesg = buf;
+ int i;
+ int rv;
+ int len;
+ int npools;
+ int *nthreads;
+
+ mutex_lock(&nfsd_mutex);
+ npools = nfsd_nrpools();
+ if (npools == 0) {
+ /*
+ * NFS is shut down. The admin can start it by
+ * writing to the threads file but NOT the pool_threads
+ * file, sorry. Report zero threads.
+ */
+ mutex_unlock(&nfsd_mutex);
+ strcpy(buf, "0\n");
+ return strlen(buf);
+ }
+
+ nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL);
+ rv = -ENOMEM;
+ if (nthreads == NULL)
+ goto out_free;
+
+ if (size > 0) {
+ for (i = 0; i < npools; i++) {
+ rv = get_int(&mesg, &nthreads[i]);
+ if (rv == -ENOENT)
+ break; /* fewer numbers than pools */
+ if (rv)
+ goto out_free; /* syntax error */
+ rv = -EINVAL;
+ if (nthreads[i] < 0)
+ goto out_free;
+ }
+ rv = nfsd_set_nrthreads(i, nthreads);
+ if (rv)
+ goto out_free;
+ }
+
+ rv = nfsd_get_nrthreads(npools, nthreads);
+ if (rv)
+ goto out_free;
+
+ mesg = buf;
+ size = SIMPLE_TRANSACTION_LIMIT;
+ for (i = 0; i < npools && size > 0; i++) {
+ snprintf(mesg, size, "%d%c", nthreads[i], (i == npools-1 ? '\n' : ' '));
+ len = strlen(mesg);
+ size -= len;
+ mesg += len;
+ }
+ rv = mesg - buf;
+out_free:
+ kfree(nthreads);
+ mutex_unlock(&nfsd_mutex);
+ return rv;
+}
+
+static ssize_t __write_versions(struct file *file, char *buf, size_t size)
+{
+ char *mesg = buf;
+ char *vers, *minorp, sign;
+ int len, num, remaining;
+ unsigned minor;
+ ssize_t tlen = 0;
+ char *sep;
+
+ if (size>0) {
+ if (nfsd_serv)
+ /* Cannot change versions without updating
+ * nfsd_serv->sv_xdrsize, and reallocing
+ * rq_argp and rq_resp
+ */
+ return -EBUSY;
+ if (buf[size-1] != '\n')
+ return -EINVAL;
+ buf[size-1] = 0;
+
+ vers = mesg;
+ len = qword_get(&mesg, vers, size);
+ if (len <= 0) return -EINVAL;
+ do {
+ sign = *vers;
+ if (sign == '+' || sign == '-')
+ num = simple_strtol((vers+1), &minorp, 0);
+ else
+ num = simple_strtol(vers, &minorp, 0);
+ if (*minorp == '.') {
+ if (num < 4)
+ return -EINVAL;
+ minor = simple_strtoul(minorp+1, NULL, 0);
+ if (minor == 0)
+ return -EINVAL;
+ if (nfsd_minorversion(minor, sign == '-' ?
+ NFSD_CLEAR : NFSD_SET) < 0)
+ return -EINVAL;
+ goto next;
+ }
+ switch(num) {
+ case 2:
+ case 3:
+ case 4:
+ nfsd_vers(num, sign == '-' ? NFSD_CLEAR : NFSD_SET);
+ break;
+ default:
+ return -EINVAL;
+ }
+ next:
+ vers += len + 1;
+ } while ((len = qword_get(&mesg, vers, size)) > 0);
+ /* If all get turned off, turn them back on, as
+ * having no versions is BAD
+ */
+ nfsd_reset_versions();
+ }
+
+ /* Now write current state into reply buffer */
+ len = 0;
+ sep = "";
+ remaining = SIMPLE_TRANSACTION_LIMIT;
+ for (num=2 ; num <= 4 ; num++)
+ if (nfsd_vers(num, NFSD_AVAIL)) {
+ len = snprintf(buf, remaining, "%s%c%d", sep,
+ nfsd_vers(num, NFSD_TEST)?'+':'-',
+ num);
+ sep = " ";
+
+ if (len > remaining)
+ break;
+ remaining -= len;
+ buf += len;
+ tlen += len;
+ }
+ if (nfsd_vers(4, NFSD_AVAIL))
+ for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION;
+ minor++) {
+ len = snprintf(buf, remaining, " %c4.%u",
+ (nfsd_vers(4, NFSD_TEST) &&
+ nfsd_minorversion(minor, NFSD_TEST)) ?
+ '+' : '-',
+ minor);
+
+ if (len > remaining)
+ break;
+ remaining -= len;
+ buf += len;
+ tlen += len;
+ }
+
+ len = snprintf(buf, remaining, "\n");
+ if (len > remaining)
+ return -EINVAL;
+ return tlen + len;
+}
+
+/**
+ * write_versions - Set or report the available NFS protocol versions
+ *
+ * Input:
+ * buf: ignored
+ * size: zero
+ * Output:
+ * On success: passed-in buffer filled with '\n'-terminated C
+ * string containing positive or negative integer
+ * values representing the current status of each
+ * protocol version;
+ * return code is the size in bytes of the string
+ * On error: return code is zero or a negative errno value
+ *
+ * OR
+ *
+ * Input:
+ * buf: C string containing whitespace-
+ * separated positive or negative
+ * integer values representing NFS
+ * protocol versions to enable ("+n")
+ * or disable ("-n")
+ * size: non-zero length of C string in @buf
+ * Output:
+ * On success: status of zero or more protocol versions has
+ * been updated; passed-in buffer filled with
+ * '\n'-terminated C string containing positive
+ * or negative integer values representing the
+ * current status of each protocol version;
+ * return code is the size in bytes of the string
+ * On error: return code is zero or a negative errno value
+ */
+static ssize_t write_versions(struct file *file, char *buf, size_t size)
+{
+ ssize_t rv;
+
+ mutex_lock(&nfsd_mutex);
+ rv = __write_versions(file, buf, size);
+ mutex_unlock(&nfsd_mutex);
+ return rv;
+}
+
+/*
+ * Zero-length write. Return a list of NFSD's current listener
+ * transports.
+ */
+static ssize_t __write_ports_names(char *buf)
+{
+ if (nfsd_serv == NULL)
+ return 0;
+ return svc_xprt_names(nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT);
+}
+
+/*
+ * A single 'fd' number was written, in which case it must be for
+ * a socket of a supported family/protocol, and we use it as an
+ * nfsd listener.
+ */
+static ssize_t __write_ports_addfd(char *buf)
+{
+ char *mesg = buf;
+ int fd, err;
+
+ err = get_int(&mesg, &fd);
+ if (err != 0 || fd < 0)
+ return -EINVAL;
+
+ err = nfsd_create_serv();
+ if (err != 0)
+ return err;
+
+ err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT);
+ if (err < 0) {
+ svc_destroy(nfsd_serv);
+ return err;
+ }
+
+ /* Decrease the count, but don't shut down the service */
+ nfsd_serv->sv_nrthreads--;
+ return err;
+}
+
+/*
+ * A '-' followed by the 'name' of a socket means we close the socket.
+ */
+static ssize_t __write_ports_delfd(char *buf)
+{
+ char *toclose;
+ int len = 0;
+
+ toclose = kstrdup(buf + 1, GFP_KERNEL);
+ if (toclose == NULL)
+ return -ENOMEM;
+
+ if (nfsd_serv != NULL)
+ len = svc_sock_names(nfsd_serv, buf,
+ SIMPLE_TRANSACTION_LIMIT, toclose);
+ kfree(toclose);
+ return len;
+}
+
+/*
+ * A transport listener is added by writing it's transport name and
+ * a port number.
+ */
+static ssize_t __write_ports_addxprt(char *buf)
+{
+ char transport[16];
+ struct svc_xprt *xprt;
+ int port, err;
+
+ if (sscanf(buf, "%15s %4u", transport, &port) != 2)
+ return -EINVAL;
+
+ if (port < 1 || port > USHRT_MAX)
+ return -EINVAL;
+
+ err = nfsd_create_serv();
+ if (err != 0)
+ return err;
+
+ err = svc_create_xprt(nfsd_serv, transport, &init_net,
+ PF_INET, port, SVC_SOCK_ANONYMOUS);
+ if (err < 0)
+ goto out_err;
+
+ err = svc_create_xprt(nfsd_serv, transport, &init_net,
+ PF_INET6, port, SVC_SOCK_ANONYMOUS);
+ if (err < 0 && err != -EAFNOSUPPORT)
+ goto out_close;
+
+ /* Decrease the count, but don't shut down the service */
+ nfsd_serv->sv_nrthreads--;
+ return 0;
+out_close:
+ xprt = svc_find_xprt(nfsd_serv, transport, PF_INET, port);
+ if (xprt != NULL) {
+ svc_close_xprt(xprt);
+ svc_xprt_put(xprt);
+ }
+out_err:
+ svc_destroy(nfsd_serv);
+ return err;
+}
+
+/*
+ * A transport listener is removed by writing a "-", it's transport
+ * name, and it's port number.
+ */
+static ssize_t __write_ports_delxprt(char *buf)
+{
+ struct svc_xprt *xprt;
+ char transport[16];
+ int port;
+
+ if (sscanf(&buf[1], "%15s %4u", transport, &port) != 2)
+ return -EINVAL;
+
+ if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL)
+ return -EINVAL;
+
+ xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port);
+ if (xprt == NULL)
+ return -ENOTCONN;
+
+ svc_close_xprt(xprt);
+ svc_xprt_put(xprt);
+ return 0;
+}
+
+static ssize_t __write_ports(struct file *file, char *buf, size_t size)
+{
+ if (size == 0)
+ return __write_ports_names(buf);
+
+ if (isdigit(buf[0]))
+ return __write_ports_addfd(buf);
+
+ if (buf[0] == '-' && isdigit(buf[1]))
+ return __write_ports_delfd(buf);
+
+ if (isalpha(buf[0]))
+ return __write_ports_addxprt(buf);
+
+ if (buf[0] == '-' && isalpha(buf[1]))
+ return __write_ports_delxprt(buf);
+
+ return -EINVAL;
+}
+
+/**
+ * write_ports - Pass a socket file descriptor or transport name to listen on
+ *
+ * Input:
+ * buf: ignored
+ * size: zero
+ * Output:
+ * On success: passed-in buffer filled with a '\n'-terminated C
+ * string containing a whitespace-separated list of
+ * named NFSD listeners;
+ * return code is the size in bytes of the string
+ * On error: return code is zero or a negative errno value
+ *
+ * OR
+ *
+ * Input:
+ * buf: C string containing an unsigned
+ * integer value representing a bound
+ * but unconnected socket that is to be
+ * used as an NFSD listener; listen(3)
+ * must be called for a SOCK_STREAM
+ * socket, otherwise it is ignored
+ * size: non-zero length of C string in @buf
+ * Output:
+ * On success: NFS service is started;
+ * passed-in buffer filled with a '\n'-terminated C
+ * string containing a unique alphanumeric name of
+ * the listener;
+ * return code is the size in bytes of the string
+ * On error: return code is a negative errno value
+ *
+ * OR
+ *
+ * Input:
+ * buf: C string containing a "-" followed
+ * by an integer value representing a
+ * previously passed in socket file
+ * descriptor
+ * size: non-zero length of C string in @buf
+ * Output:
+ * On success: NFS service no longer listens on that socket;
+ * passed-in buffer filled with a '\n'-terminated C
+ * string containing a unique name of the listener;
+ * return code is the size in bytes of the string
+ * On error: return code is a negative errno value
+ *
+ * OR
+ *
+ * Input:
+ * buf: C string containing a transport
+ * name and an unsigned integer value
+ * representing the port to listen on,
+ * separated by whitespace
+ * size: non-zero length of C string in @buf
+ * Output:
+ * On success: returns zero; NFS service is started
+ * On error: return code is a negative errno value
+ *
+ * OR
+ *
+ * Input:
+ * buf: C string containing a "-" followed
+ * by a transport name and an unsigned
+ * integer value representing the port
+ * to listen on, separated by whitespace
+ * size: non-zero length of C string in @buf
+ * Output:
+ * On success: returns zero; NFS service no longer listens
+ * on that transport
+ * On error: return code is a negative errno value
+ */
+static ssize_t write_ports(struct file *file, char *buf, size_t size)
+{
+ ssize_t rv;
+
+ mutex_lock(&nfsd_mutex);
+ rv = __write_ports(file, buf, size);
+ mutex_unlock(&nfsd_mutex);
+ return rv;
+}
+
+
+int nfsd_max_blksize;
+
+/**
+ * write_maxblksize - Set or report the current NFS blksize
+ *
+ * Input:
+ * buf: ignored
+ * size: zero
+ *
+ * OR
+ *
+ * Input:
+ * buf: C string containing an unsigned
+ * integer value representing the new
+ * NFS blksize
+ * size: non-zero length of C string in @buf
+ * Output:
+ * On success: passed-in buffer filled with '\n'-terminated C string
+ * containing numeric value of the current NFS blksize
+ * setting;
+ * return code is the size in bytes of the string
+ * On error: return code is zero or a negative errno value
+ */
+static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
+{
+ char *mesg = buf;
+ if (size > 0) {
+ int bsize;
+ int rv = get_int(&mesg, &bsize);
+ if (rv)
+ return rv;
+ /* force bsize into allowed range and
+ * required alignment.
+ */
+ if (bsize < 1024)
+ bsize = 1024;
+ if (bsize > NFSSVC_MAXBLKSIZE)
+ bsize = NFSSVC_MAXBLKSIZE;
+ bsize &= ~(1024-1);
+ mutex_lock(&nfsd_mutex);
+ if (nfsd_serv) {
+ mutex_unlock(&nfsd_mutex);
+ return -EBUSY;
+ }
+ nfsd_max_blksize = bsize;
+ mutex_unlock(&nfsd_mutex);
+ }
+
+ return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n",
+ nfsd_max_blksize);
+}
+
+#ifdef CONFIG_NFSD_V4
+static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
+{
+ char *mesg = buf;
+ int rv, i;
+
+ if (size > 0) {
+ if (nfsd_serv)
+ return -EBUSY;
+ rv = get_int(&mesg, &i);
+ if (rv)
+ return rv;
+ /*
+ * Some sanity checking. We don't have a reason for
+ * these particular numbers, but problems with the
+ * extremes are:
+ * - Too short: the briefest network outage may
+ * cause clients to lose all their locks. Also,
+ * the frequent polling may be wasteful.
+ * - Too long: do you really want reboot recovery
+ * to take more than an hour? Or to make other
+ * clients wait an hour before being able to
+ * revoke a dead client's locks?
+ */
+ if (i < 10 || i > 3600)
+ return -EINVAL;
+ *time = i;
+ }
+
+ return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time);
+}
+
+static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
+{
+ ssize_t rv;
+
+ mutex_lock(&nfsd_mutex);
+ rv = __nfsd4_write_time(file, buf, size, time);
+ mutex_unlock(&nfsd_mutex);
+ return rv;
+}
+
+/**
+ * write_leasetime - Set or report the current NFSv4 lease time
+ *
+ * Input:
+ * buf: ignored
+ * size: zero
+ *
+ * OR
+ *
+ * Input:
+ * buf: C string containing an unsigned
+ * integer value representing the new
+ * NFSv4 lease expiry time
+ * size: non-zero length of C string in @buf
+ * Output:
+ * On success: passed-in buffer filled with '\n'-terminated C
+ * string containing unsigned integer value of the
+ * current lease expiry time;
+ * return code is the size in bytes of the string
+ * On error: return code is zero or a negative errno value
+ */
+static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
+{
+ return nfsd4_write_time(file, buf, size, &nfsd4_lease);
+}
+
+/**
+ * write_gracetime - Set or report current NFSv4 grace period time
+ *
+ * As above, but sets the time of the NFSv4 grace period.
+ *
+ * Note this should never be set to less than the *previous*
+ * lease-period time, but we don't try to enforce this. (In the common
+ * case (a new boot), we don't know what the previous lease time was
+ * anyway.)
+ */
+static ssize_t write_gracetime(struct file *file, char *buf, size_t size)
+{
+ return nfsd4_write_time(file, buf, size, &nfsd4_grace);
+}
+
+extern char *nfs4_recoverydir(void);
+
+static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size)
+{
+ char *mesg = buf;
+ char *recdir;
+ int len, status;
+
+ if (size > 0) {
+ if (nfsd_serv)
+ return -EBUSY;
+ if (size > PATH_MAX || buf[size-1] != '\n')
+ return -EINVAL;
+ buf[size-1] = 0;
+
+ recdir = mesg;
+ len = qword_get(&mesg, recdir, size);
+ if (len <= 0)
+ return -EINVAL;
+
+ status = nfs4_reset_recoverydir(recdir);
+ if (status)
+ return status;
+ }
+
+ return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%s\n",
+ nfs4_recoverydir());
+}
+
+/**
+ * write_recoverydir - Set or report the pathname of the recovery directory
+ *
+ * Input:
+ * buf: ignored
+ * size: zero
+ *
+ * OR
+ *
+ * Input:
+ * buf: C string containing the pathname
+ * of the directory on a local file
+ * system containing permanent NFSv4
+ * recovery data
+ * size: non-zero length of C string in @buf
+ * Output:
+ * On success: passed-in buffer filled with '\n'-terminated C string
+ * containing the current recovery pathname setting;
+ * return code is the size in bytes of the string
+ * On error: return code is zero or a negative errno value
+ */
+static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
+{
+ ssize_t rv;
+
+ mutex_lock(&nfsd_mutex);
+ rv = __write_recoverydir(file, buf, size);
+ mutex_unlock(&nfsd_mutex);
+ return rv;
+}
+
+#endif
+
+/*----------------------------------------------------------------------------*/
+/*
+ * populating the filesystem.
+ */
+
+static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
+{
+ static struct tree_descr nfsd_files[] = {
+ [NFSD_List] = {"exports", &exports_operations, S_IRUGO},
+ [NFSD_Export_features] = {"export_features",
+ &export_features_operations, S_IRUGO},
+ [NFSD_FO_UnlockIP] = {"unlock_ip",
+ &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_FO_UnlockFS] = {"unlock_filesystem",
+ &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO},
+ [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
+ [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
+#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
+ [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
+#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
+#ifdef CONFIG_NFSD_V4
+ [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
+#endif
+ /* last one */ {""}
+ };
+ return simple_fill_super(sb, 0x6e667364, nfsd_files);
+}
+
+static struct dentry *nfsd_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+{
+ return mount_single(fs_type, flags, data, nfsd_fill_super);
+}
+
+static struct file_system_type nfsd_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "nfsd",
+ .mount = nfsd_mount,
+ .kill_sb = kill_litter_super,
+};
+
+#ifdef CONFIG_PROC_FS
+static int create_proc_exports_entry(void)
+{
+ struct proc_dir_entry *entry;
+
+ entry = proc_mkdir("fs/nfs", NULL);
+ if (!entry)
+ return -ENOMEM;
+ entry = proc_create("exports", 0, entry, &exports_operations);
+ if (!entry)
+ return -ENOMEM;
+ return 0;
+}
+#else /* CONFIG_PROC_FS */
+static int create_proc_exports_entry(void)
+{
+ return 0;
+}
+#endif
+
+static int __init init_nfsd(void)
+{
+ int retval;
+ printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
+
+ retval = nfsd4_init_slabs();
+ if (retval)
+ return retval;
+ nfs4_state_init();
+ retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
+ if (retval)
+ goto out_free_slabs;
+ nfsd_stat_init(); /* Statistics */
+ retval = nfsd_reply_cache_init();
+ if (retval)
+ goto out_free_stat;
+ retval = nfsd_export_init();
+ if (retval)
+ goto out_free_cache;
+ nfsd_lockd_init(); /* lockd->nfsd callbacks */
+ retval = nfsd_idmap_init();
+ if (retval)
+ goto out_free_lockd;
+ retval = create_proc_exports_entry();
+ if (retval)
+ goto out_free_idmap;
+ retval = register_filesystem(&nfsd_fs_type);
+ if (retval)
+ goto out_free_all;
+ return 0;
+out_free_all:
+ remove_proc_entry("fs/nfs/exports", NULL);
+ remove_proc_entry("fs/nfs", NULL);
+out_free_idmap:
+ nfsd_idmap_shutdown();
+out_free_lockd:
+ nfsd_lockd_shutdown();
+ nfsd_export_shutdown();
+out_free_cache:
+ nfsd_reply_cache_shutdown();
+out_free_stat:
+ nfsd_stat_shutdown();
+ nfsd_fault_inject_cleanup();
+out_free_slabs:
+ nfsd4_free_slabs();
+ return retval;
+}
+
+static void __exit exit_nfsd(void)
+{
+ nfsd_export_shutdown();
+ nfsd_reply_cache_shutdown();
+ remove_proc_entry("fs/nfs/exports", NULL);
+ remove_proc_entry("fs/nfs", NULL);
+ nfsd_stat_shutdown();
+ nfsd_lockd_shutdown();
+ nfsd_idmap_shutdown();
+ nfsd4_free_slabs();
+ nfsd_fault_inject_cleanup();
+ unregister_filesystem(&nfsd_fs_type);
+}
+
+MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
+MODULE_LICENSE("GPL");
+module_init(init_nfsd)
+module_exit(exit_nfsd)
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
new file mode 100644
index 00000000000..1d1e8589b4c
--- /dev/null
+++ b/fs/nfsd/nfsd.h
@@ -0,0 +1,375 @@
+/*
+ * Hodge-podge collection of knfsd-related stuff.
+ * I will sort this out later.
+ *
+ * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef LINUX_NFSD_NFSD_H
+#define LINUX_NFSD_NFSD_H
+
+#include <linux/types.h>
+#include <linux/mount.h>
+
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs3.h>
+#include <linux/nfs4.h>
+#include <linux/sunrpc/msg_prot.h>
+
+#include <linux/nfsd/debug.h>
+#include <linux/nfsd/export.h>
+#include <linux/nfsd/stats.h>
+
+/*
+ * nfsd version
+ */
+#define NFSD_SUPPORTED_MINOR_VERSION 1
+/*
+ * Maximum blocksizes supported by daemon under various circumstances.
+ */
+#define NFSSVC_MAXBLKSIZE RPCSVC_MAXPAYLOAD
+/* NFSv2 is limited by the protocol specification, see RFC 1094 */
+#define NFSSVC_MAXBLKSIZE_V2 (8*1024)
+
+
+/*
+ * Largest number of bytes we need to allocate for an NFS
+ * call or reply. Used to control buffer sizes. We use
+ * the length of v3 WRITE, READDIR and READDIR replies
+ * which are an RPC header, up to 26 XDR units of reply
+ * data, and some page data.
+ *
+ * Note that accuracy here doesn't matter too much as the
+ * size is rounded up to a page size when allocating space.
+ */
+#define NFSD_BUFSIZE ((RPC_MAX_HEADER_WITH_AUTH+26)*XDR_UNIT + NFSSVC_MAXBLKSIZE)
+
+struct readdir_cd {
+ __be32 err; /* 0, nfserr, or nfserr_eof */
+};
+
+
+extern struct svc_program nfsd_program;
+extern struct svc_version nfsd_version2, nfsd_version3,
+ nfsd_version4;
+extern u32 nfsd_supported_minorversion;
+extern struct mutex nfsd_mutex;
+extern struct svc_serv *nfsd_serv;
+extern spinlock_t nfsd_drc_lock;
+extern unsigned int nfsd_drc_max_mem;
+extern unsigned int nfsd_drc_mem_used;
+
+extern const struct seq_operations nfs_exports_op;
+
+/*
+ * Function prototypes.
+ */
+int nfsd_svc(unsigned short port, int nrservs);
+int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
+
+int nfsd_nrthreads(void);
+int nfsd_nrpools(void);
+int nfsd_get_nrthreads(int n, int *);
+int nfsd_set_nrthreads(int n, int *);
+
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+#ifdef CONFIG_NFSD_V2_ACL
+extern struct svc_version nfsd_acl_version2;
+#else
+#define nfsd_acl_version2 NULL
+#endif
+#ifdef CONFIG_NFSD_V3_ACL
+extern struct svc_version nfsd_acl_version3;
+#else
+#define nfsd_acl_version3 NULL
+#endif
+#endif
+
+enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
+int nfsd_vers(int vers, enum vers_op change);
+int nfsd_minorversion(u32 minorversion, enum vers_op change);
+void nfsd_reset_versions(void);
+int nfsd_create_serv(void);
+
+extern int nfsd_max_blksize;
+
+static inline int nfsd_v4client(struct svc_rqst *rq)
+{
+ return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
+}
+
+/*
+ * NFSv4 State
+ */
+#ifdef CONFIG_NFSD_V4
+extern unsigned int max_delegations;
+void nfs4_state_init(void);
+int nfsd4_init_slabs(void);
+void nfsd4_free_slabs(void);
+int nfs4_state_start(void);
+void nfs4_state_shutdown(void);
+void nfs4_reset_lease(time_t leasetime);
+int nfs4_reset_recoverydir(char *recdir);
+#else
+static inline void nfs4_state_init(void) { }
+static inline int nfsd4_init_slabs(void) { return 0; }
+static inline void nfsd4_free_slabs(void) { }
+static inline int nfs4_state_start(void) { return 0; }
+static inline void nfs4_state_shutdown(void) { }
+static inline void nfs4_reset_lease(time_t leasetime) { }
+static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
+#endif
+
+/*
+ * lockd binding
+ */
+void nfsd_lockd_init(void);
+void nfsd_lockd_shutdown(void);
+
+
+/*
+ * These macros provide pre-xdr'ed values for faster operation.
+ */
+#define nfs_ok cpu_to_be32(NFS_OK)
+#define nfserr_perm cpu_to_be32(NFSERR_PERM)
+#define nfserr_noent cpu_to_be32(NFSERR_NOENT)
+#define nfserr_io cpu_to_be32(NFSERR_IO)
+#define nfserr_nxio cpu_to_be32(NFSERR_NXIO)
+#define nfserr_eagain cpu_to_be32(NFSERR_EAGAIN)
+#define nfserr_acces cpu_to_be32(NFSERR_ACCES)
+#define nfserr_exist cpu_to_be32(NFSERR_EXIST)
+#define nfserr_xdev cpu_to_be32(NFSERR_XDEV)
+#define nfserr_nodev cpu_to_be32(NFSERR_NODEV)
+#define nfserr_notdir cpu_to_be32(NFSERR_NOTDIR)
+#define nfserr_isdir cpu_to_be32(NFSERR_ISDIR)
+#define nfserr_inval cpu_to_be32(NFSERR_INVAL)
+#define nfserr_fbig cpu_to_be32(NFSERR_FBIG)
+#define nfserr_nospc cpu_to_be32(NFSERR_NOSPC)
+#define nfserr_rofs cpu_to_be32(NFSERR_ROFS)
+#define nfserr_mlink cpu_to_be32(NFSERR_MLINK)
+#define nfserr_opnotsupp cpu_to_be32(NFSERR_OPNOTSUPP)
+#define nfserr_nametoolong cpu_to_be32(NFSERR_NAMETOOLONG)
+#define nfserr_notempty cpu_to_be32(NFSERR_NOTEMPTY)
+#define nfserr_dquot cpu_to_be32(NFSERR_DQUOT)
+#define nfserr_stale cpu_to_be32(NFSERR_STALE)
+#define nfserr_remote cpu_to_be32(NFSERR_REMOTE)
+#define nfserr_wflush cpu_to_be32(NFSERR_WFLUSH)
+#define nfserr_badhandle cpu_to_be32(NFSERR_BADHANDLE)
+#define nfserr_notsync cpu_to_be32(NFSERR_NOT_SYNC)
+#define nfserr_badcookie cpu_to_be32(NFSERR_BAD_COOKIE)
+#define nfserr_notsupp cpu_to_be32(NFSERR_NOTSUPP)
+#define nfserr_toosmall cpu_to_be32(NFSERR_TOOSMALL)
+#define nfserr_serverfault cpu_to_be32(NFSERR_SERVERFAULT)
+#define nfserr_badtype cpu_to_be32(NFSERR_BADTYPE)
+#define nfserr_jukebox cpu_to_be32(NFSERR_JUKEBOX)
+#define nfserr_denied cpu_to_be32(NFSERR_DENIED)
+#define nfserr_deadlock cpu_to_be32(NFSERR_DEADLOCK)
+#define nfserr_expired cpu_to_be32(NFSERR_EXPIRED)
+#define nfserr_bad_cookie cpu_to_be32(NFSERR_BAD_COOKIE)
+#define nfserr_same cpu_to_be32(NFSERR_SAME)
+#define nfserr_clid_inuse cpu_to_be32(NFSERR_CLID_INUSE)
+#define nfserr_stale_clientid cpu_to_be32(NFSERR_STALE_CLIENTID)
+#define nfserr_resource cpu_to_be32(NFSERR_RESOURCE)
+#define nfserr_moved cpu_to_be32(NFSERR_MOVED)
+#define nfserr_nofilehandle cpu_to_be32(NFSERR_NOFILEHANDLE)
+#define nfserr_minor_vers_mismatch cpu_to_be32(NFSERR_MINOR_VERS_MISMATCH)
+#define nfserr_share_denied cpu_to_be32(NFSERR_SHARE_DENIED)
+#define nfserr_stale_stateid cpu_to_be32(NFSERR_STALE_STATEID)
+#define nfserr_old_stateid cpu_to_be32(NFSERR_OLD_STATEID)
+#define nfserr_bad_stateid cpu_to_be32(NFSERR_BAD_STATEID)
+#define nfserr_bad_seqid cpu_to_be32(NFSERR_BAD_SEQID)
+#define nfserr_symlink cpu_to_be32(NFSERR_SYMLINK)
+#define nfserr_not_same cpu_to_be32(NFSERR_NOT_SAME)
+#define nfserr_lock_range cpu_to_be32(NFSERR_LOCK_RANGE)
+#define nfserr_restorefh cpu_to_be32(NFSERR_RESTOREFH)
+#define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP)
+#define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR)
+#define nfserr_openmode cpu_to_be32(NFSERR_OPENMODE)
+#define nfserr_badowner cpu_to_be32(NFSERR_BADOWNER)
+#define nfserr_locks_held cpu_to_be32(NFSERR_LOCKS_HELD)
+#define nfserr_op_illegal cpu_to_be32(NFSERR_OP_ILLEGAL)
+#define nfserr_grace cpu_to_be32(NFSERR_GRACE)
+#define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
+#define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
+#define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
+#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
+#define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
+#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
+#define nfserr_badiomode cpu_to_be32(NFS4ERR_BADIOMODE)
+#define nfserr_badlayout cpu_to_be32(NFS4ERR_BADLAYOUT)
+#define nfserr_bad_session_digest cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST)
+#define nfserr_badsession cpu_to_be32(NFS4ERR_BADSESSION)
+#define nfserr_badslot cpu_to_be32(NFS4ERR_BADSLOT)
+#define nfserr_complete_already cpu_to_be32(NFS4ERR_COMPLETE_ALREADY)
+#define nfserr_conn_not_bound_to_session cpu_to_be32(NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
+#define nfserr_deleg_already_wanted cpu_to_be32(NFS4ERR_DELEG_ALREADY_WANTED)
+#define nfserr_back_chan_busy cpu_to_be32(NFS4ERR_BACK_CHAN_BUSY)
+#define nfserr_layouttrylater cpu_to_be32(NFS4ERR_LAYOUTTRYLATER)
+#define nfserr_layoutunavailable cpu_to_be32(NFS4ERR_LAYOUTUNAVAILABLE)
+#define nfserr_nomatching_layout cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT)
+#define nfserr_recallconflict cpu_to_be32(NFS4ERR_RECALLCONFLICT)
+#define nfserr_unknown_layouttype cpu_to_be32(NFS4ERR_UNKNOWN_LAYOUTTYPE)
+#define nfserr_seq_misordered cpu_to_be32(NFS4ERR_SEQ_MISORDERED)
+#define nfserr_sequence_pos cpu_to_be32(NFS4ERR_SEQUENCE_POS)
+#define nfserr_req_too_big cpu_to_be32(NFS4ERR_REQ_TOO_BIG)
+#define nfserr_rep_too_big cpu_to_be32(NFS4ERR_REP_TOO_BIG)
+#define nfserr_rep_too_big_to_cache cpu_to_be32(NFS4ERR_REP_TOO_BIG_TO_CACHE)
+#define nfserr_retry_uncached_rep cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP)
+#define nfserr_unsafe_compound cpu_to_be32(NFS4ERR_UNSAFE_COMPOUND)
+#define nfserr_too_many_ops cpu_to_be32(NFS4ERR_TOO_MANY_OPS)
+#define nfserr_op_not_in_session cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION)
+#define nfserr_hash_alg_unsupp cpu_to_be32(NFS4ERR_HASH_ALG_UNSUPP)
+#define nfserr_clientid_busy cpu_to_be32(NFS4ERR_CLIENTID_BUSY)
+#define nfserr_pnfs_io_hole cpu_to_be32(NFS4ERR_PNFS_IO_HOLE)
+#define nfserr_seq_false_retry cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY)
+#define nfserr_bad_high_slot cpu_to_be32(NFS4ERR_BAD_HIGH_SLOT)
+#define nfserr_deadsession cpu_to_be32(NFS4ERR_DEADSESSION)
+#define nfserr_encr_alg_unsupp cpu_to_be32(NFS4ERR_ENCR_ALG_UNSUPP)
+#define nfserr_pnfs_no_layout cpu_to_be32(NFS4ERR_PNFS_NO_LAYOUT)
+#define nfserr_not_only_op cpu_to_be32(NFS4ERR_NOT_ONLY_OP)
+#define nfserr_wrong_cred cpu_to_be32(NFS4ERR_WRONG_CRED)
+#define nfserr_wrong_type cpu_to_be32(NFS4ERR_WRONG_TYPE)
+#define nfserr_dirdeleg_unavail cpu_to_be32(NFS4ERR_DIRDELEG_UNAVAIL)
+#define nfserr_reject_deleg cpu_to_be32(NFS4ERR_REJECT_DELEG)
+#define nfserr_returnconflict cpu_to_be32(NFS4ERR_RETURNCONFLICT)
+#define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED)
+
+/* error codes for internal use */
+/* if a request fails due to kmalloc failure, it gets dropped.
+ * Client should resend eventually
+ */
+#define nfserr_dropit cpu_to_be32(30000)
+/* end-of-file indicator in readdir */
+#define nfserr_eof cpu_to_be32(30001)
+/* replay detected */
+#define nfserr_replay_me cpu_to_be32(11001)
+/* nfs41 replay detected */
+#define nfserr_replay_cache cpu_to_be32(11002)
+
+/* Check for dir entries '.' and '..' */
+#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
+
+/*
+ * Time of server startup
+ */
+extern struct timeval nfssvc_boot;
+
+#ifdef CONFIG_NFSD_V4
+
+extern time_t nfsd4_lease;
+extern time_t nfsd4_grace;
+
+/* before processing a COMPOUND operation, we have to check that there
+ * is enough space in the buffer for XDR encode to succeed. otherwise,
+ * we might process an operation with side effects, and be unable to
+ * tell the client that the operation succeeded.
+ *
+ * COMPOUND_SLACK_SPACE - this is the minimum bytes of buffer space
+ * needed to encode an "ordinary" _successful_ operation. (GETATTR,
+ * READ, READDIR, and READLINK have their own buffer checks.) if we
+ * fall below this level, we fail the next operation with NFS4ERR_RESOURCE.
+ *
+ * COMPOUND_ERR_SLACK_SPACE - this is the minimum bytes of buffer space
+ * needed to encode an operation which has failed with NFS4ERR_RESOURCE.
+ * care is taken to ensure that we never fall below this level for any
+ * reason.
+ */
+#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */
+#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */
+
+#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */
+
+/*
+ * The following attributes are currently not supported by the NFSv4 server:
+ * ARCHIVE (deprecated anyway)
+ * HIDDEN (unlikely to be supported any time soon)
+ * MIMETYPE (unlikely to be supported any time soon)
+ * QUOTA_* (will be supported in a forthcoming patch)
+ * SYSTEM (unlikely to be supported any time soon)
+ * TIME_BACKUP (unlikely to be supported any time soon)
+ * TIME_CREATE (unlikely to be supported any time soon)
+ */
+#define NFSD4_SUPPORTED_ATTRS_WORD0 \
+(FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_TYPE | FATTR4_WORD0_FH_EXPIRE_TYPE \
+ | FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE | FATTR4_WORD0_LINK_SUPPORT \
+ | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR | FATTR4_WORD0_FSID \
+ | FATTR4_WORD0_UNIQUE_HANDLES | FATTR4_WORD0_LEASE_TIME | FATTR4_WORD0_RDATTR_ERROR \
+ | FATTR4_WORD0_ACLSUPPORT | FATTR4_WORD0_CANSETTIME | FATTR4_WORD0_CASE_INSENSITIVE \
+ | FATTR4_WORD0_CASE_PRESERVING | FATTR4_WORD0_CHOWN_RESTRICTED \
+ | FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FILEID | FATTR4_WORD0_FILES_AVAIL \
+ | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_HOMOGENEOUS \
+ | FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME \
+ | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_ACL)
+
+#define NFSD4_SUPPORTED_ATTRS_WORD1 \
+(FATTR4_WORD1_MODE | FATTR4_WORD1_NO_TRUNC | FATTR4_WORD1_NUMLINKS \
+ | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \
+ | FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \
+ | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_ACCESS_SET \
+ | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \
+ | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
+
+#define NFSD4_SUPPORTED_ATTRS_WORD2 0
+
+#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
+ NFSD4_SUPPORTED_ATTRS_WORD0
+
+#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
+ NFSD4_SUPPORTED_ATTRS_WORD1
+
+#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
+ (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
+
+static inline u32 nfsd_suppattrs0(u32 minorversion)
+{
+ return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
+ : NFSD4_SUPPORTED_ATTRS_WORD0;
+}
+
+static inline u32 nfsd_suppattrs1(u32 minorversion)
+{
+ return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1
+ : NFSD4_SUPPORTED_ATTRS_WORD1;
+}
+
+static inline u32 nfsd_suppattrs2(u32 minorversion)
+{
+ return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2
+ : NFSD4_SUPPORTED_ATTRS_WORD2;
+}
+
+/* These will return ERR_INVAL if specified in GETATTR or READDIR. */
+#define NFSD_WRITEONLY_ATTRS_WORD1 \
+ (FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+
+/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
+#define NFSD_WRITEABLE_ATTRS_WORD0 \
+ (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL)
+#define NFSD_WRITEABLE_ATTRS_WORD1 \
+ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
+ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+#define NFSD_WRITEABLE_ATTRS_WORD2 0
+
+#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
+ NFSD_WRITEABLE_ATTRS_WORD0
+/*
+ * we currently store the exclusive create verifier in the v_{a,m}time
+ * attributes so the client can't set these at create time using EXCLUSIVE4_1
+ */
+#define NFSD_SUPPATTR_EXCLCREAT_WORD1 \
+ (NFSD_WRITEABLE_ATTRS_WORD1 & \
+ ~(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET))
+#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \
+ NFSD_WRITEABLE_ATTRS_WORD2
+
+extern int nfsd4_is_junction(struct dentry *dentry);
+#else
+static inline int nfsd4_is_junction(struct dentry *dentry)
+{
+ return 0;
+}
+
+#endif /* CONFIG_NFSD_V4 */
+
+#endif /* LINUX_NFSD_NFSD_H */
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
new file mode 100644
index 00000000000..68454e75fce
--- /dev/null
+++ b/fs/nfsd/nfsfh.c
@@ -0,0 +1,690 @@
+/*
+ * NFS server file handle treatment.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ * Portions Copyright (C) 1999 G. Allen Morris III <gam3@acm.org>
+ * Extensive rewrite by Neil Brown <neilb@cse.unsw.edu.au> Southern-Spring 1999
+ * ... and again Southern-Winter 2001 to support export_operations
+ */
+
+#include <linux/exportfs.h>
+
+#include <linux/sunrpc/svcauth_gss.h>
+#include "nfsd.h"
+#include "vfs.h"
+#include "auth.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_FH
+
+
+/*
+ * our acceptability function.
+ * if NOSUBTREECHECK, accept anything
+ * if not, require that we can walk up to exp->ex_dentry
+ * doing some checks on the 'x' bits
+ */
+static int nfsd_acceptable(void *expv, struct dentry *dentry)
+{
+ struct svc_export *exp = expv;
+ int rv;
+ struct dentry *tdentry;
+ struct dentry *parent;
+
+ if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
+ return 1;
+
+ tdentry = dget(dentry);
+ while (tdentry != exp->ex_path.dentry && !IS_ROOT(tdentry)) {
+ /* make sure parents give x permission to user */
+ int err;
+ parent = dget_parent(tdentry);
+ err = inode_permission(parent->d_inode, MAY_EXEC);
+ if (err < 0) {
+ dput(parent);
+ break;
+ }
+ dput(tdentry);
+ tdentry = parent;
+ }
+ if (tdentry != exp->ex_path.dentry)
+ dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name);
+ rv = (tdentry == exp->ex_path.dentry);
+ dput(tdentry);
+ return rv;
+}
+
+/* Type check. The correct error return for type mismatches does not seem to be
+ * generally agreed upon. SunOS seems to use EISDIR if file isn't S_IFREG; a
+ * comment in the NFSv3 spec says this is incorrect (implementation notes for
+ * the write call).
+ */
+static inline __be32
+nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, umode_t requested)
+{
+ mode &= S_IFMT;
+
+ if (requested == 0) /* the caller doesn't care */
+ return nfs_ok;
+ if (mode == requested)
+ return nfs_ok;
+ /*
+ * v4 has an error more specific than err_notdir which we should
+ * return in preference to err_notdir:
+ */
+ if (rqstp->rq_vers == 4 && mode == S_IFLNK)
+ return nfserr_symlink;
+ if (requested == S_IFDIR)
+ return nfserr_notdir;
+ if (mode == S_IFDIR)
+ return nfserr_isdir;
+ return nfserr_inval;
+}
+
+static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
+ struct svc_export *exp)
+{
+ int flags = nfsexp_flags(rqstp, exp);
+
+ /* Check if the request originated from a secure port. */
+ if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) {
+ RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
+ dprintk(KERN_WARNING
+ "nfsd: request from insecure port %s!\n",
+ svc_print_addr(rqstp, buf, sizeof(buf)));
+ return nfserr_perm;
+ }
+
+ /* Set user creds for this exportpoint */
+ return nfserrno(nfsd_setuser(rqstp, exp));
+}
+
+static inline __be32 check_pseudo_root(struct svc_rqst *rqstp,
+ struct dentry *dentry, struct svc_export *exp)
+{
+ if (!(exp->ex_flags & NFSEXP_V4ROOT))
+ return nfs_ok;
+ /*
+ * v2/v3 clients have no need for the V4ROOT export--they use
+ * the mount protocl instead; also, further V4ROOT checks may be
+ * in v4-specific code, in which case v2/v3 clients could bypass
+ * them.
+ */
+ if (!nfsd_v4client(rqstp))
+ return nfserr_stale;
+ /*
+ * We're exposing only the directories and symlinks that have to be
+ * traversed on the way to real exports:
+ */
+ if (unlikely(!S_ISDIR(dentry->d_inode->i_mode) &&
+ !S_ISLNK(dentry->d_inode->i_mode)))
+ return nfserr_stale;
+ /*
+ * A pseudoroot export gives permission to access only one
+ * single directory; the kernel has to make another upcall
+ * before granting access to anything else under it:
+ */
+ if (unlikely(dentry != exp->ex_path.dentry))
+ return nfserr_stale;
+ return nfs_ok;
+}
+
+/*
+ * Use the given filehandle to look up the corresponding export and
+ * dentry. On success, the results are used to set fh_export and
+ * fh_dentry.
+ */
+static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
+{
+ struct knfsd_fh *fh = &fhp->fh_handle;
+ struct fid *fid = NULL, sfid;
+ struct svc_export *exp;
+ struct dentry *dentry;
+ int fileid_type;
+ int data_left = fh->fh_size/4;
+ __be32 error;
+
+ error = nfserr_stale;
+ if (rqstp->rq_vers > 2)
+ error = nfserr_badhandle;
+ if (rqstp->rq_vers == 4 && fh->fh_size == 0)
+ return nfserr_nofilehandle;
+
+ if (fh->fh_version == 1) {
+ int len;
+
+ if (--data_left < 0)
+ return error;
+ if (fh->fh_auth_type != 0)
+ return error;
+ len = key_len(fh->fh_fsid_type) / 4;
+ if (len == 0)
+ return error;
+ if (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
+ /* deprecated, convert to type 3 */
+ len = key_len(FSID_ENCODE_DEV)/4;
+ fh->fh_fsid_type = FSID_ENCODE_DEV;
+ fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1])));
+ fh->fh_fsid[1] = fh->fh_fsid[2];
+ }
+ data_left -= len;
+ if (data_left < 0)
+ return error;
+ exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth);
+ fid = (struct fid *)(fh->fh_auth + len);
+ } else {
+ __u32 tfh[2];
+ dev_t xdev;
+ ino_t xino;
+
+ if (fh->fh_size != NFS_FHSIZE)
+ return error;
+ /* assume old filehandle format */
+ xdev = old_decode_dev(fh->ofh_xdev);
+ xino = u32_to_ino_t(fh->ofh_xino);
+ mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL);
+ exp = rqst_exp_find(rqstp, FSID_DEV, tfh);
+ }
+
+ error = nfserr_stale;
+ if (PTR_ERR(exp) == -ENOENT)
+ return error;
+
+ if (IS_ERR(exp))
+ return nfserrno(PTR_ERR(exp));
+
+ if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
+ /* Elevate privileges so that the lack of 'r' or 'x'
+ * permission on some parent directory will
+ * not stop exportfs_decode_fh from being able
+ * to reconnect a directory into the dentry cache.
+ * The same problem can affect "SUBTREECHECK" exports,
+ * but as nfsd_acceptable depends on correct
+ * access control settings being in effect, we cannot
+ * fix that case easily.
+ */
+ struct cred *new = prepare_creds();
+ if (!new)
+ return nfserrno(-ENOMEM);
+ new->cap_effective =
+ cap_raise_nfsd_set(new->cap_effective,
+ new->cap_permitted);
+ put_cred(override_creds(new));
+ put_cred(new);
+ } else {
+ error = nfsd_setuser_and_check_port(rqstp, exp);
+ if (error)
+ goto out;
+ }
+
+ /*
+ * Look up the dentry using the NFS file handle.
+ */
+ error = nfserr_stale;
+ if (rqstp->rq_vers > 2)
+ error = nfserr_badhandle;
+
+ if (fh->fh_version != 1) {
+ sfid.i32.ino = fh->ofh_ino;
+ sfid.i32.gen = fh->ofh_generation;
+ sfid.i32.parent_ino = fh->ofh_dirino;
+ fid = &sfid;
+ data_left = 3;
+ if (fh->ofh_dirino == 0)
+ fileid_type = FILEID_INO32_GEN;
+ else
+ fileid_type = FILEID_INO32_GEN_PARENT;
+ } else
+ fileid_type = fh->fh_fileid_type;
+
+ if (fileid_type == FILEID_ROOT)
+ dentry = dget(exp->ex_path.dentry);
+ else {
+ dentry = exportfs_decode_fh(exp->ex_path.mnt, fid,
+ data_left, fileid_type,
+ nfsd_acceptable, exp);
+ }
+ if (dentry == NULL)
+ goto out;
+ if (IS_ERR(dentry)) {
+ if (PTR_ERR(dentry) != -EINVAL)
+ error = nfserrno(PTR_ERR(dentry));
+ goto out;
+ }
+
+ if (S_ISDIR(dentry->d_inode->i_mode) &&
+ (dentry->d_flags & DCACHE_DISCONNECTED)) {
+ printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
+ }
+
+ fhp->fh_dentry = dentry;
+ fhp->fh_export = exp;
+ return 0;
+out:
+ exp_put(exp);
+ return error;
+}
+
+/**
+ * fh_verify - filehandle lookup and access checking
+ * @rqstp: pointer to current rpc request
+ * @fhp: filehandle to be verified
+ * @type: expected type of object pointed to by filehandle
+ * @access: type of access needed to object
+ *
+ * Look up a dentry from the on-the-wire filehandle, check the client's
+ * access to the export, and set the current task's credentials.
+ *
+ * Regardless of success or failure of fh_verify(), fh_put() should be
+ * called on @fhp when the caller is finished with the filehandle.
+ *
+ * fh_verify() may be called multiple times on a given filehandle, for
+ * example, when processing an NFSv4 compound. The first call will look
+ * up a dentry using the on-the-wire filehandle. Subsequent calls will
+ * skip the lookup and just perform the other checks and possibly change
+ * the current task's credentials.
+ *
+ * @type specifies the type of object expected using one of the S_IF*
+ * constants defined in include/linux/stat.h. The caller may use zero
+ * to indicate that it doesn't care, or a negative integer to indicate
+ * that it expects something not of the given type.
+ *
+ * @access is formed from the NFSD_MAY_* constants defined in
+ * include/linux/nfsd/nfsd.h.
+ */
+__be32
+fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
+{
+ struct svc_export *exp;
+ struct dentry *dentry;
+ __be32 error;
+
+ dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp));
+
+ if (!fhp->fh_dentry) {
+ error = nfsd_set_fh_dentry(rqstp, fhp);
+ if (error)
+ goto out;
+ }
+ dentry = fhp->fh_dentry;
+ exp = fhp->fh_export;
+ /*
+ * We still have to do all these permission checks, even when
+ * fh_dentry is already set:
+ * - fh_verify may be called multiple times with different
+ * "access" arguments (e.g. nfsd_proc_create calls
+ * fh_verify(...,NFSD_MAY_EXEC) first, then later (in
+ * nfsd_create) calls fh_verify(...,NFSD_MAY_CREATE).
+ * - in the NFSv4 case, the filehandle may have been filled
+ * in by fh_compose, and given a dentry, but further
+ * compound operations performed with that filehandle
+ * still need permissions checks. In the worst case, a
+ * mountpoint crossing may have changed the export
+ * options, and we may now need to use a different uid
+ * (for example, if different id-squashing options are in
+ * effect on the new filesystem).
+ */
+ error = check_pseudo_root(rqstp, dentry, exp);
+ if (error)
+ goto out;
+
+ error = nfsd_setuser_and_check_port(rqstp, exp);
+ if (error)
+ goto out;
+
+ error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type);
+ if (error)
+ goto out;
+
+ /*
+ * pseudoflavor restrictions are not enforced on NLM,
+ * which clients virtually always use auth_sys for,
+ * even while using RPCSEC_GSS for NFS.
+ */
+ if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS)
+ goto skip_pseudoflavor_check;
+ /*
+ * Clients may expect to be able to use auth_sys during mount,
+ * even if they use gss for everything else; see section 2.3.2
+ * of rfc 2623.
+ */
+ if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT
+ && exp->ex_path.dentry == dentry)
+ goto skip_pseudoflavor_check;
+
+ error = check_nfsd_access(exp, rqstp);
+ if (error)
+ goto out;
+
+skip_pseudoflavor_check:
+ /* Finally, check access permissions. */
+ error = nfsd_permission(rqstp, exp, dentry, access);
+
+ if (error) {
+ dprintk("fh_verify: %s/%s permission failure, "
+ "acc=%x, error=%d\n",
+ dentry->d_parent->d_name.name,
+ dentry->d_name.name,
+ access, ntohl(error));
+ }
+out:
+ if (error == nfserr_stale)
+ nfsdstats.fh_stale++;
+ return error;
+}
+
+
+/*
+ * Compose a file handle for an NFS reply.
+ *
+ * Note that when first composed, the dentry may not yet have
+ * an inode. In this case a call to fh_update should be made
+ * before the fh goes out on the wire ...
+ */
+static void _fh_update(struct svc_fh *fhp, struct svc_export *exp,
+ struct dentry *dentry)
+{
+ if (dentry != exp->ex_path.dentry) {
+ struct fid *fid = (struct fid *)
+ (fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1);
+ int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4;
+ int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK);
+
+ fhp->fh_handle.fh_fileid_type =
+ exportfs_encode_fh(dentry, fid, &maxsize, subtreecheck);
+ fhp->fh_handle.fh_size += maxsize * 4;
+ } else {
+ fhp->fh_handle.fh_fileid_type = FILEID_ROOT;
+ }
+}
+
+/*
+ * for composing old style file handles
+ */
+static inline void _fh_update_old(struct dentry *dentry,
+ struct svc_export *exp,
+ struct knfsd_fh *fh)
+{
+ fh->ofh_ino = ino_t_to_u32(dentry->d_inode->i_ino);
+ fh->ofh_generation = dentry->d_inode->i_generation;
+ if (S_ISDIR(dentry->d_inode->i_mode) ||
+ (exp->ex_flags & NFSEXP_NOSUBTREECHECK))
+ fh->ofh_dirino = 0;
+}
+
+static bool is_root_export(struct svc_export *exp)
+{
+ return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root;
+}
+
+static struct super_block *exp_sb(struct svc_export *exp)
+{
+ return exp->ex_path.dentry->d_inode->i_sb;
+}
+
+static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
+{
+ switch (fsid_type) {
+ case FSID_DEV:
+ if (!old_valid_dev(exp_sb(exp)->s_dev))
+ return 0;
+ /* FALL THROUGH */
+ case FSID_MAJOR_MINOR:
+ case FSID_ENCODE_DEV:
+ return exp_sb(exp)->s_type->fs_flags & FS_REQUIRES_DEV;
+ case FSID_NUM:
+ return exp->ex_flags & NFSEXP_FSID;
+ case FSID_UUID8:
+ case FSID_UUID16:
+ if (!is_root_export(exp))
+ return 0;
+ /* fall through */
+ case FSID_UUID4_INUM:
+ case FSID_UUID16_INUM:
+ return exp->ex_uuid != NULL;
+ }
+ return 1;
+}
+
+
+static void set_version_and_fsid_type(struct svc_fh *fhp, struct svc_export *exp, struct svc_fh *ref_fh)
+{
+ u8 version;
+ u8 fsid_type;
+retry:
+ version = 1;
+ if (ref_fh && ref_fh->fh_export == exp) {
+ version = ref_fh->fh_handle.fh_version;
+ fsid_type = ref_fh->fh_handle.fh_fsid_type;
+
+ ref_fh = NULL;
+
+ switch (version) {
+ case 0xca:
+ fsid_type = FSID_DEV;
+ break;
+ case 1:
+ break;
+ default:
+ goto retry;
+ }
+
+ /*
+ * As the fsid -> filesystem mapping was guided by
+ * user-space, there is no guarantee that the filesystem
+ * actually supports that fsid type. If it doesn't we
+ * loop around again without ref_fh set.
+ */
+ if (!fsid_type_ok_for_exp(fsid_type, exp))
+ goto retry;
+ } else if (exp->ex_flags & NFSEXP_FSID) {
+ fsid_type = FSID_NUM;
+ } else if (exp->ex_uuid) {
+ if (fhp->fh_maxsize >= 64) {
+ if (is_root_export(exp))
+ fsid_type = FSID_UUID16;
+ else
+ fsid_type = FSID_UUID16_INUM;
+ } else {
+ if (is_root_export(exp))
+ fsid_type = FSID_UUID8;
+ else
+ fsid_type = FSID_UUID4_INUM;
+ }
+ } else if (!old_valid_dev(exp_sb(exp)->s_dev))
+ /* for newer device numbers, we must use a newer fsid format */
+ fsid_type = FSID_ENCODE_DEV;
+ else
+ fsid_type = FSID_DEV;
+ fhp->fh_handle.fh_version = version;
+ if (version)
+ fhp->fh_handle.fh_fsid_type = fsid_type;
+}
+
+__be32
+fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
+ struct svc_fh *ref_fh)
+{
+ /* ref_fh is a reference file handle.
+ * if it is non-null and for the same filesystem, then we should compose
+ * a filehandle which is of the same version, where possible.
+ * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
+ * Then create a 32byte filehandle using nfs_fhbase_old
+ *
+ */
+
+ struct inode * inode = dentry->d_inode;
+ struct dentry *parent = dentry->d_parent;
+ __u32 *datap;
+ dev_t ex_dev = exp_sb(exp)->s_dev;
+
+ dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
+ MAJOR(ex_dev), MINOR(ex_dev),
+ (long) exp->ex_path.dentry->d_inode->i_ino,
+ parent->d_name.name, dentry->d_name.name,
+ (inode ? inode->i_ino : 0));
+
+ /* Choose filehandle version and fsid type based on
+ * the reference filehandle (if it is in the same export)
+ * or the export options.
+ */
+ set_version_and_fsid_type(fhp, exp, ref_fh);
+
+ if (ref_fh == fhp)
+ fh_put(ref_fh);
+
+ if (fhp->fh_locked || fhp->fh_dentry) {
+ printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n",
+ parent->d_name.name, dentry->d_name.name);
+ }
+ if (fhp->fh_maxsize < NFS_FHSIZE)
+ printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n",
+ fhp->fh_maxsize,
+ parent->d_name.name, dentry->d_name.name);
+
+ fhp->fh_dentry = dget(dentry); /* our internal copy */
+ fhp->fh_export = exp;
+ cache_get(&exp->h);
+
+ if (fhp->fh_handle.fh_version == 0xca) {
+ /* old style filehandle please */
+ memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE);
+ fhp->fh_handle.fh_size = NFS_FHSIZE;
+ fhp->fh_handle.ofh_dcookie = 0xfeebbaca;
+ fhp->fh_handle.ofh_dev = old_encode_dev(ex_dev);
+ fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev;
+ fhp->fh_handle.ofh_xino =
+ ino_t_to_u32(exp->ex_path.dentry->d_inode->i_ino);
+ fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry));
+ if (inode)
+ _fh_update_old(dentry, exp, &fhp->fh_handle);
+ } else {
+ int len;
+ fhp->fh_handle.fh_auth_type = 0;
+ datap = fhp->fh_handle.fh_auth+0;
+ mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev,
+ exp->ex_path.dentry->d_inode->i_ino,
+ exp->ex_fsid, exp->ex_uuid);
+
+ len = key_len(fhp->fh_handle.fh_fsid_type);
+ datap += len/4;
+ fhp->fh_handle.fh_size = 4 + len;
+
+ if (inode)
+ _fh_update(fhp, exp, dentry);
+ if (fhp->fh_handle.fh_fileid_type == 255) {
+ fh_put(fhp);
+ return nfserr_opnotsupp;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Update file handle information after changing a dentry.
+ * This is only called by nfsd_create, nfsd_create_v3 and nfsd_proc_create
+ */
+__be32
+fh_update(struct svc_fh *fhp)
+{
+ struct dentry *dentry;
+
+ if (!fhp->fh_dentry)
+ goto out_bad;
+
+ dentry = fhp->fh_dentry;
+ if (!dentry->d_inode)
+ goto out_negative;
+ if (fhp->fh_handle.fh_version != 1) {
+ _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle);
+ } else {
+ if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT)
+ goto out;
+
+ _fh_update(fhp, fhp->fh_export, dentry);
+ if (fhp->fh_handle.fh_fileid_type == 255)
+ return nfserr_opnotsupp;
+ }
+out:
+ return 0;
+
+out_bad:
+ printk(KERN_ERR "fh_update: fh not verified!\n");
+ goto out;
+out_negative:
+ printk(KERN_ERR "fh_update: %s/%s still negative!\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
+ goto out;
+}
+
+/*
+ * Release a file handle.
+ */
+void
+fh_put(struct svc_fh *fhp)
+{
+ struct dentry * dentry = fhp->fh_dentry;
+ struct svc_export * exp = fhp->fh_export;
+ if (dentry) {
+ fh_unlock(fhp);
+ fhp->fh_dentry = NULL;
+ dput(dentry);
+#ifdef CONFIG_NFSD_V3
+ fhp->fh_pre_saved = 0;
+ fhp->fh_post_saved = 0;
+#endif
+ }
+ if (exp) {
+ cache_put(&exp->h, &svc_export_cache);
+ fhp->fh_export = NULL;
+ }
+ return;
+}
+
+/*
+ * Shorthand for dprintk()'s
+ */
+char * SVCFH_fmt(struct svc_fh *fhp)
+{
+ struct knfsd_fh *fh = &fhp->fh_handle;
+
+ static char buf[80];
+ sprintf(buf, "%d: %08x %08x %08x %08x %08x %08x",
+ fh->fh_size,
+ fh->fh_base.fh_pad[0],
+ fh->fh_base.fh_pad[1],
+ fh->fh_base.fh_pad[2],
+ fh->fh_base.fh_pad[3],
+ fh->fh_base.fh_pad[4],
+ fh->fh_base.fh_pad[5]);
+ return buf;
+}
+
+enum fsid_source fsid_source(struct svc_fh *fhp)
+{
+ if (fhp->fh_handle.fh_version != 1)
+ return FSIDSOURCE_DEV;
+ switch(fhp->fh_handle.fh_fsid_type) {
+ case FSID_DEV:
+ case FSID_ENCODE_DEV:
+ case FSID_MAJOR_MINOR:
+ if (exp_sb(fhp->fh_export)->s_type->fs_flags & FS_REQUIRES_DEV)
+ return FSIDSOURCE_DEV;
+ break;
+ case FSID_NUM:
+ if (fhp->fh_export->ex_flags & NFSEXP_FSID)
+ return FSIDSOURCE_FSID;
+ break;
+ default:
+ break;
+ }
+ /* either a UUID type filehandle, or the filehandle doesn't
+ * match the export.
+ */
+ if (fhp->fh_export->ex_flags & NFSEXP_FSID)
+ return FSIDSOURCE_FSID;
+ if (fhp->fh_export->ex_uuid)
+ return FSIDSOURCE_UUID;
+ return FSIDSOURCE_DEV;
+}
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
new file mode 100644
index 00000000000..e5e6707ba68
--- /dev/null
+++ b/fs/nfsd/nfsfh.h
@@ -0,0 +1,206 @@
+/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */
+
+#ifndef _LINUX_NFSD_FH_INT_H
+#define _LINUX_NFSD_FH_INT_H
+
+#include <linux/nfsd/nfsfh.h>
+
+enum nfsd_fsid {
+ FSID_DEV = 0,
+ FSID_NUM,
+ FSID_MAJOR_MINOR,
+ FSID_ENCODE_DEV,
+ FSID_UUID4_INUM,
+ FSID_UUID8,
+ FSID_UUID16,
+ FSID_UUID16_INUM,
+};
+
+enum fsid_source {
+ FSIDSOURCE_DEV,
+ FSIDSOURCE_FSID,
+ FSIDSOURCE_UUID,
+};
+extern enum fsid_source fsid_source(struct svc_fh *fhp);
+
+
+/* This might look a little large to "inline" but in all calls except
+ * one, 'vers' is constant so moste of the function disappears.
+ */
+static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
+ u32 fsid, unsigned char *uuid)
+{
+ u32 *up;
+ switch(vers) {
+ case FSID_DEV:
+ fsidv[0] = htonl((MAJOR(dev)<<16) |
+ MINOR(dev));
+ fsidv[1] = ino_t_to_u32(ino);
+ break;
+ case FSID_NUM:
+ fsidv[0] = fsid;
+ break;
+ case FSID_MAJOR_MINOR:
+ fsidv[0] = htonl(MAJOR(dev));
+ fsidv[1] = htonl(MINOR(dev));
+ fsidv[2] = ino_t_to_u32(ino);
+ break;
+
+ case FSID_ENCODE_DEV:
+ fsidv[0] = new_encode_dev(dev);
+ fsidv[1] = ino_t_to_u32(ino);
+ break;
+
+ case FSID_UUID4_INUM:
+ /* 4 byte fsid and inode number */
+ up = (u32*)uuid;
+ fsidv[0] = ino_t_to_u32(ino);
+ fsidv[1] = up[0] ^ up[1] ^ up[2] ^ up[3];
+ break;
+
+ case FSID_UUID8:
+ /* 8 byte fsid */
+ up = (u32*)uuid;
+ fsidv[0] = up[0] ^ up[2];
+ fsidv[1] = up[1] ^ up[3];
+ break;
+
+ case FSID_UUID16:
+ /* 16 byte fsid - NFSv3+ only */
+ memcpy(fsidv, uuid, 16);
+ break;
+
+ case FSID_UUID16_INUM:
+ /* 8 byte inode and 16 byte fsid */
+ *(u64*)fsidv = (u64)ino;
+ memcpy(fsidv+2, uuid, 16);
+ break;
+ default: BUG();
+ }
+}
+
+static inline int key_len(int type)
+{
+ switch(type) {
+ case FSID_DEV: return 8;
+ case FSID_NUM: return 4;
+ case FSID_MAJOR_MINOR: return 12;
+ case FSID_ENCODE_DEV: return 8;
+ case FSID_UUID4_INUM: return 8;
+ case FSID_UUID8: return 8;
+ case FSID_UUID16: return 16;
+ case FSID_UUID16_INUM: return 24;
+ default: return 0;
+ }
+}
+
+/*
+ * Shorthand for dprintk()'s
+ */
+extern char * SVCFH_fmt(struct svc_fh *fhp);
+
+/*
+ * Function prototypes
+ */
+__be32 fh_verify(struct svc_rqst *, struct svc_fh *, umode_t, int);
+__be32 fh_compose(struct svc_fh *, struct svc_export *, struct d