diff options
author | Anton Arapov <anton@redhat.com> | 2012-04-16 10:05:28 +0200 |
---|---|---|
committer | Anton Arapov <anton@redhat.com> | 2012-04-16 10:05:28 +0200 |
commit | b4b6116a13633898cf868f2f103c96a90c4c20f8 (patch) | |
tree | 93d1b7e2cfcdf473d8d4ff3ad141fa864f8491f6 /fs/nfsd | |
parent | edd4be777c953e5faafc80d091d3084b4343f5d3 (diff) | |
download | kernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.tar.gz kernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.tar.xz kernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.zip |
fedora kernel: d9aad82f3319f3cfd1aebc01234254ef0c37ad84v3.3.2-1
Signed-off-by: Anton Arapov <anton@redhat.com>
Diffstat (limited to 'fs/nfsd')
37 files changed, 26385 insertions, 0 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig new file mode 100644 index 00000000000..8df1ea4a6ff --- /dev/null +++ b/fs/nfsd/Kconfig @@ -0,0 +1,92 @@ +config NFSD + tristate "NFS server support" + depends on INET + depends on FILE_LOCKING + select LOCKD + select SUNRPC + select EXPORTFS + select NFS_ACL_SUPPORT if NFSD_V2_ACL + help + Choose Y here if you want to allow other computers to access + files residing on this system using Sun's Network File System + protocol. To compile the NFS server support as a module, + choose M here: the module will be called nfsd. + + You may choose to use a user-space NFS server instead, in which + case you can choose N here. + + To export local file systems using NFS, you also need to install + user space programs which can be found in the Linux nfs-utils + package, available from http://linux-nfs.org/. More detail about + the Linux NFS server implementation is available via the + exports(5) man page. + + Below you can choose which versions of the NFS protocol are + available to clients mounting the NFS server on this system. + Support for NFS version 2 (RFC 1094) is always available when + CONFIG_NFSD is selected. + + If unsure, say N. + +config NFSD_V2_ACL + bool + depends on NFSD + +config NFSD_V3 + bool "NFS server support for NFS version 3" + depends on NFSD + help + This option enables support in your system's NFS server for + version 3 of the NFS protocol (RFC 1813). + + If unsure, say Y. + +config NFSD_V3_ACL + bool "NFS server support for the NFSv3 ACL protocol extension" + depends on NFSD_V3 + select NFSD_V2_ACL + help + Solaris NFS servers support an auxiliary NFSv3 ACL protocol that + never became an official part of the NFS version 3 protocol. + This protocol extension allows applications on NFS clients to + manipulate POSIX Access Control Lists on files residing on NFS + servers. NFS servers enforce POSIX ACLs on local files whether + this protocol is available or not. + + This option enables support in your system's NFS server for the + NFSv3 ACL protocol extension allowing NFS clients to manipulate + POSIX ACLs on files exported by your system's NFS server. NFS + clients which support the Solaris NFSv3 ACL protocol can then + access and modify ACLs on your NFS server. + + To store ACLs on your NFS server, you also need to enable ACL- + related CONFIG options for your local file systems of choice. + + If unsure, say N. + +config NFSD_V4 + bool "NFS server support for NFS version 4 (EXPERIMENTAL)" + depends on NFSD && PROC_FS && EXPERIMENTAL + select NFSD_V3 + select FS_POSIX_ACL + select SUNRPC_GSS + select CRYPTO + help + This option enables support in your system's NFS server for + version 4 of the NFS protocol (RFC 3530). + + To export files using NFSv4, you need to install additional user + space programs which can be found in the Linux nfs-utils package, + available from http://linux-nfs.org/. + + If unsure, say N. + +config NFSD_FAULT_INJECTION + bool "NFS server manual fault injection" + depends on NFSD_V4 && DEBUG_KERNEL + help + This option enables support for manually injecting faults + into the NFS server. This is intended to be used for + testing error recovery on the NFS client. + + If unsure, say N. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile new file mode 100644 index 00000000000..af32ef06b4f --- /dev/null +++ b/fs/nfsd/Makefile @@ -0,0 +1,14 @@ +# +# Makefile for the Linux nfs server +# + +obj-$(CONFIG_NFSD) += nfsd.o + +nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ + export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o +nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o +nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o +nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o +nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o +nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ + nfs4acl.o nfs4callback.o nfs4recover.o diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h new file mode 100644 index 00000000000..34e5c40af5e --- /dev/null +++ b/fs/nfsd/acl.h @@ -0,0 +1,59 @@ +/* + * Common NFSv4 ACL handling definitions. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Marius Aamodt Eriksen <marius@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LINUX_NFS4_ACL_H +#define LINUX_NFS4_ACL_H + +#include <linux/posix_acl.h> + +/* Maximum ACL we'll accept from client; chosen (somewhat arbitrarily) to + * fit in a page: */ +#define NFS4_ACL_MAX 170 + +struct nfs4_acl *nfs4_acl_new(int); +int nfs4_acl_get_whotype(char *, u32); +int nfs4_acl_write_who(int who, char *p); +int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group, + uid_t who, u32 mask); + +#define NFS4_ACL_TYPE_DEFAULT 0x01 +#define NFS4_ACL_DIR 0x02 +#define NFS4_ACL_OWNER 0x04 + +struct nfs4_acl *nfs4_acl_posix_to_nfsv4(struct posix_acl *, + struct posix_acl *, unsigned int flags); +int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *, struct posix_acl **, + struct posix_acl **, unsigned int flags); + +#endif /* LINUX_NFS4_ACL_H */ diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c new file mode 100644 index 00000000000..79717a40dab --- /dev/null +++ b/fs/nfsd/auth.c @@ -0,0 +1,95 @@ +/* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ + +#include <linux/sched.h> +#include "nfsd.h" +#include "auth.h" + +int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) +{ + struct exp_flavor_info *f; + struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; + + for (f = exp->ex_flavors; f < end; f++) { + if (f->pseudoflavor == rqstp->rq_flavor) + return f->flags; + } + return exp->ex_flags; + +} + +int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) +{ + struct group_info *rqgi; + struct group_info *gi; + struct cred *new; + int i; + int flags = nfsexp_flags(rqstp, exp); + int ret; + + validate_process_creds(); + + /* discard any old override before preparing the new set */ + revert_creds(get_cred(current->real_cred)); + new = prepare_creds(); + if (!new) + return -ENOMEM; + + new->fsuid = rqstp->rq_cred.cr_uid; + new->fsgid = rqstp->rq_cred.cr_gid; + + rqgi = rqstp->rq_cred.cr_group_info; + + if (flags & NFSEXP_ALLSQUASH) { + new->fsuid = exp->ex_anon_uid; + new->fsgid = exp->ex_anon_gid; + gi = groups_alloc(0); + if (!gi) + goto oom; + } else if (flags & NFSEXP_ROOTSQUASH) { + if (!new->fsuid) + new->fsuid = exp->ex_anon_uid; + if (!new->fsgid) + new->fsgid = exp->ex_anon_gid; + + gi = groups_alloc(rqgi->ngroups); + if (!gi) + goto oom; + + for (i = 0; i < rqgi->ngroups; i++) { + if (!GROUP_AT(rqgi, i)) + GROUP_AT(gi, i) = exp->ex_anon_gid; + else + GROUP_AT(gi, i) = GROUP_AT(rqgi, i); + } + } else { + gi = get_group_info(rqgi); + } + + if (new->fsuid == (uid_t) -1) + new->fsuid = exp->ex_anon_uid; + if (new->fsgid == (gid_t) -1) + new->fsgid = exp->ex_anon_gid; + + ret = set_groups(new, gi); + put_group_info(gi); + if (ret < 0) + goto error; + + if (new->fsuid) + new->cap_effective = cap_drop_nfsd_set(new->cap_effective); + else + new->cap_effective = cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted); + validate_process_creds(); + put_cred(override_creds(new)); + put_cred(new); + validate_process_creds(); + return 0; + +oom: + ret = -ENOMEM; +error: + abort_creds(new); + return ret; +} + diff --git a/fs/nfsd/auth.h b/fs/nfsd/auth.h new file mode 100644 index 00000000000..78b3c0e9382 --- /dev/null +++ b/fs/nfsd/auth.h @@ -0,0 +1,22 @@ +/* + * nfsd-specific authentication stuff. + * uid/gid mapping not yet implemented. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#ifndef LINUX_NFSD_AUTH_H +#define LINUX_NFSD_AUTH_H + +#define nfsd_luid(rq, uid) ((u32)(uid)) +#define nfsd_lgid(rq, gid) ((u32)(gid)) +#define nfsd_ruid(rq, uid) ((u32)(uid)) +#define nfsd_rgid(rq, gid) ((u32)(gid)) + +/* + * Set the current process's fsuid/fsgid etc to those of the NFS + * client user + */ +int nfsd_setuser(struct svc_rqst *, struct svc_export *); + +#endif /* LINUX_NFSD_AUTH_H */ diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h new file mode 100644 index 00000000000..93cc9d34c45 --- /dev/null +++ b/fs/nfsd/cache.h @@ -0,0 +1,83 @@ +/* + * Request reply cache. This was heavily inspired by the + * implementation in 4.3BSD/4.4BSD. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#ifndef NFSCACHE_H +#define NFSCACHE_H + +#include <linux/sunrpc/svc.h> + +/* + * Representation of a reply cache entry. + */ +struct svc_cacherep { + struct hlist_node c_hash; + struct list_head c_lru; + + unsigned char c_state, /* unused, inprog, done */ + c_type, /* status, buffer */ + c_secure : 1; /* req came from port < 1024 */ + struct sockaddr_in c_addr; + __be32 c_xid; + u32 c_prot; + u32 c_proc; + u32 c_vers; + unsigned long c_timestamp; + union { + struct kvec u_vec; + __be32 u_status; + } c_u; +}; + +#define c_replvec c_u.u_vec +#define c_replstat c_u.u_status + +/* cache entry states */ +enum { + RC_UNUSED, + RC_INPROG, + RC_DONE +}; + +/* return values */ +enum { + RC_DROPIT, + RC_REPLY, + RC_DOIT, + RC_INTR +}; + +/* + * Cache types. + * We may want to add more types one day, e.g. for diropres and + * attrstat replies. Using cache entries with fixed length instead + * of buffer pointers may be more efficient. + */ +enum { + RC_NOCACHE, + RC_REPLSTAT, + RC_REPLBUFF, +}; + +/* + * If requests are retransmitted within this interval, they're dropped. + */ +#define RC_DELAY (HZ/5) + +int nfsd_reply_cache_init(void); +void nfsd_reply_cache_shutdown(void); +int nfsd_cache_lookup(struct svc_rqst *); +void nfsd_cache_update(struct svc_rqst *, int, __be32 *); + +#ifdef CONFIG_NFSD_V4 +void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp); +#else /* CONFIG_NFSD_V4 */ +static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp) +{ +} +#endif /* CONFIG_NFSD_V4 */ + +#endif /* NFSCACHE_H */ diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c new file mode 100644 index 00000000000..cf8a6bd062f --- /dev/null +++ b/fs/nfsd/export.c @@ -0,0 +1,1263 @@ +/* + * NFS exporting and validation. + * + * We maintain a list of clients, each of which has a list of + * exports. To export an fs to a given client, you first have + * to create the client entry with NFSCTL_ADDCLIENT, which + * creates a client control block and adds it to the hash + * table. Then, you call NFSCTL_EXPORT for each fs. + * + * + * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de> + */ + +#include <linux/slab.h> +#include <linux/namei.h> +#include <linux/module.h> +#include <linux/exportfs.h> + +#include <net/ipv6.h> + +#include "nfsd.h" +#include "nfsfh.h" + +#define NFSDDBG_FACILITY NFSDDBG_EXPORT + +typedef struct auth_domain svc_client; +typedef struct svc_export svc_export; + +/* + * We have two caches. + * One maps client+vfsmnt+dentry to export options - the export map + * The other maps client+filehandle-fragment to export options. - the expkey map + * + * The export options are actually stored in the first map, and the + * second map contains a reference to the entry in the first map. + */ + +#define EXPKEY_HASHBITS 8 +#define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) +#define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) +static struct cache_head *expkey_table[EXPKEY_HASHMAX]; + +static void expkey_put(struct kref *ref) +{ + struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); + + if (test_bit(CACHE_VALID, &key->h.flags) && + !test_bit(CACHE_NEGATIVE, &key->h.flags)) + path_put(&key->ek_path); + auth_domain_put(key->ek_client); + kfree(key); +} + +static void expkey_request(struct cache_detail *cd, + struct cache_head *h, + char **bpp, int *blen) +{ + /* client fsidtype \xfsid */ + struct svc_expkey *ek = container_of(h, struct svc_expkey, h); + char type[5]; + + qword_add(bpp, blen, ek->ek_client->name); + snprintf(type, 5, "%d", ek->ek_fsidtype); + qword_add(bpp, blen, type); + qword_addhex(bpp, blen, (char*)ek->ek_fsid, key_len(ek->ek_fsidtype)); + (*bpp)[-1] = '\n'; +} + +static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, expkey_request); +} + +static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old); +static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *); +static struct cache_detail svc_expkey_cache; + +static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) +{ + /* client fsidtype fsid [path] */ + char *buf; + int len; + struct auth_domain *dom = NULL; + int err; + int fsidtype; + char *ep; + struct svc_expkey key; + struct svc_expkey *ek = NULL; + + if (mlen < 1 || mesg[mlen-1] != '\n') + return -EINVAL; + mesg[mlen-1] = 0; + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + err = -ENOMEM; + if (!buf) + goto out; + + err = -EINVAL; + if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) + goto out; + + err = -ENOENT; + dom = auth_domain_find(buf); + if (!dom) + goto out; + dprintk("found domain %s\n", buf); + + err = -EINVAL; + if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) + goto out; + fsidtype = simple_strtoul(buf, &ep, 10); + if (*ep) + goto out; + dprintk("found fsidtype %d\n", fsidtype); + if (key_len(fsidtype)==0) /* invalid type */ + goto out; + if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) + goto out; + dprintk("found fsid length %d\n", len); + if (len != key_len(fsidtype)) + goto out; + + /* OK, we seem to have a valid key */ + key.h.flags = 0; + key.h.expiry_time = get_expiry(&mesg); + if (key.h.expiry_time == 0) + goto out; + + key.ek_client = dom; + key.ek_fsidtype = fsidtype; + memcpy(key.ek_fsid, buf, len); + + ek = svc_expkey_lookup(&key); + err = -ENOMEM; + if (!ek) + goto out; + + /* now we want a pathname, or empty meaning NEGATIVE */ + err = -EINVAL; + len = qword_get(&mesg, buf, PAGE_SIZE); + if (len < 0) + goto out; + dprintk("Path seems to be <%s>\n", buf); + err = 0; + if (len == 0) { + set_bit(CACHE_NEGATIVE, &key.h.flags); + ek = svc_expkey_update(&key, ek); + if (!ek) + err = -ENOMEM; + } else { + err = kern_path(buf, 0, &key.ek_path); + if (err) + goto out; + + dprintk("Found the path %s\n", buf); + + ek = svc_expkey_update(&key, ek); + if (!ek) + err = -ENOMEM; + path_put(&key.ek_path); + } + cache_flush(); + out: + if (ek) + cache_put(&ek->h, &svc_expkey_cache); + if (dom) + auth_domain_put(dom); + kfree(buf); + return err; +} + +static int expkey_show(struct seq_file *m, + struct cache_detail *cd, + struct cache_head *h) +{ + struct svc_expkey *ek ; + int i; + + if (h ==NULL) { + seq_puts(m, "#domain fsidtype fsid [path]\n"); + return 0; + } + ek = container_of(h, struct svc_expkey, h); + seq_printf(m, "%s %d 0x", ek->ek_client->name, + ek->ek_fsidtype); + for (i=0; i < key_len(ek->ek_fsidtype)/4; i++) + seq_printf(m, "%08x", ek->ek_fsid[i]); + if (test_bit(CACHE_VALID, &h->flags) && + !test_bit(CACHE_NEGATIVE, &h->flags)) { + seq_printf(m, " "); + seq_path(m, &ek->ek_path, "\\ \t\n"); + } + seq_printf(m, "\n"); + return 0; +} + +static inline int expkey_match (struct cache_head *a, struct cache_head *b) +{ + struct svc_expkey *orig = container_of(a, struct svc_expkey, h); + struct svc_expkey *new = container_of(b, struct svc_expkey, h); + + if (orig->ek_fsidtype != new->ek_fsidtype || + orig->ek_client != new->ek_client || + memcmp(orig->ek_fsid, new->ek_fsid, key_len(orig->ek_fsidtype)) != 0) + return 0; + return 1; +} + +static inline void expkey_init(struct cache_head *cnew, + struct cache_head *citem) +{ + struct svc_expkey *new = container_of(cnew, struct svc_expkey, h); + struct svc_expkey *item = container_of(citem, struct svc_expkey, h); + + kref_get(&item->ek_client->ref); + new->ek_client = item->ek_client; + new->ek_fsidtype = item->ek_fsidtype; + + memcpy(new->ek_fsid, item->ek_fsid, sizeof(new->ek_fsid)); +} + +static inline void expkey_update(struct cache_head *cnew, + struct cache_head *citem) +{ + struct svc_expkey *new = container_of(cnew, struct svc_expkey, h); + struct svc_expkey *item = container_of(citem, struct svc_expkey, h); + + new->ek_path = item->ek_path; + path_get(&item->ek_path); +} + +static struct cache_head *expkey_alloc(void) +{ + struct svc_expkey *i = kmalloc(sizeof(*i), GFP_KERNEL); + if (i) + return &i->h; + else + return NULL; +} + +static struct cache_detail svc_expkey_cache = { + .owner = THIS_MODULE, + .hash_size = EXPKEY_HASHMAX, + .hash_table = expkey_table, + .name = "nfsd.fh", + .cache_put = expkey_put, + .cache_upcall = expkey_upcall, + .cache_parse = expkey_parse, + .cache_show = expkey_show, + .match = expkey_match, + .init = expkey_init, + .update = expkey_update, + .alloc = expkey_alloc, +}; + +static int +svc_expkey_hash(struct svc_expkey *item) +{ + int hash = item->ek_fsidtype; + char * cp = (char*)item->ek_fsid; + int len = key_len(item->ek_fsidtype); + + hash ^= hash_mem(cp, len, EXPKEY_HASHBITS); + hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS); + hash &= EXPKEY_HASHMASK; + return hash; +} + +static struct svc_expkey * +svc_expkey_lookup(struct svc_expkey *item) +{ + struct cache_head *ch; + int hash = svc_expkey_hash(item); + + ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h, + hash); + if (ch) + return container_of(ch, struct svc_expkey, h); + else + return NULL; +} + +static struct svc_expkey * +svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old) +{ + struct cache_head *ch; + int hash = svc_expkey_hash(new); + + ch = sunrpc_cache_update(&svc_expkey_cache, &new->h, + &old->h, hash); + if (ch) + return container_of(ch, struct svc_expkey, h); + else + return NULL; +} + + +#define EXPORT_HASHBITS 8 +#define EXPORT_HASHMAX (1<< EXPORT_HASHBITS) + +static struct cache_head *export_table[EXPORT_HASHMAX]; + +static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc) +{ + int i; + + for (i = 0; i < fsloc->locations_count; i++) { + kfree(fsloc->locations[i].path); + kfree(fsloc->locations[i].hosts); + } + kfree(fsloc->locations); +} + +static void svc_export_put(struct kref *ref) +{ + struct svc_export *exp = container_of(ref, struct svc_export, h.ref); + path_put(&exp->ex_path); + auth_domain_put(exp->ex_client); + nfsd4_fslocs_free(&exp->ex_fslocs); + kfree(exp); +} + +static void svc_export_request(struct cache_detail *cd, + struct cache_head *h, + char **bpp, int *blen) +{ + /* client path */ + struct svc_export *exp = container_of(h, struct svc_export, h); + char *pth; + + qword_add(bpp, blen, exp->ex_client->name); + pth = d_path(&exp->ex_path, *bpp, *blen); + if (IS_ERR(pth)) { + /* is this correct? */ + (*bpp)[0] = '\n'; + return; + } + qword_add(bpp, blen, pth); + (*bpp)[-1] = '\n'; +} + +static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, svc_export_request); +} + +static struct svc_export *svc_export_update(struct svc_export *new, + struct svc_export *old); +static struct svc_export *svc_export_lookup(struct svc_export *); + +static int check_export(struct inode *inode, int *flags, unsigned char *uuid) +{ + + /* + * We currently export only dirs, regular files, and (for v4 + * pseudoroot) symlinks. + */ + if (!S_ISDIR(inode->i_mode) && + !S_ISLNK(inode->i_mode) && + !S_ISREG(inode->i_mode)) + return -ENOTDIR; + + /* + * Mountd should never pass down a writeable V4ROOT export, but, + * just to make sure: + */ + if (*flags & NFSEXP_V4ROOT) + *flags |= NFSEXP_READONLY; + + /* There are two requirements on a filesystem to be exportable. + * 1: We must be able to identify the filesystem from a number. + * either a device number (so FS_REQUIRES_DEV needed) + * or an FSID number (so NFSEXP_FSID or ->uuid is needed). + * 2: We must be able to find an inode from a filehandle. + * This means that s_export_op must be set. + */ + if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) && + !(*flags & NFSEXP_FSID) && + uuid == NULL) { + dprintk("exp_export: export of non-dev fs without fsid\n"); + return -EINVAL; + } + + if (!inode->i_sb->s_export_op || + !inode->i_sb->s_export_op->fh_to_dentry) { + dprintk("exp_export: export of invalid fs type.\n"); + return -EINVAL; + } + + return 0; + +} + +#ifdef CONFIG_NFSD_V4 + +static int +fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) +{ + int len; + int migrated, i, err; + + /* listsize */ + err = get_int(mesg, &fsloc->locations_count); + if (err) + return err; + if (fsloc->locations_count > MAX_FS_LOCATIONS) + return -EINVAL; + if (fsloc->locations_count == 0) + return 0; + + fsloc->locations = kzalloc(fsloc->locations_count + * sizeof(struct nfsd4_fs_location), GFP_KERNEL); + if (!fsloc->locations) + return -ENOMEM; + for (i=0; i < fsloc->locations_count; i++) { + /* colon separated host list */ + err = -EINVAL; + len = qword_get(mesg, buf, PAGE_SIZE); + if (len <= 0) + goto out_free_all; + err = -ENOMEM; + fsloc->locations[i].hosts = kstrdup(buf, GFP_KERNEL); + if (!fsloc->locations[i].hosts) + goto out_free_all; + err = -EINVAL; + /* slash separated path component list */ + len = qword_get(mesg, buf, PAGE_SIZE); + if (len <= 0) + goto out_free_all; + err = -ENOMEM; + fsloc->locations[i].path = kstrdup(buf, GFP_KERNEL); + if (!fsloc->locations[i].path) + goto out_free_all; + } + /* migrated */ + err = get_int(mesg, &migrated); + if (err) + goto out_free_all; + err = -EINVAL; + if (migrated < 0 || migrated > 1) + goto out_free_all; + fsloc->migrated = migrated; + return 0; +out_free_all: + nfsd4_fslocs_free(fsloc); + return err; +} + +static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) +{ + int listsize, err; + struct exp_flavor_info *f; + + err = get_int(mesg, &listsize); + if (err) + return err; + if (listsize < 0 || listsize > MAX_SECINFO_LIST) + return -EINVAL; + + for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) { + err = get_int(mesg, &f->pseudoflavor); + if (err) + return err; + /* + * XXX: It would be nice to also check whether this + * pseudoflavor is supported, so we can discover the + * problem at export time instead of when a client fails + * to authenticate. + */ + err = get_int(mesg, &f->flags); + if (err) + return err; + /* Only some flags are allowed to differ between flavors: */ + if (~NFSEXP_SECINFO_FLAGS & (f->flags ^ exp->ex_flags)) + return -EINVAL; + } + exp->ex_nflavors = listsize; + return 0; +} + +#else /* CONFIG_NFSD_V4 */ +static inline int +fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc){return 0;} +static inline int +secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; } +#endif + +static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) +{ + /* client path expiry [flags anonuid anongid fsid] */ + char *buf; + int len; + int err; + struct auth_domain *dom = NULL; + struct svc_export exp = {}, *expp; + int an_int; + + if (mesg[mlen-1] != '\n') + return -EINVAL; + mesg[mlen-1] = 0; + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* client */ + err = -EINVAL; + len = qword_get(&mesg, buf, PAGE_SIZE); + if (len <= 0) + goto out; + + err = -ENOENT; + dom = auth_domain_find(buf); + if (!dom) + goto out; + + /* path */ + err = -EINVAL; + if ((len = qword_get(&mesg, buf, PAGE_SIZE)) <= 0) + goto out1; + + err = kern_path(buf, 0, &exp.ex_path); + if (err) + goto out1; + + exp.ex_client = dom; + + /* expiry */ + err = -EINVAL; + exp.h.expiry_time = get_expiry(&mesg); + if (exp.h.expiry_time == 0) + goto out3; + + /* flags */ + err = get_int(&mesg, &an_int); + if (err == -ENOENT) { + err = 0; + set_bit(CACHE_NEGATIVE, &exp.h.flags); + } else { + if (err || an_int < 0) + goto out3; + exp.ex_flags= an_int; + + /* anon uid */ + err = get_int(&mesg, &an_int); + if (err) + goto out3; + exp.ex_anon_uid= an_int; + + /* anon gid */ + err = get_int(&mesg, &an_int); + if (err) + goto out3; + exp.ex_anon_gid= an_int; + + /* fsid */ + err = get_int(&mesg, &an_int); + if (err) + goto out3; + exp.ex_fsid = an_int; + + while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { + if (strcmp(buf, "fsloc") == 0) + err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); + else if (strcmp(buf, "uuid") == 0) { + /* expect a 16 byte uuid encoded as \xXXXX... */ + len = qword_get(&mesg, buf, PAGE_SIZE); + if (len != 16) + err = -EINVAL; + else { + exp.ex_uuid = + kmemdup(buf, 16, GFP_KERNEL); + if (exp.ex_uuid == NULL) + err = -ENOMEM; + } + } else if (strcmp(buf, "secinfo") == 0) + err = secinfo_parse(&mesg, buf, &exp); + else + /* quietly ignore unknown words and anything + * following. Newer user-space can try to set + * new values, then see what the result was. + */ + break; + if (err) + goto out4; + } + + err = check_export(exp.ex_path.dentry->d_inode, &exp.ex_flags, + exp.ex_uuid); + if (err) + goto out4; + } + + expp = svc_export_lookup(&exp); + if (expp) + expp = svc_export_update(&exp, expp); + else + err = -ENOMEM; + cache_flush(); + if (expp == NULL) + err = -ENOMEM; + else + exp_put(expp); +out4: + nfsd4_fslocs_free(&exp.ex_fslocs); + kfree(exp.ex_uuid); +out3: + path_put(&exp.ex_path); +out1: + auth_domain_put(dom); +out: + kfree(buf); + return err; +} + +static void exp_flags(struct seq_file *m, int flag, int fsid, + uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs); +static void show_secinfo(struct seq_file *m, struct svc_export *exp); + +static int svc_export_show(struct seq_file *m, + struct cache_detail *cd, + struct cache_head *h) +{ + struct svc_export *exp ; + + if (h ==NULL) { + seq_puts(m, "#path domain(flags)\n"); + return 0; + } + exp = container_of(h, struct svc_export, h); + seq_path(m, &exp->ex_path, " \t\n\\"); + seq_putc(m, '\t'); + seq_escape(m, exp->ex_client->name, " \t\n\\"); + seq_putc(m, '('); + if (test_bit(CACHE_VALID, &h->flags) && + !test_bit(CACHE_NEGATIVE, &h->flags)) { + exp_flags(m, exp->ex_flags, exp->ex_fsid, + exp->ex_anon_uid, exp->ex_anon_gid, &exp->ex_fslocs); + if (exp->ex_uuid) { + int i; + seq_puts(m, ",uuid="); + for (i=0; i<16; i++) { + if ((i&3) == 0 && i) + seq_putc(m, ':'); + seq_printf(m, "%02x", exp->ex_uuid[i]); + } + } + show_secinfo(m, exp); + } + seq_puts(m, ")\n"); + return 0; +} +static int svc_export_match(struct cache_head *a, struct cache_head *b) +{ + struct svc_export *orig = container_of(a, struct svc_export, h); + struct svc_export *new = container_of(b, struct svc_export, h); + return orig->ex_client == new->ex_client && + orig->ex_path.dentry == new->ex_path.dentry && + orig->ex_path.mnt == new->ex_path.mnt; +} + +static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) +{ + struct svc_export *new = container_of(cnew, struct svc_export, h); + struct svc_export *item = container_of(citem, struct svc_export, h); + + kref_get(&item->ex_client->ref); + new->ex_client = item->ex_client; + new->ex_path.dentry = dget(item->ex_path.dentry); + new->ex_path.mnt = mntget(item->ex_path.mnt); + new->ex_fslocs.locations = NULL; + new->ex_fslocs.locations_count = 0; + new->ex_fslocs.migrated = 0; +} + +static void export_update(struct cache_head *cnew, struct cache_head *citem) +{ + struct svc_export *new = container_of(cnew, struct svc_export, h); + struct svc_export *item = container_of(citem, struct svc_export, h); + int i; + + new->ex_flags = item->ex_flags; + new->ex_anon_uid = item->ex_anon_uid; + new->ex_anon_gid = item->ex_anon_gid; + new->ex_fsid = item->ex_fsid; + new->ex_uuid = item->ex_uuid; + item->ex_uuid = NULL; + new->ex_fslocs.locations = item->ex_fslocs.locations; + item->ex_fslocs.locations = NULL; + new->ex_fslocs.locations_count = item->ex_fslocs.locations_count; + item->ex_fslocs.locations_count = 0; + new->ex_fslocs.migrated = item->ex_fslocs.migrated; + item->ex_fslocs.migrated = 0; + new->ex_nflavors = item->ex_nflavors; + for (i = 0; i < MAX_SECINFO_LIST; i++) { + new->ex_flavors[i] = item->ex_flavors[i]; + } +} + +static struct cache_head *svc_export_alloc(void) +{ + struct svc_export *i = kmalloc(sizeof(*i), GFP_KERNEL); + if (i) + return &i->h; + else + return NULL; +} + +struct cache_detail svc_export_cache = { + .owner = THIS_MODULE, + .hash_size = EXPORT_HASHMAX, + .hash_table = export_table, + .name = "nfsd.export", + .cache_put = svc_export_put, + .cache_upcall = svc_export_upcall, + .cache_parse = svc_export_parse, + .cache_show = svc_export_show, + .match = svc_export_match, + .init = svc_export_init, + .update = export_update, + .alloc = svc_export_alloc, +}; + +static int +svc_export_hash(struct svc_export *exp) +{ + int hash; + + hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS); + hash ^= hash_ptr(exp->ex_path.dentry, EXPORT_HASHBITS); + hash ^= hash_ptr(exp->ex_path.mnt, EXPORT_HASHBITS); + return hash; +} + +static struct svc_export * +svc_export_lookup(struct svc_export *exp) +{ + struct cache_head *ch; + int hash = svc_export_hash(exp); + + ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h, + hash); + if (ch) + return container_of(ch, struct svc_export, h); + else + return NULL; +} + +static struct svc_export * +svc_export_update(struct svc_export *new, struct svc_export *old) +{ + struct cache_head *ch; + int hash = svc_export_hash(old); + + ch = sunrpc_cache_update(&svc_export_cache, &new->h, + &old->h, + hash); + if (ch) + return container_of(ch, struct svc_export, h); + else + return NULL; +} + + +static struct svc_expkey * +exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) +{ + struct svc_expkey key, *ek; + int err; + + if (!clp) + return ERR_PTR(-ENOENT); + + key.ek_client = clp; + key.ek_fsidtype = fsid_type; + memcpy(key.ek_fsid, fsidv, key_len(fsid_type)); + + ek = svc_expkey_lookup(&key); + if (ek == NULL) + return ERR_PTR(-ENOMEM); + err = cache_check(&svc_expkey_cache, &ek->h, reqp); + if (err) + return ERR_PTR(err); + return ek; +} + + +static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, + struct cache_req *reqp) +{ + struct svc_export *exp, key; + int err; + + if (!clp) + return ERR_PTR(-ENOENT); + + key.ex_client = clp; + key.ex_path = *path; + + exp = svc_export_lookup(&key); + if (exp == NULL) + return ERR_PTR(-ENOMEM); + err = cache_check(&svc_export_cache, &exp->h, reqp); + if (err) + return ERR_PTR(err); + return exp; +} + +/* + * Find the export entry for a given dentry. + */ +static struct svc_export *exp_parent(svc_client *clp, struct path *path) +{ + struct dentry *saved = dget(path->dentry); + svc_export *exp = exp_get_by_name(clp, path, NULL); + + while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { + struct dentry *parent = dget_parent(path->dentry); + dput(path->dentry); + path->dentry = parent; + exp = exp_get_by_name(clp, path, NULL); + } + dput(path->dentry); + path->dentry = saved; + return exp; +} + + + +/* + * Obtain the root fh on behalf of a client. + * This could be done in user space, but I feel that it adds some safety + * since its harder to fool a kernel module than a user space program. + */ +int +exp_rootfh(svc_client *clp, char *name, struct knfsd_fh *f, int maxsize) +{ + struct svc_export *exp; + struct path path; + struct inode *inode; + struct svc_fh fh; + int err; + + err = -EPERM; + /* NB: we probably ought to check that it's NUL-terminated */ + if (kern_path(name, 0, &path)) { + printk("nfsd: exp_rootfh path not found %s", name); + return err; + } + inode = path.dentry->d_inode; + + dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n", + name, path.dentry, clp->name, + inode->i_sb->s_id, inode->i_ino); + exp = exp_parent(clp, &path); + if (IS_ERR(exp)) { + err = PTR_ERR(exp); + goto out; + } + + /* + * fh must be initialized before calling fh_compose + */ + fh_init(&fh, maxsize); + if (fh_compose(&fh, exp, path.dentry, NULL)) + err = -EINVAL; + else + err = 0; + memcpy(f, &fh.fh_handle, sizeof(struct knfsd_fh)); + fh_put(&fh); + exp_put(exp); +out: + path_put(&path); + return err; +} + +static struct svc_export *exp_find(struct auth_domain *clp, int fsid_type, + u32 *fsidv, struct cache_req *reqp) +{ + struct svc_export *exp; + struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp); + if (IS_ERR(ek)) + return ERR_CAST(ek); + + exp = exp_get_by_name(clp, &ek->ek_path, reqp); + cache_put(&ek->h, &svc_expkey_cache); + + if (IS_ERR(exp)) + return ERR_CAST(exp); + return exp; +} + +__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) +{ + struct exp_flavor_info *f; + struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; + + /* legacy gss-only clients are always OK: */ + if (exp->ex_client == rqstp->rq_gssclient) + return 0; + /* ip-address based client; check sec= export option: */ + for (f = exp->ex_flavors; f < end; f++) { + if (f->pseudoflavor == rqstp->rq_flavor) + return 0; + } + /* defaults in absence of sec= options: */ + if (exp->ex_nflavors == 0) { + if (rqstp->rq_flavor == RPC_AUTH_NULL || + rqstp->rq_flavor == RPC_AUTH_UNIX) + return 0; + } + return nfserr_wrongsec; +} + +/* + * Uses rq_client and rq_gssclient to find an export; uses rq_client (an + * auth_unix client) if it's available and has secinfo information; + * otherwise, will try to use rq_gssclient. + * + * Called from functions that handle requests; functions that do work on + * behalf of mountd are passed a single client name to use, and should + * use exp_get_by_name() or exp_find(). + */ +struct svc_export * +rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path) +{ + struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); + + if (rqstp->rq_client == NULL) + goto gss; + + /* First try the auth_unix client: */ + exp = exp_get_by_name(rqstp->rq_client, path, &rqstp->rq_chandle); + if (PTR_ERR(exp) == -ENOENT) + goto gss; + if (IS_ERR(exp)) + return exp; + /* If it has secinfo, assume there are no gss/... clients */ + if (exp->ex_nflavors > 0) + return exp; +gss: + /* Otherwise, try falling back on gss client */ + if (rqstp->rq_gssclient == NULL) + return exp; + gssexp = exp_get_by_name(rqstp->rq_gssclient, path, &rqstp->rq_chandle); + if (PTR_ERR(gssexp) == -ENOENT) + return exp; + if (!IS_ERR(exp)) + exp_put(exp); + return gssexp; +} + +struct svc_export * +rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv) +{ + struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); + + if (rqstp->rq_client == NULL) + goto gss; + + /* First try the auth_unix client: */ + exp = exp_find(rqstp->rq_client, fsid_type, fsidv, &rqstp->rq_chandle); + if (PTR_ERR(exp) == -ENOENT) + goto gss; + if (IS_ERR(exp)) + return exp; + /* If it has secinfo, assume there are no gss/... clients */ + if (exp->ex_nflavors > 0) + return exp; +gss: + /* Otherwise, try falling back on gss client */ + if (rqstp->rq_gssclient == NULL) + return exp; + gssexp = exp_find(rqstp->rq_gssclient, fsid_type, fsidv, + &rqstp->rq_chandle); + if (PTR_ERR(gssexp) == -ENOENT) + return exp; + if (!IS_ERR(exp)) + exp_put(exp); + return gssexp; +} + +struct svc_export * +rqst_exp_parent(struct svc_rqst *rqstp, struct path *path) +{ + struct dentry *saved = dget(path->dentry); + struct svc_export *exp = rqst_exp_get_by_name(rqstp, path); + + while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { + struct dentry *parent = dget_parent(path->dentry); + dput(path->dentry); + path->dentry = parent; + exp = rqst_exp_get_by_name(rqstp, path); + } + dput(path->dentry); + path->dentry = saved; + return exp; +} + +struct svc_export *rqst_find_fsidzero_export(struct svc_rqst *rqstp) +{ + u32 fsidv[2]; + + mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); + + return rqst_exp_find(rqstp, FSID_NUM, fsidv); +} + +/* + * Called when we need the filehandle for the root of the pseudofs, + * for a given NFSv4 client. The root is defined to be the + * export point with fsid==0 + */ +__be32 +exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) +{ + struct svc_export *exp; + __be32 rv; + + exp = rqst_find_fsidzero_export(rqstp); + if (IS_ERR(exp)) + return nfserrno(PTR_ERR(exp)); + rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); + exp_put(exp); + return rv; +} + +/* Iterator */ + +static void *e_start(struct seq_file *m, loff_t *pos) + __acquires(svc_export_cache.hash_lock) +{ + loff_t n = *pos; + unsigned hash, export; + struct cache_head *ch; + + read_lock(&svc_export_cache.hash_lock); + if (!n--) + return SEQ_START_TOKEN; + hash = n >> 32; + export = n & ((1LL<<32) - 1); + + + for (ch=export_table[hash]; ch; ch=ch->next) + if (!export--) + return ch; + n &= ~((1LL<<32) - 1); + do { + hash++; + n += 1LL<<32; + } while(hash < EXPORT_HASHMAX && export_table[hash]==NULL); + if (hash >= EXPORT_HASHMAX) + return NULL; + *pos = n+1; + return export_table[hash]; +} + +static void *e_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct cache_head *ch = p; + int hash = (*pos >> 32); + + if (p == SEQ_START_TOKEN) + hash = 0; + else if (ch->next == NULL) { + hash++; + *pos += 1LL<<32; + } else { + ++*pos; + return ch->next; + } + *pos &= ~((1LL<<32) - 1); + while (hash < EXPORT_HASHMAX && export_table[hash] == NULL) { + hash++; + *pos += 1LL<<32; + } + if (hash >= EXPORT_HASHMAX) + return NULL; + ++*pos; + return export_table[hash]; +} + +static void e_stop(struct seq_file *m, void *p) + __releases(svc_export_cache.hash_lock) +{ + read_unlock(&svc_export_cache.hash_lock); +} + +static struct flags { + int flag; + char *name[2]; +} expflags[] = { + { NFSEXP_READONLY, {"ro", "rw"}}, + { NFSEXP_INSECURE_PORT, {"insecure", ""}}, + { NFSEXP_ROOTSQUASH, {"root_squash", "no_root_squash"}}, + { NFSEXP_ALLSQUASH, {"all_squash", ""}}, + { NFSEXP_ASYNC, {"async", "sync"}}, + { NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}}, + { NFSEXP_NOHIDE, {"nohide", ""}}, + { NFSEXP_CROSSMOUNT, {"crossmnt", ""}}, + { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, + { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}}, + { NFSEXP_V4ROOT, {"v4root", ""}}, + { 0, {"", ""}} +}; + +static void show_expflags(struct seq_file *m, int flags, int mask) +{ + struct flags *flg; + int state, first = 0; + + for (flg = expflags; flg->flag; flg++) { + if (flg->flag & ~mask) + continue; + state = (flg->flag & flags) ? 0 : 1; + if (*flg->name[state]) + seq_printf(m, "%s%s", first++?",":"", flg->name[state]); + } +} + +static void show_secinfo_flags(struct seq_file *m, int flags) +{ + seq_printf(m, ","); + show_expflags(m, flags, NFSEXP_SECINFO_FLAGS); +} + +static bool secinfo_flags_equal(int f, int g) +{ + f &= NFSEXP_SECINFO_FLAGS; + g &= NFSEXP_SECINFO_FLAGS; + return f == g; +} + +static int show_secinfo_run(struct seq_file *m, struct exp_flavor_info **fp, struct exp_flavor_info *end) +{ + int flags; + + flags = (*fp)->flags; + seq_printf(m, ",sec=%d", (*fp)->pseudoflavor); + (*fp)++; + while (*fp != end && secinfo_flags_equal(flags, (*fp)->flags)) { + seq_printf(m, ":%d", (*fp)->pseudoflavor); + (*fp)++; + } + return flags; +} + +static void show_secinfo(struct seq_file *m, struct svc_export *exp) +{ + struct exp_flavor_info *f; + struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; + int flags; + + if (exp->ex_nflavors == 0) + return; + f = exp->ex_flavors; + flags = show_secinfo_run(m, &f, end); + if (!secinfo_flags_equal(flags, exp->ex_flags)) + show_secinfo_flags(m, flags); + while (f != end) { + flags = show_secinfo_run(m, &f, end); + show_secinfo_flags(m, flags); + } +} + +static void exp_flags(struct seq_file *m, int flag, int fsid, + uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc) +{ + show_expflags(m, flag, NFSEXP_ALLFLAGS); + if (flag & NFSEXP_FSID) + seq_printf(m, ",fsid=%d", fsid); + if (anonu != (uid_t)-2 && anonu != (0x10000-2)) + seq_printf(m, ",anonuid=%u", anonu); + if (anong != (gid_t)-2 && anong != (0x10000-2)) + seq_printf(m, ",anongid=%u", anong); + if (fsloc && fsloc->locations_count > 0) { + char *loctype = (fsloc->migrated) ? "refer" : "replicas"; + int i; + + seq_printf(m, ",%s=", loctype); + seq_escape(m, fsloc->locations[0].path, ",;@ \t\n\\"); + seq_putc(m, '@'); + seq_escape(m, fsloc->locations[0].hosts, ",;@ \t\n\\"); + for (i = 1; i < fsloc->locations_count; i++) { + seq_putc(m, ';'); + seq_escape(m, fsloc->locations[i].path, ",;@ \t\n\\"); + seq_putc(m, '@'); + seq_escape(m, fsloc->locations[i].hosts, ",;@ \t\n\\"); + } + } +} + +static int e_show(struct seq_file *m, void *p) +{ + struct cache_head *cp = p; + struct svc_export *exp = container_of(cp, struct svc_export, h); + + if (p == SEQ_START_TOKEN) { + seq_puts(m, "# Version 1.1\n"); + seq_puts(m, "# Path Client(Flags) # IPs\n"); + return 0; + } + + cache_get(&exp->h); + if (cache_check(&svc_export_cache, &exp->h, NULL)) + return 0; + cache_put(&exp->h, &svc_export_cache); + return svc_export_show(m, &svc_export_cache, cp); +} + +const struct seq_operations nfs_exports_op = { + .start = e_start, + .next = e_next, + .stop = e_stop, + .show = e_show, +}; + + +/* + * Initialize the exports module. + */ +int +nfsd_export_init(void) +{ + int rv; + dprintk("nfsd: initializing export module.\n"); + + rv = cache_register_net(&svc_export_cache, &init_net); + if (rv) + return rv; + rv = cache_register_net(&svc_expkey_cache, &init_net); + if (rv) + cache_unregister_net(&svc_export_cache, &init_net); + return rv; + +} + +/* + * Flush exports table - called when last nfsd thread is killed + */ +void +nfsd_export_flush(void) +{ + cache_purge(&svc_expkey_cache); + cache_purge(&svc_export_cache); +} + +/* + * Shutdown the exports module. + */ +void +nfsd_export_shutdown(void) +{ + + dprintk("nfsd: shutting down export module.\n"); + + cache_unregister_net(&svc_expkey_cache, &init_net); + cache_unregister_net(&svc_export_cache, &init_net); + svcauth_unix_purge(); + + dprintk("nfsd: export shutdown complete.\n"); +} diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c new file mode 100644 index 00000000000..ce7f0758d84 --- /dev/null +++ b/fs/nfsd/fault_inject.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com> + * + * Uses debugfs to create fault injection points for client testing + */ + +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/debugfs.h> +#include <linux/module.h> + +#include "state.h" +#include "fault_inject.h" + +struct nfsd_fault_inject_op { + char *file; + void (*func)(u64); +}; + +static struct nfsd_fault_inject_op inject_ops[] = { + { + .file = "forget_clients", + .func = nfsd_forget_clients, + }, + { + .file = "forget_locks", + .func = nfsd_forget_locks, + }, + { + .file = "forget_openowners", + .func = nfsd_forget_openowners, + }, + { + .file = "forget_delegations", + .func = nfsd_forget_delegations, + }, + { + .file = "recall_delegations", + .func = nfsd_recall_delegations, + }, +}; + +static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op); +static struct dentry *debug_dir; + +static int nfsd_inject_set(void *op_ptr, u64 val) +{ + struct nfsd_fault_inject_op *op = op_ptr; + + if (val == 0) + printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file); + else + printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val); + + op->func(val); + return 0; +} + +static int nfsd_inject_get(void *data, u64 *val) +{ + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n"); + +void nfsd_fault_inject_cleanup(void) +{ + debugfs_remove_recursive(debug_dir); +} + +int nfsd_fault_inject_init(void) +{ + unsigned int i; + struct nfsd_fault_inject_op *op; + mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + + debug_dir = debugfs_create_dir("nfsd", NULL); + if (!debug_dir) + goto fail; + + for (i = 0; i < NUM_INJECT_OPS; i++) { + op = &inject_ops[i]; + if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd)) + goto fail; + } + return 0; + +fail: + nfsd_fault_inject_cleanup(); + return -ENOMEM; +} diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h new file mode 100644 index 00000000000..90bd0570956 --- /dev/null +++ b/fs/nfsd/fault_inject.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com> + * + * Function definitions for fault injection + */ + +#ifndef LINUX_NFSD_FAULT_INJECT_H +#define LINUX_NFSD_FAULT_INJECT_H + +#ifdef CONFIG_NFSD_FAULT_INJECTION +int nfsd_fault_inject_init(void); +void nfsd_fault_inject_cleanup(void); +void nfsd_forget_clients(u64); +void nfsd_forget_locks(u64); +void nfsd_forget_openowners(u64); +void nfsd_forget_delegations(u64); +void nfsd_recall_delegations(u64); +#else /* CONFIG_NFSD_FAULT_INJECTION */ +static inline int nfsd_fault_inject_init(void) { return 0; } +static inline void nfsd_fault_inject_cleanup(void) {} +static inline void nfsd_forget_clients(u64 num) {} +static inline void nfsd_forget_locks(u64 num) {} +static inline void nfsd_forget_openowners(u64 num) {} +static inline void nfsd_forget_delegations(u64 num) {} +static inline void nfsd_recall_delegations(u64 num) {} +#endif /* CONFIG_NFSD_FAULT_INJECTION */ + +#endif /* LINUX_NFSD_FAULT_INJECT_H */ diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h new file mode 100644 index 00000000000..2f3be132153 --- /dev/null +++ b/fs/nfsd/idmap.h @@ -0,0 +1,62 @@ +/* + * Mapping of UID to name and vice versa. + * + * Copyright (c) 2002, 2003 The Regents of the University of + * Michigan. All rights reserved. +> * + * Marius Aamodt Eriksen <marius@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LINUX_NFSD_IDMAP_H +#define LINUX_NFSD_IDMAP_H + +#include <linux/in.h> +#include <linux/sunrpc/svc.h> + +/* XXX from linux/nfs_idmap.h */ +#define IDMAP_NAMESZ 128 + +#ifdef CONFIG_NFSD_V4 +int nfsd_idmap_init(void); +void nfsd_idmap_shutdown(void); +#else +static inline int nfsd_idmap_init(void) +{ + return 0; +} +static inline void nfsd_idmap_shutdown(void) +{ +} +#endif + +__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); +__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *); +int nfsd_map_uid_to_name(struct svc_rqst *, __u32, char *); +int nfsd_map_gid_to_name(struct svc_rqst *, __u32, char *); + +#endif /* LINUX_NFSD_IDMAP_H */ diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c new file mode 100644 index 00000000000..77e7a5cca88 --- /dev/null +++ b/fs/nfsd/lockd.c @@ -0,0 +1,77 @@ +/* + * This file contains all the stubs needed when communicating with lockd. + * This level of indirection is necessary so we can run nfsd+lockd without + * requiring the nfs client to be compiled in/loaded, and vice versa. + * + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/file.h> +#include <linux/lockd/bind.h> +#include "nfsd.h" +#include "vfs.h" + +#define NFSDDBG_FACILITY NFSDDBG_LOCKD + +#ifdef CONFIG_LOCKD_V4 +#define nlm_stale_fh nlm4_stale_fh +#define nlm_failed nlm4_failed +#else +#define nlm_stale_fh nlm_lck_denied_nolocks +#define nlm_failed nlm_lck_denied_nolocks +#endif +/* + * Note: we hold the dentry use count while the file is open. + */ +static __be32 +nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp) +{ + __be32 nfserr; + struct svc_fh fh; + + /* must initialize before using! but maxsize doesn't matter */ + fh_init(&fh,0); + fh.fh_handle.fh_size = f->size; + memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size); + fh.fh_export = NULL; + + nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp); + fh_put(&fh); + /* We return nlm error codes as nlm doesn't know + * about nfsd, but nfsd does know about nlm.. + */ + switch (nfserr) { + case nfs_ok: + return 0; + case nfserr_dropit: + return nlm_drop_reply; + case nfserr_stale: + return nlm_stale_fh; + default: + return nlm_failed; + } +} + +static void +nlm_fclose(struct file *filp) +{ + fput(filp); +} + +static struct nlmsvc_binding nfsd_nlm_ops = { + .fopen = nlm_fopen, /* open file for locking */ + .fclose = nlm_fclose, /* close file */ +}; + +void +nfsd_lockd_init(void) +{ + dprintk("nfsd: initializing lockd\n"); + nlmsvc_ops = &nfsd_nlm_ops; +} + +void +nfsd_lockd_shutdown(void) +{ + nlmsvc_ops = NULL; +} diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c new file mode 100644 index 00000000000..6aa5590c367 --- /dev/null +++ b/fs/nfsd/nfs2acl.c @@ -0,0 +1,356 @@ +/* + * Process version 2 NFSACL requests. + * + * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> + */ + +#include "nfsd.h" +/* FIXME: nfsacl.h is a broken header */ +#include <linux/nfsacl.h> +#include <linux/gfp.h> +#include "cache.h" +#include "xdr3.h" +#include "vfs.h" + +#define NFSDDBG_FACILITY NFSDDBG_PROC +#define RETURN_STATUS(st) { resp->status = (st); return (st); } + +/* + * NULL call. + */ +static __be32 +nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return nfs_ok; +} + +/* + * Get the Access and/or Default ACL of a file. + */ +static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp, + struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp) +{ + svc_fh *fh; + struct posix_acl *acl; + __be32 nfserr = 0; + + dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); + + fh = fh_copy(&resp->fh, &argp->fh); + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + if (nfserr) + RETURN_STATUS(nfserr); + + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + RETURN_STATUS(nfserr_inval); + resp->mask = argp->mask; + + if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { + acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + if (acl == NULL) { + /* Solaris returns the inode's minimum ACL. */ + + struct inode *inode = fh->fh_dentry->d_inode; + acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + } + resp->acl_access = acl; + } + if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { + /* Check how Solaris handles requests for the Default ACL + of a non-directory! */ + + acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + resp->acl_default = acl; + } + + /* resp->acl_{access,default} are released in nfssvc_release_getacl. */ + RETURN_STATUS(0); + +fail: + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + RETURN_STATUS(nfserr); +} + +/* + * Set the Access and/or Default ACL of a file. + */ +static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp, + struct nfsd3_setaclargs *argp, + struct nfsd_attrstat *resp) +{ + svc_fh *fh; + __be32 nfserr = 0; + + dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); + + fh = fh_copy(&resp->fh, &argp->fh); + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR); + + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_ACCESS, argp->acl_access) ); + } + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_DEFAULT, argp->acl_default) ); + } + + /* argp->acl_{access,default} may have been allocated in + nfssvc_decode_setaclargs. */ + posix_acl_release(argp->acl_access); + posix_acl_release(argp->acl_default); + return nfserr; +} + +/* + * Check file attributes + */ +static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp, + struct nfsd_fhandle *argp, struct nfsd_attrstat *resp) +{ + dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); + + fh_copy(&resp->fh, &argp->fh); + return fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); +} + +/* + * Check file access + */ +static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp, + struct nfsd3_accessres *resp) +{ + __be32 nfserr; + + dprintk("nfsd: ACCESS(2acl) %s 0x%x\n", + SVCFH_fmt(&argp->fh), + argp->access); + + fh_copy(&resp->fh, &argp->fh); + resp->access = argp->access; + nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); + return nfserr; +} + +/* + * XDR decode functions + */ +static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_getaclargs *argp) +{ + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + argp->mask = ntohl(*p); p++; + + return xdr_argsize_check(rqstp, p); +} + + +static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_setaclargs *argp) +{ + struct kvec *head = rqstp->rq_arg.head; + unsigned int base; + int n; + + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + argp->mask = ntohl(*p++); + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || + !xdr_argsize_check(rqstp, p)) + return 0; + + base = (char *)p - (char *)head->iov_base; + n = nfsacl_decode(&rqstp->rq_arg, base, NULL, + (argp->mask & NFS_ACL) ? + &argp->acl_access : NULL); + if (n > 0) + n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL, + (argp->mask & NFS_DFACL) ? + &argp->acl_default : NULL); + return (n > 0); +} + +static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd_fhandle *argp) +{ + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + return xdr_argsize_check(rqstp, p); +} + +static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_accessargs *argp) +{ + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + argp->access = ntohl(*p++); + + return xdr_argsize_check(rqstp, p); +} + +/* + * XDR encode functions + */ + +/* + * There must be an encoding function for void results so svc_process + * will work properly. + */ +int +nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy) +{ + return xdr_ressize_check(rqstp, p); +} + +/* GETACL */ +static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_getaclres *resp) +{ + struct dentry *dentry = resp->fh.fh_dentry; + struct inode *inode; + struct kvec *head = rqstp->rq_res.head; + unsigned int base; + int n; + int w; + + /* + * Since this is version 2, the check for nfserr in + * nfsd_dispatch actually ensures the following cannot happen. + * However, it seems fragile to depend on that. + */ + if (dentry == NULL || dentry->d_inode == NULL) + return 0; + inode = dentry->d_inode; + + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + *p++ = htonl(resp->mask); + if (!xdr_ressize_check(rqstp, p)) + return 0; + base = (char *)p - (char *)head->iov_base; + + rqstp->rq_res.page_len = w = nfsacl_size( + (resp->mask & NFS_ACL) ? resp->acl_access : NULL, + (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); + while (w > 0) { + if (!rqstp->rq_respages[rqstp->rq_resused++]) + return 0; + w -= PAGE_SIZE; + } + + n = nfsacl_encode(&rqstp->rq_res, base, inode, + resp->acl_access, + resp->mask & NFS_ACL, 0); + if (n > 0) + n = nfsacl_encode(&rqstp->rq_res, base + n, inode, + resp->acl_default, + resp->mask & NFS_DFACL, + NFS_ACL_DEFAULT); + if (n <= 0) + return 0; + return 1; +} + +static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd_attrstat *resp) +{ + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + return xdr_ressize_check(rqstp, p); +} + +/* ACCESS */ +static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_accessres *resp) +{ + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + *p++ = htonl(resp->access); + return xdr_ressize_check(rqstp, p); +} + +/* + * XDR release functions + */ +static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_getaclres *resp) +{ + fh_put(&resp->fh); + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + return 1; +} + +static int nfsaclsvc_release_attrstat(struct svc_rqst *rqstp, __be32 *p, + struct nfsd_attrstat *resp) +{ + fh_put(&resp->fh); + return 1; +} + +static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_accessres *resp) +{ + fh_put(&resp->fh); + return 1; +} + +#define nfsaclsvc_decode_voidargs NULL +#define nfsaclsvc_release_void NULL +#define nfsd3_fhandleargs nfsd_fhandle +#define nfsd3_attrstatres nfsd_attrstat +#define nfsd3_voidres nfsd3_voidargs +struct nfsd3_voidargs { int dummy; }; + +#define PROC(name, argt, rest, relt, cache, respsize) \ + { (svc_procfunc) nfsacld_proc_##name, \ + (kxdrproc_t) nfsaclsvc_decode_##argt##args, \ + (kxdrproc_t) nfsaclsvc_encode_##rest##res, \ + (kxdrproc_t) nfsaclsvc_release_##relt, \ + sizeof(struct nfsd3_##argt##args), \ + sizeof(struct nfsd3_##rest##res), \ + 0, \ + cache, \ + respsize, \ + } + +#define ST 1 /* status*/ +#define AT 21 /* attributes */ +#define pAT (1+AT) /* post attributes - conditional */ +#define ACL (1+NFS_ACL_MAX_ENTRIES*3) /* Access Control List */ + +static struct svc_procedure nfsd_acl_procedures2[] = { + PROC(null, void, void, void, RC_NOCACHE, ST), + PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)), + PROC(setacl, setacl, attrstat, attrstat, RC_NOCACHE, ST+AT), + PROC(getattr, fhandle, attrstat, attrstat, RC_NOCACHE, ST+AT), + PROC(access, access, access, access, RC_NOCACHE, ST+AT+1), +}; + +struct svc_version nfsd_acl_version2 = { + .vs_vers = 2, + .vs_nproc = 5, + .vs_proc = nfsd_acl_procedures2, + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS3_SVC_XDRSIZE, + .vs_hidden = 0, +}; diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c new file mode 100644 index 00000000000..a596e9d987e --- /dev/null +++ b/fs/nfsd/nfs3acl.c @@ -0,0 +1,267 @@ +/* + * Process version 3 NFSACL requests. + * + * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> + */ + +#include "nfsd.h" +/* FIXME: nfsacl.h is a broken header */ +#include <linux/nfsacl.h> +#include <linux/gfp.h> +#include "cache.h" +#include "xdr3.h" +#include "vfs.h" + +#define RETURN_STATUS(st) { resp->status = (st); return (st); } + +/* + * NULL call. + */ +static __be32 +nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return nfs_ok; +} + +/* + * Get the Access and/or Default ACL of a file. + */ +static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp, + struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp) +{ + svc_fh *fh; + struct posix_acl *acl; + __be32 nfserr = 0; + + fh = fh_copy(&resp->fh, &argp->fh); + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + if (nfserr) + RETURN_STATUS(nfserr); + + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + RETURN_STATUS(nfserr_inval); + resp->mask = argp->mask; + + if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { + acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + if (acl == NULL) { + /* Solaris returns the inode's minimum ACL. */ + + struct inode *inode = fh->fh_dentry->d_inode; + acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + } + resp->acl_access = acl; + } + if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { + /* Check how Solaris handles requests for the Default ACL + of a non-directory! */ + + acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + resp->acl_default = acl; + } + + /* resp->acl_{access,default} are released in nfs3svc_release_getacl. */ + RETURN_STATUS(0); + +fail: + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + RETURN_STATUS(nfserr); +} + +/* + * Set the Access and/or Default ACL of a file. + */ +static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp, + struct nfsd3_setaclargs *argp, + struct nfsd3_attrstat *resp) +{ + svc_fh *fh; + __be32 nfserr = 0; + + fh = fh_copy(&resp->fh, &argp->fh); + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR); + + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_ACCESS, argp->acl_access) ); + } + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_DEFAULT, argp->acl_default) ); + } + + /* argp->acl_{access,default} may have been allocated in + nfs3svc_decode_setaclargs. */ + posix_acl_release(argp->acl_access); + posix_acl_release(argp->acl_default); + RETURN_STATUS(nfserr); +} + +/* + * XDR decode functions + */ +static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_getaclargs *args) +{ + if (!(p = nfs3svc_decode_fh(p, &args->fh))) + return 0; + args->mask = ntohl(*p); p++; + + return xdr_argsize_check(rqstp, p); +} + + +static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_setaclargs *args) +{ + struct kvec *head = rqstp->rq_arg.head; + unsigned int base; + int n; + + if (!(p = nfs3svc_decode_fh(p, &args->fh))) + return 0; + args->mask = ntohl(*p++); + if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || + !xdr_argsize_check(rqstp, p)) + return 0; + + base = (char *)p - (char *)head->iov_base; + n = nfsacl_decode(&rqstp->rq_arg, base, NULL, + (args->mask & NFS_ACL) ? + &args->acl_access : NULL); + if (n > 0) + n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL, + (args->mask & NFS_DFACL) ? + &args->acl_default : NULL); + return (n > 0); +} + +/* + * XDR encode functions + */ + +/* GETACL */ +static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_getaclres *resp) +{ + struct dentry *dentry = resp->fh.fh_dentry; + + p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); + if (resp->status == 0 && dentry && dentry->d_inode) { + struct inode *inode = dentry->d_inode; + struct kvec *head = rqstp->rq_res.head; + unsigned int base; + int n; + int w; + + *p++ = htonl(resp->mask); + if (!xdr_ressize_check(rqstp, p)) + return 0; + base = (char *)p - (char *)head->iov_base; + + rqstp->rq_res.page_len = w = nfsacl_size( + (resp->mask & NFS_ACL) ? resp->acl_access : NULL, + (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); + while (w > 0) { + if (!rqstp->rq_respages[rqstp->rq_resused++]) + return 0; + w -= PAGE_SIZE; + } + + n = nfsacl_encode(&rqstp->rq_res, base, inode, + resp->acl_access, + resp->mask & NFS_ACL, 0); + if (n > 0) + n = nfsacl_encode(&rqstp->rq_res, base + n, inode, + resp->acl_default, + resp->mask & NFS_DFACL, + NFS_ACL_DEFAULT); + if (n <= 0) + return 0; + } else + if (!xdr_ressize_check(rqstp, p)) + return 0; + + return 1; +} + +/* SETACL */ +static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_attrstat *resp) +{ + p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); + + return xdr_ressize_check(rqstp, p); +} + +/* + * XDR release functions + */ +static int nfs3svc_release_getacl(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_getaclres *resp) +{ + fh_put(&resp->fh); + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + return 1; +} + +#define nfs3svc_decode_voidargs NULL +#define nfs3svc_release_void NULL +#define nfsd3_setaclres nfsd3_attrstat +#define nfsd3_voidres nfsd3_voidargs +struct nfsd3_voidargs { int dummy; }; + +#define PROC(name, argt, rest, relt, cache, respsize) \ + { (svc_procfunc) nfsd3_proc_##name, \ + (kxdrproc_t) nfs3svc_decode_##argt##args, \ + (kxdrproc_t) nfs3svc_encode_##rest##res, \ + (kxdrproc_t) nfs3svc_release_##relt, \ + sizeof(struct nfsd3_##argt##args), \ + sizeof(struct nfsd3_##rest##res), \ + 0, \ + cache, \ + respsize, \ + } + +#define ST 1 /* status*/ +#define AT 21 /* attributes */ +#define pAT (1+AT) /* post attributes - conditional */ +#define ACL (1+NFS_ACL_MAX_ENTRIES*3) /* Access Control List */ + +static struct svc_procedure nfsd_acl_procedures3[] = { + PROC(null, void, void, void, RC_NOCACHE, ST), + PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)), + PROC(setacl, setacl, setacl, fhandle, RC_NOCACHE, ST+pAT), +}; + +struct svc_version nfsd_acl_version3 = { + .vs_vers = 3, + .vs_nproc = 3, + .vs_proc = nfsd_acl_procedures3, + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS3_SVC_XDRSIZE, + .vs_hidden = 0, +}; + diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c new file mode 100644 index 00000000000..9095f3c21df --- /dev/null +++ b/fs/nfsd/nfs3proc.c @@ -0,0 +1,896 @@ +/* + * Process version 3 NFS requests. + * + * Copyright (C) 1996, 1997, 1998 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/magic.h> + +#include "cache.h" +#include "xdr3.h" +#include "vfs.h" + +#define NFSDDBG_FACILITY NFSDDBG_PROC + +#define RETURN_STATUS(st) { resp->status = (st); return (st); } + +static int nfs3_ftypes[] = { + 0, /* NF3NON */ + S_IFREG, /* NF3REG */ + S_IFDIR, /* NF3DIR */ + S_IFBLK, /* NF3BLK */ + S_IFCHR, /* NF3CHR */ + S_IFLNK, /* NF3LNK */ + S_IFSOCK, /* NF3SOCK */ + S_IFIFO, /* NF3FIFO */ +}; + +/* + * NULL call. + */ +static __be32 +nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return nfs_ok; +} + +/* + * Get a file's attributes + */ +static __be32 +nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, + struct nfsd3_attrstat *resp) +{ + int err; + __be32 nfserr; + + dprintk("nfsd: GETATTR(3) %s\n", + SVCFH_fmt(&argp->fh)); + + fh_copy(&resp->fh, &argp->fh); + nfserr = fh_verify(rqstp, &resp->fh, 0, + NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); + if (nfserr) + RETURN_STATUS(nfserr); + + err = vfs_getattr(resp->fh.fh_export->ex_path.mnt, + resp->fh.fh_dentry, &resp->stat); + nfserr = nfserrno(err); + + RETURN_STATUS(nfserr); +} + +/* + * Set a file's attributes + */ +static __be32 +nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp, + struct nfsd3_attrstat *resp) +{ + __be32 nfserr; + + dprintk("nfsd: SETATTR(3) %s\n", + SVCFH_fmt(&argp->fh)); + + fh_copy(&resp->fh, &argp->fh); + nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs, + argp->check_guard, argp->guardtime); + RETURN_STATUS(nfserr); +} + +/* + * Look up a path name component + */ +static __be32 +nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp, + struct nfsd3_diropres *resp) +{ + __be32 nfserr; + + dprintk("nfsd: LOOKUP(3) %s %.*s\n", + SVCFH_fmt(&argp->fh), + argp->len, + argp->name); + + fh_copy(&resp->dirfh, &argp->fh); + fh_init(&resp->fh, NFS3_FHSIZE); + + nfserr = nfsd_lookup(rqstp, &resp->dirfh, + argp->name, + argp->len, + &resp->fh); + RETURN_STATUS(nfserr); +} + +/* + * Check file access + */ +static __be32 +nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp, + struct nfsd3_accessres *resp) +{ + __be32 nfserr; + + dprintk("nfsd: ACCESS(3) %s 0x%x\n", + SVCFH_fmt(&argp->fh), + argp->access); + + fh_copy(&resp->fh, &argp->fh); + resp->access = argp->access; + nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); + RETURN_STATUS(nfserr); +} + +/* + * Read a symlink. + */ +static __be32 +nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp, + struct nfsd3_readlinkres *resp) +{ + __be32 nfserr; + + dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh)); + + /* Read the symlink. */ + fh_copy(&resp->fh, &argp->fh); + resp->len = NFS3_MAXPATHLEN; + nfserr = nfsd_readlink(rqstp, &resp->fh, argp->buffer, &resp->len); + RETURN_STATUS(nfserr); +} + +/* + * Read a portion of a file. + */ +static __be32 +nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, + struct nfsd3_readres *resp) +{ + __be32 nfserr; + u32 max_blocksize = svc_max_payload(rqstp); + + dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", + SVCFH_fmt(&argp->fh), + (unsigned long) argp->count, + (unsigned long long) argp->offset); + + /* Obtain buffer pointer for payload. + * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) + * + 1 (xdr opaque byte count) = 26 + */ + + resp->count = argp->count; + if (max_blocksize < resp->count) + resp->count = max_blocksize; + + svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); + + fh_copy(&resp->fh, &argp->fh); + nfserr = nfsd_read(rqstp, &resp->fh, + argp->offset, + rqstp->rq_vec, argp->vlen, + &resp->count); + if (nfserr == 0) { + struct inode *inode = resp->fh.fh_dentry->d_inode; + + resp->eof = (argp->offset + resp->count) >= inode->i_size; + } + + RETURN_STATUS(nfserr); +} + +/* + * Write data to a file + */ +static __be32 +nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, + struct nfsd3_writeres *resp) +{ + __be32 nfserr; + unsigned long cnt = argp->len; + + dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n", + SVCFH_fmt(&argp->fh), + argp->len, + (unsigned long long) argp->offset, + argp->stable? " stable" : ""); + + fh_copy(&resp->fh, &argp->fh); + resp->committed = argp->stable; + nfserr = nfsd_write(rqstp, &resp->fh, NULL, + argp->offset, + rqstp->rq_vec, argp->vlen, + &cnt, + &resp->committed); + resp->count = cnt; + RETURN_STATUS(nfserr); +} + +/* + * With NFSv3, CREATE processing is a lot easier than with NFSv2. + * At least in theory; we'll see how it fares in practice when the + * first reports about SunOS compatibility problems start to pour in... + */ +static __be32 +nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, + struct nfsd3_diropres *resp) +{ + svc_fh *dirfhp, *newfhp = NULL; + struct iattr *attr; + __be32 nfserr; + + dprintk("nfsd: CREATE(3) %s %.*s\n", + SVCFH_fmt(&argp->fh), + argp->len, + argp->name); + + dirfhp = fh_copy(&resp->dirfh, &argp->fh); + newfhp = fh_init(&resp->fh, NFS3_FHSIZE); + attr = &argp->attrs; + + /* Get the directory inode */ + nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE); + if (nfserr) + RETURN_STATUS(nfserr); + + /* Unfudge the mode bits */ + attr->ia_mode &= ~S_IFMT; + if (!(attr->ia_valid & ATTR_MODE)) { + attr->ia_valid |= ATTR_MODE; + attr->ia_mode = S_IFREG; + } else { + attr->ia_mode = (attr->ia_mode & ~S_IFMT) | S_IFREG; + } + + /* Now create the file and set attributes */ + nfserr = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len, + attr, newfhp, + argp->createmode, argp->verf, NULL, NULL); + + RETURN_STATUS(nfserr); +} + +/* + * Make directory. This operation is not idempotent. + */ +static __be32 +nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, + struct nfsd3_diropres *resp) +{ + __be32 nfserr; + + dprintk("nfsd: MKDIR(3) %s %.*s\n", + SVCFH_fmt(&argp->fh), + argp->len, + argp->name); + + argp->attrs.ia_valid &= ~ATTR_SIZE; + fh_copy(&resp->dirfh, &argp->fh); + fh_init(&resp->fh, NFS3_FHSIZE); + nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, + &argp->attrs, S_IFDIR, 0, &resp->fh); + fh_unlock(&resp->dirfh); + RETURN_STATUS(nfserr); +} + +static __be32 +nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp, + struct nfsd3_diropres *resp) +{ + __be32 nfserr; + + dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n", + SVCFH_fmt(&argp->ffh), + argp->flen, argp->fname, + argp->tlen, argp->tname); + + fh_copy(&resp->dirfh, &argp->ffh); + fh_init(&resp->fh, NFS3_FHSIZE); + nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen, + argp->tname, argp->tlen, + &resp->fh, &argp->attrs); + RETURN_STATUS(nfserr); +} + +/* + * Make socket/fifo/device. + */ +static __be32 +nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp, + struct nfsd3_diropres *resp) +{ + __be32 nfserr; + int type; + dev_t rdev = 0; + + dprintk("nfsd: MKNOD(3) %s %.*s\n", + SVCFH_fmt(&argp->fh), + argp->len, + argp->name); + + fh_copy(&resp->dirfh, &argp->fh); + fh_init(&resp->fh, NFS3_FHSIZE); + + if (argp->ftype == 0 || argp->ftype >= NF3BAD) + RETURN_STATUS(nfserr_inval); + if (argp->ftype == NF3CHR || argp->ftype == NF3BLK) { + rdev = MKDEV(argp->major, argp->minor); + if (MAJOR(rdev) != argp->major || + MINOR(rdev) != argp->minor) + RETURN_STATUS(nfserr_inval); + } else + if (argp->ftype != NF3SOCK && argp->ftype != NF3FIFO) + RETURN_STATUS(nfserr_inval); + + type = nfs3_ftypes[argp->ftype]; + nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, + &argp->attrs, type, rdev, &resp->fh); + fh_unlock(&resp->dirfh); + RETURN_STATUS(nfserr); +} + +/* + * Remove file/fifo/socket etc. + */ +static __be32 +nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp, + struct nfsd3_attrstat *resp) +{ + __be32 nfserr; + + dprintk("nfsd: REMOVE(3) %s %.*s\n", + SVCFH_fmt(&argp->fh), + argp->len, + argp->name); + + /* Unlink. -S_IFDIR means file must not be a directory */ + fh_copy(&resp->fh, &argp->fh); + nfserr = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len); + fh_unlock(&resp->fh); + RETURN_STATUS(nfserr); +} + +/* + * Remove a directory + */ +static __be32 +nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp, + struct nfsd3_attrstat *resp) +{ + __be32 nfserr; + + dprintk("nfsd: RMDIR(3) %s %.*s\n", + SVCFH_fmt(&argp->fh), + argp->len, + argp->name); + + fh_copy(&resp->fh, &argp->fh); + nfserr = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len); + fh_unlock(&resp->fh); + RETURN_STATUS(nfserr); +} + +static __be32 +nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp, + struct nfsd3_renameres *resp) +{ + __be32 nfserr; + + dprintk("nfsd: RENAME(3) %s %.*s ->\n", + SVCFH_fmt(&argp->ffh), + argp->flen, + argp->fname); + dprintk("nfsd: -> %s %.*s\n", + SVCFH_fmt(&argp->tfh), + argp->tlen, + argp->tname); + + fh_copy(&resp->ffh, &argp->ffh); + fh_copy(&resp->tfh, &argp->tfh); + nfserr = nfsd_rename(rqstp, &resp->ffh, argp->fname, argp->flen, + &resp->tfh, argp->tname, argp->tlen); + RETURN_STATUS(nfserr); +} + +static __be32 +nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp, + struct nfsd3_linkres *resp) +{ + __be32 nfserr; + + dprintk("nfsd: LINK(3) %s ->\n", + SVCFH_fmt(&argp->ffh)); + dprintk("nfsd: -> %s %.*s\n", + SVCFH_fmt(&argp->tfh), + argp->tlen, + argp->tname); + + fh_copy(&resp->fh, &argp->ffh); + fh_copy(&resp->tfh, &argp->tfh); + nfserr = nfsd_link(rqstp, &resp->tfh, argp->tname, argp->tlen, + &resp->fh); + RETURN_STATUS(nfserr); +} + +/* + * Read a portion of a directory. + */ +static __be32 +nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, + struct nfsd3_readdirres *resp) +{ + __be32 nfserr; + int count; + + dprintk("nfsd: READDIR(3) %s %d bytes at %d\n", + SVCFH_fmt(&argp->fh), + argp->count, (u32) argp->cookie); + + /* Make sure we've room for the NULL ptr & eof flag, and shrink to + * client read size */ + count = (argp->count >> 2) - 2; + + /* Read directory and encode entries on the fly */ + fh_copy(&resp->fh, &argp->fh); + + resp->buflen = count; + resp->common.err = nfs_ok; + resp->buffer = argp->buffer; + resp->rqstp = rqstp; + nfserr = nfsd_readdir(rqstp, &resp->fh, (loff_t*) &argp->cookie, + &resp->common, nfs3svc_encode_entry); + memcpy(resp->verf, argp->verf, 8); + resp->count = resp->buffer - argp->buffer; + if (resp->offset) + xdr_encode_hyper(resp->offset, argp->cookie); + + RETURN_STATUS(nfserr); +} + +/* + * Read a portion of a directory, including file handles and attrs. + * For now, we choose to ignore the dircount parameter. + */ +static __be32 +nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, + struct nfsd3_readdirres *resp) +{ + __be32 nfserr; + int count = 0; + loff_t offset; + int i; + caddr_t page_addr = NULL; + + dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n", + SVCFH_fmt(&argp->fh), + argp->count, (u32) argp->cookie); + + /* Convert byte count to number of words (i.e. >> 2), + * and reserve room for the NULL ptr & eof flag (-2 words) */ + resp->count = (argp->count >> 2) - 2; + + /* Read directory and encode entries on the fly */ + fh_copy(&resp->fh, &argp->fh); + + resp->common.err = nfs_ok; + resp->buffer = argp->buffer; + resp->buflen = resp->count; + resp->rqstp = rqstp; + offset = argp->cookie; + nfserr = nfsd_readdir(rqstp, &resp->fh, + &offset, + &resp->common, + nfs3svc_encode_entry_plus); + memcpy(resp->verf, argp->verf, 8); + for (i=1; i<rqstp->rq_resused ; i++) { + page_addr = page_address(rqstp->rq_respages[i]); + + if (((caddr_t)resp->buffer >= page_addr) && + ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) { + count += (caddr_t)resp->buffer - page_addr; + break; + } + count += PAGE_SIZE; + } + resp->count = count >> 2; + if (resp->offset) { + if (unlikely(resp->offset1)) { + /* we ended up with offset on a page boundary */ + *resp->offset = htonl(offset >> 32); + *resp->offset1 = htonl(offset & 0xffffffff); + resp->offset1 = NULL; + } else { + xdr_encode_hyper(resp->offset, offset); + } + } + + RETURN_STATUS(nfserr); +} + +/* + * Get file system stats + */ +static __be32 +nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, + struct nfsd3_fsstatres *resp) +{ + __be32 nfserr; + + dprintk("nfsd: FSSTAT(3) %s\n", + SVCFH_fmt(&argp->fh)); + + nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0); + fh_put(&argp->fh); + RETURN_STATUS(nfserr); +} + +/* + * Get file system info + */ +static __be32 +nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, + struct nfsd3_fsinfores *resp) +{ + __be32 nfserr; + u32 max_blocksize = svc_max_payload(rqstp); + + dprintk("nfsd: FSINFO(3) %s\n", + SVCFH_fmt(&argp->fh)); + + resp->f_rtmax = max_blocksize; + resp->f_rtpref = max_blocksize; + resp->f_rtmult = PAGE_SIZE; + resp->f_wtmax = max_blocksize; + resp->f_wtpref = max_blocksize; + resp->f_wtmult = PAGE_SIZE; + resp->f_dtpref = PAGE_SIZE; + resp->f_maxfilesize = ~(u32) 0; + resp->f_properties = NFS3_FSF_DEFAULT; + + nfserr = fh_verify(rqstp, &argp->fh, 0, + NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); + + /* Check special features of the file system. May request + * different read/write sizes for file systems known to have + * problems with large blocks */ + if (nfserr == 0) { + struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb; + + /* Note that we don't care for remote fs's here */ + if (sb->s_magic == MSDOS_SUPER_MAGIC) { + resp->f_properties = NFS3_FSF_BILLYBOY; + } + resp->f_maxfilesize = sb->s_maxbytes; + } + + fh_put(&argp->fh); + RETURN_STATUS(nfserr); +} + +/* + * Get pathconf info for the specified file + */ +static __be32 +nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, + struct nfsd3_pathconfres *resp) +{ + __be32 nfserr; + + dprintk("nfsd: PATHCONF(3) %s\n", + SVCFH_fmt(&argp->fh)); + + /* Set default pathconf */ + resp->p_link_max = 255; /* at least */ + resp->p_name_max = 255; /* at least */ + resp->p_no_trunc = 0; + resp->p_chown_restricted = 1; + resp->p_case_insensitive = 0; + resp->p_case_preserving = 1; + + nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP); + + if (nfserr == 0) { + struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb; + + /* Note that we don't care for remote fs's here */ + switch (sb->s_magic) { + case EXT2_SUPER_MAGIC: + resp->p_link_max = EXT2_LINK_MAX; + resp->p_name_max = EXT2_NAME_LEN; + break; + case MSDOS_SUPER_MAGIC: + resp->p_case_insensitive = 1; + resp->p_case_preserving = 0; + break; + } + } + + fh_put(&argp->fh); + RETURN_STATUS(nfserr); +} + + +/* + * Commit a file (range) to stable storage. + */ +static __be32 +nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp, + struct nfsd3_commitres *resp) +{ + __be32 nfserr; + + dprintk("nfsd: COMMIT(3) %s %u@%Lu\n", + SVCFH_fmt(&argp->fh), + argp->count, + (unsigned long long) argp->offset); + + if (argp->offset > NFS_OFFSET_MAX) + RETURN_STATUS(nfserr_inval); + + fh_copy(&resp->fh, &argp->fh); + nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count); + + RETURN_STATUS(nfserr); +} + + +/* + * NFSv3 Server procedures. + * Only the results of non-idempotent operations are cached. + */ +#define nfs3svc_decode_fhandleargs nfs3svc_decode_fhandle +#define nfs3svc_encode_attrstatres nfs3svc_encode_attrstat +#define nfs3svc_encode_wccstatres nfs3svc_encode_wccstat +#define nfsd3_mkdirargs nfsd3_createargs +#define nfsd3_readdirplusargs nfsd3_readdirargs +#define nfsd3_fhandleargs nfsd_fhandle +#define nfsd3_fhandleres nfsd3_attrstat +#define nfsd3_attrstatres nfsd3_attrstat +#define nfsd3_wccstatres nfsd3_attrstat +#define nfsd3_createres nfsd3_diropres +#define nfsd3_voidres nfsd3_voidargs +struct nfsd3_voidargs { int dummy; }; + +#define PROC(name, argt, rest, relt, cache, respsize) \ + { (svc_procfunc) nfsd3_proc_##name, \ + (kxdrproc_t) nfs3svc_decode_##argt##args, \ + (kxdrproc_t) nfs3svc_encode_##rest##res, \ + (kxdrproc_t) nfs3svc_release_##relt, \ + sizeof(struct nfsd3_##argt##args), \ + sizeof(struct nfsd3_##rest##res), \ + 0, \ + cache, \ + respsize, \ + } + +#define ST 1 /* status*/ +#define FH 17 /* filehandle with length */ +#define AT 21 /* attributes */ +#define pAT (1+AT) /* post attributes - conditional */ +#define WC (7+pAT) /* WCC attributes */ + +static struct svc_procedure nfsd_procedures3[22] = { + [NFS3PROC_NULL] = { + .pc_func = (svc_procfunc) nfsd3_proc_null, + .pc_encode = (kxdrproc_t) nfs3svc_encode_voidres, + .pc_argsize = sizeof(struct nfsd3_voidargs), + .pc_ressize = sizeof(struct nfsd3_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + }, + [NFS3PROC_GETATTR] = { + .pc_func = (svc_procfunc) nfsd3_proc_getattr, + .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_ressize = sizeof(struct nfsd3_attrstatres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+AT, + }, + [NFS3PROC_SETATTR] = { + .pc_func = (svc_procfunc) nfsd3_proc_setattr, + .pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_sattrargs), + .pc_ressize = sizeof(struct nfsd3_wccstatres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC, + }, + [NFS3PROC_LOOKUP] = { + .pc_func = (svc_procfunc) nfsd3_proc_lookup, + .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_diropres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_ressize = sizeof(struct nfsd3_diropres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+FH+pAT+pAT, + }, + [NFS3PROC_ACCESS] = { + .pc_func = (svc_procfunc) nfsd3_proc_access, + .pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_accessres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_accessargs), + .pc_ressize = sizeof(struct nfsd3_accessres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+1, + }, + [NFS3PROC_READLINK] = { + .pc_func = (svc_procfunc) nfsd3_proc_readlink, + .pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_readlinkargs), + .pc_ressize = sizeof(struct nfsd3_readlinkres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4, + }, + [NFS3PROC_READ] = { + .pc_func = (svc_procfunc) nfsd3_proc_read, + .pc_decode = (kxdrproc_t) nfs3svc_decode_readargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_readres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_readargs), + .pc_ressize = sizeof(struct nfsd3_readres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4, + }, + [NFS3PROC_WRITE] = { + .pc_func = (svc_procfunc) nfsd3_proc_write, + .pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_writeres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_writeargs), + .pc_ressize = sizeof(struct nfsd3_writeres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC+4, + }, + [NFS3PROC_CREATE] = { + .pc_func = (svc_procfunc) nfsd3_proc_create, + .pc_decode = (kxdrproc_t) nfs3svc_decode_createargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_createres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_createargs), + .pc_ressize = sizeof(struct nfsd3_createres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+(1+FH+pAT)+WC, + }, + [NFS3PROC_MKDIR] = { + .pc_func = (svc_procfunc) nfsd3_proc_mkdir, + .pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_createres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_mkdirargs), + .pc_ressize = sizeof(struct nfsd3_createres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+(1+FH+pAT)+WC, + }, + [NFS3PROC_SYMLINK] = { + .pc_func = (svc_procfunc) nfsd3_proc_symlink, + .pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_createres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_symlinkargs), + .pc_ressize = sizeof(struct nfsd3_createres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+(1+FH+pAT)+WC, + }, + [NFS3PROC_MKNOD] = { + .pc_func = (svc_procfunc) nfsd3_proc_mknod, + .pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_createres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_mknodargs), + .pc_ressize = sizeof(struct nfsd3_createres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+(1+FH+pAT)+WC, + }, + [NFS3PROC_REMOVE] = { + .pc_func = (svc_procfunc) nfsd3_proc_remove, + .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_ressize = sizeof(struct nfsd3_wccstatres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC, + }, + [NFS3PROC_RMDIR] = { + .pc_func = (svc_procfunc) nfsd3_proc_rmdir, + .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_ressize = sizeof(struct nfsd3_wccstatres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC, + }, + [NFS3PROC_RENAME] = { + .pc_func = (svc_procfunc) nfsd3_proc_rename, + .pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_renameres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_renameargs), + .pc_ressize = sizeof(struct nfsd3_renameres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC+WC, + }, + [NFS3PROC_LINK] = { + .pc_func = (svc_procfunc) nfsd3_proc_link, + .pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_linkres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_linkargs), + .pc_ressize = sizeof(struct nfsd3_linkres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+pAT+WC, + }, + [NFS3PROC_READDIR] = { + .pc_func = (svc_procfunc) nfsd3_proc_readdir, + .pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_readdirargs), + .pc_ressize = sizeof(struct nfsd3_readdirres), + .pc_cachetype = RC_NOCACHE, + }, + [NFS3PROC_READDIRPLUS] = { + .pc_func = (svc_procfunc) nfsd3_proc_readdirplus, + .pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_readdirplusargs), + .pc_ressize = sizeof(struct nfsd3_readdirres), + .pc_cachetype = RC_NOCACHE, + }, + [NFS3PROC_FSSTAT] = { + .pc_func = (svc_procfunc) nfsd3_proc_fsstat, + .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres, + .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_ressize = sizeof(struct nfsd3_fsstatres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+2*6+1, + }, + [NFS3PROC_FSINFO] = { + .pc_func = (svc_procfunc) nfsd3_proc_fsinfo, + .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores, + .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_ressize = sizeof(struct nfsd3_fsinfores), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+12, + }, + [NFS3PROC_PATHCONF] = { + .pc_func = (svc_procfunc) nfsd3_proc_pathconf, + .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres, + .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_ressize = sizeof(struct nfsd3_pathconfres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+6, + }, + [NFS3PROC_COMMIT] = { + .pc_func = (svc_procfunc) nfsd3_proc_commit, + .pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_commitres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_commitargs), + .pc_ressize = sizeof(struct nfsd3_commitres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+WC+2, + }, +}; + +struct svc_version nfsd_version3 = { + .vs_vers = 3, + .vs_nproc = 22, + .vs_proc = nfsd_procedures3, + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS3_SVC_XDRSIZE, +}; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c new file mode 100644 index 00000000000..08c6e36ab2e --- /dev/null +++ b/fs/nfsd/nfs3xdr.c @@ -0,0 +1,1118 @@ +/* + * XDR support for nfsd/protocol version 3. + * + * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> + * + * 2003-08-09 Jamie Lokier: Use htonl() for nanoseconds, not htons()! + */ + +#include <linux/namei.h> +#include "xdr3.h" +#include "auth.h" + +#define NFSDDBG_FACILITY NFSDDBG_XDR + + +/* + * Mapping of S_IF* types to NFS file types + */ +static u32 nfs3_ftypes[] = { + NF3NON, NF3FIFO, NF3CHR, NF3BAD, + NF3DIR, NF3BAD, NF3BLK, NF3BAD, + NF3REG, NF3BAD, NF3LNK, NF3BAD, + NF3SOCK, NF3BAD, NF3LNK, NF3BAD, +}; + +/* + * XDR functions for basic NFS types + */ +static __be32 * +encode_time3(__be32 *p, struct timespec *time) +{ + *p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec); + return p; +} + +static __be32 * +decode_time3(__be32 *p, struct timespec *time) +{ + time->tv_sec = ntohl(*p++); + time->tv_nsec = ntohl(*p++); + return p; +} + +static __be32 * +decode_fh(__be32 *p, struct svc_fh *fhp) +{ + unsigned int size; + fh_init(fhp, NFS3_FHSIZE); + size = ntohl(*p++); + if (size > NFS3_FHSIZE) + return NULL; + + memcpy(&fhp->fh_handle.fh_base, p, size); + fhp->fh_handle.fh_size = size; + return p + XDR_QUADLEN(size); +} + +/* Helper function for NFSv3 ACL code */ +__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp) +{ + return decode_fh(p, fhp); +} + +static __be32 * +encode_fh(__be32 *p, struct svc_fh *fhp) +{ + unsigned int size = fhp->fh_handle.fh_size; + *p++ = htonl(size); + if (size) p[XDR_QUADLEN(size)-1]=0; + memcpy(p, &fhp->fh_handle.fh_base, size); + return p + XDR_QUADLEN(size); +} + +/* + * Decode a file name and make sure that the path contains + * no slashes or null bytes. + */ +static __be32 * +decode_filename(__be32 *p, char **namp, unsigned int *lenp) +{ + char *name; + unsigned int i; + + if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) { + for (i = 0, name = *namp; i < *lenp; i++, name++) { + if (*name == '\0' || *name == '/') + return NULL; + } + } + + return p; +} + +static __be32 * +decode_sattr3(__be32 *p, struct iattr *iap) +{ + u32 tmp; + + iap->ia_valid = 0; + + if (*p++) { + iap->ia_valid |= ATTR_MODE; + iap->ia_mode = ntohl(*p++); + } + if (*p++) { + iap->ia_valid |= ATTR_UID; + iap->ia_uid = ntohl(*p++); + } + if (*p++) { + iap->ia_valid |= ATTR_GID; + iap->ia_gid = ntohl(*p++); + } + if (*p++) { + u64 newsize; + + iap->ia_valid |= ATTR_SIZE; + p = xdr_decode_hyper(p, &newsize); + if (newsize <= NFS_OFFSET_MAX) + iap->ia_size = newsize; + else + iap->ia_size = NFS_OFFSET_MAX; + } + if ((tmp = ntohl(*p++)) == 1) { /* set to server time */ + iap->ia_valid |= ATTR_ATIME; + } else if (tmp == 2) { /* set to client time */ + iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET; + iap->ia_atime.tv_sec = ntohl(*p++); + iap->ia_atime.tv_nsec = ntohl(*p++); + } + if ((tmp = ntohl(*p++)) == 1) { /* set to server time */ + iap->ia_valid |= ATTR_MTIME; + } else if (tmp == 2) { /* set to client time */ + iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET; + iap->ia_mtime.tv_sec = ntohl(*p++); + iap->ia_mtime.tv_nsec = ntohl(*p++); + } + return p; +} + +static __be32 *encode_fsid(__be32 *p, struct svc_fh *fhp) +{ + u64 f; + switch(fsid_source(fhp)) { + default: + case FSIDSOURCE_DEV: + p = xdr_encode_hyper(p, (u64)huge_encode_dev + (fhp->fh_dentry->d_inode->i_sb->s_dev)); + break; + case FSIDSOURCE_FSID: + p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid); + break; + case FSIDSOURCE_UUID: + f = ((u64*)fhp->fh_export->ex_uuid)[0]; + f ^= ((u64*)fhp->fh_export->ex_uuid)[1]; + p = xdr_encode_hyper(p, f); + break; + } + return p; +} + +static __be32 * +encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, + struct kstat *stat) +{ + *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); + *p++ = htonl((u32) stat->mode); + *p++ = htonl((u32) stat->nlink); + *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); + *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); + if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { + p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); + } else { + p = xdr_encode_hyper(p, (u64) stat->size); + } + p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9); + *p++ = htonl((u32) MAJOR(stat->rdev)); + *p++ = htonl((u32) MINOR(stat->rdev)); + p = encode_fsid(p, fhp); + p = xdr_encode_hyper(p, stat->ino); + p = encode_time3(p, &stat->atime); + p = encode_time3(p, &stat->mtime); + p = encode_time3(p, &stat->ctime); + + return p; +} + +static __be32 * +encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +{ + /* Attributes to follow */ + *p++ = xdr_one; + return encode_fattr3(rqstp, p, fhp, &fhp->fh_post_attr); +} + +/* + * Encode post-operation attributes. + * The inode may be NULL if the call failed because of a stale file + * handle. In this case, no attributes are returned. + */ +static __be32 * +encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +{ + struct dentry *dentry = fhp->fh_dentry; + if (dentry && dentry->d_inode) { + int err; + struct kstat stat; + + err = vfs_getattr(fhp->fh_export->ex_path.mnt, dentry, &stat); + if (!err) { + *p++ = xdr_one; /* attributes follow */ + lease_get_mtime(dentry->d_inode, &stat.mtime); + return encode_fattr3(rqstp, p, fhp, &stat); + } + } + *p++ = xdr_zero; + return p; +} + +/* Helper for NFSv3 ACLs */ +__be32 * +nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +{ + return encode_post_op_attr(rqstp, p, fhp); +} + +/* + * Enocde weak cache consistency data + */ +static __be32 * +encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +{ + struct dentry *dentry = fhp->fh_dentry; + + if (dentry && dentry->d_inode && fhp->fh_post_saved) { + if (fhp->fh_pre_saved) { + *p++ = xdr_one; + p = xdr_encode_hyper(p, (u64) fhp->fh_pre_size); + p = encode_time3(p, &fhp->fh_pre_mtime); + p = encode_time3(p, &fhp->fh_pre_ctime); + } else { + *p++ = xdr_zero; + } + return encode_saved_post_attr(rqstp, p, fhp); + } + /* no pre- or post-attrs */ + *p++ = xdr_zero; + return encode_post_op_attr(rqstp, p, fhp); +} + +/* + * Fill in the post_op attr for the wcc data + */ +void fill_post_wcc(struct svc_fh *fhp) +{ + int err; + + if (fhp->fh_post_saved) + printk("nfsd: inode locked twice during operation.\n"); + + err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, + &fhp->fh_post_attr); + fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version; + if (err) { + fhp->fh_post_saved = 0; + /* Grab the ctime anyway - set_change_info might use it */ + fhp->fh_post_attr.ctime = fhp->fh_dentry->d_inode->i_ctime; + } else + fhp->fh_post_saved = 1; +} + +/* + * XDR decode functions + */ +int +nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_sattrargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + p = decode_sattr3(p, &args->attrs); + + if ((args->check_guard = ntohl(*p++)) != 0) { + struct timespec time; + p = decode_time3(p, &time); + args->guardtime = time.tv_sec; + } + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_diropargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_accessargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + args->access = ntohl(*p++); + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_readargs *args) +{ + unsigned int len; + int v,pn; + u32 max_blocksize = svc_max_payload(rqstp); + + if (!(p = decode_fh(p, &args->fh))) + return 0; + p = xdr_decode_hyper(p, &args->offset); + + len = args->count = ntohl(*p++); + + if (len > max_blocksize) + len = max_blocksize; + + /* set up the kvec */ + v=0; + while (len > 0) { + pn = rqstp->rq_resused++; + rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]); + rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; + len -= rqstp->rq_vec[v].iov_len; + v++; + } + args->vlen = v; + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_writeargs *args) +{ + unsigned int len, v, hdr, dlen; + u32 max_blocksize = svc_max_payload(rqstp); + + if (!(p = decode_fh(p, &args->fh))) + return 0; + p = xdr_decode_hyper(p, &args->offset); + + args->count = ntohl(*p++); + args->stable = ntohl(*p++); + len = args->len = ntohl(*p++); + /* + * The count must equal the amount of data passed. + */ + if (args->count != args->len) + return 0; + + /* + * Check to make sure that we got the right number of + * bytes. + */ + hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; + dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len + - hdr; + /* + * Round the length of the data which was specified up to + * the next multiple of XDR units and then compare that + * against the length which was actually received. + * Note that when RPCSEC/GSS (for example) is used, the + * data buffer can be padded so dlen might be larger + * than required. It must never be smaller. + */ + if (dlen < XDR_QUADLEN(len)*4) + return 0; + + if (args->count > max_blocksize) { + args->count = max_blocksize; + len = args->len = max_blocksize; + } + rqstp->rq_vec[0].iov_base = (void*)p; + rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; + v = 0; + while (len > rqstp->rq_vec[v].iov_len) { + len -= rqstp->rq_vec[v].iov_len; + v++; + rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]); + rqstp->rq_vec[v].iov_len = PAGE_SIZE; + } + rqstp->rq_vec[v].iov_len = len; + args->vlen = v + 1; + return 1; +} + +int +nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_createargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) + return 0; + + switch (args->createmode = ntohl(*p++)) { + case NFS3_CREATE_UNCHECKED: + case NFS3_CREATE_GUARDED: + p = decode_sattr3(p, &args->attrs); + break; + case NFS3_CREATE_EXCLUSIVE: + args->verf = p; + p += 2; + break; + default: + return 0; + } + + return xdr_argsize_check(rqstp, p); +} +int +nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_createargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) || + !(p = decode_filename(p, &args->name, &args->len))) + return 0; + p = decode_sattr3(p, &args->attrs); + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_symlinkargs *args) +{ + unsigned int len, avail; + char *old, *new; + struct kvec *vec; + + if (!(p = decode_fh(p, &args->ffh)) || + !(p = decode_filename(p, &args->fname, &args->flen)) + ) + return 0; + p = decode_sattr3(p, &args->attrs); + + /* now decode the pathname, which might be larger than the first page. + * As we have to check for nul's anyway, we copy it into a new page + * This page appears in the rq_res.pages list, but as pages_len is always + * 0, it won't get in the way + */ + len = ntohl(*p++); + if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE) + return 0; + args->tname = new = + page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->tlen = len; + /* first copy and check from the first page */ + old = (char*)p; + vec = &rqstp->rq_arg.head[0]; + avail = vec->iov_len - (old - (char*)vec->iov_base); + while (len && avail && *old) { + *new++ = *old++; + len--; + avail--; + } + /* now copy next page if there is one */ + if (len && !avail && rqstp->rq_arg.page_len) { + avail = rqstp->rq_arg.page_len; + if (avail > PAGE_SIZE) + avail = PAGE_SIZE; + old = page_address(rqstp->rq_arg.pages[0]); + } + while (len && avail && *old) { + *new++ = *old++; + len--; + avail--; + } + *new = '\0'; + if (len) + return 0; + + return 1; +} + +int +nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_mknodargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) + return 0; + + args->ftype = ntohl(*p++); + + if (args->ftype == NF3BLK || args->ftype == NF3CHR + || args->ftype == NF3SOCK || args->ftype == NF3FIFO) + p = decode_sattr3(p, &args->attrs); + + if (args->ftype == NF3BLK || args->ftype == NF3CHR) { + args->major = ntohl(*p++); + args->minor = ntohl(*p++); + } + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_renameargs *args) +{ + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_filename(p, &args->fname, &args->flen)) + || !(p = decode_fh(p, &args->tfh)) + || !(p = decode_filename(p, &args->tname, &args->tlen))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_readlinkargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + args->buffer = + page_address(rqstp->rq_respages[rqstp->rq_resused++]); + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_linkargs *args) +{ + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_fh(p, &args->tfh)) + || !(p = decode_filename(p, &args->tname, &args->tlen))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_readdirargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + p = xdr_decode_hyper(p, &args->cookie); + args->verf = p; p += 2; + args->dircount = ~0; + args->count = ntohl(*p++); + + if (args->count > PAGE_SIZE) + args->count = PAGE_SIZE; + + args->buffer = + page_address(rqstp->rq_respages[rqstp->rq_resused++]); + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_readdirargs *args) +{ + int len, pn; + u32 max_blocksize = svc_max_payload(rqstp); + + if (!(p = decode_fh(p, &args->fh))) + return 0; + p = xdr_decode_hyper(p, &args->cookie); + args->verf = p; p += 2; + args->dircount = ntohl(*p++); + args->count = ntohl(*p++); + + len = (args->count > max_blocksize) ? max_blocksize : + args->count; + args->count = len; + + while (len > 0) { + pn = rqstp->rq_resused++; + if (!args->buffer) + args->buffer = page_address(rqstp->rq_respages[pn]); + len -= PAGE_SIZE; + } + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_commitargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + p = xdr_decode_hyper(p, &args->offset); + args->count = ntohl(*p++); + + return xdr_argsize_check(rqstp, p); +} + +/* + * XDR encode functions + */ +/* + * There must be an encoding function for void results so svc_process + * will work properly. + */ +int +nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy) +{ + return xdr_ressize_check(rqstp, p); +} + +/* GETATTR */ +int +nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_attrstat *resp) +{ + if (resp->status == 0) { + lease_get_mtime(resp->fh.fh_dentry->d_inode, + &resp->stat.mtime); + p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat); + } + return xdr_ressize_check(rqstp, p); +} + +/* SETATTR, REMOVE, RMDIR */ +int +nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_attrstat *resp) +{ + p = encode_wcc_data(rqstp, p, &resp->fh); + return xdr_ressize_check(rqstp, p); +} + +/* LOOKUP */ +int +nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_diropres *resp) +{ + if (resp->status == 0) { + p = encode_fh(p, &resp->fh); + p = encode_post_op_attr(rqstp, p, &resp->fh); + } + p = encode_post_op_attr(rqstp, p, &resp->dirfh); + return xdr_ressize_check(rqstp, p); +} + +/* ACCESS */ +int +nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_accessres *resp) +{ + p = encode_post_op_attr(rqstp, p, &resp->fh); + if (resp->status == 0) + *p++ = htonl(resp->access); + return xdr_ressize_check(rqstp, p); +} + +/* READLINK */ +int +nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_readlinkres *resp) +{ + p = encode_post_op_attr(rqstp, p, &resp->fh); + if (resp->status == 0) { + *p++ = htonl(resp->len); + xdr_ressize_check(rqstp, p); + rqstp->rq_res.page_len = resp->len; + if (resp->len & 3) { + /* need to pad the tail */ + rqstp->rq_res.tail[0].iov_base = p; + *p = 0; + rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); + } + return 1; + } else + return xdr_ressize_check(rqstp, p); +} + +/* READ */ +int +nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_readres *resp) +{ + p = encode_post_op_attr(rqstp, p, &resp->fh); + if (resp->status == 0) { + *p++ = htonl(resp->count); + *p++ = htonl(resp->eof); + *p++ = htonl(resp->count); /* xdr opaque count */ + xdr_ressize_check(rqstp, p); + /* now update rqstp->rq_res to reflect data as well */ + rqstp->rq_res.page_len = resp->count; + if (resp->count & 3) { + /* need to pad the tail */ + rqstp->rq_res.tail[0].iov_base = p; + *p = 0; + rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3); + } + return 1; + } else + return xdr_ressize_check(rqstp, p); +} + +/* WRITE */ +int +nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_writeres *resp) +{ + p = encode_wcc_data(rqstp, p, &resp->fh); + if (resp->status == 0) { + *p++ = htonl(resp->count); + *p++ = htonl(resp->committed); + *p++ = htonl(nfssvc_boot.tv_sec); + *p++ = htonl(nfssvc_boot.tv_usec); + } + return xdr_ressize_check(rqstp, p); +} + +/* CREATE, MKDIR, SYMLINK, MKNOD */ +int +nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_diropres *resp) +{ + if (resp->status == 0) { + *p++ = xdr_one; + p = encode_fh(p, &resp->fh); + p = encode_post_op_attr(rqstp, p, &resp->fh); + } + p = encode_wcc_data(rqstp, p, &resp->dirfh); + return xdr_ressize_check(rqstp, p); +} + +/* RENAME */ +int +nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_renameres *resp) +{ + p = encode_wcc_data(rqstp, p, &resp->ffh); + p = encode_wcc_data(rqstp, p, &resp->tfh); + return xdr_ressize_check(rqstp, p); +} + +/* LINK */ +int +nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_linkres *resp) +{ + p = encode_post_op_attr(rqstp, p, &resp->fh); + p = encode_wcc_data(rqstp, p, &resp->tfh); + return xdr_ressize_check(rqstp, p); +} + +/* READDIR */ +int +nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_readdirres *resp) +{ + p = encode_post_op_attr(rqstp, p, &resp->fh); + + if (resp->status == 0) { + /* stupid readdir cookie */ + memcpy(p, resp->verf, 8); p += 2; + xdr_ressize_check(rqstp, p); + if (rqstp->rq_res.head[0].iov_len + (2<<2) > PAGE_SIZE) + return 1; /*No room for trailer */ + rqstp->rq_res.page_len = (resp->count) << 2; + + /* add the 'tail' to the end of the 'head' page - page 0. */ + rqstp->rq_res.tail[0].iov_base = p; + *p++ = 0; /* no more entries */ + *p++ = htonl(resp->common.err == nfserr_eof); + rqstp->rq_res.tail[0].iov_len = 2<<2; + return 1; + } else + return xdr_ressize_check(rqstp, p); +} + +static __be32 * +encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, + int namlen, u64 ino) +{ + *p++ = xdr_one; /* mark entry present */ + p = xdr_encode_hyper(p, ino); /* file id */ + p = xdr_encode_array(p, name, namlen);/* name length & name */ + + cd->offset = p; /* remember pointer */ + p = xdr_encode_hyper(p, NFS_OFFSET_MAX);/* offset of next entry */ + + return p; +} + +static int +compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, + const char *name, int namlen) +{ + struct svc_export *exp; + struct dentry *dparent, *dchild; + int rv = 0; + + dparent = cd->fh.fh_dentry; + exp = cd->fh.fh_export; + + if (isdotent(name, namlen)) { + if (namlen == 2) { + dchild = dget_parent(dparent); + if (dchild == dparent) { + /* filesystem root - cannot return filehandle for ".." */ + dput(dchild); + return -ENOENT; + } + } else + dchild = dget(dparent); + } else + dchild = lookup_one_len(name, dparent, namlen); + if (IS_ERR(dchild)) + return -ENOENT; + rv = -ENOENT; + if (d_mountpoint(dchild)) + goto out; + rv = fh_compose(fhp, exp, dchild, &cd->fh); + if (rv) + goto out; + if (!dchild->d_inode) + goto out; + rv = 0; +out: + dput(dchild); + return rv; +} + +static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) +{ + struct svc_fh fh; + int err; + + fh_init(&fh, NFS3_FHSIZE); + err = compose_entry_fh(cd, &fh, name, namlen); + if (err) { + *p++ = 0; + *p++ = 0; + goto out; + } + p = encode_post_op_attr(cd->rqstp, p, &fh); + *p++ = xdr_one; /* yes, a file handle follows */ + p = encode_fh(p, &fh); +out: + fh_put(&fh); + return p; +} + +/* + * Encode a directory entry. This one works for both normal readdir + * and readdirplus. + * The normal readdir reply requires 2 (fileid) + 1 (stringlen) + * + string + 2 (cookie) + 1 (next) words, i.e. 6 + strlen. + * + * The readdirplus baggage is 1+21 words for post_op_attr, plus the + * file handle. + */ + +#define NFS3_ENTRY_BAGGAGE (2 + 1 + 2 + 1) +#define NFS3_ENTRYPLUS_BAGGAGE (1 + 21 + 1 + (NFS3_FHSIZE >> 2)) +static int +encode_entry(struct readdir_cd *ccd, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type, int plus) +{ + struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres, + common); + __be32 *p = cd->buffer; + caddr_t curr_page_addr = NULL; + int pn; /* current page number */ + int slen; /* string (name) length */ + int elen; /* estimated entry length in words */ + int num_entry_words = 0; /* actual number of words */ + + if (cd->offset) { + u64 offset64 = offset; + + if (unlikely(cd->offset1)) { + /* we ended up with offset on a page boundary */ + *cd->offset = htonl(offset64 >> 32); + *cd->offset1 = htonl(offset64 & 0xffffffff); + cd->offset1 = NULL; + } else { + xdr_encode_hyper(cd->offset, offset64); + } + } + + /* + dprintk("encode_entry(%.*s @%ld%s)\n", + namlen, name, (long) offset, plus? " plus" : ""); + */ + + /* truncate filename if too long */ + if (namlen > NFS3_MAXNAMLEN) + namlen = NFS3_MAXNAMLEN; + + slen = XDR_QUADLEN(namlen); + elen = slen + NFS3_ENTRY_BAGGAGE + + (plus? NFS3_ENTRYPLUS_BAGGAGE : 0); + + if (cd->buflen < elen) { + cd->common.err = nfserr_toosmall; + return -EINVAL; + } + + /* determine which page in rq_respages[] we are currently filling */ + for (pn=1; pn < cd->rqstp->rq_resused; pn++) { + curr_page_addr = page_address(cd->rqstp->rq_respages[pn]); + + if (((caddr_t)cd->buffer >= curr_page_addr) && + ((caddr_t)cd->buffer < curr_page_addr + PAGE_SIZE)) + break; + } + + if ((caddr_t)(cd->buffer + elen) < (curr_page_addr + PAGE_SIZE)) { + /* encode entry in current page */ + + p = encode_entry_baggage(cd, p, name, namlen, ino); + + if (plus) + p = encode_entryplus_baggage(cd, p, name, namlen); + num_entry_words = p - cd->buffer; + } else if (cd->rqstp->rq_respages[pn+1] != NULL) { + /* temporarily encode entry into next page, then move back to + * current and next page in rq_respages[] */ + __be32 *p1, *tmp; + int len1, len2; + + /* grab next page for temporary storage of entry */ + p1 = tmp = page_address(cd->rqstp->rq_respages[pn+1]); + + p1 = encode_entry_baggage(cd, p1, name, namlen, ino); + + if (plus) + p1 = encode_entryplus_baggage(cd, p1, name, namlen); + + /* determine entry word length and lengths to go in pages */ + num_entry_words = p1 - tmp; + len1 = curr_page_addr + PAGE_SIZE - (caddr_t)cd->buffer; + if ((num_entry_words << 2) < len1) { + /* the actual number of words in the entry is less + * than elen and can still fit in the current page + */ + memmove(p, tmp, num_entry_words << 2); + p += num_entry_words; + + /* update offset */ + cd->offset = cd->buffer + (cd->offset - tmp); + } else { + unsigned int offset_r = (cd->offset - tmp) << 2; + + /* update pointer to offset location. + * This is a 64bit quantity, so we need to + * deal with 3 cases: + * - entirely in first page + * - entirely in second page + * - 4 bytes in each page + */ + if (offset_r + 8 <= len1) { + cd->offset = p + (cd->offset - tmp); + } else if (offset_r >= len1) { + cd->offset -= len1 >> 2; + } else { + /* sitting on the fence */ + BUG_ON(offset_r != len1 - 4); + cd->offset = p + (cd->offset - tmp); + cd->offset1 = tmp; + } + + len2 = (num_entry_words << 2) - len1; + + /* move from temp page to current and next pages */ + memmove(p, tmp, len1); + memmove(tmp, (caddr_t)tmp+len1, len2); + + p = tmp + (len2 >> 2); + } + } + else { + cd->common.err = nfserr_toosmall; + return -EINVAL; + } + + cd->buflen -= num_entry_words; + cd->buffer = p; + cd->common.err = nfs_ok; + return 0; + +} + +int +nfs3svc_encode_entry(void *cd, const char *name, + int namlen, loff_t offset, u64 ino, unsigned int d_type) +{ + return encode_entry(cd, name, namlen, offset, ino, d_type, 0); +} + +int +nfs3svc_encode_entry_plus(void *cd, const char *name, + int namlen, loff_t offset, u64 ino, + unsigned int d_type) +{ + return encode_entry(cd, name, namlen, offset, ino, d_type, 1); +} + +/* FSSTAT */ +int +nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_fsstatres *resp) +{ + struct kstatfs *s = &resp->stats; + u64 bs = s->f_bsize; + + *p++ = xdr_zero; /* no post_op_attr */ + + if (resp->status == 0) { + p = xdr_encode_hyper(p, bs * s->f_blocks); /* total bytes */ + p = xdr_encode_hyper(p, bs * s->f_bfree); /* free bytes */ + p = xdr_encode_hyper(p, bs * s->f_bavail); /* user available bytes */ + p = xdr_encode_hyper(p, s->f_files); /* total inodes */ + p = xdr_encode_hyper(p, s->f_ffree); /* free inodes */ + p = xdr_encode_hyper(p, s->f_ffree); /* user available inodes */ + *p++ = htonl(resp->invarsec); /* mean unchanged time */ + } + return xdr_ressize_check(rqstp, p); +} + +/* FSINFO */ +int +nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_fsinfores *resp) +{ + *p++ = xdr_zero; /* no post_op_attr */ + + if (resp->status == 0) { + *p++ = htonl(resp->f_rtmax); + *p++ = htonl(resp->f_rtpref); + *p++ = htonl(resp->f_rtmult); + *p++ = htonl(resp->f_wtmax); + *p++ = htonl(resp->f_wtpref); + *p++ = htonl(resp->f_wtmult); + *p++ = htonl(resp->f_dtpref); + p = xdr_encode_hyper(p, resp->f_maxfilesize); + *p++ = xdr_one; + *p++ = xdr_zero; + *p++ = htonl(resp->f_properties); + } + + return xdr_ressize_check(rqstp, p); +} + +/* PATHCONF */ +int +nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_pathconfres *resp) +{ + *p++ = xdr_zero; /* no post_op_attr */ + + if (resp->status == 0) { + *p++ = htonl(resp->p_link_max); + *p++ = htonl(resp->p_name_max); + *p++ = htonl(resp->p_no_trunc); + *p++ = htonl(resp->p_chown_restricted); + *p++ = htonl(resp->p_case_insensitive); + *p++ = htonl(resp->p_case_preserving); + } + + return xdr_ressize_check(rqstp, p); +} + +/* COMMIT */ +int +nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_commitres *resp) +{ + p = encode_wcc_data(rqstp, p, &resp->fh); + /* Write verifier */ + if (resp->status == 0) { + *p++ = htonl(nfssvc_boot.tv_sec); + *p++ = htonl(nfssvc_boot.tv_usec); + } + return xdr_ressize_check(rqstp, p); +} + +/* + * XDR release functions + */ +int +nfs3svc_release_fhandle(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_attrstat *resp) +{ + fh_put(&resp->fh); + return 1; +} + +int +nfs3svc_release_fhandle2(struct svc_rqst *rqstp, __be32 *p, + struct nfsd3_fhandle_pair *resp) +{ + fh_put(&resp->fh1); + fh_put(&resp->fh2); + return 1; +} diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c new file mode 100644 index 00000000000..9c51aff02ae --- /dev/null +++ b/fs/nfsd/nfs4acl.c @@ -0,0 +1,839 @@ +/* + * Common NFSv4 ACL handling code. + * + * Copyright (c) 2002, 2003 The Regents of the University of Michigan. + * All rights reserved. + * + * Marius Aamodt Eriksen <marius@umich.edu> + * Jeff Sedlak <jsedlak@umich.edu> + * J. Bruce Fields <bfields@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/slab.h> +#include <linux/nfs_fs.h> +#include <linux/export.h> +#include "acl.h" + + +/* mode bit translations: */ +#define NFS4_READ_MODE (NFS4_ACE_READ_DATA) +#define NFS4_WRITE_MODE (NFS4_ACE_WRITE_DATA | NFS4_ACE_APPEND_DATA) +#define NFS4_EXECUTE_MODE NFS4_ACE_EXECUTE +#define NFS4_ANYONE_MODE (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL | NFS4_ACE_SYNCHRONIZE) +#define NFS4_OWNER_MODE (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL) + +/* We don't support these bits; insist they be neither allowed nor denied */ +#define NFS4_MASK_UNSUPP (NFS4_ACE_DELETE | NFS4_ACE_WRITE_OWNER \ + | NFS4_ACE_READ_NAMED_ATTRS | NFS4_ACE_WRITE_NAMED_ATTRS) + +/* flags used to simulate posix default ACLs */ +#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \ + | NFS4_ACE_DIRECTORY_INHERIT_ACE) + +#define NFS4_SUPPORTED_FLAGS (NFS4_INHERITANCE_FLAGS \ + | NFS4_ACE_INHERIT_ONLY_ACE \ + | NFS4_ACE_IDENTIFIER_GROUP) + +#define MASK_EQUAL(mask1, mask2) \ + ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) ) + +static u32 +mask_from_posix(unsigned short perm, unsigned int flags) +{ + int mask = NFS4_ANYONE_MODE; + + if (flags & NFS4_ACL_OWNER) + mask |= NFS4_OWNER_MODE; + if (perm & ACL_READ) + mask |= NFS4_READ_MODE; + if (perm & ACL_WRITE) + mask |= NFS4_WRITE_MODE; + if ((perm & ACL_WRITE) && (flags & NFS4_ACL_DIR)) + mask |= NFS4_ACE_DELETE_CHILD; + if (perm & ACL_EXECUTE) + mask |= NFS4_EXECUTE_MODE; + return mask; +} + +static u32 +deny_mask_from_posix(unsigned short perm, u32 flags) +{ + u32 mask = 0; + + if (perm & ACL_READ) + mask |= NFS4_READ_MODE; + if (perm & ACL_WRITE) + mask |= NFS4_WRITE_MODE; + if ((perm & ACL_WRITE) && (flags & NFS4_ACL_DIR)) + mask |= NFS4_ACE_DELETE_CHILD; + if (perm & ACL_EXECUTE) + mask |= NFS4_EXECUTE_MODE; + return mask; +} + +/* XXX: modify functions to return NFS errors; they're only ever + * used by nfs code, after all.... */ + +/* We only map from NFSv4 to POSIX ACLs when setting ACLs, when we err on the + * side of being more restrictive, so the mode bit mapping below is + * pessimistic. An optimistic version would be needed to handle DENY's, + * but we espect to coalesce all ALLOWs and DENYs before mapping to mode + * bits. */ + +static void +low_mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags) +{ + u32 write_mode = NFS4_WRITE_MODE; + + if (flags & NFS4_ACL_DIR) + write_mode |= NFS4_ACE_DELETE_CHILD; + *mode = 0; + if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE) + *mode |= ACL_READ; + if ((perm & write_mode) == write_mode) + *mode |= ACL_WRITE; + if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE) + *mode |= ACL_EXECUTE; +} + +struct ace_container { + struct nfs4_ace *ace; + struct list_head ace_l; +}; + +static short ace2type(struct nfs4_ace *); +static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, + unsigned int); + +struct nfs4_acl * +nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, + unsigned int flags) +{ + struct nfs4_acl *acl; + int size = 0; + + if (pacl) { + if (posix_acl_valid(pacl) < 0) + return ERR_PTR(-EINVAL); + size += 2*pacl->a_count; + } + if (dpacl) { + if (posix_acl_valid(dpacl) < 0) + return ERR_PTR(-EINVAL); + size += 2*dpacl->a_count; + } + + /* Allocate for worst case: one (deny, allow) pair each: */ + acl = nfs4_acl_new(size); + if (acl == NULL) + return ERR_PTR(-ENOMEM); + + if (pacl) + _posix_to_nfsv4_one(pacl, acl, flags & ~NFS4_ACL_TYPE_DEFAULT); + + if (dpacl) + _posix_to_nfsv4_one(dpacl, acl, flags | NFS4_ACL_TYPE_DEFAULT); + + return acl; +} + +struct posix_acl_summary { + unsigned short owner; + unsigned short users; + unsigned short group; + unsigned short groups; + unsigned short other; + unsigned short mask; +}; + +static void +summarize_posix_acl(struct posix_acl *acl, struct posix_acl_summary *pas) +{ + struct posix_acl_entry *pa, *pe; + + /* + * Only pas.users and pas.groups need initialization; previous + * posix_acl_valid() calls ensure that the other fields will be + * initialized in the following loop. But, just to placate gcc: + */ + memset(pas, 0, sizeof(*pas)); + pas->mask = 07; + + pe = acl->a_entries + acl->a_count; + + FOREACH_ACL_ENTRY(pa, acl, pe) { + switch (pa->e_tag) { + case ACL_USER_OBJ: + pas->owner = pa->e_perm; + break; + case ACL_GROUP_OBJ: + pas->group = pa->e_perm; + break; + case ACL_USER: + pas->users |= pa->e_perm; + break; + case ACL_GROUP: + pas->groups |= pa->e_perm; + break; + case ACL_OTHER: + pas->other = pa->e_perm; + break; + case ACL_MASK: + pas->mask = pa->e_perm; + break; + } + } + /* We'll only care about effective permissions: */ + pas->users &= pas->mask; + pas->group &= pas->mask; + pas->groups &= pas->mask; +} + +/* We assume the acl has been verified with posix_acl_valid. */ +static void +_posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, + unsigned int flags) +{ + struct posix_acl_entry *pa, *group_owner_entry; + struct nfs4_ace *ace; + struct posix_acl_summary pas; + unsigned short deny; + int eflag = ((flags & NFS4_ACL_TYPE_DEFAULT) ? + NFS4_INHERITANCE_FLAGS | NFS4_ACE_INHERIT_ONLY_ACE : 0); + + BUG_ON(pacl->a_count < 3); + summarize_posix_acl(pacl, &pas); + + pa = pacl->a_entries; + ace = acl->aces + acl->naces; + + /* We could deny everything not granted by the owner: */ + deny = ~pas.owner; + /* + * but it is equivalent (and simpler) to deny only what is not + * granted by later entries: + */ + deny &= pas.users | pas.group | pas.groups | pas.other; + if (deny) { + ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; + ace->flag = eflag; + ace->access_mask = deny_mask_from_posix(deny, flags); + ace->whotype = NFS4_ACL_WHO_OWNER; + ace++; + acl->naces++; + } + + ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE; + ace->flag = eflag; + ace->access_mask = mask_from_posix(pa->e_perm, flags | NFS4_ACL_OWNER); + ace->whotype = NFS4_ACL_WHO_OWNER; + ace++; + acl->naces++; + pa++; + + while (pa->e_tag == ACL_USER) { + deny = ~(pa->e_perm & pas.mask); + deny &= pas.groups | pas.group | pas.other; + if (deny) { + ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; + ace->flag = eflag; + ace->access_mask = deny_mask_from_posix(deny, flags); + ace->whotype = NFS4_ACL_WHO_NAMED; + ace->who = pa->e_id; + ace++; + acl->naces++; + } + ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE; + ace->flag = eflag; + ace->access_mask = mask_from_posix(pa->e_perm & pas.mask, + flags); + ace->whotype = NFS4_ACL_WHO_NAMED; + ace->who = pa->e_id; + ace++; + acl->naces++; + pa++; + } + + /* In the case of groups, we apply allow ACEs first, then deny ACEs, + * since a user can be in more than one group. */ + + /* allow ACEs */ + + group_owner_entry = pa; + + ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE; + ace->flag = eflag; + ace->access_mask = mask_from_posix(pas.group, flags); + ace->whotype = NFS4_ACL_WHO_GROUP; + ace++; + acl->naces++; + pa++; + + while (pa->e_tag == ACL_GROUP) { + ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE; + ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; + ace->access_mask = mask_from_posix(pa->e_perm & pas.mask, + flags); + ace->whotype = NFS4_ACL_WHO_NAMED; + ace->who = pa->e_id; + ace++; + acl->naces++; + pa++; + } + + /* deny ACEs */ + + pa = group_owner_entry; + + deny = ~pas.group & pas.other; + if (deny) { + ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; + ace->flag = eflag; + ace->access_mask = deny_mask_from_posix(deny, flags); + ace->whotype = NFS4_ACL_WHO_GROUP; + ace++; + acl->naces++; + } + pa++; + + while (pa->e_tag == ACL_GROUP) { + deny = ~(pa->e_perm & pas.mask); + deny &= pas.other; + if (deny) { + ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; + ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; + ace->access_mask = deny_mask_from_posix(deny, flags); + ace->whotype = NFS4_ACL_WHO_NAMED; + ace->who = pa->e_id; + ace++; + acl->naces++; + } + pa++; + } + + if (pa->e_tag == ACL_MASK) + pa++; + ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE; + ace->flag = eflag; + ace->access_mask = mask_from_posix(pa->e_perm, flags); + ace->whotype = NFS4_ACL_WHO_EVERYONE; + acl->naces++; +} + +static void +sort_pacl_range(struct posix_acl *pacl, int start, int end) { + int sorted = 0, i; + struct posix_acl_entry tmp; + + /* We just do a bubble sort; easy to do in place, and we're not + * expecting acl's to be long enough to justify anything more. */ + while (!sorted) { + sorted = 1; + for (i = start; i < end; i++) { + if (pacl->a_entries[i].e_id + > pacl->a_entries[i+1].e_id) { + sorted = 0; + tmp = pacl->a_entries[i]; + pacl->a_entries[i] = pacl->a_entries[i+1]; + pacl->a_entries[i+1] = tmp; + } + } + } +} + +static void +sort_pacl(struct posix_acl *pacl) +{ + /* posix_acl_valid requires that users and groups be in order + * by uid/gid. */ + int i, j; + + if (pacl->a_count <= 4) + return; /* no users or groups */ + i = 1; + while (pacl->a_entries[i].e_tag == ACL_USER) + i++; + sort_pacl_range(pacl, 1, i-1); + + BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ); + j = ++i; + while (pacl->a_entries[j].e_tag == ACL_GROUP) + j++; + sort_pacl_range(pacl, i, j-1); + return; +} + +/* + * While processing the NFSv4 ACE, this maintains bitmasks representing + * which permission bits have been allowed and which denied to a given + * entity: */ +struct posix_ace_state { + u32 allow; + u32 deny; +}; + +struct posix_user_ace_state { + uid_t uid; + struct posix_ace_state perms; +}; + +struct posix_ace_state_array { + int n; + struct posix_user_ace_state aces[]; +}; + +/* + * While processing the NFSv4 ACE, this maintains the partial permissions + * calculated so far: */ + +struct posix_acl_state { + int empty; + struct posix_ace_state owner; + struct posix_ace_state group; + struct posix_ace_state other; + struct posix_ace_state everyone; + struct posix_ace_state mask; /* Deny unused in this case */ + struct posix_ace_state_array *users; + struct posix_ace_state_array *groups; +}; + +static int +init_state(struct posix_acl_state *state, int cnt) +{ + int alloc; + + memset(state, 0, sizeof(struct posix_acl_state)); + state->empty = 1; + /* + * In the worst case, each individual acl could be for a distinct + * named user or group, but we don't no which, so we allocate + * enough space for either: + */ + alloc = sizeof(struct posix_ace_state_array) + + cnt*sizeof(struct posix_user_ace_state); + state->users = kzalloc(alloc, GFP_KERNEL); + if (!state->users) + return -ENOMEM; + state->groups = kzalloc(alloc, GFP_KERNEL); + if (!state->groups) { + kfree(state->users); + return -ENOMEM; + } + return 0; +} + +static void +free_state(struct posix_acl_state *state) { + kfree(state->users); + kfree(state->groups); +} + +static inline void add_to_mask(struct posix_acl_state *state, struct posix_ace_state *astate) +{ + state->mask.allow |= astate->allow; +} + +/* + * Certain bits (SYNCHRONIZE, DELETE, WRITE_OWNER, READ/WRITE_NAMED_ATTRS, + * READ_ATTRIBUTES, READ_ACL) are currently unenforceable and don't translate + * to traditional read/write/execute permissions. + * + * It's problematic to reject acls that use certain mode bits, because it + * places the burden on users to learn the rules about which bits one + * particular server sets, without giving the user a lot of help--we return an + * error that could mean any number of different things. To make matters + * worse, the problematic bits might be introduced by some application that's + * automatically mapping from some other acl model. + * + * So wherever possible we accept anything, possibly erring on the side of + * denying more permissions than necessary. + * + * However we do reject *explicit* DENY's of a few bits representing + * permissions we could never deny: + */ + +static inline int check_deny(u32 mask, int isowner) +{ + if (mask & (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL)) + return -EINVAL; + if (!isowner) + return 0; + if (mask & (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL)) + return -EINVAL; + return 0; +} + +static struct posix_acl * +posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) +{ + struct posix_acl_entry *pace; + struct posix_acl *pacl; + int nace; + int i, error = 0; + + /* + * ACLs with no ACEs are treated differently in the inheritable + * and effective cases: when there are no inheritable ACEs, we + * set a zero-length default posix acl: + */ + if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) { + pacl = posix_acl_alloc(0, GFP_KERNEL); + return pacl ? pacl : ERR_PTR(-ENOMEM); + } + /* + * When there are no effective ACEs, the following will end + * up setting a 3-element effective posix ACL with all + * permissions zero. + */ + nace = 4 + state->users->n + state->groups->n; + pacl = posix_acl_alloc(nace, GFP_KERNEL); + if (!pacl) + return ERR_PTR(-ENOMEM); + + pace = pacl->a_entries; + pace->e_tag = ACL_USER_OBJ; + error = check_deny(state->owner.deny, 1); + if (error) + goto out_err; + low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags); + pace->e_id = ACL_UNDEFINED_ID; + + for (i=0; i < state->users->n; i++) { + pace++; + pace->e_tag = ACL_USER; + error = check_deny(state->users->aces[i].perms.deny, 0); + if (error) + goto out_err; + low_mode_from_nfs4(state->users->aces[i].perms.allow, + &pace->e_perm, flags); + pace->e_id = state->users->aces[i].uid; + add_to_mask(state, &state->users->aces[i].perms); + } + + pace++; + pace->e_tag = ACL_GROUP_OBJ; + error = check_deny(state->group.deny, 0); + if (error) + goto out_err; + low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags); + pace->e_id = ACL_UNDEFINED_ID; + add_to_mask(state, &state->group); + + for (i=0; i < state->groups->n; i++) { + pace++; + pace->e_tag = ACL_GROUP; + error = check_deny(state->groups->aces[i].perms.deny, 0); + if (error) + goto out_err; + low_mode_from_nfs4(state->groups->aces[i].perms.allow, + &pace->e_perm, flags); + pace->e_id = state->groups->aces[i].uid; + add_to_mask(state, &state->groups->aces[i].perms); + } + + pace++; + pace->e_tag = ACL_MASK; + low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags); + pace->e_id = ACL_UNDEFINED_ID; + + pace++; + pace->e_tag = ACL_OTHER; + error = check_deny(state->other.deny, 0); + if (error) + goto out_err; + low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags); + pace->e_id = ACL_UNDEFINED_ID; + + return pacl; +out_err: + posix_acl_release(pacl); + return ERR_PTR(error); +} + +static inline void allow_bits(struct posix_ace_state *astate, u32 mask) +{ + /* Allow all bits in the mask not already denied: */ + astate->allow |= mask & ~astate->deny; +} + +static inline void deny_bits(struct posix_ace_state *astate, u32 mask) +{ + /* Deny all bits in the mask not already allowed: */ + astate->deny |= mask & ~astate->allow; +} + +static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array *a, uid_t uid) +{ + int i; + + for (i = 0; i < a->n; i++) + if (a->aces[i].uid == uid) + return i; + /* Not found: */ + a->n++; + a->aces[i].uid = uid; + a->aces[i].perms.allow = state->everyone.allow; + a->aces[i].perms.deny = state->everyone.deny; + + return i; +} + +static void deny_bits_array(struct posix_ace_state_array *a, u32 mask) +{ + int i; + + for (i=0; i < a->n; i++) + deny_bits(&a->aces[i].perms, mask); +} + +static void allow_bits_array(struct posix_ace_state_array *a, u32 mask) +{ + int i; + + for (i=0; i < a->n; i++) + allow_bits(&a->aces[i].perms, mask); +} + +static void process_one_v4_ace(struct posix_acl_state *state, + struct nfs4_ace *ace) +{ + u32 mask = ace->access_mask; + int i; + + state->empty = 0; + + switch (ace2type(ace)) { + case ACL_USER_OBJ: + if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { + allow_bits(&state->owner, mask); + } else { + deny_bits(&state->owner, mask); + } + break; + case ACL_USER: + i = find_uid(state, state->users, ace->who); + if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { + allow_bits(&state->users->aces[i].perms, mask); + } else { + deny_bits(&state->users->aces[i].perms, mask); + mask = state->users->aces[i].perms.deny; + deny_bits(&state->owner, mask); + } + break; + case ACL_GROUP_OBJ: + if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { + allow_bits(&state->group, mask); + } else { + deny_bits(&state->group, mask); + mask = state->group.deny; + deny_bits(&state->owner, mask); + deny_bits(&state->everyone, mask); + deny_bits_array(state->users, mask); + deny_bits_array(state->groups, mask); + } + break; + case ACL_GROUP: + i = find_uid(state, state->groups, ace->who); + if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { + allow_bits(&state->groups->aces[i].perms, mask); + } else { + deny_bits(&state->groups->aces[i].perms, mask); + mask = state->groups->aces[i].perms.deny; + deny_bits(&state->owner, mask); + deny_bits(&state->group, mask); + deny_bits(&state->everyone, mask); + deny_bits_array(state->users, mask); + deny_bits_array(state->groups, mask); + } + break; + case ACL_OTHER: + if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { + allow_bits(&state->owner, mask); + allow_bits(&state->group, mask); + allow_bits(&state->other, mask); + allow_bits(&state->everyone, mask); + allow_bits_array(state->users, mask); + allow_bits_array(state->groups, mask); + } else { + deny_bits(&state->owner, mask); + deny_bits(&state->group, mask); + deny_bits(&state->other, mask); + deny_bits(&state->everyone, mask); + deny_bits_array(state->users, mask); + deny_bits_array(state->groups, mask); + } + } +} + +int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, + struct posix_acl **dpacl, unsigned int flags) +{ + struct posix_acl_state effective_acl_state, default_acl_state; + struct nfs4_ace *ace; + int ret; + + ret = init_state(&effective_acl_state, acl->naces); + if (ret) + return ret; + ret = init_state(&default_acl_state, acl->naces); + if (ret) + goto out_estate; + ret = -EINVAL; + for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) { + if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE && + ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) + goto out_dstate; + if (ace->flag & ~NFS4_SUPPORTED_FLAGS) + goto out_dstate; + if ((ace->flag & NFS4_INHERITANCE_FLAGS) == 0) { + process_one_v4_ace(&effective_acl_state, ace); + continue; + } + if (!(flags & NFS4_ACL_DIR)) + goto out_dstate; + /* + * Note that when only one of FILE_INHERIT or DIRECTORY_INHERIT + * is set, we're effectively turning on the other. That's OK, + * according to rfc 3530. + */ + process_one_v4_ace(&default_acl_state, ace); + + if (!(ace->flag & NFS4_ACE_INHERIT_ONLY_ACE)) + process_one_v4_ace(&effective_acl_state, ace); + } + *pacl = posix_state_to_acl(&effective_acl_state, flags); + if (IS_ERR(*pacl)) { + ret = PTR_ERR(*pacl); + *pacl = NULL; + goto out_dstate; + } + *dpacl = posix_state_to_acl(&default_acl_state, + flags | NFS4_ACL_TYPE_DEFAULT); + if (IS_ERR(*dpacl)) { + ret = PTR_ERR(*dpacl); + *dpacl = NULL; + posix_acl_release(*pacl); + *pacl = NULL; + goto out_dstate; + } + sort_pacl(*pacl); + sort_pacl(*dpacl); + ret = 0; +out_dstate: + free_state(&default_acl_state); +out_estate: + free_state(&effective_acl_state); + return ret; +} + +static short +ace2type(struct nfs4_ace *ace) +{ + switch (ace->whotype) { + case NFS4_ACL_WHO_NAMED: + return (ace->flag & NFS4_ACE_IDENTIFIER_GROUP ? + ACL_GROUP : ACL_USER); + case NFS4_ACL_WHO_OWNER: + return ACL_USER_OBJ; + case NFS4_ACL_WHO_GROUP: + return ACL_GROUP_OBJ; + case NFS4_ACL_WHO_EVERYONE: + return ACL_OTHER; + } + BUG(); + return -1; +} + +EXPORT_SYMBOL(nfs4_acl_posix_to_nfsv4); +EXPORT_SYMBOL(nfs4_acl_nfsv4_to_posix); + +struct nfs4_acl * +nfs4_acl_new(int n) +{ + struct nfs4_acl *acl; + + acl = kmalloc(sizeof(*acl) + n*sizeof(struct nfs4_ace), GFP_KERNEL); + if (acl == NULL) + return NULL; + acl->naces = 0; + return acl; +} + +static struct { + char *string; + int stringlen; + int type; +} s2t_map[] = { + { + .string = "OWNER@", + .stringlen = sizeof("OWNER@") - 1, + .type = NFS4_ACL_WHO_OWNER, + }, + { + .string = "GROUP@", + .stringlen = sizeof("GROUP@") - 1, + .type = NFS4_ACL_WHO_GROUP, + }, + { + .string = "EVERYONE@", + .stringlen = sizeof("EVERYONE@") - 1, + .type = NFS4_ACL_WHO_EVERYONE, + }, +}; + +int +nfs4_acl_get_whotype(char *p, u32 len) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(s2t_map); i++) { + if (s2t_map[i].stringlen == len && + 0 == memcmp(s2t_map[i].string, p, len)) + return s2t_map[i].type; + } + return NFS4_ACL_WHO_NAMED; +} + +int +nfs4_acl_write_who(int who, char *p) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(s2t_map); i++) { + if (s2t_map[i].type == who) { + memcpy(p, s2t_map[i].string, s2t_map[i].stringlen); + return s2t_map[i].stringlen; + } + } + BUG(); + return -1; +} + +EXPORT_SYMBOL(nfs4_acl_new); +EXPORT_SYMBOL(nfs4_acl_get_whotype); +EXPORT_SYMBOL(nfs4_acl_write_who); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c new file mode 100644 index 00000000000..6f3ebb48b12 --- /dev/null +++ b/fs/nfsd/nfs4callback.c @@ -0,0 +1,1036 @@ +/* + * Copyright (c) 2001 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * Andy Adamson <andros@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/svc_xprt.h> +#include <linux/slab.h> +#include "nfsd.h" +#include "state.h" + +#define NFSDDBG_FACILITY NFSDDBG_PROC + +static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason); + +#define NFSPROC4_CB_NULL 0 +#define NFSPROC4_CB_COMPOUND 1 + +/* Index of predefined Linux callback client operations */ + +enum { + NFSPROC4_CLNT_CB_NULL = 0, + NFSPROC4_CLNT_CB_RECALL, + NFSPROC4_CLNT_CB_SEQUENCE, +}; + +#define NFS4_MAXTAGLEN 20 + +#define NFS4_enc_cb_null_sz 0 +#define NFS4_dec_cb_null_sz 0 +#define cb_compound_enc_hdr_sz 4 +#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) +#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2) +#define cb_sequence_enc_sz (sessionid_sz + 4 + \ + 1 /* no referring calls list yet */) +#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4) + +#define op_enc_sz 1 +#define op_dec_sz 2 +#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) +#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) +#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ + 1 + enc_stateid_sz + \ + enc_nfs4_fh_sz) + +#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ + op_dec_sz) + +struct nfs4_cb_compound_hdr { + /* args */ + u32 ident; /* minorversion 0 only */ + u32 nops; + __be32 *nops_p; + u32 minorversion; + /* res */ + int status; +}; + +/* + * Handle decode buffer overflows out-of-line. + */ +static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) +{ + dprintk("NFS: %s prematurely hit the end of our receive buffer. " + "Remaining buffer length is %tu words.\n", + func, xdr->end - xdr->p); +} + +static __be32 *xdr_encode_empty_array(__be32 *p) +{ + *p++ = xdr_zero; + return p; +} + +/* + * Encode/decode NFSv4 CB basic data types + * + * Basic NFSv4 callback data types are defined in section 15 of RFC + * 3530: "Network File System (NFS) version 4 Protocol" and section + * 20 of RFC 5661: "Network File System (NFS) Version 4 Minor Version + * 1 Protocol" + */ + +/* + * nfs_cb_opnum4 + * + * enum nfs_cb_opnum4 { + * OP_CB_GETATTR = 3, + * ... + * }; + */ +enum nfs_cb_opnum4 { + OP_CB_GETATTR = 3, + OP_CB_RECALL = 4, + OP_CB_LAYOUTRECALL = 5, + OP_CB_NOTIFY = 6, + OP_CB_PUSH_DELEG = 7, + OP_CB_RECALL_ANY = 8, + OP_CB_RECALLABLE_OBJ_AVAIL = 9, + OP_CB_RECALL_SLOT = 10, + OP_CB_SEQUENCE = 11, + OP_CB_WANTS_CANCELLED = 12, + OP_CB_NOTIFY_LOCK = 13, + OP_CB_NOTIFY_DEVICEID = 14, + OP_CB_ILLEGAL = 10044 +}; + +static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 4); + *p = cpu_to_be32(op); +} + +/* + * nfs_fh4 + * + * typedef opaque nfs_fh4<NFS4_FHSIZE>; + */ +static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh) +{ + u32 length = fh->fh_size; + __be32 *p; + + BUG_ON(length > NFS4_FHSIZE); + p = xdr_reserve_space(xdr, 4 + length); + xdr_encode_opaque(p, &fh->fh_base, length); +} + +/* + * stateid4 + * + * struct stateid4 { + * uint32_t seqid; + * opaque other[12]; + * }; + */ +static void encode_stateid4(struct xdr_stream *xdr, const stateid_t *sid) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(sid->si_generation); + xdr_encode_opaque_fixed(p, &sid->si_opaque, NFS4_STATEID_OTHER_SIZE); +} + +/* + * sessionid4 + * + * typedef opaque sessionid4[NFS4_SESSIONID_SIZE]; + */ +static void encode_sessionid4(struct xdr_stream *xdr, + const struct nfsd4_session *session) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN); + xdr_encode_opaque_fixed(p, session->se_sessionid.data, + NFS4_MAX_SESSIONID_LEN); +} + +/* + * nfsstat4 + */ +static const struct { + int stat; + int errno; +} nfs_cb_errtbl[] = { + { NFS4_OK, 0 }, + { NFS4ERR_PERM, -EPERM }, + { NFS4ERR_NOENT, -ENOENT }, + { NFS4ERR_IO, -EIO }, + { NFS4ERR_NXIO, -ENXIO }, + { NFS4ERR_ACCESS, -EACCES }, + { NFS4ERR_EXIST, -EEXIST }, + { NFS4ERR_XDEV, -EXDEV }, + { NFS4ERR_NOTDIR, -ENOTDIR }, + { NFS4ERR_ISDIR, -EISDIR }, + { NFS4ERR_INVAL, -EINVAL }, + { NFS4ERR_FBIG, -EFBIG }, + { NFS4ERR_NOSPC, -ENOSPC }, + { NFS4ERR_ROFS, -EROFS }, + { NFS4ERR_MLINK, -EMLINK }, + { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG }, + { NFS4ERR_NOTEMPTY, -ENOTEMPTY }, + { NFS4ERR_DQUOT, -EDQUOT }, + { NFS4ERR_STALE, -ESTALE }, + { NFS4ERR_BADHANDLE, -EBADHANDLE }, + { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, + { NFS4ERR_NOTSUPP, -ENOTSUPP }, + { NFS4ERR_TOOSMALL, -ETOOSMALL }, + { NFS4ERR_SERVERFAULT, -ESERVERFAULT }, + { NFS4ERR_BADTYPE, -EBADTYPE }, + { NFS4ERR_LOCKED, -EAGAIN }, + { NFS4ERR_RESOURCE, -EREMOTEIO }, + { NFS4ERR_SYMLINK, -ELOOP }, + { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, + { NFS4ERR_DEADLOCK, -EDEADLK }, + { -1, -EIO } +}; + +/* + * If we cannot translate the error, the recovery routines should + * handle it. + * + * Note: remaining NFSv4 error codes have values > 10000, so should + * not conflict with native Linux error codes. + */ +static int nfs_cb_stat_to_errno(int status) +{ + int i; + + for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) { + if (nfs_cb_errtbl[i].stat == status) + return nfs_cb_errtbl[i].errno; + } + + dprintk("NFSD: Unrecognized NFS CB status value: %u\n", status); + return -status; +} + +static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected, + enum nfsstat4 *status) +{ + __be32 *p; + u32 op; + + p = xdr_inline_decode(xdr, 4 + 4); + if (unlikely(p == NULL)) + goto out_overflow; + op = be32_to_cpup(p++); + if (unlikely(op != expected)) + goto out_unexpected; + *status = be32_to_cpup(p); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +out_unexpected: + dprintk("NFSD: Callback server returned operation %d but " + "we issued a request for %d\n", op, expected); + return -EIO; +} + +/* + * CB_COMPOUND4args + * + * struct CB_COMPOUND4args { + * utf8str_cs tag; + * uint32_t minorversion; + * uint32_t callback_ident; + * nfs_cb_argop4 argarray<>; + * }; +*/ +static void encode_cb_compound4args(struct xdr_stream *xdr, + struct nfs4_cb_compound_hdr *hdr) +{ + __be32 * p; + + p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4); + p = xdr_encode_empty_array(p); /* empty tag */ + *p++ = cpu_to_be32(hdr->minorversion); + *p++ = cpu_to_be32(hdr->ident); + + hdr->nops_p = p; + *p = cpu_to_be32(hdr->nops); /* argarray element count */ +} + +/* + * Update argarray element count + */ +static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr) +{ + BUG_ON(hdr->nops > NFS4_MAX_BACK_CHANNEL_OPS); + *hdr->nops_p = cpu_to_be32(hdr->nops); +} + +/* + * CB_COMPOUND4res + * + * struct CB_COMPOUND4res { + * nfsstat4 status; + * utf8str_cs tag; + * nfs_cb_resop4 resarray<>; + * }; + */ +static int decode_cb_compound4res(struct xdr_stream *xdr, + struct nfs4_cb_compound_hdr *hdr) +{ + u32 length; + __be32 *p; + + p = xdr_inline_decode(xdr, 4 + 4); + if (unlikely(p == NULL)) + goto out_overflow; + hdr->status = be32_to_cpup(p++); + /* Ignore the tag */ + length = be32_to_cpup(p++); + p = xdr_inline_decode(xdr, length + 4); + if (unlikely(p == NULL)) + goto out_overflow; + hdr->nops = be32_to_cpup(p); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +/* + * CB_RECALL4args + * + * struct CB_RECALL4args { + * stateid4 stateid; + * bool truncate; + * nfs_fh4 fh; + * }; + */ +static void encode_cb_recall4args(struct xdr_stream *xdr, + const struct nfs4_delegation *dp, + struct nfs4_cb_compound_hdr *hdr) +{ + __be32 *p; + + encode_nfs_cb_opnum4(xdr, OP_CB_RECALL); + encode_stateid4(xdr, &dp->dl_stid.sc_stateid); + + p = xdr_reserve_space(xdr, 4); + *p++ = xdr_zero; /* truncate */ + + encode_nfs_fh4(xdr, &dp->dl_fh); + + hdr->nops++; +} + +/* + * CB_SEQUENCE4args + * + * struct CB_SEQUENCE4args { + * sessionid4 csa_sessionid; + * sequenceid4 csa_sequenceid; + * slotid4 csa_slotid; + * slotid4 csa_highest_slotid; + * bool csa_cachethis; + * referring_call_list4 csa_referring_call_lists<>; + * }; + */ +static void encode_cb_sequence4args(struct xdr_stream *xdr, + const struct nfsd4_callback *cb, + struct nfs4_cb_compound_hdr *hdr) +{ + struct nfsd4_session *session = cb->cb_clp->cl_cb_session; + __be32 *p; + + if (hdr->minorversion == 0) + return; + + encode_nfs_cb_opnum4(xdr, OP_CB_SEQUENCE); + encode_sessionid4(xdr, session); + + p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4); + *p++ = cpu_to_be32(session->se_cb_seq_nr); /* csa_sequenceid */ + *p++ = xdr_zero; /* csa_slotid */ + *p++ = xdr_zero; /* csa_highest_slotid */ + *p++ = xdr_zero; /* csa_cachethis */ + xdr_encode_empty_array(p); /* csa_referring_call_lists */ + + hdr->nops++; +} + +/* + * CB_SEQUENCE4resok + * + * struct CB_SEQUENCE4resok { + * sessionid4 csr_sessionid; + * sequenceid4 csr_sequenceid; + * slotid4 csr_slotid; + * slotid4 csr_highest_slotid; + * slotid4 csr_target_highest_slotid; + * }; + * + * union CB_SEQUENCE4res switch (nfsstat4 csr_status) { + * case NFS4_OK: + * CB_SEQUENCE4resok csr_resok4; + * default: + * void; + * }; + * + * Our current back channel implmentation supports a single backchannel + * with a single slot. + */ +static int decode_cb_sequence4resok(struct xdr_stream *xdr, + struct nfsd4_callback *cb) +{ + struct nfsd4_session *session = cb->cb_clp->cl_cb_session; + struct nfs4_sessionid id; + int status; + __be32 *p; + u32 dummy; + + status = -ESERVERFAULT; + + /* + * If the server returns different values for sessionID, slotID or + * sequence number, the server is looney tunes. + */ + p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4); + if (unlikely(p == NULL)) + goto out_overflow; + memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); + if (memcmp(id.data, session->se_sessionid.data, + NFS4_MAX_SESSIONID_LEN) != 0) { + dprintk("NFS: %s Invalid session id\n", __func__); + goto out; + } + p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); + + dummy = be32_to_cpup(p++); + if (dummy != session->se_cb_seq_nr) { + dprintk("NFS: %s Invalid sequence number\n", __func__); + goto out; + } + + dummy = be32_to_cpup(p++); + if (dummy != 0) { + dprintk("NFS: %s Invalid slotid\n", __func__); + goto out; + } + + /* + * FIXME: process highest slotid and target highest slotid + */ + status = 0; +out: + if (status) + nfsd4_mark_cb_fault(cb->cb_clp, status); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_cb_sequence4res(struct xdr_stream *xdr, + struct nfsd4_callback *cb) +{ + enum nfsstat4 nfserr; + int status; + + if (cb->cb_minorversion == 0) + return 0; + + status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr); + if (unlikely(status)) + goto out; + if (unlikely(nfserr != NFS4_OK)) + goto out_default; + status = decode_cb_sequence4resok(xdr, cb); +out: + return status; +out_default: + return nfs_cb_stat_to_errno(nfserr); +} + +/* + * NFSv4.0 and NFSv4.1 XDR encode functions + * + * NFSv4.0 callback argument types are defined in section 15 of RFC + * 3530: "Network File System (NFS) version 4 Protocol" and section 20 + * of RFC 5661: "Network File System (NFS) Version 4 Minor Version 1 + * Protocol". + */ + +/* + * NB: Without this zero space reservation, callbacks over krb5p fail + */ +static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr, + void *__unused) +{ + xdr_reserve_space(xdr, 0); +} + +/* + * 20.2. Operation 4: CB_RECALL - Recall a Delegation + */ +static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct nfsd4_callback *cb) +{ + const struct nfs4_delegation *args = cb->cb_op; + struct nfs4_cb_compound_hdr hdr = { + .ident = cb->cb_clp->cl_cb_ident, + .minorversion = cb->cb_minorversion, + }; + + encode_cb_compound4args(xdr, &hdr); + encode_cb_sequence4args(xdr, cb, &hdr); + encode_cb_recall4args(xdr, args, &hdr); + encode_cb_nops(&hdr); +} + + +/* + * NFSv4.0 and NFSv4.1 XDR decode functions + * + * NFSv4.0 callback result types are defined in section 15 of RFC + * 3530: "Network File System (NFS) version 4 Protocol" and section 20 + * of RFC 5661: "Network File System (NFS) Version 4 Minor Version 1 + * Protocol". + */ + +static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr, + void *__unused) +{ + return 0; +} + +/* + * 20.2. Operation 4: CB_RECALL - Recall a Delegation + */ +static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfsd4_callback *cb) +{ + struct nfs4_cb_compound_hdr hdr; + enum nfsstat4 nfserr; + int status; + + status = decode_cb_compound4res(xdr, &hdr); + if (unlikely(status)) + goto out; + + if (cb != NULL) { + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status)) + goto out; + } + + status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr); + if (unlikely(status)) + goto out; + if (unlikely(nfserr != NFS4_OK)) + status = nfs_cb_stat_to_errno(nfserr); +out: + return status; +} + +/* + * RPC procedure tables + */ +#define PROC(proc, call, argtype, restype) \ +[NFSPROC4_CLNT_##proc] = { \ + .p_proc = NFSPROC4_CB_##call, \ + .p_encode = (kxdreproc_t)nfs4_xdr_enc_##argtype, \ + .p_decode = (kxdrdproc_t)nfs4_xdr_dec_##restype, \ + .p_arglen = NFS4_enc_##argtype##_sz, \ + .p_replen = NFS4_dec_##restype##_sz, \ + .p_statidx = NFSPROC4_CB_##call, \ + .p_name = #proc, \ +} + +static struct rpc_procinfo nfs4_cb_procedures[] = { + PROC(CB_NULL, NULL, cb_null, cb_null), + PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall), +}; + +static struct rpc_version nfs_cb_version4 = { +/* + * Note on the callback rpc program version number: despite language in rfc + * 5661 section 18.36.3 requiring servers to use 4 in this field, the + * official xdr descriptions for both 4.0 and 4.1 specify version 1, and + * in practice that appears to be what implementations use. The section + * 18.36.3 language is expected to be fixed in an erratum. + */ + .number = 1, + .nrprocs = ARRAY_SIZE(nfs4_cb_procedures), + .procs = nfs4_cb_procedures +}; + +static struct rpc_version *nfs_cb_version[] = { + &nfs_cb_version4, +}; + +static struct rpc_program cb_program; + +static struct rpc_stat cb_stats = { + .program = &cb_program +}; + +#define NFS4_CALLBACK 0x40000000 +static struct rpc_program cb_program = { + .name = "nfs4_cb", + .number = NFS4_CALLBACK, + .nrvers = ARRAY_SIZE(nfs_cb_version), + .version = nfs_cb_version, + .stats = &cb_stats, + .pipe_dir_name = "/nfsd4_cb", +}; + +static int max_cb_time(void) +{ + return max(nfsd4_lease/10, (time_t)1) * HZ; +} + + +static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) +{ + struct rpc_timeout timeparms = { + .to_initval = max_cb_time(), + .to_retries = 0, + }; + struct rpc_create_args args = { + .net = &init_net, + .address = (struct sockaddr *) &conn->cb_addr, + .addrsize = conn->cb_addrlen, + .saddress = (struct sockaddr *) &conn->cb_saddr, + .timeout = &timeparms, + .program = &cb_program, + .version = 0, + .authflavor = clp->cl_flavor, + .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), + }; + struct rpc_clnt *client; + + if (clp->cl_minorversion == 0) { + if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) + return -EINVAL; + args.client_name = clp->cl_principal; + args.prognumber = conn->cb_prog, + args.protocol = XPRT_TRANSPORT_TCP; + clp->cl_cb_ident = conn->cb_ident; + } else { + if (!conn->cb_xprt) + return -EINVAL; + clp->cl_cb_conn.cb_xprt = conn->cb_xprt; + clp->cl_cb_session = ses; + args.bc_xprt = conn->cb_xprt; + args.prognumber = clp->cl_cb_session->se_cb_prog; + args.protocol = XPRT_TRANSPORT_BC_TCP; + } + /* Create RPC client */ + client = rpc_create(&args); + if (IS_ERR(client)) { + dprintk("NFSD: couldn't create callback client: %ld\n", + PTR_ERR(client)); + return PTR_ERR(client); + } + clp->cl_cb_client = client; + return 0; + +} + +static void warn_no_callback_path(struct nfs4_client *clp, int reason) +{ + dprintk("NFSD: warning: no callback path to client %.*s: error %d\n", + (int)clp->cl_name.len, clp->cl_name.data, reason); +} + +static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason) +{ + clp->cl_cb_state = NFSD4_CB_DOWN; + warn_no_callback_path(clp, reason); +} + +static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason) +{ + clp->cl_cb_state = NFSD4_CB_FAULT; + warn_no_callback_path(clp, reason); +} + +static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); + + if (task->tk_status) + nfsd4_mark_cb_down(clp, task->tk_status); + else + clp->cl_cb_state = NFSD4_CB_UP; +} + +static const struct rpc_call_ops nfsd4_cb_probe_ops = { + /* XXX: release method to ensure we set the cb channel down if + * necessary on early failure? */ + .rpc_call_done = nfsd4_cb_probe_done, +}; + +static struct rpc_cred *callback_cred; + +int set_callback_cred(void) +{ + if (callback_cred) + return 0; + callback_cred = rpc_lookup_machine_cred("nfs"); + if (!callback_cred) + return -ENOMEM; + return 0; +} + +static struct workqueue_struct *callback_wq; + +static void run_nfsd4_cb(struct nfsd4_callback *cb) +{ + queue_work(callback_wq, &cb->cb_work); +} + +static void do_probe_callback(struct nfs4_client *clp) +{ + struct nfsd4_callback *cb = &clp->cl_cb_null; + + cb->cb_op = NULL; + cb->cb_clp = clp; + + cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL]; + cb->cb_msg.rpc_argp = NULL; + cb->cb_msg.rpc_resp = NULL; + cb->cb_msg.rpc_cred = callback_cred; + + cb->cb_ops = &nfsd4_cb_probe_ops; + + run_nfsd4_cb(cb); +} + +/* + * Poke the callback thread to process any updates to the callback + * parameters, and send a null probe. + */ +void nfsd4_probe_callback(struct nfs4_client *clp) +{ + /* XXX: atomicity? Also, should we be using cl_cb_flags? */ + clp->cl_cb_state = NFSD4_CB_UNKNOWN; + set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); + do_probe_callback(clp); +} + +void nfsd4_probe_callback_sync(struct nfs4_client *clp) +{ + nfsd4_probe_callback(clp); + flush_workqueue(callback_wq); +} + +void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) +{ + clp->cl_cb_state = NFSD4_CB_UNKNOWN; + spin_lock(&clp->cl_lock); + memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn)); + spin_unlock(&clp->cl_lock); +} + +/* + * There's currently a single callback channel slot. + * If the slot is available, then mark it busy. Otherwise, set the + * thread for sleeping on the callback RPC wait queue. + */ +static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) +{ + if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { + rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); + dprintk("%s slot is busy\n", __func__); + return false; + } + return true; +} + +/* + * TODO: cb_sequence should support referring call lists, cachethis, multiple + * slots, and mark callback channel down on communication errors. + */ +static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) +{ + struct nfsd4_callback *cb = calldata; + struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); + struct nfs4_client *clp = dp->dl_stid.sc_client; + u32 minorversion = clp->cl_minorversion; + + cb->cb_minorversion = minorversion; + if (minorversion) { + if (!nfsd41_cb_get_slot(clp, task)) + return; + } + spin_lock(&clp->cl_lock); + if (list_empty(&cb->cb_per_client)) { + /* This is the first call, not a restart */ + cb->cb_done = false; + list_add(&cb->cb_per_client, &clp->cl_callbacks); + } + spin_unlock(&clp->cl_lock); + rpc_call_start(task); +} + +static void nfsd4_cb_done(struct rpc_task *task, void *calldata) +{ + struct nfsd4_callback *cb = calldata; + struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); + struct nfs4_client *clp = dp->dl_stid.sc_client; + + dprintk("%s: minorversion=%d\n", __func__, + clp->cl_minorversion); + + if (clp->cl_minorversion) { + /* No need for lock, access serialized in nfsd4_cb_prepare */ + ++clp->cl_cb_session->se_cb_seq_nr; + clear_bit(0, &clp->cl_cb_slot_busy); + rpc_wake_up_next(&clp->cl_cb_waitq); + dprintk("%s: freed slot, new seqid=%d\n", __func__, + clp->cl_cb_session->se_cb_seq_nr); + + /* We're done looking into the sequence information */ + task->tk_msg.rpc_resp = NULL; + } +} + + +static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) +{ + struct nfsd4_callback *cb = calldata; + struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); + struct nfs4_client *clp = dp->dl_stid.sc_client; + struct rpc_clnt *current_rpc_client = clp->cl_cb_client; + + nfsd4_cb_done(task, calldata); + + if (current_rpc_client != task->tk_client) { + /* We're shutting down or changing cl_cb_client; leave + * it to nfsd4_process_cb_update to restart the call if + * necessary. */ + return; + } + + if (cb->cb_done) + return; + switch (task->tk_status) { + case 0: + cb->cb_done = true; + return; + case -EBADHANDLE: + case -NFS4ERR_BAD_STATEID: + /* Race: client probably got cb_recall + * before open reply granting delegation */ + break; + default: + /* Network partition? */ + nfsd4_mark_cb_down(clp, task->tk_status); + } + if (dp->dl_retries--) { + rpc_delay(task, 2*HZ); + task->tk_status = 0; + rpc_restart_call_prepare(task); + return; + } + nfsd4_mark_cb_down(clp, task->tk_status); + cb->cb_done = true; +} + +static void nfsd4_cb_recall_release(void *calldata) +{ + struct nfsd4_callback *cb = calldata; + struct nfs4_client *clp = cb->cb_clp; + struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); + + if (cb->cb_done) { + spin_lock(&clp->cl_lock); + list_del(&cb->cb_per_client); + spin_unlock(&clp->cl_lock); + nfs4_put_delegation(dp); + } +} + +static const struct rpc_call_ops nfsd4_cb_recall_ops = { + .rpc_call_prepare = nfsd4_cb_prepare, + .rpc_call_done = nfsd4_cb_recall_done, + .rpc_release = nfsd4_cb_recall_release, +}; + +int nfsd4_create_callback_queue(void) +{ + callback_wq = create_singlethread_workqueue("nfsd4_callbacks"); + if (!callback_wq) + return -ENOMEM; + return 0; +} + +void nfsd4_destroy_callback_queue(void) +{ + destroy_workqueue(callback_wq); +} + +/* must be called under the state lock */ +void nfsd4_shutdown_callback(struct nfs4_client *clp) +{ + set_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags); + /* + * Note this won't actually result in a null callback; + * instead, nfsd4_do_callback_rpc() will detect the killed + * client, destroy the rpc client, and stop: + */ + do_probe_callback(clp); + flush_workqueue(callback_wq); +} + +static void nfsd4_release_cb(struct nfsd4_callback *cb) +{ + if (cb->cb_ops->rpc_release) + cb->cb_ops->rpc_release(cb); +} + +/* requires cl_lock: */ +static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) +{ + struct nfsd4_session *s; + struct nfsd4_conn *c; + + list_for_each_entry(s, &clp->cl_sessions, se_perclnt) { + list_for_each_entry(c, &s->se_conns, cn_persession) { + if (c->cn_flags & NFS4_CDFC4_BACK) + return c; + } + } + return NULL; +} + +static void nfsd4_process_cb_update(struct nfsd4_callback *cb) +{ + struct nfs4_cb_conn conn; + struct nfs4_client *clp = cb->cb_clp; + struct nfsd4_session *ses = NULL; + struct nfsd4_conn *c; + int err; + + /* + * This is either an update, or the client dying; in either case, + * kill the old client: + */ + if (clp->cl_cb_client) { + rpc_shutdown_client(clp->cl_cb_client); + clp->cl_cb_client = NULL; + } + if (clp->cl_cb_conn.cb_xprt) { + svc_xprt_put(clp->cl_cb_conn.cb_xprt); + clp->cl_cb_conn.cb_xprt = NULL; + } + if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags)) + return; + spin_lock(&clp->cl_lock); + /* + * Only serialized callback code is allowed to clear these + * flags; main nfsd code can only set them: + */ + BUG_ON(!clp->cl_cb_flags); + clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); + memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn)); + c = __nfsd4_find_backchannel(clp); + if (c) { + svc_xprt_get(c->cn_xprt); + conn.cb_xprt = c->cn_xprt; + ses = c->cn_session; + } + spin_unlock(&clp->cl_lock); + + err = setup_callback_client(clp, &conn, ses); + if (err) { + warn_no_callback_path(clp, err); + return; + } + /* Yay, the callback channel's back! Restart any callbacks: */ + list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client) + run_nfsd4_cb(cb); +} + +void nfsd4_do_callback_rpc(struct work_struct *w) +{ + struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); + struct nfs4_client *clp = cb->cb_clp; + struct rpc_clnt *clnt; + + if (clp->cl_cb_flags) + nfsd4_process_cb_update(cb); + + clnt = clp->cl_cb_client; + if (!clnt) { + /* Callback channel broken, or client killed; give up: */ + nfsd4_release_cb(cb); + return; + } + rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, + cb->cb_ops, cb); +} + +void nfsd4_cb_recall(struct nfs4_delegation *dp) +{ + struct nfsd4_callback *cb = &dp->dl_recall; + struct nfs4_client *clp = dp->dl_stid.sc_client; + + dp->dl_retries = 1; + cb->cb_op = dp; + cb->cb_clp = clp; + cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; + cb->cb_msg.rpc_argp = cb; + cb->cb_msg.rpc_resp = cb; + cb->cb_msg.rpc_cred = callback_cred; + + cb->cb_ops = &nfsd4_cb_recall_ops; + dp->dl_retries = 1; + + INIT_LIST_HEAD(&cb->cb_per_client); + cb->cb_done = true; + + run_nfsd4_cb(&dp->dl_recall); +} diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c new file mode 100644 index 00000000000..94096273cd6 --- /dev/null +++ b/fs/nfsd/nfs4idmap.c @@ -0,0 +1,588 @@ +/* + * Mapping of UID/GIDs to name and vice versa. + * + * Copyright (c) 2002, 2003 The Regents of the University of + * Michigan. All rights reserved. + * + * Marius Aamodt Eriksen <marius@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <net/net_namespace.h> +#include "idmap.h" +#include "nfsd.h" + +/* + * Cache entry + */ + +/* + * XXX we know that IDMAP_NAMESZ < PAGE_SIZE, but it's ugly to rely on + * that. + */ + +#define IDMAP_TYPE_USER 0 +#define IDMAP_TYPE_GROUP 1 + +struct ent { + struct cache_head h; + int type; /* User / Group */ + uid_t id; + char name[IDMAP_NAMESZ]; + char authname[IDMAP_NAMESZ]; +}; + +/* Common entry handling */ + +#define ENT_HASHBITS 8 +#define ENT_HASHMAX (1 << ENT_HASHBITS) + +static void +ent_init(struct cache_head *cnew, struct cache_head *citm) +{ + struct ent *new = container_of(cnew, struct ent, h); + struct ent *itm = container_of(citm, struct ent, h); + + new->id = itm->id; + new->type = itm->type; + + strlcpy(new->name, itm->name, sizeof(new->name)); + strlcpy(new->authname, itm->authname, sizeof(new->name)); +} + +static void +ent_put(struct kref *ref) +{ + struct ent *map = container_of(ref, struct ent, h.ref); + kfree(map); +} + +static struct cache_head * +ent_alloc(void) +{ + struct ent *e = kmalloc(sizeof(*e), GFP_KERNEL); + if (e) + return &e->h; + else + return NULL; +} + +/* + * ID -> Name cache + */ + +static struct cache_head *idtoname_table[ENT_HASHMAX]; + +static uint32_t +idtoname_hash(struct ent *ent) +{ + uint32_t hash; + + hash = hash_str(ent->authname, ENT_HASHBITS); + hash = hash_long(hash ^ ent->id, ENT_HASHBITS); + + /* Flip LSB for user/group */ + if (ent->type == IDMAP_TYPE_GROUP) + hash ^= 1; + + return hash; +} + +static void +idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, + int *blen) +{ + struct ent *ent = container_of(ch, struct ent, h); + char idstr[11]; + + qword_add(bpp, blen, ent->authname); + snprintf(idstr, sizeof(idstr), "%u", ent->id); + qword_add(bpp, blen, ent->type == IDMAP_TYPE_GROUP ? "group" : "user"); + qword_add(bpp, blen, idstr); + + (*bpp)[-1] = '\n'; +} + +static int +idtoname_upcall(struct cache_detail *cd, struct cache_head *ch) +{ + return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request); +} + +static int +idtoname_match(struct cache_head *ca, struct cache_head *cb) +{ + struct ent *a = container_of(ca, struct ent, h); + struct ent *b = container_of(cb, struct ent, h); + + return (a->id == b->id && a->type == b->type && + strcmp(a->authname, b->authname) == 0); +} + +static int +idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) +{ + struct ent *ent; + + if (h == NULL) { + seq_puts(m, "#domain type id [name]\n"); + return 0; + } + ent = container_of(h, struct ent, h); + seq_printf(m, "%s %s %u", ent->authname, + ent->type == IDMAP_TYPE_GROUP ? "group" : "user", + ent->id); + if (test_bit(CACHE_VALID, &h->flags)) + seq_printf(m, " %s", ent->name); + seq_printf(m, "\n"); + return 0; +} + +static void +warn_no_idmapd(struct cache_detail *detail, int has_died) +{ + printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n", + has_died ? "died" : "not been started"); +} + + +static int idtoname_parse(struct cache_detail *, char *, int); +static struct ent *idtoname_lookup(struct ent *); +static struct ent *idtoname_update(struct ent *, struct ent *); + +static struct cache_detail idtoname_cache = { + .owner = THIS_MODULE, + .hash_size = ENT_HASHMAX, + .hash_table = idtoname_table, + .name = "nfs4.idtoname", + .cache_put = ent_put, + .cache_upcall = idtoname_upcall, + .cache_parse = idtoname_parse, + .cache_show = idtoname_show, + .warn_no_listener = warn_no_idmapd, + .match = idtoname_match, + .init = ent_init, + .update = ent_init, + .alloc = ent_alloc, +}; + +static int +idtoname_parse(struct cache_detail *cd, char *buf, int buflen) +{ + struct ent ent, *res; + char *buf1, *bp; + int len; + int error = -EINVAL; + + if (buf[buflen - 1] != '\n') + return (-EINVAL); + buf[buflen - 1]= '\0'; + + buf1 = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (buf1 == NULL) + return (-ENOMEM); + + memset(&ent, 0, sizeof(ent)); + + /* Authentication name */ + if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + goto out; + memcpy(ent.authname, buf1, sizeof(ent.authname)); + + /* Type */ + if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + goto out; + ent.type = strcmp(buf1, "user") == 0 ? + IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; + + /* ID */ + if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + goto out; + ent.id = simple_strtoul(buf1, &bp, 10); + if (bp == buf1) + goto out; + + /* expiry */ + ent.h.expiry_time = get_expiry(&buf); + if (ent.h.expiry_time == 0) + goto out; + + error = -ENOMEM; + res = idtoname_lookup(&ent); + if (!res) + goto out; + + /* Name */ + error = -EINVAL; + len = qword_get(&buf, buf1, PAGE_SIZE); + if (len < 0) + goto out; + if (len == 0) + set_bit(CACHE_NEGATIVE, &ent.h.flags); + else if (len >= IDMAP_NAMESZ) + goto out; + else + memcpy(ent.name, buf1, sizeof(ent.name)); + error = -ENOMEM; + res = idtoname_update(&ent, res); + if (res == NULL) + goto out; + + cache_put(&res->h, &idtoname_cache); + + error = 0; +out: + kfree(buf1); + + return error; +} + + +static struct ent * +idtoname_lookup(struct ent *item) +{ + struct cache_head *ch = sunrpc_cache_lookup(&idtoname_cache, + &item->h, + idtoname_hash(item)); + if (ch) + return container_of(ch, struct ent, h); + else + return NULL; +} + +static struct ent * +idtoname_update(struct ent *new, struct ent *old) +{ + struct cache_head *ch = sunrpc_cache_update(&idtoname_cache, + &new->h, &old->h, + idtoname_hash(new)); + if (ch) + return container_of(ch, struct ent, h); + else + return NULL; +} + + +/* + * Name -> ID cache + */ + +static struct cache_head *nametoid_table[ENT_HASHMAX]; + +static inline int +nametoid_hash(struct ent *ent) +{ + return hash_str(ent->name, ENT_HASHBITS); +} + +static void +nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, + int *blen) +{ + struct ent *ent = container_of(ch, struct ent, h); + + qword_add(bpp, blen, ent->authname); + qword_add(bpp, blen, ent->type == IDMAP_TYPE_GROUP ? "group" : "user"); + qword_add(bpp, blen, ent->name); + + (*bpp)[-1] = '\n'; +} + +static int +nametoid_upcall(struct cache_detail *cd, struct cache_head *ch) +{ + return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request); +} + +static int +nametoid_match(struct cache_head *ca, struct cache_head *cb) +{ + struct ent *a = container_of(ca, struct ent, h); + struct ent *b = container_of(cb, struct ent, h); + + return (a->type == b->type && strcmp(a->name, b->name) == 0 && + strcmp(a->authname, b->authname) == 0); +} + +static int +nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) +{ + struct ent *ent; + + if (h == NULL) { + seq_puts(m, "#domain type name [id]\n"); + return 0; + } + ent = container_of(h, struct ent, h); + seq_printf(m, "%s %s %s", ent->authname, + ent->type == IDMAP_TYPE_GROUP ? "group" : "user", + ent->name); + if (test_bit(CACHE_VALID, &h->flags)) + seq_printf(m, " %u", ent->id); + seq_printf(m, "\n"); + return 0; +} + +static struct ent *nametoid_lookup(struct ent *); +static struct ent *nametoid_update(struct ent *, struct ent *); +static int nametoid_parse(struct cache_detail *, char *, int); + +static struct cache_detail nametoid_cache = { + .owner = THIS_MODULE, + .hash_size = ENT_HASHMAX, + .hash_table = nametoid_table, + .name = "nfs4.nametoid", + .cache_put = ent_put, + .cache_upcall = nametoid_upcall, + .cache_parse = nametoid_parse, + .cache_show = nametoid_show, + .warn_no_listener = warn_no_idmapd, + .match = nametoid_match, + .init = ent_init, + .update = ent_init, + .alloc = ent_alloc, +}; + +static int +nametoid_parse(struct cache_detail *cd, char *buf, int buflen) +{ + struct ent ent, *res; + char *buf1; + int error = -EINVAL; + + if (buf[buflen - 1] != '\n') + return (-EINVAL); + buf[buflen - 1]= '\0'; + + buf1 = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (buf1 == NULL) + return (-ENOMEM); + + memset(&ent, 0, sizeof(ent)); + + /* Authentication name */ + if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + goto out; + memcpy(ent.authname, buf1, sizeof(ent.authname)); + + /* Type */ + if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + goto out; + ent.type = strcmp(buf1, "user") == 0 ? + IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; + + /* Name */ + error = qword_get(&buf, buf1, PAGE_SIZE); + if (error <= 0 || error >= IDMAP_NAMESZ) + goto out; + memcpy(ent.name, buf1, sizeof(ent.name)); + + /* expiry */ + ent.h.expiry_time = get_expiry(&buf); + if (ent.h.expiry_time == 0) + goto out; + + /* ID */ + error = get_int(&buf, &ent.id); + if (error == -EINVAL) + goto out; + if (error == -ENOENT) + set_bit(CACHE_NEGATIVE, &ent.h.flags); + + error = -ENOMEM; + res = nametoid_lookup(&ent); + if (res == NULL) + goto out; + res = nametoid_update(&ent, res); + if (res == NULL) + goto out; + + cache_put(&res->h, &nametoid_cache); + error = 0; +out: + kfree(buf1); + + return (error); +} + + +static struct ent * +nametoid_lookup(struct ent *item) +{ + struct cache_head *ch = sunrpc_cache_lookup(&nametoid_cache, + &item->h, + nametoid_hash(item)); + if (ch) + return container_of(ch, struct ent, h); + else + return NULL; +} + +static struct ent * +nametoid_update(struct ent *new, struct ent *old) +{ + struct cache_head *ch = sunrpc_cache_update(&nametoid_cache, + &new->h, &old->h, + nametoid_hash(new)); + if (ch) + return container_of(ch, struct ent, h); + else + return NULL; +} + +/* + * Exported API + */ + +int +nfsd_idmap_init(void) +{ + int rv; + + rv = cache_register_net(&idtoname_cache, &init_net); + if (rv) + return rv; + rv = cache_register_net(&nametoid_cache, &init_net); + if (rv) + cache_unregister_net(&idtoname_cache, &init_net); + return rv; +} + +void +nfsd_idmap_shutdown(void) +{ + cache_unregister_net(&idtoname_cache, &init_net); + cache_unregister_net(&nametoid_cache, &init_net); +} + +static int +idmap_lookup(struct svc_rqst *rqstp, + struct ent *(*lookup_fn)(struct ent *), struct ent *key, + struct cache_detail *detail, struct ent **item) +{ + int ret; + + *item = lookup_fn(key); + if (!*item) + return -ENOMEM; + retry: + ret = cache_check(detail, &(*item)->h, &rqstp->rq_chandle); + + if (ret == -ETIMEDOUT) { + struct ent *prev_item = *item; + *item = lookup_fn(key); + if (*item != prev_item) + goto retry; + cache_put(&(*item)->h, detail); + } + return ret; +} + +static char * +rqst_authname(struct svc_rqst *rqstp) +{ + struct auth_domain *clp; + + clp = rqstp->rq_gssclient ? rqstp->rq_gssclient : rqstp->rq_client; + return clp->name; +} + +static __be32 +idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, + uid_t *id) +{ + struct ent *item, key = { + .type = type, + }; + int ret; + + if (namelen + 1 > sizeof(key.name)) + return nfserr_badowner; + memcpy(key.name, name, namelen); + key.name[namelen] = '\0'; + strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); + ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item); + if (ret == -ENOENT) + return nfserr_badowner; + if (ret) + return nfserrno(ret); + *id = item->id; + cache_put(&item->h, &nametoid_cache); + return 0; +} + +static int +idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) +{ + struct ent *item, key = { + .id = id, + .type = type, + }; + int ret; + + strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); + ret = idmap_lookup(rqstp, idtoname_lookup, &key, &idtoname_cache, &item); + if (ret == -ENOENT) + return sprintf(name, "%u", id); + if (ret) + return ret; + ret = strlen(item->name); + BUG_ON(ret > IDMAP_NAMESZ); + memcpy(name, item->name, ret); + cache_put(&item->h, &idtoname_cache); + return ret; +} + +__be32 +nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, + __u32 *id) +{ + return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id); +} + +__be32 +nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, + __u32 *id) +{ + return idmap_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id); +} + +int +nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) +{ + return idmap_id_to_name(rqstp, IDMAP_TYPE_USER, id, name); +} + +int +nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) +{ + return idmap_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name); +} diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c new file mode 100644 index 00000000000..896da74ec56 --- /dev/null +++ b/fs/nfsd/nfs4proc.c @@ -0,0 +1,1720 @@ +/* + * Server-side procedures for NFSv4. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * Andy Adamson <andros@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <linux/file.h> +#include <linux/slab.h> + +#include "idmap.h" +#include "cache.h" +#include "xdr4.h" +#include "vfs.h" + +#define NFSDDBG_FACILITY NFSDDBG_PROC + +static u32 nfsd_attrmask[] = { + NFSD_WRITEABLE_ATTRS_WORD0, + NFSD_WRITEABLE_ATTRS_WORD1, + NFSD_WRITEABLE_ATTRS_WORD2 +}; + +static u32 nfsd41_ex_attrmask[] = { + NFSD_SUPPATTR_EXCLCREAT_WORD0, + NFSD_SUPPATTR_EXCLCREAT_WORD1, + NFSD_SUPPATTR_EXCLCREAT_WORD2 +}; + +static __be32 +check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + u32 *bmval, u32 *writable) +{ + struct dentry *dentry = cstate->current_fh.fh_dentry; + + /* + * Check about attributes are supported by the NFSv4 server or not. + * According to spec, unsupported attributes return ERR_ATTRNOTSUPP. + */ + if ((bmval[0] & ~nfsd_suppattrs0(cstate->minorversion)) || + (bmval[1] & ~nfsd_suppattrs1(cstate->minorversion)) || + (bmval[2] & ~nfsd_suppattrs2(cstate->minorversion))) + return nfserr_attrnotsupp; + + /* + * Check FATTR4_WORD0_ACL can be supported + * in current environment or not. + */ + if (bmval[0] & FATTR4_WORD0_ACL) { + if (!IS_POSIXACL(dentry->d_inode)) + return nfserr_attrnotsupp; + } + + /* + * According to spec, read-only attributes return ERR_INVAL. + */ + if (writable) { + if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) || + (bmval[2] & ~writable[2])) + return nfserr_inval; + } + + return nfs_ok; +} + +static __be32 +nfsd4_check_open_attributes(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, struct nfsd4_open *open) +{ + __be32 status = nfs_ok; + + if (open->op_create == NFS4_OPEN_CREATE) { + if (open->op_createmode == NFS4_CREATE_UNCHECKED + || open->op_createmode == NFS4_CREATE_GUARDED) + status = check_attr_support(rqstp, cstate, + open->op_bmval, nfsd_attrmask); + else if (open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1) + status = check_attr_support(rqstp, cstate, + open->op_bmval, nfsd41_ex_attrmask); + } + + return status; +} + +static int +is_create_with_attrs(struct nfsd4_open *open) +{ + return open->op_create == NFS4_OPEN_CREATE + && (open->op_createmode == NFS4_CREATE_UNCHECKED + || open->op_createmode == NFS4_CREATE_GUARDED + || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1); +} + +/* + * if error occurs when setting the acl, just clear the acl bit + * in the returned attr bitmap. + */ +static void +do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfs4_acl *acl, u32 *bmval) +{ + __be32 status; + + status = nfsd4_set_nfs4_acl(rqstp, fhp, acl); + if (status) + /* + * We should probably fail the whole open at this point, + * but we've already created the file, so it's too late; + * So this seems the least of evils: + */ + bmval[0] &= ~FATTR4_WORD0_ACL; +} + +static inline void +fh_dup2(struct svc_fh *dst, struct svc_fh *src) +{ + fh_put(dst); + dget(src->fh_dentry); + if (src->fh_export) + cache_get(&src->fh_export->h); + *dst = *src; +} + +static __be32 +do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, int accmode) +{ + __be32 status; + + if (open->op_truncate && + !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) + return nfserr_inval; + + accmode |= NFSD_MAY_READ_IF_EXEC; + + if (open->op_share_access & NFS4_SHARE_ACCESS_READ) + accmode |= NFSD_MAY_READ; + if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) + accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC); + if (open->op_share_deny & NFS4_SHARE_DENY_READ) + accmode |= NFSD_MAY_WRITE; + + status = fh_verify(rqstp, current_fh, S_IFREG, accmode); + + return status; +} + +static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) +{ + umode_t mode = fh->fh_dentry->d_inode->i_mode; + + if (S_ISREG(mode)) + return nfs_ok; + if (S_ISDIR(mode)) + return nfserr_isdir; + /* + * Using err_symlink as our catch-all case may look odd; but + * there's no other obvious error for this case in 4.0, and we + * happen to know that it will cause the linux v4 client to do + * the right thing on attempts to open something other than a + * regular file. + */ + return nfserr_symlink; +} + +static __be32 +do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +{ + struct svc_fh resfh; + __be32 status; + + fh_init(&resfh, NFS4_FHSIZE); + open->op_truncate = 0; + + if (open->op_create) { + /* FIXME: check session persistence and pnfs flags. + * The nfsv4.1 spec requires the following semantics: + * + * Persistent | pNFS | Server REQUIRED | Client Allowed + * Reply Cache | server | | + * -------------+--------+-----------------+-------------------- + * no | no | EXCLUSIVE4_1 | EXCLUSIVE4_1 + * | | | (SHOULD) + * | | and EXCLUSIVE4 | or EXCLUSIVE4 + * | | | (SHOULD NOT) + * no | yes | EXCLUSIVE4_1 | EXCLUSIVE4_1 + * yes | no | GUARDED4 | GUARDED4 + * yes | yes | GUARDED4 | GUARDED4 + */ + + /* + * Note: create modes (UNCHECKED,GUARDED...) are the same + * in NFSv4 as in v3 except EXCLUSIVE4_1. + */ + status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, + open->op_fname.len, &open->op_iattr, + &resfh, open->op_createmode, + (u32 *)open->op_verf.data, + &open->op_truncate, &open->op_created); + + /* + * Following rfc 3530 14.2.16, use the returned bitmask + * to indicate which attributes we used to store the + * verifier: + */ + if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) + open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS | + FATTR4_WORD1_TIME_MODIFY); + } else { + status = nfsd_lookup(rqstp, current_fh, + open->op_fname.data, open->op_fname.len, &resfh); + fh_unlock(current_fh); + if (status) + goto out; + status = nfsd_check_obj_isreg(&resfh); + } + if (status) + goto out; + + if (is_create_with_attrs(open) && open->op_acl != NULL) + do_set_nfs4_acl(rqstp, &resfh, open->op_acl, open->op_bmval); + + set_change_info(&open->op_cinfo, current_fh); + fh_dup2(current_fh, &resfh); + + /* set reply cache */ + fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, + &resfh.fh_handle); + if (!open->op_created) + status = do_open_permission(rqstp, current_fh, open, + NFSD_MAY_NOP); + +out: + fh_put(&resfh); + return status; +} + +static __be32 +do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +{ + __be32 status; + + /* We don't know the target directory, and therefore can not + * set the change info + */ + + memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); + + /* set replay cache */ + fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, + ¤t_fh->fh_handle); + + open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && + (open->op_iattr.ia_size == 0); + + status = do_open_permission(rqstp, current_fh, open, + NFSD_MAY_OWNER_OVERRIDE); + + return status; +} + +static void +copy_clientid(clientid_t *clid, struct nfsd4_session *session) +{ + struct nfsd4_sessionid *sid = + (struct nfsd4_sessionid *)session->se_sessionid.data; + + clid->cl_boot = sid->clientid.cl_boot; + clid->cl_id = sid->clientid.cl_id; +} + +static __be32 +nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_open *open) +{ + __be32 status; + struct nfsd4_compoundres *resp; + + dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n", + (int)open->op_fname.len, open->op_fname.data, + open->op_openowner); + + /* This check required by spec. */ + if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) + return nfserr_inval; + + /* We don't yet support WANT bits: */ + open->op_share_access &= NFS4_SHARE_ACCESS_MASK; + + open->op_created = 0; + /* + * RFC5661 18.51.3 + * Before RECLAIM_COMPLETE done, server should deny new lock + */ + if (nfsd4_has_session(cstate) && + !cstate->session->se_client->cl_firststate && + open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + return nfserr_grace; + + if (nfsd4_has_session(cstate)) + copy_clientid(&open->op_clientid, cstate->session); + + nfs4_lock_state(); + + /* check seqid for replay. set nfs4_owner */ + resp = rqstp->rq_resp; + status = nfsd4_process_open1(&resp->cstate, open); + if (status == nfserr_replay_me) { + struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay; + fh_put(&cstate->current_fh); + fh_copy_shallow(&cstate->current_fh.fh_handle, + &rp->rp_openfh); + status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); + if (status) + dprintk("nfsd4_open: replay failed" + " restoring previous filehandle\n"); + else + status = nfserr_replay_me; + } + if (status) + goto out; + + status = nfsd4_check_open_attributes(rqstp, cstate, open); + if (status) + goto out; + + /* Openowner is now set, so sequence id will get bumped. Now we need + * these checks before we do any creates: */ + status = nfserr_grace; + if (locks_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + goto out; + status = nfserr_no_grace; + if (!locks_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + goto out; + + switch (open->op_claim_type) { + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + case NFS4_OPEN_CLAIM_NULL: + status = do_open_lookup(rqstp, &cstate->current_fh, + open); + if (status) + goto out; + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + status = nfs4_check_open_reclaim(&open->op_clientid); + if (status) + goto out; + case NFS4_OPEN_CLAIM_FH: + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + status = do_open_fhandle(rqstp, &cstate->current_fh, + open); + if (status) + goto out; + break; + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: + case NFS4_OPEN_CLAIM_DELEGATE_PREV: + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + dprintk("NFSD: unsupported OPEN claim type %d\n", + open->op_claim_type); + status = nfserr_notsupp; + goto out; + default: + dprintk("NFSD: Invalid OPEN claim type %d\n", + open->op_claim_type); + status = nfserr_inval; + goto out; + } + /* + * nfsd4_process_open2() does the actual opening of the file. If + * successful, it (1) truncates the file if open->op_truncate was + * set, (2) sets open->op_stateid, (3) sets open->op_delegation. + */ + status = nfsd4_process_open2(rqstp, &cstate->current_fh, open); + WARN_ON(status && open->op_created); +out: + nfsd4_cleanup_open_state(open, status); + if (open->op_openowner) + cstate->replay_owner = &open->op_openowner->oo_owner; + else + nfs4_unlock_state(); + return status; +} + +/* + * filehandle-manipulating ops. + */ +static __be32 +nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct svc_fh **getfh) +{ + if (!cstate->current_fh.fh_dentry) + return nfserr_nofilehandle; + + *getfh = &cstate->current_fh; + return nfs_ok; +} + +static __be32 +nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_putfh *putfh) +{ + fh_put(&cstate->current_fh); + cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; + memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, + putfh->pf_fhlen); + return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); +} + +static __be32 +nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + void *arg) +{ + __be32 status; + + fh_put(&cstate->current_fh); + status = exp_pseudoroot(rqstp, &cstate->current_fh); + return status; +} + +static __be32 +nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + void *arg) +{ + if (!cstate->save_fh.fh_dentry) + return nfserr_restorefh; + + fh_dup2(&cstate->current_fh, &cstate->save_fh); + return nfs_ok; +} + +static __be32 +nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + void *arg) +{ + if (!cstate->current_fh.fh_dentry) + return nfserr_nofilehandle; + + fh_dup2(&cstate->save_fh, &cstate->current_fh); + return nfs_ok; +} + +/* + * misc nfsv4 ops + */ +static __be32 +nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_access *access) +{ + if (access->ac_req_access & ~NFS3_ACCESS_FULL) + return nfserr_inval; + + access->ac_resp_access = access->ac_req_access; + return nfsd_access(rqstp, &cstate->current_fh, &access->ac_resp_access, + &access->ac_supported); +} + +static __be32 +nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_commit *commit) +{ + u32 *p = (u32 *)commit->co_verf.data; + *p++ = nfssvc_boot.tv_sec; + *p++ = nfssvc_boot.tv_usec; + + return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, + commit->co_count); +} + +static __be32 +nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_create *create) +{ + struct svc_fh resfh; + __be32 status; + dev_t rdev; + + fh_init(&resfh, NFS4_FHSIZE); + + status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, + NFSD_MAY_CREATE); + if (status) + return status; + + status = check_attr_support(rqstp, cstate, create->cr_bmval, + nfsd_attrmask); + if (status) + return status; + + switch (create->cr_type) { + case NF4LNK: + /* ugh! we have to null-terminate the linktext, or + * vfs_symlink() will choke. it is always safe to + * null-terminate by brute force, since at worst we + * will overwrite the first byte of the create namelen + * in the XDR buffer, which has already been extracted + * during XDR decode. + */ + create->cr_linkname[create->cr_linklen] = 0; + + status = nfsd_symlink(rqstp, &cstate->current_fh, + create->cr_name, create->cr_namelen, + create->cr_linkname, create->cr_linklen, + &resfh, &create->cr_iattr); + break; + + case NF4BLK: + rdev = MKDEV(create->cr_specdata1, create->cr_specdata2); + if (MAJOR(rdev) != create->cr_specdata1 || + MINOR(rdev) != create->cr_specdata2) + return nfserr_inval; + status = nfsd_create(rqstp, &cstate->current_fh, + create->cr_name, create->cr_namelen, + &create->cr_iattr, S_IFBLK, rdev, &resfh); + break; + + case NF4CHR: + rdev = MKDEV(create->cr_specdata1, create->cr_specdata2); + if (MAJOR(rdev) != create->cr_specdata1 || + MINOR(rdev) != create->cr_specdata2) + return nfserr_inval; + status = nfsd_create(rqstp, &cstate->current_fh, + create->cr_name, create->cr_namelen, + &create->cr_iattr,S_IFCHR, rdev, &resfh); + break; + + case NF4SOCK: + status = nfsd_create(rqstp, &cstate->current_fh, + create->cr_name, create->cr_namelen, + &create->cr_iattr, S_IFSOCK, 0, &resfh); + break; + + case NF4FIFO: + status = nfsd_create(rqstp, &cstate->current_fh, + create->cr_name, create->cr_namelen, + &create->cr_iattr, S_IFIFO, 0, &resfh); + break; + + case NF4DIR: + create->cr_iattr.ia_valid &= ~ATTR_SIZE; + status = nfsd_create(rqstp, &cstate->current_fh, + create->cr_name, create->cr_namelen, + &create->cr_iattr, S_IFDIR, 0, &resfh); + break; + + default: + status = nfserr_badtype; + } + + if (status) + goto out; + + if (create->cr_acl != NULL) + do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, + create->cr_bmval); + + fh_unlock(&cstate->current_fh); + set_change_info(&create->cr_cinfo, &cstate->current_fh); + fh_dup2(&cstate->current_fh, &resfh); +out: + fh_put(&resfh); + return status; +} + +static __be32 +nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_getattr *getattr) +{ + __be32 status; + + status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); + if (status) + return status; + + if (getattr->ga_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) + return nfserr_inval; + + getattr->ga_bmval[0] &= nfsd_suppattrs0(cstate->minorversion); + getattr->ga_bmval[1] &= nfsd_suppattrs1(cstate->minorversion); + getattr->ga_bmval[2] &= nfsd_suppattrs2(cstate->minorversion); + + getattr->ga_fhp = &cstate->current_fh; + return nfs_ok; +} + +static __be32 +nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_link *link) +{ + __be32 status = nfserr_nofilehandle; + + if (!cstate->save_fh.fh_dentry) + return status; + status = nfsd_link(rqstp, &cstate->current_fh, + link->li_name, link->li_namelen, &cstate->save_fh); + if (!status) + set_change_info(&link->li_cinfo, &cstate->current_fh); + return status; +} + +static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh) +{ + struct svc_fh tmp_fh; + __be32 ret; + + fh_init(&tmp_fh, NFS4_FHSIZE); + ret = exp_pseudoroot(rqstp, &tmp_fh); + if (ret) + return ret; + if (tmp_fh.fh_dentry == fh->fh_dentry) { + fh_put(&tmp_fh); + return nfserr_noent; + } + fh_put(&tmp_fh); + return nfsd_lookup(rqstp, fh, "..", 2, fh); +} + +static __be32 +nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + void *arg) +{ + return nfsd4_do_lookupp(rqstp, &cstate->current_fh); +} + +static __be32 +nfsd4_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_lookup *lookup) +{ + return nfsd_lookup(rqstp, &cstate->current_fh, + lookup->lo_name, lookup->lo_len, + &cstate->current_fh); +} + +static __be32 +nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_read *read) +{ + __be32 status; + + /* no need to check permission - this will be done in nfsd_read() */ + + read->rd_filp = NULL; + if (read->rd_offset >= OFFSET_MAX) + return nfserr_inval; + + nfs4_lock_state(); + /* check stateid */ + if ((status = nfs4_preprocess_stateid_op(cstate, &read->rd_stateid, + RD_STATE, &read->rd_filp))) { + dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); + goto out; + } + if (read->rd_filp) + get_file(read->rd_filp); + status = nfs_ok; +out: + nfs4_unlock_state(); + read->rd_rqstp = rqstp; + read->rd_fhp = &cstate->current_fh; + return status; +} + +static __be32 +nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_readdir *readdir) +{ + u64 cookie = readdir->rd_cookie; + static const nfs4_verifier zeroverf; + + /* no need to check permission - this will be done in nfsd_readdir() */ + + if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) + return nfserr_inval; + + readdir->rd_bmval[0] &= nfsd_suppattrs0(cstate->minorversion); + readdir->rd_bmval[1] &= nfsd_suppattrs1(cstate->minorversion); + readdir->rd_bmval[2] &= nfsd_suppattrs2(cstate->minorversion); + + if ((cookie == 1) || (cookie == 2) || + (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) + return nfserr_bad_cookie; + + readdir->rd_rqstp = rqstp; + readdir->rd_fhp = &cstate->current_fh; + return nfs_ok; +} + +static __be32 +nfsd4_readlink(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_readlink *readlink) +{ + readlink->rl_rqstp = rqstp; + readlink->rl_fhp = &cstate->current_fh; + return nfs_ok; +} + +static __be32 +nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_remove *remove) +{ + __be32 status; + + if (locks_in_grace()) + return nfserr_grace; + status = nfsd_unlink(rqstp, &cstate->current_fh, 0, + remove->rm_name, remove->rm_namelen); + if (!status) { + fh_unlock(&cstate->current_fh); + set_change_info(&remove->rm_cinfo, &cstate->current_fh); + } + return status; +} + +static __be32 +nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_rename *rename) +{ + __be32 status = nfserr_nofilehandle; + + if (!cstate->save_fh.fh_dentry) + return status; + if (locks_in_grace() && !(cstate->save_fh.fh_export->ex_flags + & NFSEXP_NOSUBTREECHECK)) + return nfserr_grace; + status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, + rename->rn_snamelen, &cstate->current_fh, + rename->rn_tname, rename->rn_tnamelen); + + /* the underlying filesystem returns different error's than required + * by NFSv4. both save_fh and current_fh have been verified.. */ + if (status == nfserr_isdir) + status = nfserr_exist; + else if ((status == nfserr_notdir) && + (S_ISDIR(cstate->save_fh.fh_dentry->d_inode->i_mode) && + S_ISDIR(cstate->current_fh.fh_dentry->d_inode->i_mode))) + status = nfserr_exist; + + if (!status) { + set_change_info(&rename->rn_sinfo, &cstate->current_fh); + set_change_info(&rename->rn_tinfo, &cstate->save_fh); + } + return status; +} + +static __be32 +nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_secinfo *secinfo) +{ + struct svc_fh resfh; + struct svc_export *exp; + struct dentry *dentry; + __be32 err; + + fh_init(&resfh, NFS4_FHSIZE); + err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC); + if (err) + return err; + err = nfsd_lookup_dentry(rqstp, &cstate->current_fh, + secinfo->si_name, secinfo->si_namelen, + &exp, &dentry); + if (err) + return err; + if (dentry->d_inode == NULL) { + exp_put(exp); + err = nfserr_noent; + } else + secinfo->si_exp = exp; + dput(dentry); + if (cstate->minorversion) + /* See rfc 5661 section 2.6.3.1.1.8 */ + fh_put(&cstate->current_fh); + return err; +} + +static __be32 +nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_secinfo_no_name *sin) +{ + __be32 err; + + switch (sin->sin_style) { + case NFS4_SECINFO_STYLE4_CURRENT_FH: + break; + case NFS4_SECINFO_STYLE4_PARENT: + err = nfsd4_do_lookupp(rqstp, &cstate->current_fh); + if (err) + return err; + break; + default: + return nfserr_inval; + } + exp_get(cstate->current_fh.fh_export); + sin->sin_exp = cstate->current_fh.fh_export; + fh_put(&cstate->current_fh); + return nfs_ok; +} + +static __be32 +nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_setattr *setattr) +{ + __be32 status = nfs_ok; + + if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { + nfs4_lock_state(); + status = nfs4_preprocess_stateid_op(cstate, + &setattr->sa_stateid, WR_STATE, NULL); + nfs4_unlock_state(); + if (status) { + dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); + return status; + } + } + status = fh_want_write(&cstate->current_fh); + if (status) + return status; + status = nfs_ok; + + status = check_attr_support(rqstp, cstate, setattr->sa_bmval, + nfsd_attrmask); + if (status) + goto out; + + if (setattr->sa_acl != NULL) + status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh, + setattr->sa_acl); + if (status) + goto out; + status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, + 0, (time_t)0); +out: + fh_drop_write(&cstate->current_fh); + return status; +} + +static __be32 +nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_write *write) +{ + stateid_t *stateid = &write->wr_stateid; + struct file *filp = NULL; + u32 *p; + __be32 status = nfs_ok; + unsigned long cnt; + + /* no need to check permission - this will be done in nfsd_write() */ + + if (write->wr_offset >= OFFSET_MAX) + return nfserr_inval; + + nfs4_lock_state(); + status = nfs4_preprocess_stateid_op(cstate, stateid, WR_STATE, &filp); + if (filp) + get_file(filp); + nfs4_unlock_state(); + + if (status) { + dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); + return status; + } + + cnt = write->wr_buflen; + write->wr_how_written = write->wr_stable_how; + p = (u32 *)write->wr_verifier.data; + *p++ = nfssvc_boot.tv_sec; + *p++ = nfssvc_boot.tv_usec; + + status = nfsd_write(rqstp, &cstate->current_fh, filp, + write->wr_offset, rqstp->rq_vec, write->wr_vlen, + &cnt, &write->wr_how_written); + if (filp) + fput(filp); + + write->wr_bytes_written = cnt; + + return status; +} + +/* This routine never returns NFS_OK! If there are no other errors, it + * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the + * attributes matched. VERIFY is implemented by mapping NFSERR_SAME + * to NFS_OK after the call; NVERIFY by mapping NFSERR_NOT_SAME to NFS_OK. + */ +static __be32 +_nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_verify *verify) +{ + __be32 *buf, *p; + int count; + __be32 status; + + status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); + if (status) + return status; + + status = check_attr_support(rqstp, cstate, verify->ve_bmval, NULL); + if (status) + return status; + + if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR) + || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)) + return nfserr_inval; + if (verify->ve_attrlen & 3) + return nfserr_inval; + + /* count in words: + * bitmap_len(1) + bitmap(2) + attr_len(1) = 4 + */ + count = 4 + (verify->ve_attrlen >> 2); + buf = kmalloc(count << 2, GFP_KERNEL); + if (!buf) + return nfserr_jukebox; + + status = nfsd4_encode_fattr(&cstate->current_fh, + cstate->current_fh.fh_export, + cstate->current_fh.fh_dentry, buf, + &count, verify->ve_bmval, + rqstp, 0); + + /* this means that nfsd4_encode_fattr() ran out of space */ + if (status == nfserr_resource && count == 0) + status = nfserr_not_same; + if (status) + goto out_kfree; + + /* skip bitmap */ + p = buf + 1 + ntohl(buf[0]); + status = nfserr_not_same; + if (ntohl(*p++) != verify->ve_attrlen) + goto out_kfree; + if (!memcmp(p, verify->ve_attrval, verify->ve_attrlen)) + status = nfserr_same; + +out_kfree: + kfree(buf); + return status; +} + +static __be32 +nfsd4_nverify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_verify *verify) +{ + __be32 status; + + status = _nfsd4_verify(rqstp, cstate, verify); + return status == nfserr_not_same ? nfs_ok : status; +} + +static __be32 +nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_verify *verify) +{ + __be32 status; + + status = _nfsd4_verify(rqstp, cstate, verify); + return status == nfserr_same ? nfs_ok : status; +} + +/* + * NULL call. + */ +static __be32 +nfsd4_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return nfs_ok; +} + +static inline void nfsd4_increment_op_stats(u32 opnum) +{ + if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP) + nfsdstats.nfs4_opcount[opnum]++; +} + +typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, + void *); +typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op); + +enum nfsd4_op_flags { + ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ + ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ + ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ + /* For rfc 5661 section 2.6.3.1.1: */ + OP_HANDLES_WRONGSEC = 1 << 3, + OP_IS_PUTFH_LIKE = 1 << 4, + /* + * These are the ops whose result size we estimate before + * encoding, to avoid performing an op then not being able to + * respond or cache a response. This includes writes and setattrs + * as well as the operations usually called "nonidempotent": + */ + OP_MODIFIES_SOMETHING = 1 << 5, + /* + * Cache compounds containing these ops in the xid-based drc: + * We use the DRC for compounds containing non-idempotent + * operations, *except* those that are 4.1-specific (since + * sessions provide their own EOS), and except for stateful + * operations other than setclientid and setclientid_confirm + * (since sequence numbers provide EOS for open, lock, etc in + * the v4.0 case). + */ + OP_CACHEME = 1 << 6, +}; + +struct nfsd4_operation { + nfsd4op_func op_func; + u32 op_flags; + char *op_name; + /* Try to get response size before operation */ + nfsd4op_rsize op_rsize_bop; +}; + +static struct nfsd4_operation nfsd4_ops[]; + +static const char *nfsd4_op_name(unsigned opnum); + +/* + * Enforce NFSv4.1 COMPOUND ordering rules: + * + * Also note, enforced elsewhere: + * - SEQUENCE other than as first op results in + * NFS4ERR_SEQUENCE_POS. (Enforced in nfsd4_sequence().) + * - BIND_CONN_TO_SESSION must be the only op in its compound. + * (Enforced in nfsd4_bind_conn_to_session().) + * - DESTROY_SESSION must be the final operation in a compound, if + * sessionid's in SEQUENCE and DESTROY_SESSION are the same. + * (Enforced in nfsd4_destroy_session().) + */ +static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args) +{ + struct nfsd4_op *op = &args->ops[0]; + + /* These ordering requirements don't apply to NFSv4.0: */ + if (args->minorversion == 0) + return nfs_ok; + /* This is weird, but OK, not our problem: */ + if (args->opcnt == 0) + return nfs_ok; + if (op->status == nfserr_op_illegal) + return nfs_ok; + if (!(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP)) + return nfserr_op_not_in_session; + if (op->opnum == OP_SEQUENCE) + return nfs_ok; + if (args->opcnt != 1) + return nfserr_not_only_op; + return nfs_ok; +} + +static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) +{ + return &nfsd4_ops[op->opnum]; +} + +bool nfsd4_cache_this_op(struct nfsd4_op *op) +{ + return OPDESC(op)->op_flags & OP_CACHEME; +} + +static bool need_wrongsec_check(struct svc_rqst *rqstp) +{ + struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct nfsd4_compoundargs *argp = rqstp->rq_argp; + struct nfsd4_op *this = &argp->ops[resp->opcnt - 1]; + struct nfsd4_op *next = &argp->ops[resp->opcnt]; + struct nfsd4_operation *thisd; + struct nfsd4_operation *nextd; + + thisd = OPDESC(this); + /* + * Most ops check wronsec on our own; only the putfh-like ops + * have special rules. + */ + if (!(thisd->op_flags & OP_IS_PUTFH_LIKE)) + return false; + /* + * rfc 5661 2.6.3.1.1.6: don't bother erroring out a + * put-filehandle operation if we're not going to use the + * result: + */ + if (argp->opcnt == resp->opcnt) + return false; + + nextd = OPDESC(next); + /* + * Rest of 2.6.3.1.1: certain operations will return WRONGSEC + * errors themselves as necessary; others should check for them + * now: + */ + return !(nextd->op_flags & OP_HANDLES_WRONGSEC); +} + +/* + * COMPOUND call. + */ +static __be32 +nfsd4_proc_compound(struct svc_rqst *rqstp, + struct nfsd4_compoundargs *args, + struct nfsd4_compoundres *resp) +{ + struct nfsd4_op *op; + struct nfsd4_operation *opdesc; + struct nfsd4_compound_state *cstate = &resp->cstate; + int slack_bytes; + u32 plen = 0; + __be32 status; + + resp->xbuf = &rqstp->rq_res; + resp->p = rqstp->rq_res.head[0].iov_base + + rqstp->rq_res.head[0].iov_len; + resp->tagp = resp->p; + /* reserve space for: taglen, tag, and opcnt */ + resp->p += 2 + XDR_QUADLEN(args->taglen); + resp->end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE; + resp->taglen = args->taglen; + resp->tag = args->tag; + resp->opcnt = 0; + resp->rqstp = rqstp; + resp->cstate.minorversion = args->minorversion; + resp->cstate.replay_owner = NULL; + resp->cstate.session = NULL; + fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); + fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); + /* + * Don't use the deferral mechanism for NFSv4; compounds make it + * too hard to avoid non-idempotency problems. + */ + rqstp->rq_usedeferral = 0; + + /* + * According to RFC3010, this takes precedence over all other errors. + */ + status = nfserr_minor_vers_mismatch; + if (args->minorversion > nfsd_supported_minorversion) + goto out; + + status = nfs41_check_op_ordering(args); + if (status) { + op = &args->ops[0]; + op->status = status; + goto encode_op; + } + + while (!status && resp->opcnt < args->opcnt) { + op = &args->ops[resp->opcnt++]; + + dprintk("nfsv4 compound op #%d/%d: %d (%s)\n", + resp->opcnt, args->opcnt, op->opnum, + nfsd4_op_name(op->opnum)); + /* + * The XDR decode routines may have pre-set op->status; + * for example, if there is a miscellaneous XDR error + * it will be set to nfserr_bad_xdr. + */ + if (op->status) + goto encode_op; + + /* We must be able to encode a successful response to + * this operation, with enough room left over to encode a + * failed response to the next operation. If we don't + * have enough room, fail with ERR_RESOURCE. + */ + slack_bytes = (char *)resp->end - (char *)resp->p; + if (slack_bytes < COMPOUND_SLACK_SPACE + + COMPOUND_ERR_SLACK_SPACE) { + BUG_ON(slack_bytes < COMPOUND_ERR_SLACK_SPACE); + op->status = nfserr_resource; + goto encode_op; + } + + opdesc = OPDESC(op); + + if (!cstate->current_fh.fh_dentry) { + if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { + op->status = nfserr_nofilehandle; + goto encode_op; + } + } else if (cstate->current_fh.fh_export->ex_fslocs.migrated && + !(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) { + op->status = nfserr_moved; + goto encode_op; + } + + /* If op is non-idempotent */ + if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { + plen = opdesc->op_rsize_bop(rqstp, op); + op->status = nfsd4_check_resp_size(resp, plen); + } + + if (op->status) + goto encode_op; + + if (opdesc->op_func) + op->status = opdesc->op_func(rqstp, cstate, &op->u); + else + BUG_ON(op->status == nfs_ok); + + if (!op->status && need_wrongsec_check(rqstp)) + op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp); + +encode_op: + /* Only from SEQUENCE */ + if (resp->cstate.status == nfserr_replay_cache) { + dprintk("%s NFS4.1 replay from cache\n", __func__); + status = op->status; + goto out; + } + if (op->status == nfserr_replay_me) { + op->replay = &cstate->replay_owner->so_replay; + nfsd4_encode_replay(resp, op); + status = op->status = op->replay->rp_status; + } else { + nfsd4_encode_operation(resp, op); + status = op->status; + } + + dprintk("nfsv4 compound op %p opcnt %d #%d: %d: status %d\n", + args->ops, args->opcnt, resp->opcnt, op->opnum, + be32_to_cpu(status)); + + if (cstate->replay_owner) { + nfs4_unlock_state(); + cstate->replay_owner = NULL; + } + /* XXX Ugh, we need to get rid of this kind of special case: */ + if (op->opnum == OP_READ && op->u.read.rd_filp) + fput(op->u.read.rd_filp); + + nfsd4_increment_op_stats(op->opnum); + } + + resp->cstate.status = status; + fh_put(&resp->cstate.current_fh); + fh_put(&resp->cstate.save_fh); + BUG_ON(resp->cstate.replay_owner); +out: + /* Reset deferral mechanism for RPC deferrals */ + rqstp->rq_usedeferral = 1; + dprintk("nfsv4 compound returned %d\n", ntohl(status)); + return status; +} + +#define op_encode_hdr_size (2) +#define op_encode_stateid_maxsz (XDR_QUADLEN(NFS4_STATEID_SIZE)) +#define op_encode_verifier_maxsz (XDR_QUADLEN(NFS4_VERIFIER_SIZE)) +#define op_encode_change_info_maxsz (5) +#define nfs4_fattr_bitmap_maxsz (4) + +#define op_encode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +#define op_encode_lock_denied_maxsz (8 + op_encode_lockowner_maxsz) + +#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) + +#define op_encode_ace_maxsz (3 + nfs4_owner_maxsz) +#define op_encode_delegation_maxsz (1 + op_encode_stateid_maxsz + 1 + \ + op_encode_ace_maxsz) + +#define op_encode_channel_attrs_maxsz (6 + 1 + 1) + +static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size) * sizeof(__be32); +} + +static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32); +} + +static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_change_info_maxsz + + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_change_info_maxsz) + * sizeof(__be32); +} + +static inline u32 nfsd4_lock_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_lock_denied_maxsz) + * sizeof(__be32); +} + +static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_stateid_maxsz + + op_encode_change_info_maxsz + 1 + + nfs4_fattr_bitmap_maxsz + + op_encode_delegation_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + u32 maxcount = 0, rlen = 0; + + maxcount = svc_max_payload(rqstp); + rlen = op->u.read.rd_length; + + if (rlen > maxcount) + rlen = maxcount; + + return (op_encode_hdr_size + 2) * sizeof(__be32) + rlen; +} + +static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + u32 rlen = op->u.readdir.rd_maxcount; + + if (rlen > PAGE_SIZE) + rlen = PAGE_SIZE; + + return (op_encode_hdr_size + op_encode_verifier_maxsz) + * sizeof(__be32) + rlen; +} + +static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_change_info_maxsz) + * sizeof(__be32); +} + +static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_change_info_maxsz + + op_encode_change_info_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32); +} + +static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ + 1 + 1 + 0 + /* eir_flags, spr_how, SP4_NONE (for now) */\ + 2 + /*eir_server_owner.so_minor_id */\ + /* eir_server_owner.so_major_id<> */\ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ + /* eir_server_scope<> */\ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ + 1 + /* eir_server_impl_id array length */\ + 0 /* ignored eir_server_impl_id contents */) * sizeof(__be32); +} + +static inline u32 nfsd4_bind_conn_to_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* bctsr_sessid */\ + 2 /* bctsr_dir, use_conn_in_rdma_mode */) * sizeof(__be32); +} + +static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* sessionid */\ + 2 + /* csr_sequence, csr_flags */\ + op_encode_channel_attrs_maxsz + \ + op_encode_channel_attrs_maxsz) * sizeof(__be32); +} + +static struct nfsd4_operation nfsd4_ops[] = { + [OP_ACCESS] = { + .op_func = (nfsd4op_func)nfsd4_access, + .op_name = "OP_ACCESS", + }, + [OP_CLOSE] = { + .op_func = (nfsd4op_func)nfsd4_close, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_CLOSE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, + }, + [OP_COMMIT] = { + .op_func = (nfsd4op_func)nfsd4_commit, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_COMMIT", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_commit_rsize, + }, + [OP_CREATE] = { + .op_func = (nfsd4op_func)nfsd4_create, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_CREATE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_rsize, + }, + [OP_DELEGRETURN] = { + .op_func = (nfsd4op_func)nfsd4_delegreturn, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_DELEGRETURN", + .op_rsize_bop = nfsd4_only_status_rsize, + }, + [OP_GETATTR] = { + .op_func = (nfsd4op_func)nfsd4_getattr, + .op_flags = ALLOWED_ON_ABSENT_FS, + .op_name = "OP_GETATTR", + }, + [OP_GETFH] = { + .op_func = (nfsd4op_func)nfsd4_getfh, + .op_name = "OP_GETFH", + }, + [OP_LINK] = { + .op_func = (nfsd4op_func)nfsd4_link, + .op_flags = ALLOWED_ON_ABSENT_FS | OP_MODIFIES_SOMETHING + | OP_CACHEME, + .op_name = "OP_LINK", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_link_rsize, + }, + [OP_LOCK] = { + .op_func = (nfsd4op_func)nfsd4_lock, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_LOCK", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize, + }, + [OP_LOCKT] = { + .op_func = (nfsd4op_func)nfsd4_lockt, + .op_name = "OP_LOCKT", + }, + [OP_LOCKU] = { + .op_func = (nfsd4op_func)nfsd4_locku, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_LOCKU", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, + }, + [OP_LOOKUP] = { + .op_func = (nfsd4op_func)nfsd4_lookup, + .op_flags = OP_HANDLES_WRONGSEC, + .op_name = "OP_LOOKUP", + }, + [OP_LOOKUPP] = { + .op_func = (nfsd4op_func)nfsd4_lookupp, + .op_flags = OP_HANDLES_WRONGSEC, + .op_name = "OP_LOOKUPP", + }, + [OP_NVERIFY] = { + .op_func = (nfsd4op_func)nfsd4_nverify, + .op_name = "OP_NVERIFY", + }, + [OP_OPEN] = { + .op_func = (nfsd4op_func)nfsd4_open, + .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING, + .op_name = "OP_OPEN", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize, + }, + [OP_OPEN_CONFIRM] = { + .op_func = (nfsd4op_func)nfsd4_open_confirm, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_OPEN_CONFIRM", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, + }, + [OP_OPEN_DOWNGRADE] = { + .op_func = (nfsd4op_func)nfsd4_open_downgrade, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_OPEN_DOWNGRADE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, + }, + [OP_PUTFH] = { + .op_func = (nfsd4op_func)nfsd4_putfh, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, + .op_name = "OP_PUTFH", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + [OP_PUTPUBFH] = { + .op_func = (nfsd4op_func)nfsd4_putrootfh, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, + .op_name = "OP_PUTPUBFH", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + [OP_PUTROOTFH] = { + .op_func = (nfsd4op_func)nfsd4_putrootfh, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, + .op_name = "OP_PUTROOTFH", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + [OP_READ] = { + .op_func = (nfsd4op_func)nfsd4_read, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_READ", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize, + }, + [OP_READDIR] = { + .op_func = (nfsd4op_func)nfsd4_readdir, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_READDIR", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize, + }, + [OP_READLINK] = { + .op_func = (nfsd4op_func)nfsd4_readlink, + .op_name = "OP_READLINK", + }, + [OP_REMOVE] = { + .op_func = (nfsd4op_func)nfsd4_remove, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_REMOVE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_remove_rsize, + }, + [OP_RENAME] = { + .op_func = (nfsd4op_func)nfsd4_rename, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_RENAME", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_rename_rsize, + }, + [OP_RENEW] = { + .op_func = (nfsd4op_func)nfsd4_renew, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_MODIFIES_SOMETHING, + .op_name = "OP_RENEW", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + + }, + [OP_RESTOREFH] = { + .op_func = (nfsd4op_func)nfsd4_restorefh, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, + .op_name = "OP_RESTOREFH", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + [OP_SAVEFH] = { + .op_func = (nfsd4op_func)nfsd4_savefh, + .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING, + .op_name = "OP_SAVEFH", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + [OP_SECINFO] = { + .op_func = (nfsd4op_func)nfsd4_secinfo, + .op_flags = OP_HANDLES_WRONGSEC, + .op_name = "OP_SECINFO", + }, + [OP_SETATTR] = { + .op_func = (nfsd4op_func)nfsd4_setattr, + .op_name = "OP_SETATTR", + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize, + }, + [OP_SETCLIENTID] = { + .op_func = (nfsd4op_func)nfsd4_setclientid, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_SETCLIENTID", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_setclientid_rsize, + }, + [OP_SETCLIENTID_CONFIRM] = { + .op_func = (nfsd4op_func)nfsd4_setclientid_confirm, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_SETCLIENTID_CONFIRM", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + [OP_VERIFY] = { + .op_func = (nfsd4op_func)nfsd4_verify, + .op_name = "OP_VERIFY", + }, + [OP_WRITE] = { + .op_func = (nfsd4op_func)nfsd4_write, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_WRITE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize, + }, + [OP_RELEASE_LOCKOWNER] = { + .op_func = (nfsd4op_func)nfsd4_release_lockowner, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_MODIFIES_SOMETHING, + .op_name = "OP_RELEASE_LOCKOWNER", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + + /* NFSv4.1 operations */ + [OP_EXCHANGE_ID] = { + .op_func = (nfsd4op_func)nfsd4_exchange_id, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP + | OP_MODIFIES_SOMETHING, + .op_name = "OP_EXCHANGE_ID", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_exchange_id_rsize, + }, + [OP_BIND_CONN_TO_SESSION] = { + .op_func = (nfsd4op_func)nfsd4_bind_conn_to_session, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP + | OP_MODIFIES_SOMETHING, + .op_name = "OP_BIND_CONN_TO_SESSION", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_bind_conn_to_session_rsize, + }, + [OP_CREATE_SESSION] = { + .op_func = (nfsd4op_func)nfsd4_create_session, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP + | OP_MODIFIES_SOMETHING, + .op_name = "OP_CREATE_SESSION", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_session_rsize, + }, + [OP_DESTROY_SESSION] = { + .op_func = (nfsd4op_func)nfsd4_destroy_session, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP + | OP_MODIFIES_SOMETHING, + .op_name = "OP_DESTROY_SESSION", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + [OP_SEQUENCE] = { + .op_func = (nfsd4op_func)nfsd4_sequence, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, + .op_name = "OP_SEQUENCE", + }, + [OP_DESTROY_CLIENTID] = { + .op_func = (nfsd4op_func)nfsd4_destroy_clientid, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP + | OP_MODIFIES_SOMETHING, + .op_name = "OP_DESTROY_CLIENTID", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + [OP_RECLAIM_COMPLETE] = { + .op_func = (nfsd4op_func)nfsd4_reclaim_complete, + .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING, + .op_name = "OP_RECLAIM_COMPLETE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + [OP_SECINFO_NO_NAME] = { + .op_func = (nfsd4op_func)nfsd4_secinfo_no_name, + .op_flags = OP_HANDLES_WRONGSEC, + .op_name = "OP_SECINFO_NO_NAME", + }, + [OP_TEST_STATEID] = { + .op_func = (nfsd4op_func)nfsd4_test_stateid, + .op_flags = ALLOWED_WITHOUT_FH, + .op_name = "OP_TEST_STATEID", + }, + [OP_FREE_STATEID] = { + .op_func = (nfsd4op_func)nfsd4_free_stateid, + .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING, + .op_name = "OP_FREE_STATEID", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, +}; + +static const char *nfsd4_op_name(unsigned opnum) +{ + if (opnum < ARRAY_SIZE(nfsd4_ops)) + return nfsd4_ops[opnum].op_name; + return "unknown_operation"; +} + +#define nfsd4_voidres nfsd4_voidargs +struct nfsd4_voidargs { int dummy; }; + +static struct svc_procedure nfsd_procedures4[2] = { + [NFSPROC4_NULL] = { + .pc_func = (svc_procfunc) nfsd4_proc_null, + .pc_encode = (kxdrproc_t) nfs4svc_encode_voidres, + .pc_argsize = sizeof(struct nfsd4_voidargs), + .pc_ressize = sizeof(struct nfsd4_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = 1, + }, + [NFSPROC4_COMPOUND] = { + .pc_func = (svc_procfunc) nfsd4_proc_compound, + .pc_decode = (kxdrproc_t) nfs4svc_decode_compoundargs, + .pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres, + .pc_argsize = sizeof(struct nfsd4_compoundargs), + .pc_ressize = sizeof(struct nfsd4_compoundres), + .pc_release = nfsd4_release_compoundargs, + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = NFSD_BUFSIZE/4, + }, +}; + +struct svc_version nfsd_version4 = { + .vs_vers = 4, + .vs_nproc = 2, + .vs_proc = nfsd_procedures4, + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS4_SVC_XDRSIZE, +}; + +/* + * Local variables: + * c-basic-offset: 8 + * End: + */ diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c new file mode 100644 index 00000000000..0b3e875d1ab --- /dev/null +++ b/fs/nfsd/nfs4recover.c @@ -0,0 +1,427 @@ +/* +* Copyright (c) 2004 The Regents of the University of Michigan. +* All rights reserved. +* +* Andy Adamson <andros@citi.umich.edu> +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* 2. Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* 3. Neither the name of the University nor the names of its +* contributors may be used to endorse or promote products derived +* from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED +* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +*/ + +#include <linux/file.h> +#include <linux/slab.h> +#include <linux/namei.h> +#include <linux/crypto.h> +#include <linux/sched.h> + +#include "nfsd.h" +#include "state.h" +#include "vfs.h" + +#define NFSDDBG_FACILITY NFSDDBG_PROC + +/* Globals */ +static struct file *rec_file; +static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; + +static int +nfs4_save_creds(const struct cred **original_creds) +{ + struct cred *new; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + + new->fsuid = 0; + new->fsgid = 0; + *original_creds = override_creds(new); + put_cred(new); + return 0; +} + +static void +nfs4_reset_creds(const struct cred *original) +{ + revert_creds(original); +} + +static void +md5_to_hex(char *out, char *md5) +{ + int i; + + for (i=0; i<16; i++) { + unsigned char c = md5[i]; + + *out++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1); + *out++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1); + } + *out = '\0'; +} + +__be32 +nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) +{ + struct xdr_netobj cksum; + struct hash_desc desc; + struct scatterlist sg; + __be32 status = nfserr_jukebox; + + dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", + clname->len, clname->data); + desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; + desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(desc.tfm)) + goto out_no_tfm; + cksum.len = crypto_hash_digestsize(desc.tfm); + cksum.data = kmalloc(cksum.len, GFP_KERNEL); + if (cksum.data == NULL) + goto out; + + sg_init_one(&sg, clname->data, clname->len); + + if (crypto_hash_digest(&desc, &sg, sg.length, cksum.data)) + goto out; + + md5_to_hex(dname, cksum.data); + + status = nfs_ok; +out: + kfree(cksum.data); + crypto_free_hash(desc.tfm); +out_no_tfm: + return status; +} + +void nfsd4_create_clid_dir(struct nfs4_client *clp) +{ + const struct cred *original_cred; + char *dname = clp->cl_recdir; + struct dentry *dir, *dentry; + int status; + + dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); + + if (clp->cl_firststate) + return; + clp->cl_firststate = 1; + if (!rec_file) + return; + status = nfs4_save_creds(&original_cred); + if (status < 0) + return; + + dir = rec_file->f_path.dentry; + /* lock the parent */ + mutex_lock(&dir->d_inode->i_mutex); + + dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1); + if (IS_ERR(dentry)) { + status = PTR_ERR(dentry); + goto out_unlock; + } + if (dentry->d_inode) + /* + * In the 4.1 case, where we're called from + * reclaim_complete(), records from the previous reboot + * may still be left, so this is OK. + * + * In the 4.0 case, we should never get here; but we may + * as well be forgiving and just succeed silently. + */ + goto out_put; + status = mnt_want_write_file(rec_file); + if (status) + goto out_put; + status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU); + mnt_drop_write_file(rec_file); +out_put: + dput(dentry); +out_unlock: + mutex_unlock(&dir->d_inode->i_mutex); + if (status == 0) + vfs_fsync(rec_file, 0); + else + printk(KERN_ERR "NFSD: failed to write recovery record" + " (err %d); please check that %s exists" + " and is writeable", status, + user_recovery_dirname); + nfs4_reset_creds(original_cred); +} + +typedef int (recdir_func)(struct dentry *, struct dentry *); + +struct name_list { + char name[HEXDIR_LEN]; + struct list_head list; +}; + +static int +nfsd4_build_namelist(void *arg, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct list_head *names = arg; + struct name_list *entry; + + if (namlen != HEXDIR_LEN - 1) + return 0; + entry = kmalloc(sizeof(struct name_list), GFP_KERNEL); + if (entry == NULL) + return -ENOMEM; + memcpy(entry->name, name, HEXDIR_LEN - 1); + entry->name[HEXDIR_LEN - 1] = '\0'; + list_add(&entry->list, names); + return 0; +} + +static int +nfsd4_list_rec_dir(recdir_func *f) +{ + const struct cred *original_cred; + struct dentry *dir = rec_file->f_path.dentry; + LIST_HEAD(names); + int status; + + status = nfs4_save_creds(&original_cred); + if (status < 0) + return status; + + status = vfs_llseek(rec_file, 0, SEEK_SET); + if (status < 0) { + nfs4_reset_creds(original_cred); + return status; + } + + status = vfs_readdir(rec_file, nfsd4_build_namelist, &names); + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); + while (!list_empty(&names)) { + struct name_list *entry; + entry = list_entry(names.next, struct name_list, list); + if (!status) { + struct dentry *dentry; + dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); + if (IS_ERR(dentry)) { + status = PTR_ERR(dentry); + break; + } + status = f(dir, dentry); + dput(dentry); + } + list_del(&entry->list); + kfree(entry); + } + mutex_unlock(&dir->d_inode->i_mutex); + nfs4_reset_creds(original_cred); + return status; +} + +static int +nfsd4_unlink_clid_dir(char *name, int namlen) +{ + struct dentry *dir, *dentry; + int status; + + dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); + + dir = rec_file->f_path.dentry; + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); + dentry = lookup_one_len(name, dir, namlen); + if (IS_ERR(dentry)) { + status = PTR_ERR(dentry); + goto out_unlock; + } + status = -ENOENT; + if (!dentry->d_inode) + goto out; + status = vfs_rmdir(dir->d_inode, dentry); +out: + dput(dentry); +out_unlock: + mutex_unlock(&dir->d_inode->i_mutex); + return status; +} + +void +nfsd4_remove_clid_dir(struct nfs4_client *clp) +{ + const struct cred *original_cred; + int status; + + if (!rec_file || !clp->cl_firststate) + return; + + status = mnt_want_write_file(rec_file); + if (status) + goto out; + clp->cl_firststate = 0; + + status = nfs4_save_creds(&original_cred); + if (status < 0) + goto out; + + status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); + nfs4_reset_creds(original_cred); + if (status == 0) + vfs_fsync(rec_file, 0); + mnt_drop_write_file(rec_file); +out: + if (status) + printk("NFSD: Failed to remove expired client state directory" + " %.*s\n", HEXDIR_LEN, clp->cl_recdir); + return; +} + +static int +purge_old(struct dentry *parent, struct dentry *child) +{ + int status; + + if (nfs4_has_reclaimed_state(child->d_name.name, false)) + return 0; + + status = vfs_rmdir(parent->d_inode, child); + if (status) + printk("failed to remove client recovery directory %s\n", + child->d_name.name); + /* Keep trying, success or failure: */ + return 0; +} + +void +nfsd4_recdir_purge_old(void) { + int status; + + if (!rec_file) + return; + status = mnt_want_write_file(rec_file); + if (status) + goto out; + status = nfsd4_list_rec_dir(purge_old); + if (status == 0) + vfs_fsync(rec_file, 0); + mnt_drop_write_file(rec_file); +out: + if (status) + printk("nfsd4: failed to purge old clients from recovery" + " directory %s\n", rec_file->f_path.dentry->d_name.name); +} + +static int +load_recdir(struct dentry *parent, struct dentry *child) +{ + if (child->d_name.len != HEXDIR_LEN - 1) { + printk("nfsd4: illegal name %s in recovery directory\n", + child->d_name.name); + /* Keep trying; maybe the others are OK: */ + return 0; + } + nfs4_client_to_reclaim(child->d_name.name); + return 0; +} + +int +nfsd4_recdir_load(void) { + int status; + + if (!rec_file) + return 0; + + status = nfsd4_list_rec_dir(load_recdir); + if (status) + printk("nfsd4: failed loading clients from recovery" + " directory %s\n", rec_file->f_path.dentry->d_name.name); + return status; +} + +/* + * Hold reference to the recovery directory. + */ + +void +nfsd4_init_recdir() +{ + const struct cred *original_cred; + int status; + + printk("NFSD: Using %s as the NFSv4 state recovery directory\n", + user_recovery_dirname); + + BUG_ON(rec_file); + + status = nfs4_save_creds(&original_cred); + if (status < 0) { + printk("NFSD: Unable to change credentials to find recovery" + " directory: error %d\n", + status); + return; + } + + rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0); + if (IS_ERR(rec_file)) { + printk("NFSD: unable to find recovery directory %s\n", + user_recovery_dirname); + rec_file = NULL; + } + + nfs4_reset_creds(original_cred); +} + +void +nfsd4_shutdown_recdir(void) +{ + if (!rec_file) + return; + fput(rec_file); + rec_file = NULL; +} + +/* + * Change the NFSv4 recovery directory to recdir. + */ +int +nfs4_reset_recoverydir(char *recdir) +{ + int status; + struct path path; + + status = kern_path(recdir, LOOKUP_FOLLOW, &path); + if (status) + return status; + status = -ENOTDIR; + if (S_ISDIR(path.dentry->d_inode->i_mode)) { + strcpy(user_recovery_dirname, recdir); + status = 0; + } + path_put(&path); + return status; +} + +char * +nfs4_recoverydir(void) +{ + return user_recovery_dirname; +} diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c new file mode 100644 index 00000000000..e8c98f00967 --- /dev/null +++ b/fs/nfsd/nfs4state.c @@ -0,0 +1,4693 @@ +/* +* Copyright (c) 2001 The Regents of the University of Michigan. +* All rights reserved. +* +* Kendrick Smith <kmsmith@umich.edu> +* Andy Adamson <kandros@umich.edu> +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* 2. Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* 3. Neither the name of the University nor the names of its +* contributors may be used to endorse or promote products derived +* from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED +* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +*/ + +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/namei.h> +#include <linux/swap.h> +#include <linux/pagemap.h> +#include <linux/sunrpc/svcauth_gss.h> +#include <linux/sunrpc/clnt.h> +#include "xdr4.h" +#include "vfs.h" + +#define NFSDDBG_FACILITY NFSDDBG_PROC + +/* Globals */ +time_t nfsd4_lease = 90; /* default lease time */ +time_t nfsd4_grace = 90; +static time_t boot_time; + +#define all_ones {{~0,~0},~0} +static const stateid_t one_stateid = { + .si_generation = ~0, + .si_opaque = all_ones, +}; +static const stateid_t zero_stateid = { + /* all fields zero */ +}; + +static u64 current_sessionid = 1; + +#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) +#define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) + +/* forward declarations */ +static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); + +/* Locking: */ + +/* Currently used for almost all code touching nfsv4 state: */ +static DEFINE_MUTEX(client_mutex); + +/* + * Currently used for the del_recall_lru and file hash table. In an + * effort to decrease the scope of the client_mutex, this spinlock may + * eventually cover more: + */ +static DEFINE_SPINLOCK(recall_lock); + +static struct kmem_cache *openowner_slab = NULL; +static struct kmem_cache *lockowner_slab = NULL; +static struct kmem_cache *file_slab = NULL; +static struct kmem_cache *stateid_slab = NULL; +static struct kmem_cache *deleg_slab = NULL; + +void +nfs4_lock_state(void) +{ + mutex_lock(&client_mutex); +} + +void +nfs4_unlock_state(void) +{ + mutex_unlock(&client_mutex); +} + +static inline u32 +opaque_hashval(const void *ptr, int nbytes) +{ + unsigned char *cptr = (unsigned char *) ptr; + + u32 x = 0; + while (nbytes--) { + x *= 37; + x += *cptr++; + } + return x; +} + +static struct list_head del_recall_lru; + +static void nfsd4_free_file(struct nfs4_file *f) +{ + kmem_cache_free(file_slab, f); +} + +static inline void +put_nfs4_file(struct nfs4_file *fi) +{ + if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) { + list_del(&fi->fi_hash); + spin_unlock(&recall_lock); + iput(fi->fi_inode); + nfsd4_free_file(fi); + } +} + +static inline void +get_nfs4_file(struct nfs4_file *fi) +{ + atomic_inc(&fi->fi_ref); +} + +static int num_delegations; +unsigned int max_delegations; + +/* + * Open owner state (share locks) + */ + +/* hash tables for lock and open owners */ +#define OWNER_HASH_BITS 8 +#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) +#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) + +static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) +{ + unsigned int ret; + + ret = opaque_hashval(ownername->data, ownername->len); + ret += clientid; + return ret & OWNER_HASH_MASK; +} + +static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; + +/* hash table for nfs4_file */ +#define FILE_HASH_BITS 8 +#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) + +static unsigned int file_hashval(struct inode *ino) +{ + /* XXX: why are we hashing on inode pointer, anyway? */ + return hash_ptr(ino, FILE_HASH_BITS); +} + +static struct list_head file_hashtbl[FILE_HASH_SIZE]; + +static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) +{ + BUG_ON(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); + atomic_inc(&fp->fi_access[oflag]); +} + +static void nfs4_file_get_access(struct nfs4_file *fp, int oflag) +{ + if (oflag == O_RDWR) { + __nfs4_file_get_access(fp, O_RDONLY); + __nfs4_file_get_access(fp, O_WRONLY); + } else + __nfs4_file_get_access(fp, oflag); +} + +static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag) +{ + if (fp->fi_fds[oflag]) { + fput(fp->fi_fds[oflag]); + fp->fi_fds[oflag] = NULL; + } +} + +static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) +{ + if (atomic_dec_and_test(&fp->fi_access[oflag])) { + nfs4_file_put_fd(fp, oflag); + /* + * It's also safe to get rid of the RDWR open *if* + * we no longer have need of the other kind of access + * or if we already have the other kind of open: + */ + if (fp->fi_fds[1-oflag] + || atomic_read(&fp->fi_access[1 - oflag]) == 0) + nfs4_file_put_fd(fp, O_RDWR); + } +} + +static void nfs4_file_put_access(struct nfs4_file *fp, int oflag) +{ + if (oflag == O_RDWR) { + __nfs4_file_put_access(fp, O_RDONLY); + __nfs4_file_put_access(fp, O_WRONLY); + } else + __nfs4_file_put_access(fp, oflag); +} + +static inline int get_new_stid(struct nfs4_stid *stid) +{ + static int min_stateid = 0; + struct idr *stateids = &stid->sc_client->cl_stateids; + int new_stid; + int error; + + error = idr_get_new_above(stateids, stid, min_stateid, &new_stid); + /* + * Note: the necessary preallocation was done in + * nfs4_alloc_stateid(). The idr code caps the number of + * preallocations that can exist at a time, but the state lock + * prevents anyone from using ours before we get here: + */ + BUG_ON(error); + /* + * It shouldn't be a problem to reuse an opaque stateid value. + * I don't think it is for 4.1. But with 4.0 I worry that, for + * example, a stray write retransmission could be accepted by + * the server when it should have been rejected. Therefore, + * adopt a trick from the sctp code to attempt to maximize the + * amount of time until an id is reused, by ensuring they always + * "increase" (mod INT_MAX): + */ + + min_stateid = new_stid+1; + if (min_stateid == INT_MAX) + min_stateid = 0; + return new_stid; +} + +static void init_stid(struct nfs4_stid *stid, struct nfs4_client *cl, unsigned char type) +{ + stateid_t *s = &stid->sc_stateid; + int new_id; + + stid->sc_type = type; + stid->sc_client = cl; + s->si_opaque.so_clid = cl->cl_clientid; + new_id = get_new_stid(stid); + s->si_opaque.so_id = (u32)new_id; + /* Will be incremented before return to client: */ + s->si_generation = 0; +} + +static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab) +{ + struct idr *stateids = &cl->cl_stateids; + + if (!idr_pre_get(stateids, GFP_KERNEL)) + return NULL; + /* + * Note: if we fail here (or any time between now and the time + * we actually get the new idr), we won't need to undo the idr + * preallocation, since the idr code caps the number of + * preallocated entries. + */ + return kmem_cache_alloc(slab, GFP_KERNEL); +} + +static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) +{ + return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); +} + +static struct nfs4_delegation * +alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh, u32 type) +{ + struct nfs4_delegation *dp; + struct nfs4_file *fp = stp->st_file; + + dprintk("NFSD alloc_init_deleg\n"); + /* + * Major work on the lease subsystem (for example, to support + * calbacks on stat) will be required before we can support + * write delegations properly. + */ + if (type != NFS4_OPEN_DELEGATE_READ) + return NULL; + if (fp->fi_had_conflict) + return NULL; + if (num_delegations > max_delegations) + return NULL; + dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); + if (dp == NULL) + return dp; + init_stid(&dp->dl_stid, clp, NFS4_DELEG_STID); + /* + * delegation seqid's are never incremented. The 4.1 special + * meaning of seqid 0 isn't meaningful, really, but let's avoid + * 0 anyway just for consistency and use 1: + */ + dp->dl_stid.sc_stateid.si_generation = 1; + num_delegations++; + INIT_LIST_HEAD(&dp->dl_perfile); + INIT_LIST_HEAD(&dp->dl_perclnt); + INIT_LIST_HEAD(&dp->dl_recall_lru); + get_nfs4_file(fp); + dp->dl_file = fp; + dp->dl_type = type; + fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); + dp->dl_time = 0; + atomic_set(&dp->dl_count, 1); + INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc); + return dp; +} + +void +nfs4_put_delegation(struct nfs4_delegation *dp) +{ + if (atomic_dec_and_test(&dp->dl_count)) { + dprintk("NFSD: freeing dp %p\n",dp); + put_nfs4_file(dp->dl_file); + kmem_cache_free(deleg_slab, dp); + num_delegations--; + } +} + +static void nfs4_put_deleg_lease(struct nfs4_file *fp) +{ + if (atomic_dec_and_test(&fp->fi_delegees)) { + vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); + fp->fi_lease = NULL; + fput(fp->fi_deleg_file); + fp->fi_deleg_file = NULL; + } +} + +static void unhash_stid(struct nfs4_stid *s) +{ + struct idr *stateids = &s->sc_client->cl_stateids; + + idr_remove(stateids, s->sc_stateid.si_opaque.so_id); +} + +/* Called under the state lock. */ +static void +unhash_delegation(struct nfs4_delegation *dp) +{ + unhash_stid(&dp->dl_stid); + list_del_init(&dp->dl_perclnt); + spin_lock(&recall_lock); + list_del_init(&dp->dl_perfile); + list_del_init(&dp->dl_recall_lru); + spin_unlock(&recall_lock); + nfs4_put_deleg_lease(dp->dl_file); + nfs4_put_delegation(dp); +} + +/* + * SETCLIENTID state + */ + +/* client_lock protects the client lru list and session hash table */ +static DEFINE_SPINLOCK(client_lock); + +/* Hash tables for nfs4_clientid state */ +#define CLIENT_HASH_BITS 4 +#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) +#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) + +static unsigned int clientid_hashval(u32 id) +{ + return id & CLIENT_HASH_MASK; +} + +static unsigned int clientstr_hashval(const char *name) +{ + return opaque_hashval(name, 8) & CLIENT_HASH_MASK; +} + +/* + * reclaim_str_hashtbl[] holds known client info from previous reset/reboot + * used in reboot/reset lease grace period processing + * + * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed + * setclientid_confirmed info. + * + * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed + * setclientid info. + * + * client_lru holds client queue ordered by nfs4_client.cl_time + * for lease renewal. + * + * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time + * for last close replay. + */ +static struct list_head reclaim_str_hashtbl[CLIENT_HASH_SIZE]; +static int reclaim_str_hashtbl_size = 0; +static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE]; +static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE]; +static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE]; +static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE]; +static struct list_head client_lru; +static struct list_head close_lru; + +/* + * We store the NONE, READ, WRITE, and BOTH bits separately in the + * st_{access,deny}_bmap field of the stateid, in order to track not + * only what share bits are currently in force, but also what + * combinations of share bits previous opens have used. This allows us + * to enforce the recommendation of rfc 3530 14.2.19 that the server + * return an error if the client attempt to downgrade to a combination + * of share bits not explicable by closing some of its previous opens. + * + * XXX: This enforcement is actually incomplete, since we don't keep + * track of access/deny bit combinations; so, e.g., we allow: + * + * OPEN allow read, deny write + * OPEN allow both, deny none + * DOWNGRADE allow read, deny none + * + * which we should reject. + */ +static void +set_access(unsigned int *access, unsigned long bmap) { + int i; + + *access = 0; + for (i = 1; i < 4; i++) { + if (test_bit(i, &bmap)) + *access |= i; + } +} + +static void +set_deny(unsigned int *deny, unsigned long bmap) { + int i; + + *deny = 0; + for (i = 0; i < 4; i++) { + if (test_bit(i, &bmap)) + *deny |= i ; + } +} + +static int +test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { + unsigned int access, deny; + + set_access(&access, stp->st_access_bmap); + set_deny(&deny, stp->st_deny_bmap); + if ((access & open->op_share_deny) || (deny & open->op_share_access)) + return 0; + return 1; +} + +static int nfs4_access_to_omode(u32 access) +{ + switch (access & NFS4_SHARE_ACCESS_BOTH) { + case NFS4_SHARE_ACCESS_READ: + return O_RDONLY; + case NFS4_SHARE_ACCESS_WRITE: + return O_WRONLY; + case NFS4_SHARE_ACCESS_BOTH: + return O_RDWR; + } + BUG(); +} + +static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) +{ + list_del(&stp->st_perfile); + list_del(&stp->st_perstateowner); +} + +static void close_generic_stateid(struct nfs4_ol_stateid *stp) +{ + int i; + + if (stp->st_access_bmap) { + for (i = 1; i < 4; i++) { + if (test_bit(i, &stp->st_access_bmap)) + nfs4_file_put_access(stp->st_file, + nfs4_access_to_omode(i)); + __clear_bit(i, &stp->st_access_bmap); + } + } + put_nfs4_file(stp->st_file); + stp->st_file = NULL; +} + +static void free_generic_stateid(struct nfs4_ol_stateid *stp) +{ + kmem_cache_free(stateid_slab, stp); +} + +static void release_lock_stateid(struct nfs4_ol_stateid *stp) +{ + struct file *file; + + unhash_generic_stateid(stp); + unhash_stid(&stp->st_stid); + file = find_any_file(stp->st_file); + if (file) + locks_remove_posix(file, (fl_owner_t)lockowner(stp->st_stateowner)); + close_generic_stateid(stp); + free_generic_stateid(stp); +} + +static void unhash_lockowner(struct nfs4_lockowner *lo) +{ + struct nfs4_ol_stateid *stp; + + list_del(&lo->lo_owner.so_strhash); + list_del(&lo->lo_perstateid); + list_del(&lo->lo_owner_ino_hash); + while (!list_empty(&lo->lo_owner.so_stateids)) { + stp = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + release_lock_stateid(stp); + } +} + +static void release_lockowner(struct nfs4_lockowner *lo) +{ + unhash_lockowner(lo); + nfs4_free_lockowner(lo); +} + +static void +release_stateid_lockowners(struct nfs4_ol_stateid *open_stp) +{ + struct nfs4_lockowner *lo; + + while (!list_empty(&open_stp->st_lockowners)) { + lo = list_entry(open_stp->st_lockowners.next, + struct nfs4_lockowner, lo_perstateid); + release_lockowner(lo); + } +} + +static void unhash_open_stateid(struct nfs4_ol_stateid *stp) +{ + unhash_generic_stateid(stp); + release_stateid_lockowners(stp); + close_generic_stateid(stp); +} + +static void release_open_stateid(struct nfs4_ol_stateid *stp) +{ + unhash_open_stateid(stp); + unhash_stid(&stp->st_stid); + free_generic_stateid(stp); +} + +static void unhash_openowner(struct nfs4_openowner *oo) +{ + struct nfs4_ol_stateid *stp; + + list_del(&oo->oo_owner.so_strhash); + list_del(&oo->oo_perclient); + while (!list_empty(&oo->oo_owner.so_stateids)) { + stp = list_first_entry(&oo->oo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + release_open_stateid(stp); + } +} + +static void release_last_closed_stateid(struct nfs4_openowner *oo) +{ + struct nfs4_ol_stateid *s = oo->oo_last_closed_stid; + + if (s) { + unhash_stid(&s->st_stid); + free_generic_stateid(s); + oo->oo_last_closed_stid = NULL; + } +} + +static void release_openowner(struct nfs4_openowner *oo) +{ + unhash_openowner(oo); + list_del(&oo->oo_close_lru); + release_last_closed_stateid(oo); + nfs4_free_openowner(oo); +} + +#define SESSION_HASH_SIZE 512 +static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE]; + +static inline int +hash_sessionid(struct nfs4_sessionid *sessionid) +{ + struct nfsd4_sessionid *sid = (struct nfsd4_sessionid *)sessionid; + + return sid->sequence % SESSION_HASH_SIZE; +} + +static inline void +dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) +{ + u32 *ptr = (u32 *)(&sessionid->data[0]); + dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]); +} + +static void +gen_sessionid(struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; + struct nfsd4_sessionid *sid; + + sid = (struct nfsd4_sessionid *)ses->se_sessionid.data; + sid->clientid = clp->cl_clientid; + sid->sequence = current_sessionid++; + sid->reserved = 0; +} + +/* + * The protocol defines ca_maxresponssize_cached to include the size of + * the rpc header, but all we need to cache is the data starting after + * the end of the initial SEQUENCE operation--the rest we regenerate + * each time. Therefore we can advertise a ca_maxresponssize_cached + * value that is the number of bytes in our cache plus a few additional + * bytes. In order to stay on the safe side, and not promise more than + * we can cache, those additional bytes must be the minimum possible: 24 + * bytes of rpc header (xid through accept state, with AUTH_NULL + * verifier), 12 for the compound header (with zero-length tag), and 44 + * for the SEQUENCE op response: + */ +#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) + +static void +free_session_slots(struct nfsd4_session *ses) +{ + int i; + + for (i = 0; i < ses->se_fchannel.maxreqs; i++) + kfree(ses->se_slots[i]); +} + +/* + * We don't actually need to cache the rpc and session headers, so we + * can allocate a little less for each slot: + */ +static inline int slot_bytes(struct nfsd4_channel_attrs *ca) +{ + return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; +} + +static int nfsd4_sanitize_slot_size(u32 size) +{ + size -= NFSD_MIN_HDR_SEQ_SZ; /* We don't cache the rpc header */ + size = min_t(u32, size, NFSD_SLOT_CACHE_SIZE); + + return size; +} + +/* + * XXX: If we run out of reserved DRC memory we could (up to a point) + * re-negotiate active sessions and reduce their slot usage to make + * room for new connections. For now we just fail the create session. + */ +static int nfsd4_get_drc_mem(int slotsize, u32 num) +{ + int avail; + + num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION); + + spin_lock(&nfsd_drc_lock); + avail = min_t(int, NFSD_MAX_MEM_PER_SESSION, + nfsd_drc_max_mem - nfsd_drc_mem_used); + num = min_t(int, num, avail / slotsize); + nfsd_drc_mem_used += num * slotsize; + spin_unlock(&nfsd_drc_lock); + + return num; +} + +static void nfsd4_put_drc_mem(int slotsize, int num) +{ + spin_lock(&nfsd_drc_lock); + nfsd_drc_mem_used -= slotsize * num; + spin_unlock(&nfsd_drc_lock); +} + +static struct nfsd4_session *alloc_session(int slotsize, int numslots) +{ + struct nfsd4_session *new; + int mem, i; + + BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *) + + sizeof(struct nfsd4_session) > PAGE_SIZE); + mem = numslots * sizeof(struct nfsd4_slot *); + + new = kzalloc(sizeof(*new) + mem, GFP_KERNEL); + if (!new) + return NULL; + /* allocate each struct nfsd4_slot and data cache in one piece */ + for (i = 0; i < numslots; i++) { + mem = sizeof(struct nfsd4_slot) + slotsize; + new->se_slots[i] = kzalloc(mem, GFP_KERNEL); + if (!new->se_slots[i]) + goto out_free; + } + return new; +out_free: + while (i--) + kfree(new->se_slots[i]); + kfree(new); + return NULL; +} + +static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4_channel_attrs *req, int numslots, int slotsize) +{ + u32 maxrpc = nfsd_serv->sv_max_mesg; + + new->maxreqs = numslots; + new->maxresp_cached = min_t(u32, req->maxresp_cached, + slotsize + NFSD_MIN_HDR_SEQ_SZ); + new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc); + new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc); + new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND); +} + +static void free_conn(struct nfsd4_conn *c) +{ + svc_xprt_put(c->cn_xprt); + kfree(c); +} + +static void nfsd4_conn_lost(struct svc_xpt_user *u) +{ + struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user); + struct nfs4_client *clp = c->cn_session->se_client; + + spin_lock(&clp->cl_lock); + if (!list_empty(&c->cn_persession)) { + list_del(&c->cn_persession); + free_conn(c); + } + spin_unlock(&clp->cl_lock); + nfsd4_probe_callback(clp); +} + +static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags) +{ + struct nfsd4_conn *conn; + + conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL); + if (!conn) + return NULL; + svc_xprt_get(rqstp->rq_xprt); + conn->cn_xprt = rqstp->rq_xprt; + conn->cn_flags = flags; + INIT_LIST_HEAD(&conn->cn_xpt_user.list); + return conn; +} + +static void __nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses) +{ + conn->cn_session = ses; + list_add(&conn->cn_persession, &ses->se_conns); +} + +static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; + + spin_lock(&clp->cl_lock); + __nfsd4_hash_conn(conn, ses); + spin_unlock(&clp->cl_lock); +} + +static int nfsd4_register_conn(struct nfsd4_conn *conn) +{ + conn->cn_xpt_user.callback = nfsd4_conn_lost; + return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); +} + +static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses, u32 dir) +{ + struct nfsd4_conn *conn; + int ret; + + conn = alloc_conn(rqstp, dir); + if (!conn) + return nfserr_jukebox; + nfsd4_hash_conn(conn, ses); + ret = nfsd4_register_conn(conn); + if (ret) + /* oops; xprt is already down: */ + nfsd4_conn_lost(&conn->cn_xpt_user); + return nfs_ok; +} + +static __be32 nfsd4_new_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_session *ses) +{ + u32 dir = NFS4_CDFC4_FORE; + + if (ses->se_flags & SESSION4_BACK_CHAN) + dir |= NFS4_CDFC4_BACK; + + return nfsd4_new_conn(rqstp, ses, dir); +} + +/* must be called under client_lock */ +static void nfsd4_del_conns(struct nfsd4_session *s) +{ + struct nfs4_client *clp = s->se_client; + struct nfsd4_conn *c; + + spin_lock(&clp->cl_lock); + while (!list_empty(&s->se_conns)) { + c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession); + list_del_init(&c->cn_persession); + spin_unlock(&clp->cl_lock); + + unregister_xpt_user(c->cn_xprt, &c->cn_xpt_user); + free_conn(c); + + spin_lock(&clp->cl_lock); + } + spin_unlock(&clp->cl_lock); +} + +void free_session(struct kref *kref) +{ + struct nfsd4_session *ses; + int mem; + + ses = container_of(kref, struct nfsd4_session, se_ref); + nfsd4_del_conns(ses); + spin_lock(&nfsd_drc_lock); + mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel); + nfsd_drc_mem_used -= mem; + spin_unlock(&nfsd_drc_lock); + free_session_slots(ses); + kfree(ses); +} + +static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses) +{ + struct nfsd4_session *new; + struct nfsd4_channel_attrs *fchan = &cses->fore_channel; + int numslots, slotsize; + int status; + int idx; + + /* + * Note decreasing slot size below client's request may + * make it difficult for client to function correctly, whereas + * decreasing the number of slots will (just?) affect + * performance. When short on memory we therefore prefer to + * decrease number of slots instead of their size. + */ + slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached); + numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs); + if (numslots < 1) + return NULL; + + new = alloc_session(slotsize, numslots); + if (!new) { + nfsd4_put_drc_mem(slotsize, fchan->maxreqs); + return NULL; + } + init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize); + + new->se_client = clp; + gen_sessionid(new); + + INIT_LIST_HEAD(&new->se_conns); + + new->se_cb_seq_nr = 1; + new->se_flags = cses->flags; + new->se_cb_prog = cses->callback_prog; + kref_init(&new->se_ref); + idx = hash_sessionid(&new->se_sessionid); + spin_lock(&client_lock); + list_add(&new->se_hash, &sessionid_hashtbl[idx]); + spin_lock(&clp->cl_lock); + list_add(&new->se_perclnt, &clp->cl_sessions); + spin_unlock(&clp->cl_lock); + spin_unlock(&client_lock); + + status = nfsd4_new_conn_from_crses(rqstp, new); + /* whoops: benny points out, status is ignored! (err, or bogus) */ + if (status) { + free_session(&new->se_ref); + return NULL; + } + if (cses->flags & SESSION4_BACK_CHAN) { + struct sockaddr *sa = svc_addr(rqstp); + /* + * This is a little silly; with sessions there's no real + * use for the callback address. Use the peer address + * as a reasonable default for now, but consider fixing + * the rpc client not to require an address in the + * future: + */ + rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); + clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); + } + nfsd4_probe_callback(clp); + return new; +} + +/* caller must hold client_lock */ +static struct nfsd4_session * +find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid) +{ + struct nfsd4_session *elem; + int idx; + + dump_sessionid(__func__, sessionid); + idx = hash_sessionid(sessionid); + /* Search in the appropriate list */ + list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) { + if (!memcmp(elem->se_sessionid.data, sessionid->data, + NFS4_MAX_SESSIONID_LEN)) { + return elem; + } + } + + dprintk("%s: session not found\n", __func__); + return NULL; +} + +/* caller must hold client_lock */ +static void +unhash_session(struct nfsd4_session *ses) +{ + list_del(&ses->se_hash); + spin_lock(&ses->se_client->cl_lock); + list_del(&ses->se_perclnt); + spin_unlock(&ses->se_client->cl_lock); +} + +/* must be called under the client_lock */ +static inline void +renew_client_locked(struct nfs4_client *clp) +{ + if (is_client_expired(clp)) { + dprintk("%s: client (clientid %08x/%08x) already expired\n", + __func__, + clp->cl_clientid.cl_boot, + clp->cl_clientid.cl_id); + return; + } + + dprintk("renewing client (clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, + clp->cl_clientid.cl_id); + list_move_tail(&clp->cl_lru, &client_lru); + clp->cl_time = get_seconds(); +} + +static inline void +renew_client(struct nfs4_client *clp) +{ + spin_lock(&client_lock); + renew_client_locked(clp); + spin_unlock(&client_lock); +} + +/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ +static int +STALE_CLIENTID(clientid_t *clid) +{ + if (clid->cl_boot == boot_time) + return 0; + dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n", + clid->cl_boot, clid->cl_id, boot_time); + return 1; +} + +/* + * XXX Should we use a slab cache ? + * This type of memory management is somewhat inefficient, but we use it + * anyway since SETCLIENTID is not a common operation. + */ +static struct nfs4_client *alloc_client(struct xdr_netobj name) +{ + struct nfs4_client *clp; + + clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); + if (clp == NULL) + return NULL; + clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); + if (clp->cl_name.data == NULL) { + kfree(clp); + return NULL; + } + clp->cl_name.len = name.len; + return clp; +} + +static inline void +free_client(struct nfs4_client *clp) +{ + while (!list_empty(&clp->cl_sessions)) { + struct nfsd4_session *ses; + ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, + se_perclnt); + list_del(&ses->se_perclnt); + nfsd4_put_session(ses); + } + if (clp->cl_cred.cr_group_info) + put_group_info(clp->cl_cred.cr_group_info); + kfree(clp->cl_principal); + kfree(clp->cl_name.data); + kfree(clp); +} + +void +release_session_client(struct nfsd4_session *session) +{ + struct nfs4_client *clp = session->se_client; + + if (!atomic_dec_and_lock(&clp->cl_refcount, &client_lock)) + return; + if (is_client_expired(clp)) { + free_client(clp); + session->se_client = NULL; + } else + renew_client_locked(clp); + spin_unlock(&client_lock); +} + +/* must be called under the client_lock */ +static inline void +unhash_client_locked(struct nfs4_client *clp) +{ + struct nfsd4_session *ses; + + mark_client_expired(clp); + list_del(&clp->cl_lru); + spin_lock(&clp->cl_lock); + list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) + list_del_init(&ses->se_hash); + spin_unlock(&clp->cl_lock); +} + +static void +expire_client(struct nfs4_client *clp) +{ + struct nfs4_openowner *oo; + struct nfs4_delegation *dp; + struct list_head reaplist; + + INIT_LIST_HEAD(&reaplist); + spin_lock(&recall_lock); + while (!list_empty(&clp->cl_delegations)) { + dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); + list_del_init(&dp->dl_perclnt); + list_move(&dp->dl_recall_lru, &reaplist); + } + spin_unlock(&recall_lock); + while (!list_empty(&reaplist)) { + dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); + unhash_delegation(dp); + } + while (!list_empty(&clp->cl_openowners)) { + oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); + release_openowner(oo); + } + nfsd4_shutdown_callback(clp); + if (clp->cl_cb_conn.cb_xprt) + svc_xprt_put(clp->cl_cb_conn.cb_xprt); + list_del(&clp->cl_idhash); + list_del(&clp->cl_strhash); + spin_lock(&client_lock); + unhash_client_locked(clp); + if (atomic_read(&clp->cl_refcount) == 0) + free_client(clp); + spin_unlock(&client_lock); +} + +static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) +{ + memcpy(target->cl_verifier.data, source->data, + sizeof(target->cl_verifier.data)); +} + +static void copy_clid(struct nfs4_client *target, struct nfs4_client *source) +{ + target->cl_clientid.cl_boot = source->cl_clientid.cl_boot; + target->cl_clientid.cl_id = source->cl_clientid.cl_id; +} + +static void copy_cred(struct svc_cred *target, struct svc_cred *source) +{ + target->cr_uid = source->cr_uid; + target->cr_gid = source->cr_gid; + target->cr_group_info = source->cr_group_info; + get_group_info(target->cr_group_info); +} + +static int same_name(const char *n1, const char *n2) +{ + return 0 == memcmp(n1, n2, HEXDIR_LEN); +} + +static int +same_verf(nfs4_verifier *v1, nfs4_verifier *v2) +{ + return 0 == memcmp(v1->data, v2->data, sizeof(v1->data)); +} + +static int +same_clid(clientid_t *cl1, clientid_t *cl2) +{ + return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id); +} + +/* XXX what about NGROUP */ +static int +same_creds(struct svc_cred *cr1, struct svc_cred *cr2) +{ + return cr1->cr_uid == cr2->cr_uid; +} + +static void gen_clid(struct nfs4_client *clp) +{ + static u32 current_clientid = 1; + + clp->cl_clientid.cl_boot = boot_time; + clp->cl_clientid.cl_id = current_clientid++; +} + +static void gen_confirm(struct nfs4_client *clp) +{ + static u32 i; + u32 *p; + + p = (u32 *)clp->cl_confirm.data; + *p++ = get_seconds(); + *p++ = i++; +} + +static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) +{ + return idr_find(&cl->cl_stateids, t->si_opaque.so_id); +} + +static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) +{ + struct nfs4_stid *s; + + s = find_stateid(cl, t); + if (!s) + return NULL; + if (typemask & s->sc_type) + return s; + return NULL; +} + +static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, + struct svc_rqst *rqstp, nfs4_verifier *verf) +{ + struct nfs4_client *clp; + struct sockaddr *sa = svc_addr(rqstp); + char *princ; + + clp = alloc_client(name); + if (clp == NULL) + return NULL; + + INIT_LIST_HEAD(&clp->cl_sessions); + + princ = svc_gss_principal(rqstp); + if (princ) { + clp->cl_principal = kstrdup(princ, GFP_KERNEL); + if (clp->cl_principal == NULL) { + free_client(clp); + return NULL; + } + } + + idr_init(&clp->cl_stateids); + memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); + atomic_set(&clp->cl_refcount, 0); + clp->cl_cb_state = NFSD4_CB_UNKNOWN; + INIT_LIST_HEAD(&clp->cl_idhash); + INIT_LIST_HEAD(&clp->cl_strhash); + INIT_LIST_HEAD(&clp->cl_openowners); + INIT_LIST_HEAD(&clp->cl_delegations); + INIT_LIST_HEAD(&clp->cl_lru); + INIT_LIST_HEAD(&clp->cl_callbacks); + spin_lock_init(&clp->cl_lock); + INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc); + clp->cl_time = get_seconds(); + clear_bit(0, &clp->cl_cb_slot_busy); + rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); + copy_verf(clp, verf); + rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); + clp->cl_flavor = rqstp->rq_flavor; + copy_cred(&clp->cl_cred, &rqstp->rq_cred); + gen_confirm(clp); + clp->cl_cb_session = NULL; + return clp; +} + +static void +add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval) +{ + unsigned int idhashval; + + list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]); + idhashval = clientid_hashval(clp->cl_clientid.cl_id); + list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]); + renew_client(clp); +} + +static void +move_to_confirmed(struct nfs4_client *clp) +{ + unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id); + unsigned int strhashval; + + dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); + list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]); + strhashval = clientstr_hashval(clp->cl_recdir); + list_move(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); + renew_client(clp); +} + +static struct nfs4_client * +find_confirmed_client(clientid_t *clid) +{ + struct nfs4_client *clp; + unsigned int idhashval = clientid_hashval(clid->cl_id); + + list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { + if (same_clid(&clp->cl_clientid, clid)) { + renew_client(clp); + return clp; + } + } + return NULL; +} + +static struct nfs4_client * +find_unconfirmed_client(clientid_t *clid) +{ + struct nfs4_client *clp; + unsigned int idhashval = clientid_hashval(clid->cl_id); + + list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) { + if (same_clid(&clp->cl_clientid, clid)) + return clp; + } + return NULL; +} + +static bool clp_used_exchangeid(struct nfs4_client *clp) +{ + return clp->cl_exchange_flags != 0; +} + +static struct nfs4_client * +find_confirmed_client_by_str(const char *dname, unsigned int hashval) +{ + struct nfs4_client *clp; + + list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) { + if (same_name(clp->cl_recdir, dname)) + return clp; + } + return NULL; +} + +static struct nfs4_client * +find_unconfirmed_client_by_str(const char *dname, unsigned int hashval) +{ + struct nfs4_client *clp; + + list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) { + if (same_name(clp->cl_recdir, dname)) + return clp; + } + return NULL; +} + +static void +gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_rqst *rqstp) +{ + struct nfs4_cb_conn *conn = &clp->cl_cb_conn; + struct sockaddr *sa = svc_addr(rqstp); + u32 scopeid = rpc_get_scope_id(sa); + unsigned short expected_family; + + /* Currently, we only support tcp and tcp6 for the callback channel */ + if (se->se_callback_netid_len == 3 && + !memcmp(se->se_callback_netid_val, "tcp", 3)) + expected_family = AF_INET; + else if (se->se_callback_netid_len == 4 && + !memcmp(se->se_callback_netid_val, "tcp6", 4)) + expected_family = AF_INET6; + else + goto out_err; + + conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, + se->se_callback_addr_len, + (struct sockaddr *)&conn->cb_addr, + sizeof(conn->cb_addr)); + + if (!conn->cb_addrlen || conn->cb_addr.ss_family != expected_family) + goto out_err; + + if (conn->cb_addr.ss_family == AF_INET6) + ((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid; + + conn->cb_prog = se->se_callback_prog; + conn->cb_ident = se->se_callback_ident; + memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen); + return; +out_err: + conn->cb_addr.ss_family = AF_UNSPEC; + conn->cb_addrlen = 0; + dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " + "will not receive delegations\n", + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + + return; +} + +/* + * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size. + */ +void +nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) +{ + struct nfsd4_slot *slot = resp->cstate.slot; + unsigned int base; + + dprintk("--> %s slot %p\n", __func__, slot); + + slot->sl_opcnt = resp->opcnt; + slot->sl_status = resp->cstate.status; + + if (nfsd4_not_cached(resp)) { + slot->sl_datalen = 0; + return; + } + slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap; + base = (char *)resp->cstate.datap - + (char *)resp->xbuf->head[0].iov_base; + if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data, + slot->sl_datalen)) + WARN("%s: sessions DRC could not cache compound\n", __func__); + return; +} + +/* + * Encode the replay sequence operation from the slot values. + * If cachethis is FALSE encode the uncached rep error on the next + * operation which sets resp->p and increments resp->opcnt for + * nfs4svc_encode_compoundres. + * + */ +static __be32 +nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, + struct nfsd4_compoundres *resp) +{ + struct nfsd4_op *op; + struct nfsd4_slot *slot = resp->cstate.slot; + + dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__, + resp->opcnt, resp->cstate.slot->sl_cachethis); + + /* Encode the replayed sequence operation */ + op = &args->ops[resp->opcnt - 1]; + nfsd4_encode_operation(resp, op); + + /* Return nfserr_retry_uncached_rep in next operation. */ + if (args->opcnt > 1 && slot->sl_cachethis == 0) { + op = &args->ops[resp->opcnt++]; + op->status = nfserr_retry_uncached_rep; + nfsd4_encode_operation(resp, op); + } + return op->status; +} + +/* + * The sequence operation is not cached because we can use the slot and + * session values. + */ +__be32 +nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, + struct nfsd4_sequence *seq) +{ + struct nfsd4_slot *slot = resp->cstate.slot; + __be32 status; + + dprintk("--> %s slot %p\n", __func__, slot); + + /* Either returns 0 or nfserr_retry_uncached */ + status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp); + if (status == nfserr_retry_uncached_rep) + return status; + + /* The sequence operation has been encoded, cstate->datap set. */ + memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen); + + resp->opcnt = slot->sl_opcnt; + resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); + status = slot->sl_status; + + return status; +} + +/* + * Set the exchange_id flags returned by the server. + */ +static void +nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) +{ + /* pNFS is not supported */ + new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS; + + /* Referrals are supported, Migration is not. */ + new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER; + + /* set the wire flags to return to client. */ + clid->flags = new->cl_exchange_flags; +} + +__be32 +nfsd4_exchange_id(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_exchange_id *exid) +{ + struct nfs4_client *unconf, *conf, *new; + int status; + unsigned int strhashval; + char dname[HEXDIR_LEN]; + char addr_str[INET6_ADDRSTRLEN]; + nfs4_verifier verf = exid->verifier; + struct sockaddr *sa = svc_addr(rqstp); + + rpc_ntop(sa, addr_str, sizeof(addr_str)); + dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " + "ip_addr=%s flags %x, spa_how %d\n", + __func__, rqstp, exid, exid->clname.len, exid->clname.data, + addr_str, exid->flags, exid->spa_how); + + if (exid->flags & ~EXCHGID4_FLAG_MASK_A) + return nfserr_inval; + + /* Currently only support SP4_NONE */ + switch (exid->spa_how) { + case SP4_NONE: + break; + case SP4_SSV: + return nfserr_serverfault; + default: + BUG(); /* checked by xdr code */ + case SP4_MACH_CRED: + return nfserr_serverfault; /* no excuse :-/ */ + } + + status = nfs4_make_rec_clidname(dname, &exid->clname); + + if (status) + goto error; + + strhashval = clientstr_hashval(dname); + + nfs4_lock_state(); + status = nfs_ok; + + conf = find_confirmed_client_by_str(dname, strhashval); + if (conf) { + if (!clp_used_exchangeid(conf)) { + status = nfserr_clid_inuse; /* XXX: ? */ + goto out; + } + if (!same_verf(&verf, &conf->cl_verifier)) { + /* 18.35.4 case 8 */ + if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { + status = nfserr_not_same; + goto out; + } + /* Client reboot: destroy old state */ + expire_client(conf); + goto out_new; + } + if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { + /* 18.35.4 case 9 */ + if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { + status = nfserr_perm; + goto out; + } + expire_client(conf); + goto out_new; + } + /* + * Set bit when the owner id and verifier map to an already + * confirmed client id (18.35.3). + */ + exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; + + /* + * Falling into 18.35.4 case 2, possible router replay. + * Leave confirmed record intact and return same result. + */ + copy_verf(conf, &verf); + new = conf; + goto out_copy; + } + + /* 18.35.4 case 7 */ + if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { + status = nfserr_noent; + goto out; + } + + unconf = find_unconfirmed_client_by_str(dname, strhashval); + if (unconf) { + /* + * Possible retry or client restart. Per 18.35.4 case 4, + * a new unconfirmed record should be generated regardless + * of whether any properties have changed. + */ + expire_client(unconf); + } + +out_new: + /* Normal case */ + new = create_client(exid->clname, dname, rqstp, &verf); + if (new == NULL) { + status = nfserr_jukebox; + goto out; + } + + gen_clid(new); + add_to_unconfirmed(new, strhashval); +out_copy: + exid->clientid.cl_boot = new->cl_clientid.cl_boot; + exid->clientid.cl_id = new->cl_clientid.cl_id; + + exid->seqid = 1; + nfsd4_set_ex_flags(new, exid); + + dprintk("nfsd4_exchange_id seqid %d flags %x\n", + new->cl_cs_slot.sl_seqid, new->cl_exchange_flags); + status = nfs_ok; + +out: + nfs4_unlock_state(); +error: + dprintk("nfsd4_exchange_id returns %d\n", ntohl(status)); + return status; +} + +static int +check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) +{ + dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid, + slot_seqid); + + /* The slot is in use, and no response has been sent. */ + if (slot_inuse) { + if (seqid == slot_seqid) + return nfserr_jukebox; + else + return nfserr_seq_misordered; + } + /* Normal */ + if (likely(seqid == slot_seqid + 1)) + return nfs_ok; + /* Replay */ + if (seqid == slot_seqid) + return nfserr_replay_cache; + /* Wraparound */ + if (seqid == 1 && (slot_seqid + 1) == 0) + return nfs_ok; + /* Misordered replay or misordered new request */ + return nfserr_seq_misordered; +} + +/* + * Cache the create session result into the create session single DRC + * slot cache by saving the xdr structure. sl_seqid has been set. + * Do this for solo or embedded create session operations. + */ +static void +nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses, + struct nfsd4_clid_slot *slot, int nfserr) +{ + slot->sl_status = nfserr; + memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses)); +} + +static __be32 +nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, + struct nfsd4_clid_slot *slot) +{ + memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses)); + return slot->sl_status; +} + +#define NFSD_MIN_REQ_HDR_SEQ_SZ ((\ + 2 * 2 + /* credential,verifier: AUTH_NULL, length 0 */ \ + 1 + /* MIN tag is length with zero, only length */ \ + 3 + /* version, opcount, opcode */ \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ + /* seqid, slotID, slotID, cache */ \ + 4 ) * sizeof(__be32)) + +#define NFSD_MIN_RESP_HDR_SEQ_SZ ((\ + 2 + /* verifier: AUTH_NULL, length 0 */\ + 1 + /* status */ \ + 1 + /* MIN tag is length with zero, only length */ \ + 3 + /* opcount, opcode, opstatus*/ \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ + /* seqid, slotID, slotID, slotID, status */ \ + 5 ) * sizeof(__be32)) + +static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs fchannel) +{ + return fchannel.maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ + || fchannel.maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ; +} + +__be32 +nfsd4_create_session(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_create_session *cr_ses) +{ + struct sockaddr *sa = svc_addr(rqstp); + struct nfs4_client *conf, *unconf; + struct nfsd4_session *new; + struct nfsd4_clid_slot *cs_slot = NULL; + bool confirm_me = false; + int status = 0; + + if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) + return nfserr_inval; + + nfs4_lock_state(); + unconf = find_unconfirmed_client(&cr_ses->clientid); + conf = find_confirmed_client(&cr_ses->clientid); + + if (conf) { + cs_slot = &conf->cl_cs_slot; + status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); + if (status == nfserr_replay_cache) { + dprintk("Got a create_session replay! seqid= %d\n", + cs_slot->sl_seqid); + /* Return the cached reply status */ + status = nfsd4_replay_create_session(cr_ses, cs_slot); + goto out; + } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) { + status = nfserr_seq_misordered; + dprintk("Sequence misordered!\n"); + dprintk("Expected seqid= %d but got seqid= %d\n", + cs_slot->sl_seqid, cr_ses->seqid); + goto out; + } + } else if (unconf) { + if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || + !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { + status = nfserr_clid_inuse; + goto out; + } + + cs_slot = &unconf->cl_cs_slot; + status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); + if (status) { + /* an unconfirmed replay returns misordered */ + status = nfserr_seq_misordered; + goto out; + } + + confirm_me = true; + conf = unconf; + } else { + status = nfserr_stale_clientid; + goto out; + } + + /* + * XXX: we should probably set this at creation time, and check + * for consistent minorversion use throughout: + */ + conf->cl_minorversion = 1; + /* + * We do not support RDMA or persistent sessions + */ + cr_ses->flags &= ~SESSION4_PERSIST; + cr_ses->flags &= ~SESSION4_RDMA; + + status = nfserr_toosmall; + if (check_forechannel_attrs(cr_ses->fore_channel)) + goto out; + + status = nfserr_jukebox; + new = alloc_init_session(rqstp, conf, cr_ses); + if (!new) + goto out; + status = nfs_ok; + memcpy(cr_ses->sessionid.data, new->se_sessionid.data, + NFS4_MAX_SESSIONID_LEN); + memcpy(&cr_ses->fore_channel, &new->se_fchannel, + sizeof(struct nfsd4_channel_attrs)); + cs_slot->sl_seqid++; + cr_ses->seqid = cs_slot->sl_seqid; + + /* cache solo and embedded create sessions under the state lock */ + nfsd4_cache_create_session(cr_ses, cs_slot, status); + if (confirm_me) + move_to_confirmed(conf); +out: + nfs4_unlock_state(); + dprintk("%s returns %d\n", __func__, ntohl(status)); + return status; +} + +static bool nfsd4_last_compound_op(struct svc_rqst *rqstp) +{ + struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct nfsd4_compoundargs *argp = rqstp->rq_argp; + + return argp->opcnt == resp->opcnt; +} + +static __be32 nfsd4_map_bcts_dir(u32 *dir) +{ + switch (*dir) { + case NFS4_CDFC4_FORE: + case NFS4_CDFC4_BACK: + return nfs_ok; + case NFS4_CDFC4_FORE_OR_BOTH: + case NFS4_CDFC4_BACK_OR_BOTH: + *dir = NFS4_CDFC4_BOTH; + return nfs_ok; + }; + return nfserr_inval; +} + +__be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_bind_conn_to_session *bcts) +{ + __be32 status; + + if (!nfsd4_last_compound_op(rqstp)) + return nfserr_not_only_op; + spin_lock(&client_lock); + cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid); + /* Sorta weird: we only need the refcnt'ing because new_conn acquires + * client_lock iself: */ + if (cstate->session) { + nfsd4_get_session(cstate->session); + atomic_inc(&cstate->session->se_client->cl_refcount); + } + spin_unlock(&client_lock); + if (!cstate->session) + return nfserr_badsession; + + status = nfsd4_map_bcts_dir(&bcts->dir); + if (!status) + nfsd4_new_conn(rqstp, cstate->session, bcts->dir); + return status; +} + +static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) +{ + if (!session) + return 0; + return !memcmp(sid, &session->se_sessionid, sizeof(*sid)); +} + +__be32 +nfsd4_destroy_session(struct svc_rqst *r, + struct nfsd4_compound_state *cstate, + struct nfsd4_destroy_session *sessionid) +{ + struct nfsd4_session *ses; + u32 status = nfserr_badsession; + + /* Notes: + * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid + * - Should we return nfserr_back_chan_busy if waiting for + * callbacks on to-be-destroyed session? + * - Do we need to clear any callback info from previous session? + */ + + if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { + if (!nfsd4_last_compound_op(r)) + return nfserr_not_only_op; + } + dump_sessionid(__func__, &sessionid->sessionid); + spin_lock(&client_lock); + ses = find_in_sessionid_hashtbl(&sessionid->sessionid); + if (!ses) { + spin_unlock(&client_lock); + goto out; + } + + unhash_session(ses); + spin_unlock(&client_lock); + + nfs4_lock_state(); + nfsd4_probe_callback_sync(ses->se_client); + nfs4_unlock_state(); + + nfsd4_del_conns(ses); + + nfsd4_put_session(ses); + status = nfs_ok; +out: + dprintk("%s returns %d\n", __func__, ntohl(status)); + return status; +} + +static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s) +{ + struct nfsd4_conn *c; + + list_for_each_entry(c, &s->se_conns, cn_persession) { + if (c->cn_xprt == xpt) { + return c; + } + } + return NULL; +} + +static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; + struct nfsd4_conn *c; + int ret; + + spin_lock(&clp->cl_lock); + c = __nfsd4_find_conn(new->cn_xprt, ses); + if (c) { + spin_unlock(&clp->cl_lock); + free_conn(new); + return; + } + __nfsd4_hash_conn(new, ses); + spin_unlock(&clp->cl_lock); + ret = nfsd4_register_conn(new); + if (ret) + /* oops; xprt is already down: */ + nfsd4_conn_lost(&new->cn_xpt_user); + return; +} + +static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) +{ + struct nfsd4_compoundargs *args = rqstp->rq_argp; + + return args->opcnt > session->se_fchannel.maxops; +} + +static bool nfsd4_request_too_big(struct svc_rqst *rqstp, + struct nfsd4_session *session) +{ + struct xdr_buf *xb = &rqstp->rq_arg; + + return xb->len > session->se_fchannel.maxreq_sz; +} + +__be32 +nfsd4_sequence(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_sequence *seq) +{ + struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct nfsd4_session *session; + struct nfsd4_slot *slot; + struct nfsd4_conn *conn; + int status; + + if (resp->opcnt != 1) + return nfserr_sequence_pos; + + /* + * Will be either used or freed by nfsd4_sequence_check_conn + * below. + */ + conn = alloc_conn(rqstp, NFS4_CDFC4_FORE); + if (!conn) + return nfserr_jukebox; + + spin_lock(&client_lock); + status = nfserr_badsession; + session = find_in_sessionid_hashtbl(&seq->sessionid); + if (!session) + goto out; + + status = nfserr_too_many_ops; + if (nfsd4_session_too_many_ops(rqstp, session)) + goto out; + + status = nfserr_req_too_big; + if (nfsd4_request_too_big(rqstp, session)) + goto out; + + status = nfserr_badslot; + if (seq->slotid >= session->se_fchannel.maxreqs) + goto out; + + slot = session->se_slots[seq->slotid]; + dprintk("%s: slotid %d\n", __func__, seq->slotid); + + /* We do not negotiate the number of slots yet, so set the + * maxslots to the session maxreqs which is used to encode + * sr_highest_slotid and the sr_target_slot id to maxslots */ + seq->maxslots = session->se_fchannel.maxreqs; + + status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_inuse); + if (status == nfserr_replay_cache) { + cstate->slot = slot; + cstate->session = session; + /* Return the cached reply status and set cstate->status + * for nfsd4_proc_compound processing */ + status = nfsd4_replay_cache_entry(resp, seq); + cstate->status = nfserr_replay_cache; + goto out; + } + if (status) + goto out; + + nfsd4_sequence_check_conn(conn, session); + conn = NULL; + + /* Success! bump slot seqid */ + slot->sl_inuse = true; + slot->sl_seqid = seq->seqid; + slot->sl_cachethis = seq->cachethis; + + cstate->slot = slot; + cstate->session = session; + +out: + /* Hold a session reference until done processing the compound. */ + if (cstate->session) { + struct nfs4_client *clp = session->se_client; + + nfsd4_get_session(cstate->session); + atomic_inc(&clp->cl_refcount); + switch (clp->cl_cb_state) { + case NFSD4_CB_DOWN: + seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; + break; + case NFSD4_CB_FAULT: + seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; + break; + default: + seq->status_flags = 0; + } + } + kfree(conn); + spin_unlock(&client_lock); + dprintk("%s: return %d\n", __func__, ntohl(status)); + return status; +} + +static inline bool has_resources(struct nfs4_client *clp) +{ + return !list_empty(&clp->cl_openowners) + || !list_empty(&clp->cl_delegations) + || !list_empty(&clp->cl_sessions); +} + +__be32 +nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc) +{ + struct nfs4_client *conf, *unconf, *clp; + int status = 0; + + nfs4_lock_state(); + unconf = find_unconfirmed_client(&dc->clientid); + conf = find_confirmed_client(&dc->clientid); + + if (conf) { + clp = conf; + + if (!is_client_expired(conf) && has_resources(conf)) { + status = nfserr_clientid_busy; + goto out; + } + + /* rfc5661 18.50.3 */ + if (cstate->session && conf == cstate->session->se_client) { + status = nfserr_clientid_busy; + goto out; + } + } else if (unconf) + clp = unconf; + else { + status = nfserr_stale_clientid; + goto out; + } + + expire_client(clp); +out: + nfs4_unlock_state(); + dprintk("%s return %d\n", __func__, ntohl(status)); + return status; +} + +__be32 +nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) +{ + int status = 0; + + if (rc->rca_one_fs) { + if (!cstate->current_fh.fh_dentry) + return nfserr_nofilehandle; + /* + * We don't take advantage of the rca_one_fs case. + * That's OK, it's optional, we can safely ignore it. + */ + return nfs_ok; + } + + nfs4_lock_state(); + status = nfserr_complete_already; + if (cstate->session->se_client->cl_firststate) + goto out; + + status = nfserr_stale_clientid; + if (is_client_expired(cstate->session->se_client)) + /* + * The following error isn't really legal. + * But we only get here if the client just explicitly + * destroyed the client. Surely it no longer cares what + * error it gets back on an operation for the dead + * client. + */ + goto out; + + status = nfs_ok; + nfsd4_create_clid_dir(cstate->session->se_client); +out: + nfs4_unlock_state(); + return status; +} + +__be32 +nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_setclientid *setclid) +{ + struct xdr_netobj clname = setclid->se_name; + nfs4_verifier clverifier = setclid->se_verf; + unsigned int strhashval; + struct nfs4_client *conf, *unconf, *new; + __be32 status; + char dname[HEXDIR_LEN]; + + status = nfs4_make_rec_clidname(dname, &clname); + if (status) + return status; + + /* + * XXX The Duplicate Request Cache (DRC) has been checked (??) + * We get here on a DRC miss. + */ + + strhashval = clientstr_hashval(dname); + + nfs4_lock_state(); + conf = find_confirmed_client_by_str(dname, strhashval); + if (conf) { + /* RFC 3530 14.2.33 CASE 0: */ + status = nfserr_clid_inuse; + if (clp_used_exchangeid(conf)) + goto out; + if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { + char addr_str[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str, + sizeof(addr_str)); + dprintk("NFSD: setclientid: string in use by client " + "at %s\n", addr_str); + goto out; + } + } + /* + * section 14.2.33 of RFC 3530 (under the heading "IMPLEMENTATION") + * has a description of SETCLIENTID request processing consisting + * of 5 bullet points, labeled as CASE0 - CASE4 below. + */ + unconf = find_unconfirmed_client_by_str(dname, strhashval); + status = nfserr_jukebox; + if (!conf) { + /* + * RFC 3530 14.2.33 CASE 4: + * placed first, because it is the normal case + */ + if (unconf) + expire_client(unconf); + new = create_client(clname, dname, rqstp, &clverifier); + if (new == NULL) + goto out; + gen_clid(new); + } else if (same_verf(&conf->cl_verifier, &clverifier)) { + /* + * RFC 3530 14.2.33 CASE 1: + * probable callback update + */ + if (unconf) { + /* Note this is removing unconfirmed {*x***}, + * which is stronger than RFC recommended {vxc**}. + * This has the advantage that there is at most + * one {*x***} in either list at any time. + */ + expire_client(unconf); + } + new = create_client(clname, dname, rqstp, &clverifier); + if (new == NULL) + goto out; + copy_clid(new, conf); + } else if (!unconf) { + /* + * RFC 3530 14.2.33 CASE 2: + * probable client reboot; state will be removed if + * confirmed. + */ + new = create_client(clname, dname, rqstp, &clverifier); + if (new == NULL) + goto out; + gen_clid(new); + } else { + /* + * RFC 3530 14.2.33 CASE 3: + * probable client reboot; state will be removed if + * confirmed. + */ + expire_client(unconf); + new = create_client(clname, dname, rqstp, &clverifier); + if (new == NULL) + goto out; + gen_clid(new); + } + /* + * XXX: we should probably set this at creation time, and check + * for consistent minorversion use throughout: + */ + new->cl_minorversion = 0; + gen_callback(new, setclid, rqstp); + add_to_unconfirmed(new, strhashval); + setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; + setclid->se_clientid.cl_id = new->cl_clientid.cl_id; + memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data)); + status = nfs_ok; +out: + nfs4_unlock_state(); + return status; +} + + +/* + * Section 14.2.34 of RFC 3530 (under the heading "IMPLEMENTATION") has + * a description of SETCLIENTID_CONFIRM request processing consisting of 4 + * bullets, labeled as CASE1 - CASE4 below. + */ +__be32 +nfsd4_setclientid_confirm(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_setclientid_confirm *setclientid_confirm) +{ + struct sockaddr *sa = svc_addr(rqstp); + struct nfs4_client *conf, *unconf; + nfs4_verifier confirm = setclientid_confirm->sc_confirm; + clientid_t * clid = &setclientid_confirm->sc_clientid; + __be32 status; + + if (STALE_CLIENTID(clid)) + return nfserr_stale_clientid; + /* + * XXX The Duplicate Request Cache (DRC) has been checked (??) + * We get here on a DRC miss. + */ + + nfs4_lock_state(); + + conf = find_confirmed_client(clid); + unconf = find_unconfirmed_client(clid); + + status = nfserr_clid_inuse; + if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa)) + goto out; + if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa)) + goto out; + + /* + * section 14.2.34 of RFC 3530 has a description of + * SETCLIENTID_CONFIRM request processing consisting + * of 4 bullet points, labeled as CASE1 - CASE4 below. + */ + if (conf && unconf && same_verf(&confirm, &unconf->cl_confirm)) { + /* + * RFC 3530 14.2.34 CASE 1: + * callback update + */ + if (!same_creds(&conf->cl_cred, &unconf->cl_cred)) + status = nfserr_clid_inuse; + else { + nfsd4_change_callback(conf, &unconf->cl_cb_conn); + nfsd4_probe_callback(conf); + expire_client(unconf); + status = nfs_ok; + + } + } else if (conf && !unconf) { + /* + * RFC 3530 14.2.34 CASE 2: + * probable retransmitted request; play it safe and + * do nothing. + */ + if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) + status = nfserr_clid_inuse; + else + status = nfs_ok; + } else if (!conf && unconf + && same_verf(&unconf->cl_confirm, &confirm)) { + /* + * RFC 3530 14.2.34 CASE 3: + * Normal case; new or rebooted client: + */ + if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) { + status = nfserr_clid_inuse; + } else { + unsigned int hash = + clientstr_hashval(unconf->cl_recdir); + conf = find_confirmed_client_by_str(unconf->cl_recdir, + hash); + if (conf) { + nfsd4_remove_clid_dir(conf); + expire_client(conf); + } + move_to_confirmed(unconf); + conf = unconf; + nfsd4_probe_callback(conf); + status = nfs_ok; + } + } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) + && (!unconf || (unconf && !same_verf(&unconf->cl_confirm, + &confirm)))) { + /* + * RFC 3530 14.2.34 CASE 4: + * Client probably hasn't noticed that we rebooted yet. + */ + status = nfserr_stale_clientid; + } else { + /* check that we have hit one of the cases...*/ + status = nfserr_clid_inuse; + } +out: + nfs4_unlock_state(); + return status; +} + +static struct nfs4_file *nfsd4_alloc_file(void) +{ + return kmem_cache_alloc(file_slab, GFP_KERNEL); +} + +/* OPEN Share state helper functions */ +static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) +{ + unsigned int hashval = file_hashval(ino); + + atomic_set(&fp->fi_ref, 1); + INIT_LIST_HEAD(&fp->fi_hash); + INIT_LIST_HEAD(&fp->fi_stateids); + INIT_LIST_HEAD(&fp->fi_delegations); + fp->fi_inode = igrab(ino); + fp->fi_had_conflict = false; + fp->fi_lease = NULL; + memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); + memset(fp->fi_access, 0, sizeof(fp->fi_access)); + spin_lock(&recall_lock); + list_add(&fp->fi_hash, &file_hashtbl[hashval]); + spin_unlock(&recall_lock); +} + +static void +nfsd4_free_slab(struct kmem_cache **slab) +{ + if (*slab == NULL) + return; + kmem_cache_destroy(*slab); + *slab = NULL; +} + +void +nfsd4_free_slabs(void) +{ + nfsd4_free_slab(&openowner_slab); + nfsd4_free_slab(&lockowner_slab); + nfsd4_free_slab(&file_slab); + nfsd4_free_slab(&stateid_slab); + nfsd4_free_slab(&deleg_slab); +} + +int +nfsd4_init_slabs(void) +{ + openowner_slab = kmem_cache_create("nfsd4_openowners", + sizeof(struct nfs4_openowner), 0, 0, NULL); + if (openowner_slab == NULL) + goto out_nomem; + lockowner_slab = kmem_cache_create("nfsd4_lockowners", + sizeof(struct nfs4_openowner), 0, 0, NULL); + if (lockowner_slab == NULL) + goto out_nomem; + file_slab = kmem_cache_create("nfsd4_files", + sizeof(struct nfs4_file), 0, 0, NULL); + if (file_slab == NULL) + goto out_nomem; + stateid_slab = kmem_cache_create("nfsd4_stateids", + sizeof(struct nfs4_ol_stateid), 0, 0, NULL); + if (stateid_slab == NULL) + goto out_nomem; + deleg_slab = kmem_cache_create("nfsd4_delegations", + sizeof(struct nfs4_delegation), 0, 0, NULL); + if (deleg_slab == NULL) + goto out_nomem; + return 0; +out_nomem: + nfsd4_free_slabs(); + dprintk("nfsd4: out of memory while initializing nfsv4\n"); + return -ENOMEM; +} + +void nfs4_free_openowner(struct nfs4_openowner *oo) +{ + kfree(oo->oo_owner.so_owner.data); + kmem_cache_free(openowner_slab, oo); +} + +void nfs4_free_lockowner(struct nfs4_lockowner *lo) +{ + kfree(lo->lo_owner.so_owner.data); + kmem_cache_free(lockowner_slab, lo); +} + +static void init_nfs4_replay(struct nfs4_replay *rp) +{ + rp->rp_status = nfserr_serverfault; + rp->rp_buflen = 0; + rp->rp_buf = rp->rp_ibuf; +} + +static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp) +{ + struct nfs4_stateowner *sop; + + sop = kmem_cache_alloc(slab, GFP_KERNEL); + if (!sop) + return NULL; + + sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL); + if (!sop->so_owner.data) { + kmem_cache_free(slab, sop); + return NULL; + } + sop->so_owner.len = owner->len; + + INIT_LIST_HEAD(&sop->so_stateids); + sop->so_client = clp; + init_nfs4_replay(&sop->so_replay); + return sop; +} + +static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) +{ + list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); + list_add(&oo->oo_perclient, &clp->cl_openowners); +} + +static struct nfs4_openowner * +alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) { + struct nfs4_openowner *oo; + + oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); + if (!oo) + return NULL; + oo->oo_owner.so_is_open_owner = 1; + oo->oo_owner.so_seqid = open->op_seqid; + oo->oo_flags = NFS4_OO_NEW; + oo->oo_time = 0; + oo->oo_last_closed_stid = NULL; + INIT_LIST_HEAD(&oo->oo_close_lru); + hash_openowner(oo, clp, strhashval); + return oo; +} + +static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { + struct nfs4_openowner *oo = open->op_openowner; + struct nfs4_client *clp = oo->oo_owner.so_client; + + init_stid(&stp->st_stid, clp, NFS4_OPEN_STID); + INIT_LIST_HEAD(&stp->st_lockowners); + list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); + list_add(&stp->st_perfile, &fp->fi_stateids); + stp->st_stateowner = &oo->oo_owner; + get_nfs4_file(fp); + stp->st_file = fp; + stp->st_access_bmap = 0; + stp->st_deny_bmap = 0; + __set_bit(open->op_share_access, &stp->st_access_bmap); + __set_bit(open->op_share_deny, &stp->st_deny_bmap); + stp->st_openstp = NULL; +} + +static void +move_to_close_lru(struct nfs4_openowner *oo) +{ + dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo); + + list_move_tail(&oo->oo_close_lru, &close_lru); + oo->oo_time = get_seconds(); +} + +static int +same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, + clientid_t *clid) +{ + return (sop->so_owner.len == owner->len) && + 0 == memcmp(sop->so_owner.data, owner->data, owner->len) && + (sop->so_client->cl_clientid.cl_id == clid->cl_id); +} + +static struct nfs4_openowner * +find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open) +{ + struct nfs4_stateowner *so; + struct nfs4_openowner *oo; + + list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { + if (!so->so_is_open_owner) + continue; + if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { + oo = openowner(so); + renew_client(oo->oo_owner.so_client); + return oo; + } + } + return NULL; +} + +/* search file_hashtbl[] for file */ +static struct nfs4_file * +find_file(struct inode *ino) +{ + unsigned int hashval = file_hashval(ino); + struct nfs4_file *fp; + + spin_lock(&recall_lock); + list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { + if (fp->fi_inode == ino) { + get_nfs4_file(fp); + spin_unlock(&recall_lock); + return fp; + } + } + spin_unlock(&recall_lock); + return NULL; +} + +/* + * Called to check deny when READ with all zero stateid or + * WRITE with all zero or all one stateid + */ +static __be32 +nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) +{ + struct inode *ino = current_fh->fh_dentry->d_inode; + struct nfs4_file *fp; + struct nfs4_ol_stateid *stp; + __be32 ret; + + dprintk("NFSD: nfs4_share_conflict\n"); + + fp = find_file(ino); + if (!fp) + return nfs_ok; + ret = nfserr_locked; + /* Search for conflicting share reservations */ + list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { + if (test_bit(deny_type, &stp->st_deny_bmap) || + test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap)) + goto out; + } + ret = nfs_ok; +out: + put_nfs4_file(fp); + return ret; +} + +static void nfsd_break_one_deleg(struct nfs4_delegation *dp) +{ + /* We're assuming the state code never drops its reference + * without first removing the lease. Since we're in this lease + * callback (and since the lease code is serialized by the kernel + * lock) we know the server hasn't removed the lease yet, we know + * it's safe to take a reference: */ + atomic_inc(&dp->dl_count); + + list_add_tail(&dp->dl_recall_lru, &del_recall_lru); + + /* only place dl_time is set. protected by lock_flocks*/ + dp->dl_time = get_seconds(); + + nfsd4_cb_recall(dp); +} + +/* Called from break_lease() with lock_flocks() held. */ +static void nfsd_break_deleg_cb(struct file_lock *fl) +{ + struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; + struct nfs4_delegation *dp; + + BUG_ON(!fp); + /* We assume break_lease is only called once per lease: */ + BUG_ON(fp->fi_had_conflict); + /* + * We don't want the locks code to timeout the lease for us; + * we'll remove it ourself if a delegation isn't returned + * in time: + */ + fl->fl_break_time = 0; + + spin_lock(&recall_lock); + fp->fi_had_conflict = true; + list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) + nfsd_break_one_deleg(dp); + spin_unlock(&recall_lock); +} + +static +int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) +{ + if (arg & F_UNLCK) + return lease_modify(onlist, arg); + else + return -EAGAIN; +} + +static const struct lock_manager_operations nfsd_lease_mng_ops = { + .lm_break = nfsd_break_deleg_cb, + .lm_change = nfsd_change_deleg_cb, +}; + +static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4_stateowner *so, u32 seqid) +{ + if (nfsd4_has_session(cstate)) + return nfs_ok; + if (seqid == so->so_seqid - 1) + return nfserr_replay_me; + if (seqid == so->so_seqid) + return nfs_ok; + return nfserr_bad_seqid; +} + +__be32 +nfsd4_process_open1(struct nfsd4_compound_state *cstate, + struct nfsd4_open *open) +{ + clientid_t *clientid = &open->op_clientid; + struct nfs4_client *clp = NULL; + unsigned int strhashval; + struct nfs4_openowner *oo = NULL; + __be32 status; + + if (STALE_CLIENTID(&open->op_clientid)) + return nfserr_stale_clientid; + /* + * In case we need it later, after we've already created the + * file and don't want to risk a further failure: + */ + open->op_file = nfsd4_alloc_file(); + if (open->op_file == NULL) + return nfserr_jukebox; + + strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); + oo = find_openstateowner_str(strhashval, open); + open->op_openowner = oo; + if (!oo) { + clp = find_confirmed_client(clientid); + if (clp == NULL) + return nfserr_expired; + goto new_owner; + } + if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { + /* Replace unconfirmed owners without checking for replay. */ + clp = oo->oo_owner.so_client; + release_openowner(oo); + open->op_openowner = NULL; + goto new_owner; + } + status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); + if (status) + return status; + clp = oo->oo_owner.so_client; + goto alloc_stateid; +new_owner: + oo = alloc_init_open_stateowner(strhashval, clp, open); + if (oo == NULL) + return nfserr_jukebox; + open->op_openowner = oo; +alloc_stateid: + open->op_stp = nfs4_alloc_stateid(clp); + if (!open->op_stp) + return nfserr_jukebox; + return nfs_ok; +} + +static inline __be32 +nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) +{ + if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ)) + return nfserr_openmode; + else + return nfs_ok; +} + +static int share_access_to_flags(u32 share_access) +{ + share_access &= ~NFS4_SHARE_WANT_MASK; + + return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE; +} + +static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, stateid_t *s) +{ + struct nfs4_stid *ret; + + ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID); + if (!ret) + return NULL; + return delegstateid(ret); +} + +static bool nfsd4_is_deleg_cur(struct nfsd4_open *open) +{ + return open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR || + open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH; +} + +static __be32 +nfs4_check_deleg(struct nfs4_client *cl, struct nfs4_file *fp, struct nfsd4_open *open, + struct nfs4_delegation **dp) +{ + int flags; + __be32 status = nfserr_bad_stateid; + + *dp = find_deleg_stateid(cl, &open->op_delegate_stateid); + if (*dp == NULL) + goto out; + flags = share_access_to_flags(open->op_share_access); + status = nfs4_check_delegmode(*dp, flags); + if (status) + *dp = NULL; +out: + if (!nfsd4_is_deleg_cur(open)) + return nfs_ok; + if (status) + return status; + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + return nfs_ok; +} + +static __be32 +nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_stateid **stpp) +{ + struct nfs4_ol_stateid *local; + struct nfs4_openowner *oo = open->op_openowner; + + list_for_each_entry(local, &fp->fi_stateids, st_perfile) { + /* ignore lock owners */ + if (local->st_stateowner->so_is_open_owner == 0) + continue; + /* remember if we have seen this open owner */ + if (local->st_stateowner == &oo->oo_owner) + *stpp = local; + /* check for conflicting share reservations */ + if (!test_share(local, open)) + return nfserr_share_denied; + } + return nfs_ok; +} + +static void nfs4_free_stateid(struct nfs4_ol_stateid *s) +{ + kmem_cache_free(stateid_slab, s); +} + +static inline int nfs4_access_to_access(u32 nfs4_access) +{ + int flags = 0; + + if (nfs4_access & NFS4_SHARE_ACCESS_READ) + flags |= NFSD_MAY_READ; + if (nfs4_access & NFS4_SHARE_ACCESS_WRITE) + flags |= NFSD_MAY_WRITE; + return flags; +} + +static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, + struct svc_fh *cur_fh, struct nfsd4_open *open) +{ + __be32 status; + int oflag = nfs4_access_to_omode(open->op_share_access); + int access = nfs4_access_to_access(open->op_share_access); + + if (!fp->fi_fds[oflag]) { + status = nfsd_open(rqstp, cur_fh, S_IFREG, access, + &fp->fi_fds[oflag]); + if (status) + return status; + } + nfs4_file_get_access(fp, oflag); + + return nfs_ok; +} + +static inline __be32 +nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, + struct nfsd4_open *open) +{ + struct iattr iattr = { + .ia_valid = ATTR_SIZE, + .ia_size = 0, + }; + if (!open->op_truncate) + return 0; + if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) + return nfserr_inval; + return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0); +} + +static __be32 +nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) +{ + u32 op_share_access = open->op_share_access; + bool new_access; + __be32 status; + + new_access = !test_bit(op_share_access, &stp->st_access_bmap); + if (new_access) { + status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open); + if (status) + return status; + } + status = nfsd4_truncate(rqstp, cur_fh, open); + if (status) { + if (new_access) { + int oflag = nfs4_access_to_omode(op_share_access); + nfs4_file_put_access(fp, oflag); + } + return status; + } + /* remember the open */ + __set_bit(op_share_access, &stp->st_access_bmap); + __set_bit(open->op_share_deny, &stp->st_deny_bmap); + + return nfs_ok; +} + + +static void +nfs4_set_claim_prev(struct nfsd4_open *open) +{ + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + open->op_openowner->oo_owner.so_client->cl_firststate = 1; +} + +/* Should we give out recallable state?: */ +static bool nfsd4_cb_channel_good(struct nfs4_client *clp) +{ + if (clp->cl_cb_state == NFSD4_CB_UP) + return true; + /* + * In the sessions case, since we don't have to establish a + * separate connection for callbacks, we assume it's OK + * until we hear otherwise: + */ + return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; +} + +static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag) +{ + struct file_lock *fl; + + fl = locks_alloc_lock(); + if (!fl) + return NULL; + locks_init_lock(fl); + fl->fl_lmops = &nfsd_lease_mng_ops; + fl->fl_flags = FL_LEASE; + fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; + fl->fl_end = OFFSET_MAX; + fl->fl_owner = (fl_owner_t)(dp->dl_file); + fl->fl_pid = current->tgid; + return fl; +} + +static int nfs4_setlease(struct nfs4_delegation *dp, int flag) +{ + struct nfs4_file *fp = dp->dl_file; + struct file_lock *fl; + int status; + + fl = nfs4_alloc_init_lease(dp, flag); + if (!fl) + return -ENOMEM; + fl->fl_file = find_readable_file(fp); + list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); + status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); + if (status) { + list_del_init(&dp->dl_perclnt); + locks_free_lock(fl); + return -ENOMEM; + } + fp->fi_lease = fl; + fp->fi_deleg_file = fl->fl_file; + get_file(fp->fi_deleg_file); + atomic_set(&fp->fi_delegees, 1); + list_add(&dp->dl_perfile, &fp->fi_delegations); + return 0; +} + +static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag) +{ + struct nfs4_file *fp = dp->dl_file; + + if (!fp->fi_lease) + return nfs4_setlease(dp, flag); + spin_lock(&recall_lock); + if (fp->fi_had_conflict) { + spin_unlock(&recall_lock); + return -EAGAIN; + } + atomic_inc(&fp->fi_delegees); + list_add(&dp->dl_perfile, &fp->fi_delegations); + spin_unlock(&recall_lock); + list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); + return 0; +} + +/* + * Attempt to hand out a delegation. + */ +static void +nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_stateid *stp) +{ + struct nfs4_delegation *dp; + struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); + int cb_up; + int status, flag = 0; + + cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); + flag = NFS4_OPEN_DELEGATE_NONE; + open->op_recall = 0; + switch (open->op_claim_type) { + case NFS4_OPEN_CLAIM_PREVIOUS: + if (!cb_up) + open->op_recall = 1; + flag = open->op_delegate_type; + if (flag == NFS4_OPEN_DELEGATE_NONE) + goto out; + break; + case NFS4_OPEN_CLAIM_NULL: + /* Let's not give out any delegations till everyone's + * had the chance to reclaim theirs.... */ + if (locks_in_grace()) + goto out; + if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) + goto out; + if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) + flag = NFS4_OPEN_DELEGATE_WRITE; + else + flag = NFS4_OPEN_DELEGATE_READ; + break; + default: + goto out; + } + + dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh, flag); + if (dp == NULL) + goto out_no_deleg; + status = nfs4_set_delegation(dp, flag); + if (status) + goto out_free; + + memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); + + dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", + STATEID_VAL(&dp->dl_stid.sc_stateid)); +out: + if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS + && flag == NFS4_OPEN_DELEGATE_NONE + && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) + dprintk("NFSD: WARNING: refusing delegation reclaim\n"); + open->op_delegate_type = flag; + return; +out_free: + nfs4_put_delegation(dp); +out_no_deleg: + flag = NFS4_OPEN_DELEGATE_NONE; + goto out; +} + +/* + * called with nfs4_lock_state() held. + */ +__be32 +nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +{ + struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; + struct nfs4_file *fp = NULL; + struct inode *ino = current_fh->fh_dentry->d_inode; + struct nfs4_ol_stateid *stp = NULL; + struct nfs4_delegation *dp = NULL; + __be32 status; + + /* + * Lookup file; if found, lookup stateid and check open request, + * and check for delegations in the process of being recalled. + * If not found, create the nfs4_file struct + */ + fp = find_file(ino); + if (fp) { + if ((status = nfs4_check_open(fp, open, &stp))) + goto out; + status = nfs4_check_deleg(cl, fp, open, &dp); + if (status) + goto out; + } else { + status = nfserr_bad_stateid; + if (nfsd4_is_deleg_cur(open)) + goto out; + status = nfserr_jukebox; + fp = open->op_file; + open->op_file = NULL; + nfsd4_init_file(fp, ino); + } + + /* + * OPEN the file, or upgrade an existing OPEN. + * If truncate fails, the OPEN fails. + */ + if (stp) { + /* Stateid was found, this is an OPEN upgrade */ + status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); + if (status) + goto out; + } else { + status = nfs4_get_vfs_file(rqstp, fp, current_fh, open); + if (status) + goto out; + stp = open->op_stp; + open->op_stp = NULL; + init_open_stateid(stp, fp, open); + status = nfsd4_truncate(rqstp, current_fh, open); + if (status) { + release_open_stateid(stp); + goto out; + } + } + update_stateid(&stp->st_stid.sc_stateid); + memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + + if (nfsd4_has_session(&resp->cstate)) + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + + /* + * Attempt to hand out a delegation. No error return, because the + * OPEN succeeds even if we fail. + */ + nfs4_open_delegation(current_fh, open, stp); + + status = nfs_ok; + + dprintk("%s: stateid=" STATEID_FMT "\n", __func__, + STATEID_VAL(&stp->st_stid.sc_stateid)); +out: + if (fp) + put_nfs4_file(fp); + if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + nfs4_set_claim_prev(open); + /* + * To finish the open response, we just need to set the rflags. + */ + open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; + if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) && + !nfsd4_has_session(&resp->cstate)) + open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; + + return status; +} + +void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status) +{ + if (open->op_openowner) { + struct nfs4_openowner *oo = open->op_openowner; + + if (!list_empty(&oo->oo_owner.so_stateids)) + list_del_init(&oo->oo_close_lru); + if (oo->oo_flags & NFS4_OO_NEW) { + if (status) { + release_openowner(oo); + open->op_openowner = NULL; + } else + oo->oo_flags &= ~NFS4_OO_NEW; + } + } + if (open->op_file) + nfsd4_free_file(open->op_file); + if (open->op_stp) + nfs4_free_stateid(open->op_stp); +} + +__be32 +nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + clientid_t *clid) +{ + struct nfs4_client *clp; + __be32 status; + + nfs4_lock_state(); + dprintk("process_renew(%08x/%08x): starting\n", + clid->cl_boot, clid->cl_id); + status = nfserr_stale_clientid; + if (STALE_CLIENTID(clid)) + goto out; + clp = find_confirmed_client(clid); + status = nfserr_expired; + if (clp == NULL) { + /* We assume the client took too long to RENEW. */ + dprintk("nfsd4_renew: clientid not found!\n"); + goto out; + } + status = nfserr_cb_path_down; + if (!list_empty(&clp->cl_delegations) + && clp->cl_cb_state != NFSD4_CB_UP) + goto out; + status = nfs_ok; +out: + nfs4_unlock_state(); + return status; +} + +static struct lock_manager nfsd4_manager = { +}; + +static void +nfsd4_end_grace(void) +{ + dprintk("NFSD: end of grace period\n"); + nfsd4_recdir_purge_old(); + locks_end_grace(&nfsd4_manager); + /* + * Now that every NFSv4 client has had the chance to recover and + * to see the (possibly new, possibly shorter) lease time, we + * can safely set the next grace time to the current lease time: + */ + nfsd4_grace = nfsd4_lease; +} + +static time_t +nfs4_laundromat(void) +{ + struct nfs4_client *clp; + struct nfs4_openowner *oo; + struct nfs4_delegation *dp; + struct list_head *pos, *next, reaplist; + time_t cutoff = get_seconds() - nfsd4_lease; + time_t t, clientid_val = nfsd4_lease; + time_t u, test_val = nfsd4_lease; + + nfs4_lock_state(); + + dprintk("NFSD: laundromat service - starting\n"); + if (locks_in_grace()) + nfsd4_end_grace(); + INIT_LIST_HEAD(&reaplist); + spin_lock(&client_lock); + list_for_each_safe(pos, next, &client_lru) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { + t = clp->cl_time - cutoff; + if (clientid_val > t) + clientid_val = t; + break; + } + if (atomic_read(&clp->cl_refcount)) { + dprintk("NFSD: client in use (clientid %08x)\n", + clp->cl_clientid.cl_id); + continue; + } + unhash_client_locked(clp); + list_add(&clp->cl_lru, &reaplist); + } + spin_unlock(&client_lock); + list_for_each_safe(pos, next, &reaplist) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + dprintk("NFSD: purging unused client (clientid %08x)\n", + clp->cl_clientid.cl_id); + nfsd4_remove_clid_dir(clp); + expire_client(clp); + } + spin_lock(&recall_lock); + list_for_each_safe(pos, next, &del_recall_lru) { + dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { + u = dp->dl_time - cutoff; + if (test_val > u) + test_val = u; + break; + } + list_move(&dp->dl_recall_lru, &reaplist); + } + spin_unlock(&recall_lock); + list_for_each_safe(pos, next, &reaplist) { + dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + unhash_delegation(dp); + } + test_val = nfsd4_lease; + list_for_each_safe(pos, next, &close_lru) { + oo = container_of(pos, struct nfs4_openowner, oo_close_lru); + if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { + u = oo->oo_time - cutoff; + if (test_val > u) + test_val = u; + break; + } + release_openowner(oo); + } + if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) + clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; + nfs4_unlock_state(); + return clientid_val; +} + +static struct workqueue_struct *laundry_wq; +static void laundromat_main(struct work_struct *); +static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main); + +static void +laundromat_main(struct work_struct *not_used) +{ + time_t t; + + t = nfs4_laundromat(); + dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t); + queue_delayed_work(laundry_wq, &laundromat_work, t*HZ); +} + +static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) +{ + if (fhp->fh_dentry->d_inode != stp->st_file->fi_inode) + return nfserr_bad_stateid; + return nfs_ok; +} + +static int +STALE_STATEID(stateid_t *stateid) +{ + if (stateid->si_opaque.so_clid.cl_boot == boot_time) + return 0; + dprintk("NFSD: stale stateid " STATEID_FMT "!\n", + STATEID_VAL(stateid)); + return 1; +} + +static inline int +access_permit_read(unsigned long access_bmap) +{ + return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) || + test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap) || + test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap); +} + +static inline int +access_permit_write(unsigned long access_bmap) +{ + return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) || + test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap); +} + +static +__be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags) +{ + __be32 status = nfserr_openmode; + + /* For lock stateid's, we test the parent open, not the lock: */ + if (stp->st_openstp) + stp = stp->st_openstp; + if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap))) + goto out; + if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap))) + goto out; + status = nfs_ok; +out: + return status; +} + +static inline __be32 +check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) +{ + if (ONE_STATEID(stateid) && (flags & RD_STATE)) + return nfs_ok; + else if (locks_in_grace()) { + /* Answer in remaining cases depends on existence of + * conflicting state; so we must wait out the grace period. */ + return nfserr_grace; + } else if (flags & WR_STATE) + return nfs4_share_conflict(current_fh, + NFS4_SHARE_DENY_WRITE); + else /* (flags & RD_STATE) && ZERO_STATEID(stateid) */ + return nfs4_share_conflict(current_fh, + NFS4_SHARE_DENY_READ); +} + +/* + * Allow READ/WRITE during grace period on recovered state only for files + * that are not able to provide mandatory locking. + */ +static inline int +grace_disallows_io(struct inode *inode) +{ + return locks_in_grace() && mandatory_lock(inode); +} + +/* Returns true iff a is later than b: */ +static bool stateid_generation_after(stateid_t *a, stateid_t *b) +{ + return (s32)a->si_generation - (s32)b->si_generation > 0; +} + +static int check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) +{ + /* + * When sessions are used the stateid generation number is ignored + * when it is zero. + */ + if (has_session && in->si_generation == 0) + return nfs_ok; + + if (in->si_generation == ref->si_generation) + return nfs_ok; + + /* If the client sends us a stateid from the future, it's buggy: */ + if (stateid_generation_after(in, ref)) + return nfserr_bad_stateid; + /* + * However, we could see a stateid from the past, even from a + * non-buggy client. For example, if the client sends a lock + * while some IO is outstanding, the lock may bump si_generation + * while the IO is still in flight. The client could avoid that + * situation by waiting for responses on all the IO requests, + * but better performance may result in retrying IO that + * receives an old_stateid error if requests are rarely + * reordered in flight: + */ + return nfserr_old_stateid; +} + +__be32 nfs4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) +{ + struct nfs4_stid *s; + struct nfs4_ol_stateid *ols; + __be32 status; + + if (STALE_STATEID(stateid)) + return nfserr_stale_stateid; + + s = find_stateid(cl, stateid); + if (!s) + return nfserr_stale_stateid; + status = check_stateid_generation(stateid, &s->sc_stateid, 1); + if (status) + return status; + if (!(s->sc_type & (NFS4_OPEN_STID | NFS4_LOCK_STID))) + return nfs_ok; + ols = openlockstateid(s); + if (ols->st_stateowner->so_is_open_owner + && !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) + return nfserr_bad_stateid; + return nfs_ok; +} + +static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s) +{ + struct nfs4_client *cl; + + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + return nfserr_bad_stateid; + if (STALE_STATEID(stateid)) + return nfserr_stale_stateid; + cl = find_confirmed_client(&stateid->si_opaque.so_clid); + if (!cl) + return nfserr_expired; + *s = find_stateid_by_type(cl, stateid, typemask); + if (!*s) + return nfserr_bad_stateid; + return nfs_ok; + +} + +/* +* Checks for stateid operations +*/ +__be32 +nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, + stateid_t *stateid, int flags, struct file **filpp) +{ + struct nfs4_stid *s; + struct nfs4_ol_stateid *stp = NULL; + struct nfs4_delegation *dp = NULL; + struct svc_fh *current_fh = &cstate->current_fh; + struct inode *ino = current_fh->fh_dentry->d_inode; + __be32 status; + + if (filpp) + *filpp = NULL; + + if (grace_disallows_io(ino)) + return nfserr_grace; + + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + return check_special_stateids(current_fh, stateid, flags); + + status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s); + if (status) + return status; + status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); + if (status) + goto out; + switch (s->sc_type) { + case NFS4_DELEG_STID: + dp = delegstateid(s); + status = nfs4_check_delegmode(dp, flags); + if (status) + goto out; + if (filpp) { + *filpp = dp->dl_file->fi_deleg_file; + BUG_ON(!*filpp); + } + break; + case NFS4_OPEN_STID: + case NFS4_LOCK_STID: + stp = openlockstateid(s); + status = nfs4_check_fh(current_fh, stp); + if (status) + goto out; + if (stp->st_stateowner->so_is_open_owner + && !(openowner(stp->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) + goto out; + status = nfs4_check_openmode(stp, flags); + if (status) + goto out; + if (filpp) { + if (flags & RD_STATE) + *filpp = find_readable_file(stp->st_file); + else + *filpp = find_writeable_file(stp->st_file); + } + break; + default: + return nfserr_bad_stateid; + } + status = nfs_ok; +out: + return status; +} + +static __be32 +nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) +{ + if (check_for_locks(stp->st_file, lockowner(stp->st_stateowner))) + return nfserr_locks_held; + release_lock_stateid(stp); + return nfs_ok; +} + +/* + * Test if the stateid is valid + */ +__be32 +nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_test_stateid *test_stateid) +{ + /* real work is done during encoding */ + return nfs_ok; +} + +__be32 +nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_free_stateid *free_stateid) +{ + stateid_t *stateid = &free_stateid->fr_stateid; + struct nfs4_stid *s; + struct nfs4_client *cl = cstate->session->se_client; + __be32 ret = nfserr_bad_stateid; + + nfs4_lock_state(); + s = find_stateid(cl, stateid); + if (!s) + goto out; + switch (s->sc_type) { + case NFS4_DELEG_STID: + ret = nfserr_locks_held; + goto out; + case NFS4_OPEN_STID: + case NFS4_LOCK_STID: + ret = check_stateid_generation(stateid, &s->sc_stateid, 1); + if (ret) + goto out; + if (s->sc_type == NFS4_LOCK_STID) + ret = nfsd4_free_lock_stateid(openlockstateid(s)); + else + ret = nfserr_locks_held; + break; + default: + ret = nfserr_bad_stateid; + } +out: + nfs4_unlock_state(); + return ret; +} + +static inline int +setlkflg (int type) +{ + return (type == NFS4_READW_LT || type == NFS4_READ_LT) ? + RD_STATE : WR_STATE; +} + +static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_t *stateid, u32 seqid, struct nfs4_ol_stateid *stp) +{ + struct svc_fh *current_fh = &cstate->current_fh; + struct nfs4_stateowner *sop = stp->st_stateowner; + __be32 status; + + status = nfsd4_check_seqid(cstate, sop, seqid); + if (status) + return status; + if (stp->st_stid.sc_type == NFS4_CLOSED_STID) + /* + * "Closed" stateid's exist *only* to return + * nfserr_replay_me from the previous step. + */ + return nfserr_bad_stateid; + status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); + if (status) + return status; + return nfs4_check_fh(current_fh, stp); +} + +/* + * Checks for sequence id mutating operations. + */ +static __be32 +nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, + stateid_t *stateid, char typemask, + struct nfs4_ol_stateid **stpp) +{ + __be32 status; + struct nfs4_stid *s; + + dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__, + seqid, STATEID_VAL(stateid)); + + *stpp = NULL; + status = nfsd4_lookup_stateid(stateid, typemask, &s); + if (status) + return status; + *stpp = openlockstateid(s); + cstate->replay_owner = (*stpp)->st_stateowner; + + return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp); +} + +static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, struct nfs4_ol_stateid **stpp) +{ + __be32 status; + struct nfs4_openowner *oo; + + status = nfs4_preprocess_seqid_op(cstate, seqid, stateid, + NFS4_OPEN_STID, stpp); + if (status) + return status; + oo = openowner((*stpp)->st_stateowner); + if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) + return nfserr_bad_stateid; + return nfs_ok; +} + +__be32 +nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_open_confirm *oc) +{ + __be32 status; + struct nfs4_openowner *oo; + struct nfs4_ol_stateid *stp; + + dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", + (int)cstate->current_fh.fh_dentry->d_name.len, + cstate->current_fh.fh_dentry->d_name.name); + + status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); + if (status) + return status; + + nfs4_lock_state(); + + status = nfs4_preprocess_seqid_op(cstate, + oc->oc_seqid, &oc->oc_req_stateid, + NFS4_OPEN_STID, &stp); + if (status) + goto out; + oo = openowner(stp->st_stateowner); + status = nfserr_bad_stateid; + if (oo->oo_flags & NFS4_OO_CONFIRMED) + goto out; + oo->oo_flags |= NFS4_OO_CONFIRMED; + update_stateid(&stp->st_stid.sc_stateid); + memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", + __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); + + nfsd4_create_clid_dir(oo->oo_owner.so_client); + status = nfs_ok; +out: + if (!cstate->replay_owner) + nfs4_unlock_state(); + return status; +} + +static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 access) +{ + if (!test_bit(access, &stp->st_access_bmap)) + return; + nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access)); + __clear_bit(access, &stp->st_access_bmap); +} + +static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_access) +{ + switch (to_access) { + case NFS4_SHARE_ACCESS_READ: + nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_WRITE); + nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH); + break; + case NFS4_SHARE_ACCESS_WRITE: + nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_READ); + nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH); + break; + case NFS4_SHARE_ACCESS_BOTH: + break; + default: + BUG(); + } +} + +static void +reset_union_bmap_deny(unsigned long deny, unsigned long *bmap) +{ + int i; + for (i = 0; i < 4; i++) { + if ((i & deny) != i) + __clear_bit(i, bmap); + } +} + +__be32 +nfsd4_open_downgrade(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_open_downgrade *od) +{ + __be32 status; + struct nfs4_ol_stateid *stp; + + dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", + (int)cstate->current_fh.fh_dentry->d_name.len, + cstate->current_fh.fh_dentry->d_name.name); + + /* We don't yet support WANT bits: */ + od->od_share_access &= NFS4_SHARE_ACCESS_MASK; + + nfs4_lock_state(); + status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid, + &od->od_stateid, &stp); + if (status) + goto out; + status = nfserr_inval; + if (!test_bit(od->od_share_access, &stp->st_access_bmap)) { + dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n", + stp->st_access_bmap, od->od_share_access); + goto out; + } + if (!test_bit(od->od_share_deny, &stp->st_deny_bmap)) { + dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n", + stp->st_deny_bmap, od->od_share_deny); + goto out; + } + nfs4_stateid_downgrade(stp, od->od_share_access); + + reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); + + update_stateid(&stp->st_stid.sc_stateid); + memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + status = nfs_ok; +out: + if (!cstate->replay_owner) + nfs4_unlock_state(); + return status; +} + +void nfsd4_purge_closed_stateid(struct nfs4_stateowner *so) +{ + struct nfs4_openowner *oo; + struct nfs4_ol_stateid *s; + + if (!so->so_is_open_owner) + return; + oo = openowner(so); + s = oo->oo_last_closed_stid; + if (!s) + return; + if (!(oo->oo_flags & NFS4_OO_PURGE_CLOSE)) { + /* Release the last_closed_stid on the next seqid bump: */ + oo->oo_flags |= NFS4_OO_PURGE_CLOSE; + return; + } + oo->oo_flags &= ~NFS4_OO_PURGE_CLOSE; + release_last_closed_stateid(oo); +} + +static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) +{ + unhash_open_stateid(s); + s->st_stid.sc_type = NFS4_CLOSED_STID; +} + +/* + * nfs4_unlock_state() called after encode + */ +__be32 +nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_close *close) +{ + __be32 status; + struct nfs4_openowner *oo; + struct nfs4_ol_stateid *stp; + + dprintk("NFSD: nfsd4_close on file %.*s\n", + (int)cstate->current_fh.fh_dentry->d_name.len, + cstate->current_fh.fh_dentry->d_name.name); + + nfs4_lock_state(); + status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, + &close->cl_stateid, + NFS4_OPEN_STID|NFS4_CLOSED_STID, + &stp); + if (status) + goto out; + oo = openowner(stp->st_stateowner); + status = nfs_ok; + update_stateid(&stp->st_stid.sc_stateid); + memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + + nfsd4_close_open_stateid(stp); + oo->oo_last_closed_stid = stp; + + /* place unused nfs4_stateowners on so_close_lru list to be + * released by the laundromat service after the lease period + * to enable us to handle CLOSE replay + */ + if (list_empty(&oo->oo_owner.so_stateids)) + move_to_close_lru(oo); +out: + if (!cstate->replay_owner) + nfs4_unlock_state(); + return status; +} + +__be32 +nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_delegreturn *dr) +{ + struct nfs4_delegation *dp; + stateid_t *stateid = &dr->dr_stateid; + struct nfs4_stid *s; + struct inode *inode; + __be32 status; + + if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) + return status; + inode = cstate->current_fh.fh_dentry->d_inode; + + nfs4_lock_state(); + status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s); + if (status) + goto out; + dp = delegstateid(s); + status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate)); + if (status) + goto out; + + unhash_delegation(dp); +out: + nfs4_unlock_state(); + + return status; +} + + +#define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) + +#define LOCKOWNER_INO_HASH_BITS 8 +#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS) +#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1) + +static inline u64 +end_offset(u64 start, u64 len) +{ + u64 end; + + end = start + len; + return end >= start ? end: NFS4_MAX_UINT64; +} + +/* last octet in a range */ +static inline u64 +last_byte_offset(u64 start, u64 len) +{ + u64 end; + + BUG_ON(!len); + end = start + len; + return end > start ? end - 1: NFS4_MAX_UINT64; +} + +static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername) +{ + return (file_hashval(inode) + cl_id + + opaque_hashval(ownername->data, ownername->len)) + & LOCKOWNER_INO_HASH_MASK; +} + +static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE]; + +/* + * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that + * we can't properly handle lock requests that go beyond the (2^63 - 1)-th + * byte, because of sign extension problems. Since NFSv4 calls for 64-bit + * locking, this prevents us from being completely protocol-compliant. The + * real solution to this problem is to start using unsigned file offsets in + * the VFS, but this is a very deep change! + */ +static inline void +nfs4_transform_lock_offset(struct file_lock *lock) +{ + if (lock->fl_start < 0) + lock->fl_start = OFFSET_MAX; + if (lock->fl_end < 0) + lock->fl_end = OFFSET_MAX; +} + +/* Hack!: For now, we're defining this just so we can use a pointer to it + * as a unique cookie to identify our (NFSv4's) posix locks. */ +static const struct lock_manager_operations nfsd_posix_mng_ops = { +}; + +static inline void +nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) +{ + struct nfs4_lockowner *lo; + + if (fl->fl_lmops == &nfsd_posix_mng_ops) { + lo = (struct nfs4_lockowner *) fl->fl_owner; + deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data, + lo->lo_owner.so_owner.len, GFP_KERNEL); + if (!deny->ld_owner.data) + /* We just don't care that much */ + goto nevermind; + deny->ld_owner.len = lo->lo_owner.so_owner.len; + deny->ld_clientid = lo->lo_owner.so_client->cl_clientid; + } else { +nevermind: + deny->ld_owner.len = 0; + deny->ld_owner.data = NULL; + deny->ld_clientid.cl_boot = 0; + deny->ld_clientid.cl_id = 0; + } + deny->ld_start = fl->fl_start; + deny->ld_length = NFS4_MAX_UINT64; + if (fl->fl_end != NFS4_MAX_UINT64) + deny->ld_length = fl->fl_end - fl->fl_start + 1; + deny->ld_type = NFS4_READ_LT; + if (fl->fl_type != F_RDLCK) + deny->ld_type = NFS4_WRITE_LT; +} + +static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) +{ + struct nfs4_ol_stateid *lst; + + if (!same_owner_str(&lo->lo_owner, owner, clid)) + return false; + lst = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + return lst->st_file->fi_inode == inode; +} + +static struct nfs4_lockowner * +find_lockowner_str(struct inode *inode, clientid_t *clid, + struct xdr_netobj *owner) +{ + unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner); + struct nfs4_lockowner *lo; + + list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { + if (same_lockowner_ino(lo, inode, clid, owner)) + return lo; + } + return NULL; +} + +static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) +{ + struct inode *inode = open_stp->st_file->fi_inode; + unsigned int inohash = lockowner_ino_hashval(inode, + clp->cl_clientid.cl_id, &lo->lo_owner.so_owner); + + list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); + list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]); + list_add(&lo->lo_perstateid, &open_stp->st_lockowners); +} + +/* + * Alloc a lock owner structure. + * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has + * occurred. + * + * strhashval = ownerstr_hashval + */ + +static struct nfs4_lockowner * +alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp, struct nfsd4_lock *lock) { + struct nfs4_lockowner *lo; + + lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); + if (!lo) + return NULL; + INIT_LIST_HEAD(&lo->lo_owner.so_stateids); + lo->lo_owner.so_is_open_owner = 0; + /* It is the openowner seqid that will be incremented in encode in the + * case of new lockowners; so increment the lock seqid manually: */ + lo->lo_owner.so_seqid = lock->lk_new_lock_seqid + 1; + hash_lockowner(lo, strhashval, clp, open_stp); + return lo; +} + +static struct nfs4_ol_stateid * +alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct nfs4_ol_stateid *open_stp) +{ + struct nfs4_ol_stateid *stp; + struct nfs4_client *clp = lo->lo_owner.so_client; + + stp = nfs4_alloc_stateid(clp); + if (stp == NULL) + return NULL; + init_stid(&stp->st_stid, clp, NFS4_LOCK_STID); + list_add(&stp->st_perfile, &fp->fi_stateids); + list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); + stp->st_stateowner = &lo->lo_owner; + get_nfs4_file(fp); + stp->st_file = fp; + stp->st_access_bmap = 0; + stp->st_deny_bmap = open_stp->st_deny_bmap; + stp->st_openstp = open_stp; + return stp; +} + +static int +check_lock_length(u64 offset, u64 length) +{ + return ((length == 0) || ((length != NFS4_MAX_UINT64) && + LOFF_OVERFLOW(offset, length))); +} + +static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) +{ + struct nfs4_file *fp = lock_stp->st_file; + int oflag = nfs4_access_to_omode(access); + + if (test_bit(access, &lock_stp->st_access_bmap)) + return; + nfs4_file_get_access(fp, oflag); + __set_bit(access, &lock_stp->st_access_bmap); +} + +__be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) +{ + struct nfs4_file *fi = ost->st_file; + struct nfs4_openowner *oo = openowner(ost->st_stateowner); + struct nfs4_client *cl = oo->oo_owner.so_client; + struct nfs4_lockowner *lo; + unsigned int strhashval; + + lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner); + if (lo) { + if (!cstate->minorversion) + return nfserr_bad_seqid; + /* XXX: a lockowner always has exactly one stateid: */ + *lst = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + return nfs_ok; + } + strhashval = ownerstr_hashval(cl->cl_clientid.cl_id, + &lock->v.new.owner); + lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); + if (lo == NULL) + return nfserr_jukebox; + *lst = alloc_init_lock_stateid(lo, fi, ost); + if (*lst == NULL) { + release_lockowner(lo); + return nfserr_jukebox; + } + *new = true; + return nfs_ok; +} + +/* + * LOCK operation + */ +__be32 +nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_lock *lock) +{ + struct nfs4_openowner *open_sop = NULL; + struct nfs4_lockowner *lock_sop = NULL; + struct nfs4_ol_stateid *lock_stp; + struct nfs4_file *fp; + struct file *filp = NULL; + struct file_lock file_lock; + struct file_lock conflock; + __be32 status = 0; + bool new_state = false; + int lkflg; + int err; + + dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", + (long long) lock->lk_offset, + (long long) lock->lk_length); + + if (check_lock_length(lock->lk_offset, lock->lk_length)) + return nfserr_inval; + + if ((status = fh_verify(rqstp, &cstate->current_fh, + S_IFREG, NFSD_MAY_LOCK))) { + dprintk("NFSD: nfsd4_lock: permission denied!\n"); + return status; + } + + nfs4_lock_state(); + + if (lock->lk_is_new) { + /* + * Client indicates that this is a new lockowner. + * Use open owner and open stateid to create lock owner and + * lock stateid. + */ + struct nfs4_ol_stateid *open_stp = NULL; + + if (nfsd4_has_session(cstate)) + /* See rfc 5661 18.10.3: given clientid is ignored: */ + memcpy(&lock->v.new.clientid, + &cstate->session->se_client->cl_clientid, + sizeof(clientid_t)); + + status = nfserr_stale_clientid; + if (STALE_CLIENTID(&lock->lk_new_clientid)) + goto out; + + /* validate and update open stateid and open seqid */ + status = nfs4_preprocess_confirmed_seqid_op(cstate, + lock->lk_new_open_seqid, + &lock->lk_new_open_stateid, + &open_stp); + if (status) + goto out; + open_sop = openowner(open_stp->st_stateowner); + status = nfserr_bad_stateid; + if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, + &lock->v.new.clientid)) + goto out; + status = lookup_or_create_lock_state(cstate, open_stp, lock, + &lock_stp, &new_state); + if (status) + goto out; + } else { + /* lock (lock owner + lock stateid) already exists */ + status = nfs4_preprocess_seqid_op(cstate, + lock->lk_old_lock_seqid, + &lock->lk_old_lock_stateid, + NFS4_LOCK_STID, &lock_stp); + if (status) + goto out; + } + lock_sop = lockowner(lock_stp->st_stateowner); + fp = lock_stp->st_file; + + lkflg = setlkflg(lock->lk_type); + status = nfs4_check_openmode(lock_stp, lkflg); + if (status) + goto out; + + status = nfserr_grace; + if (locks_in_grace() && !lock->lk_reclaim) + goto out; + status = nfserr_no_grace; + if (!locks_in_grace() && lock->lk_reclaim) + goto out; + + locks_init_lock(&file_lock); + switch (lock->lk_type) { + case NFS4_READ_LT: + case NFS4_READW_LT: + filp = find_readable_file(lock_stp->st_file); + if (filp) + get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); + file_lock.fl_type = F_RDLCK; + break; + case NFS4_WRITE_LT: + case NFS4_WRITEW_LT: + filp = find_writeable_file(lock_stp->st_file); + if (filp) + get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); + file_lock.fl_type = F_WRLCK; + break; + default: + status = nfserr_inval; + goto out; + } + if (!filp) { + status = nfserr_openmode; + goto out; + } + file_lock.fl_owner = (fl_owner_t)lock_sop; + file_lock.fl_pid = current->tgid; + file_lock.fl_file = filp; + file_lock.fl_flags = FL_POSIX; + file_lock.fl_lmops = &nfsd_posix_mng_ops; + + file_lock.fl_start = lock->lk_offset; + file_lock.fl_end = last_byte_offset(lock->lk_offset, lock->lk_length); + nfs4_transform_lock_offset(&file_lock); + + /* + * Try to lock the file in the VFS. + * Note: locks.c uses the BKL to protect the inode's lock list. + */ + + err = vfs_lock_file(filp, F_SETLK, &file_lock, &conflock); + switch (-err) { + case 0: /* success! */ + update_stateid(&lock_stp->st_stid.sc_stateid); + memcpy(&lock->lk_resp_stateid, &lock_stp->st_stid.sc_stateid, + sizeof(stateid_t)); + status = 0; + break; + case (EAGAIN): /* conflock holds conflicting lock */ + status = nfserr_denied; + dprintk("NFSD: nfsd4_lock: conflicting lock found!\n"); + nfs4_set_lock_denied(&conflock, &lock->lk_denied); + break; + case (EDEADLK): + status = nfserr_deadlock; + break; + default: + dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err); + status = nfserrno(err); + break; + } +out: + if (status && new_state) + release_lockowner(lock_sop); + if (!cstate->replay_owner) + nfs4_unlock_state(); + return status; +} + +/* + * The NFSv4 spec allows a client to do a LOCKT without holding an OPEN, + * so we do a temporary open here just to get an open file to pass to + * vfs_test_lock. (Arguably perhaps test_lock should be done with an + * inode operation.) + */ +static int nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) +{ + struct file *file; + int err; + + err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); + if (err) + return err; + err = vfs_test_lock(file, lock); + nfsd_close(file); + return err; +} + +/* + * LOCKT operation + */ +__be32 +nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_lockt *lockt) +{ + struct inode *inode; + struct file_lock file_lock; + struct nfs4_lockowner *lo; + int error; + __be32 status; + + if (locks_in_grace()) + return nfserr_grace; + + if (check_lock_length(lockt->lt_offset, lockt->lt_length)) + return nfserr_inval; + + nfs4_lock_state(); + + status = nfserr_stale_clientid; + if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid)) + goto out; + + if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) + goto out; + + inode = cstate->current_fh.fh_dentry->d_inode; + locks_init_lock(&file_lock); + switch (lockt->lt_type) { + case NFS4_READ_LT: + case NFS4_READW_LT: + file_lock.fl_type = F_RDLCK; + break; + case NFS4_WRITE_LT: + case NFS4_WRITEW_LT: + file_lock.fl_type = F_WRLCK; + break; + default: + dprintk("NFSD: nfs4_lockt: bad lock type!\n"); + status = nfserr_inval; + goto out; + } + + lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner); + if (lo) + file_lock.fl_owner = (fl_owner_t)lo; + file_lock.fl_pid = current->tgid; + file_lock.fl_flags = FL_POSIX; + + file_lock.fl_start = lockt->lt_offset; + file_lock.fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length); + + nfs4_transform_lock_offset(&file_lock); + + status = nfs_ok; + error = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock); + if (error) { + status = nfserrno(error); + goto out; + } + if (file_lock.fl_type != F_UNLCK) { + status = nfserr_denied; + nfs4_set_lock_denied(&file_lock, &lockt->lt_denied); + } +out: + nfs4_unlock_state(); + return status; +} + +__be32 +nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_locku *locku) +{ + struct nfs4_ol_stateid *stp; + struct file *filp = NULL; + struct file_lock file_lock; + __be32 status; + int err; + + dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n", + (long long) locku->lu_offset, + (long long) locku->lu_length); + + if (check_lock_length(locku->lu_offset, locku->lu_length)) + return nfserr_inval; + + nfs4_lock_state(); + + status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid, + &locku->lu_stateid, NFS4_LOCK_STID, &stp); + if (status) + goto out; + filp = find_any_file(stp->st_file); + if (!filp) { + status = nfserr_lock_range; + goto out; + } + BUG_ON(!filp); + locks_init_lock(&file_lock); + file_lock.fl_type = F_UNLCK; + file_lock.fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); + file_lock.fl_pid = current->tgid; + file_lock.fl_file = filp; + file_lock.fl_flags = FL_POSIX; + file_lock.fl_lmops = &nfsd_posix_mng_ops; + file_lock.fl_start = locku->lu_offset; + + file_lock.fl_end = last_byte_offset(locku->lu_offset, locku->lu_length); + nfs4_transform_lock_offset(&file_lock); + + /* + * Try to unlock the file in the VFS. + */ + err = vfs_lock_file(filp, F_SETLK, &file_lock, NULL); + if (err) { + dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n"); + goto out_nfserr; + } + /* + * OK, unlock succeeded; the only thing left to do is update the stateid. + */ + update_stateid(&stp->st_stid.sc_stateid); + memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + +out: + if (!cstate->replay_owner) + nfs4_unlock_state(); + return status; + +out_nfserr: + status = nfserrno(err); + goto out; +} + +/* + * returns + * 1: locks held by lockowner + * 0: no locks held by lockowner + */ +static int +check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) +{ + struct file_lock **flpp; + struct inode *inode = filp->fi_inode; + int status = 0; + + lock_flocks(); + for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { + if ((*flpp)->fl_owner == (fl_owner_t)lowner) { + status = 1; + goto out; + } + } +out: + unlock_flocks(); + return status; +} + +__be32 +nfsd4_release_lockowner(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_release_lockowner *rlockowner) +{ + clientid_t *clid = &rlockowner->rl_clientid; + struct nfs4_stateowner *sop; + struct nfs4_lockowner *lo; + struct nfs4_ol_stateid *stp; + struct xdr_netobj *owner = &rlockowner->rl_owner; + struct list_head matches; + unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); + __be32 status; + + dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", + clid->cl_boot, clid->cl_id); + + /* XXX check for lease expiration */ + + status = nfserr_stale_clientid; + if (STALE_CLIENTID(clid)) + return status; + + nfs4_lock_state(); + + status = nfserr_locks_held; + INIT_LIST_HEAD(&matches); + + list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) { + if (sop->so_is_open_owner) + continue; + if (!same_owner_str(sop, owner, clid)) + continue; + list_for_each_entry(stp, &sop->so_stateids, + st_perstateowner) { + lo = lockowner(sop); + if (check_for_locks(stp->st_file, lo)) + goto out; + list_add(&lo->lo_list, &matches); + } + } + /* Clients probably won't expect us to return with some (but not all) + * of the lockowner state released; so don't release any until all + * have been checked. */ + status = nfs_ok; + while (!list_empty(&matches)) { + lo = list_entry(matches.next, struct nfs4_lockowner, + lo_list); + /* unhash_stateowner deletes so_perclient only + * for openowners. */ + list_del(&lo->lo_list); + release_lockowner(lo); + } +out: + nfs4_unlock_state(); + return status; +} + +static inline struct nfs4_client_reclaim * +alloc_reclaim(void) +{ + return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL); +} + +int +nfs4_has_reclaimed_state(const char *name, bool use_exchange_id) +{ + unsigned int strhashval = clientstr_hashval(name); + struct nfs4_client *clp; + + clp = find_confirmed_client_by_str(name, strhashval); + return clp ? 1 : 0; +} + +/* + * failure => all reset bets are off, nfserr_no_grace... + */ +int +nfs4_client_to_reclaim(const char *name) +{ + unsigned int strhashval; + struct nfs4_client_reclaim *crp = NULL; + + dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name); + crp = alloc_reclaim(); + if (!crp) + return 0; + strhashval = clientstr_hashval(name); + INIT_LIST_HEAD(&crp->cr_strhash); + list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]); + memcpy(crp->cr_recdir, name, HEXDIR_LEN); + reclaim_str_hashtbl_size++; + return 1; +} + +static void +nfs4_release_reclaim(void) +{ + struct nfs4_client_reclaim *crp = NULL; + int i; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + while (!list_empty(&reclaim_str_hashtbl[i])) { + crp = list_entry(reclaim_str_hashtbl[i].next, + struct nfs4_client_reclaim, cr_strhash); + list_del(&crp->cr_strhash); + kfree(crp); + reclaim_str_hashtbl_size--; + } + } + BUG_ON(reclaim_str_hashtbl_size); +} + +/* + * called from OPEN, CLAIM_PREVIOUS with a new clientid. */ +static struct nfs4_client_reclaim * +nfs4_find_reclaim_client(clientid_t *clid) +{ + unsigned int strhashval; + struct nfs4_client *clp; + struct nfs4_client_reclaim *crp = NULL; + + + /* find clientid in conf_id_hashtbl */ + clp = find_confirmed_client(clid); + if (clp == NULL) + return NULL; + + dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n", + clp->cl_name.len, clp->cl_name.data, + clp->cl_recdir); + + /* find clp->cl_name in reclaim_str_hashtbl */ + strhashval = clientstr_hashval(clp->cl_recdir); + list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) { + if (same_name(crp->cr_recdir, clp->cl_recdir)) { + return crp; + } + } + return NULL; +} + +/* +* Called from OPEN. Look for clientid in reclaim list. +*/ +__be32 +nfs4_check_open_reclaim(clientid_t *clid) +{ + return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad; +} + +#ifdef CONFIG_NFSD_FAULT_INJECTION + +void nfsd_forget_clients(u64 num) +{ + struct nfs4_client *clp, *next; + int count = 0; + + nfs4_lock_state(); + list_for_each_entry_safe(clp, next, &client_lru, cl_lru) { + nfsd4_remove_clid_dir(clp); + expire_client(clp); + if (++count == num) + break; + } + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Forgot %d clients", count); +} + +static void release_lockowner_sop(struct nfs4_stateowner *sop) +{ + release_lockowner(lockowner(sop)); +} + +static void release_openowner_sop(struct nfs4_stateowner *sop) +{ + release_openowner(openowner(sop)); +} + +static int nfsd_release_n_owners(u64 num, bool is_open_owner, + void (*release_sop)(struct nfs4_stateowner *)) +{ + int i, count = 0; + struct nfs4_stateowner *sop, *next; + + for (i = 0; i < OWNER_HASH_SIZE; i++) { + list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) { + if (sop->so_is_open_owner != is_open_owner) + continue; + release_sop(sop); + if (++count == num) + return count; + } + } + return count; +} + +void nfsd_forget_locks(u64 num) +{ + int count; + + nfs4_lock_state(); + count = nfsd_release_n_owners(num, false, release_lockowner_sop); + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Forgot %d locks", count); +} + +void nfsd_forget_openowners(u64 num) +{ + int count; + + nfs4_lock_state(); + count = nfsd_release_n_owners(num, true, release_openowner_sop); + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Forgot %d open owners", count); +} + +int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *)) +{ + int i, count = 0; + struct nfs4_file *fp, *fnext; + struct nfs4_delegation *dp, *dnext; + + for (i = 0; i < FILE_HASH_SIZE; i++) { + list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) { + list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) { + deleg_func(dp); + if (++count == num) + return count; + } + } + } + + return count; +} + +void nfsd_forget_delegations(u64 num) +{ + unsigned int count; + + nfs4_lock_state(); + count = nfsd_process_n_delegations(num, unhash_delegation); + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Forgot %d delegations", count); +} + +void nfsd_recall_delegations(u64 num) +{ + unsigned int count; + + nfs4_lock_state(); + spin_lock(&recall_lock); + count = nfsd_process_n_delegations(num, nfsd_break_one_deleg); + spin_unlock(&recall_lock); + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Recalled %d delegations", count); +} + +#endif /* CONFIG_NFSD_FAULT_INJECTION */ + +/* initialization to perform at module load time: */ + +void +nfs4_state_init(void) +{ + int i; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + INIT_LIST_HEAD(&conf_id_hashtbl[i]); + INIT_LIST_HEAD(&conf_str_hashtbl[i]); + INIT_LIST_HEAD(&unconf_str_hashtbl[i]); + INIT_LIST_HEAD(&unconf_id_hashtbl[i]); + INIT_LIST_HEAD(&reclaim_str_hashtbl[i]); + } + for (i = 0; i < SESSION_HASH_SIZE; i++) + INIT_LIST_HEAD(&sessionid_hashtbl[i]); + for (i = 0; i < FILE_HASH_SIZE; i++) { + INIT_LIST_HEAD(&file_hashtbl[i]); + } + for (i = 0; i < OWNER_HASH_SIZE; i++) { + INIT_LIST_HEAD(&ownerstr_hashtbl[i]); + } + for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) + INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]); + INIT_LIST_HEAD(&close_lru); + INIT_LIST_HEAD(&client_lru); + INIT_LIST_HEAD(&del_recall_lru); + reclaim_str_hashtbl_size = 0; +} + +static void +nfsd4_load_reboot_recovery_data(void) +{ + int status; + + nfs4_lock_state(); + nfsd4_init_recdir(); + status = nfsd4_recdir_load(); + nfs4_unlock_state(); + if (status) + printk("NFSD: Failure reading reboot recovery data\n"); +} + +/* + * Since the lifetime of a delegation isn't limited to that of an open, a + * client may quite reasonably hang on to a delegation as long as it has + * the inode cached. This becomes an obvious problem the first time a + * client's inode cache approaches the size of the server's total memory. + * + * For now we avoid this problem by imposing a hard limit on the number + * of delegations, which varies according to the server's memory size. + */ +static void +set_max_delegations(void) +{ + /* + * Allow at most 4 delegations per megabyte of RAM. Quick + * estimates suggest that in the worst case (where every delegation + * is for a different inode), a delegation could take about 1.5K, + * giving a worst case usage of about 6% of memory. + */ + max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT); +} + +/* initialization to perform when the nfsd service is started: */ + +static int +__nfs4_state_start(void) +{ + int ret; + + boot_time = get_seconds(); + locks_start_grace(&nfsd4_manager); + printk(KERN_INFO "NFSD: starting %ld-second grace period\n", + nfsd4_grace); + ret = set_callback_cred(); + if (ret) + return -ENOMEM; + laundry_wq = create_singlethread_workqueue("nfsd4"); + if (laundry_wq == NULL) + return -ENOMEM; + ret = nfsd4_create_callback_queue(); + if (ret) + goto out_free_laundry; + queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ); + set_max_delegations(); + return 0; +out_free_laundry: + destroy_workqueue(laundry_wq); + return ret; +} + +int +nfs4_state_start(void) +{ + nfsd4_load_reboot_recovery_data(); + return __nfs4_state_start(); +} + +static void +__nfs4_state_shutdown(void) +{ + int i; + struct nfs4_client *clp = NULL; + struct nfs4_delegation *dp = NULL; + struct list_head *pos, *next, reaplist; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + while (!list_empty(&conf_id_hashtbl[i])) { + clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); + expire_client(clp); + } + while (!list_empty(&unconf_str_hashtbl[i])) { + clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash); + expire_client(clp); + } + } + INIT_LIST_HEAD(&reaplist); + spin_lock(&recall_lock); + list_for_each_safe(pos, next, &del_recall_lru) { + dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + list_move(&dp->dl_recall_lru, &reaplist); + } + spin_unlock(&recall_lock); + list_for_each_safe(pos, next, &reaplist) { + dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + unhash_delegation(dp); + } + + nfsd4_shutdown_recdir(); +} + +void +nfs4_state_shutdown(void) +{ + cancel_delayed_work_sync(&laundromat_work); + destroy_workqueue(laundry_wq); + locks_end_grace(&nfsd4_manager); + nfs4_lock_state(); + nfs4_release_reclaim(); + __nfs4_state_shutdown(); + nfs4_unlock_state(); + nfsd4_destroy_callback_queue(); +} diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c new file mode 100644 index 00000000000..0ec5a1b9700 --- /dev/null +++ b/fs/nfsd/nfs4xdr.c @@ -0,0 +1,3673 @@ +/* + * Server-side XDR for NFSv4 + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * Andy Adamson <andros@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * TODO: Neil Brown made the following observation: We currently + * initially reserve NFSD_BUFSIZE space on the transmit queue and + * never release any of that until the request is complete. + * It would be good to calculate a new maximum response size while + * decoding the COMPOUND, and call svc_reserve with this number + * at the end of nfs4svc_decode_compoundargs. + */ + +#include <linux/slab.h> +#include <linux/namei.h> +#include <linux/statfs.h> +#include <linux/utsname.h> +#include <linux/pagemap.h> +#include <linux/sunrpc/svcauth_gss.h> + +#include "idmap.h" +#include "acl.h" +#include "xdr4.h" +#include "vfs.h" +#include "state.h" +#include "cache.h" + +#define NFSDDBG_FACILITY NFSDDBG_XDR + +/* + * As per referral draft, the fsid for a referral MUST be different from the fsid of the containing + * directory in order to indicate to the client that a filesystem boundary is present + * We use a fixed fsid for a referral + */ +#define NFS4_REFERRAL_FSID_MAJOR 0x8000000ULL +#define NFS4_REFERRAL_FSID_MINOR 0x8000000ULL + +static __be32 +check_filename(char *str, int len, __be32 err) +{ + int i; + + if (len == 0) + return nfserr_inval; + if (isdotent(str, len)) + return err; + for (i = 0; i < len; i++) + if (str[i] == '/') + return err; + return 0; +} + +#define DECODE_HEAD \ + __be32 *p; \ + __be32 status +#define DECODE_TAIL \ + status = 0; \ +out: \ + return status; \ +xdr_error: \ + dprintk("NFSD: xdr error (%s:%d)\n", \ + __FILE__, __LINE__); \ + status = nfserr_bad_xdr; \ + goto out + +#define READ32(x) (x) = ntohl(*p++) +#define READ64(x) do { \ + (x) = (u64)ntohl(*p++) << 32; \ + (x) |= ntohl(*p++); \ +} while (0) +#define READTIME(x) do { \ + p++; \ + (x) = ntohl(*p++); \ + p++; \ +} while (0) +#define READMEM(x,nbytes) do { \ + x = (char *)p; \ + p += XDR_QUADLEN(nbytes); \ +} while (0) +#define SAVEMEM(x,nbytes) do { \ + if (!(x = (p==argp->tmp || p == argp->tmpp) ? \ + savemem(argp, p, nbytes) : \ + (char *)p)) { \ + dprintk("NFSD: xdr error (%s:%d)\n", \ + __FILE__, __LINE__); \ + goto xdr_error; \ + } \ + p += XDR_QUADLEN(nbytes); \ +} while (0) +#define COPYMEM(x,nbytes) do { \ + memcpy((x), p, nbytes); \ + p += XDR_QUADLEN(nbytes); \ +} while (0) + +/* READ_BUF, read_buf(): nbytes must be <= PAGE_SIZE */ +#define READ_BUF(nbytes) do { \ + if (nbytes <= (u32)((char *)argp->end - (char *)argp->p)) { \ + p = argp->p; \ + argp->p += XDR_QUADLEN(nbytes); \ + } else if (!(p = read_buf(argp, nbytes))) { \ + dprintk("NFSD: xdr error (%s:%d)\n", \ + __FILE__, __LINE__); \ + goto xdr_error; \ + } \ +} while (0) + +static void save_buf(struct nfsd4_compoundargs *argp, struct nfsd4_saved_compoundargs *savep) +{ + savep->p = argp->p; + savep->end = argp->end; + savep->pagelen = argp->pagelen; + savep->pagelist = argp->pagelist; +} + +static void restore_buf(struct nfsd4_compoundargs *argp, struct nfsd4_saved_compoundargs *savep) +{ + argp->p = savep->p; + argp->end = savep->end; + argp->pagelen = savep->pagelen; + argp->pagelist = savep->pagelist; +} + +static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) +{ + /* We want more bytes than seem to be available. + * Maybe we need a new page, maybe we have just run out + */ + unsigned int avail = (char *)argp->end - (char *)argp->p; + __be32 *p; + if (avail + argp->pagelen < nbytes) + return NULL; + if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */ + return NULL; + /* ok, we can do it with the current plus the next page */ + if (nbytes <= sizeof(argp->tmp)) + p = argp->tmp; + else { + kfree(argp->tmpp); + p = argp->tmpp = kmalloc(nbytes, GFP_KERNEL); + if (!p) + return NULL; + + } + /* + * The following memcpy is safe because read_buf is always + * called with nbytes > avail, and the two cases above both + * guarantee p points to at least nbytes bytes. + */ + memcpy(p, argp->p, avail); + /* step to next page */ + argp->p = page_address(argp->pagelist[0]); + argp->pagelist++; + if (argp->pagelen < PAGE_SIZE) { + argp->end = argp->p + (argp->pagelen>>2); + argp->pagelen = 0; + } else { + argp->end = argp->p + (PAGE_SIZE>>2); + argp->pagelen -= PAGE_SIZE; + } + memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); + argp->p += XDR_QUADLEN(nbytes - avail); + return p; +} + +static int zero_clientid(clientid_t *clid) +{ + return (clid->cl_boot == 0) && (clid->cl_id == 0); +} + +static int +defer_free(struct nfsd4_compoundargs *argp, + void (*release)(const void *), void *p) +{ + struct tmpbuf *tb; + + tb = kmalloc(sizeof(*tb), GFP_KERNEL); + if (!tb) + return -ENOMEM; + tb->buf = p; + tb->release = release; + tb->next = argp->to_free; + argp->to_free = tb; + return 0; +} + +static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) +{ + if (p == argp->tmp) { + p = kmemdup(argp->tmp, nbytes, GFP_KERNEL); + if (!p) + return NULL; + } else { + BUG_ON(p != argp->tmpp); + argp->tmpp = NULL; + } + if (defer_free(argp, kfree, p)) { + kfree(p); + return NULL; + } else + return (char *)p; +} + +static __be32 +nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) +{ + u32 bmlen; + DECODE_HEAD; + + bmval[0] = 0; + bmval[1] = 0; + bmval[2] = 0; + + READ_BUF(4); + READ32(bmlen); + if (bmlen > 1000) + goto xdr_error; + + READ_BUF(bmlen << 2); + if (bmlen > 0) + READ32(bmval[0]); + if (bmlen > 1) + READ32(bmval[1]); + if (bmlen > 2) + READ32(bmval[2]); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, + struct iattr *iattr, struct nfs4_acl **acl) +{ + int expected_len, len = 0; + u32 dummy32; + char *buf; + int host_err; + + DECODE_HEAD; + iattr->ia_valid = 0; + if ((status = nfsd4_decode_bitmap(argp, bmval))) + return status; + + READ_BUF(4); + READ32(expected_len); + + if (bmval[0] & FATTR4_WORD0_SIZE) { + READ_BUF(8); + len += 8; + READ64(iattr->ia_size); + iattr->ia_valid |= ATTR_SIZE; + } + if (bmval[0] & FATTR4_WORD0_ACL) { + int nace; + struct nfs4_ace *ace; + + READ_BUF(4); len += 4; + READ32(nace); + + if (nace > NFS4_ACL_MAX) + return nfserr_resource; + + *acl = nfs4_acl_new(nace); + if (*acl == NULL) { + host_err = -ENOMEM; + goto out_nfserr; + } + defer_free(argp, kfree, *acl); + + (*acl)->naces = nace; + for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { + READ_BUF(16); len += 16; + READ32(ace->type); + READ32(ace->flag); + READ32(ace->access_mask); + READ32(dummy32); + READ_BUF(dummy32); + len += XDR_QUADLEN(dummy32) << 2; + READMEM(buf, dummy32); + ace->whotype = nfs4_acl_get_whotype(buf, dummy32); + status = nfs_ok; + if (ace->whotype != NFS4_ACL_WHO_NAMED) + ace->who = 0; + else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) + status = nfsd_map_name_to_gid(argp->rqstp, + buf, dummy32, &ace->who); + else + status = nfsd_map_name_to_uid(argp->rqstp, + buf, dummy32, &ace->who); + if (status) + return status; + } + } else + *acl = NULL; + if (bmval[1] & FATTR4_WORD1_MODE) { + READ_BUF(4); + len += 4; + READ32(iattr->ia_mode); + iattr->ia_mode &= (S_IFMT | S_IALLUGO); + iattr->ia_valid |= ATTR_MODE; + } + if (bmval[1] & FATTR4_WORD1_OWNER) { + READ_BUF(4); + len += 4; + READ32(dummy32); + READ_BUF(dummy32); + len += (XDR_QUADLEN(dummy32) << 2); + READMEM(buf, dummy32); + if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) + return status; + iattr->ia_valid |= ATTR_UID; + } + if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { + READ_BUF(4); + len += 4; + READ32(dummy32); + READ_BUF(dummy32); + len += (XDR_QUADLEN(dummy32) << 2); + READMEM(buf, dummy32); + if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) + return status; + iattr->ia_valid |= ATTR_GID; + } + if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { + READ_BUF(4); + len += 4; + READ32(dummy32); + switch (dummy32) { + case NFS4_SET_TO_CLIENT_TIME: + /* We require the high 32 bits of 'seconds' to be 0, and we ignore + all 32 bits of 'nseconds'. */ + READ_BUF(12); + len += 12; + READ32(dummy32); + if (dummy32) + return nfserr_inval; + READ32(iattr->ia_atime.tv_sec); + READ32(iattr->ia_atime.tv_nsec); + if (iattr->ia_atime.tv_nsec >= (u32)1000000000) + return nfserr_inval; + iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); + break; + case NFS4_SET_TO_SERVER_TIME: + iattr->ia_valid |= ATTR_ATIME; + break; + default: + goto xdr_error; + } + } + if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { + READ_BUF(4); + len += 4; + READ32(dummy32); + switch (dummy32) { + case NFS4_SET_TO_CLIENT_TIME: + /* We require the high 32 bits of 'seconds' to be 0, and we ignore + all 32 bits of 'nseconds'. */ + READ_BUF(12); + len += 12; + READ32(dummy32); + if (dummy32) + return nfserr_inval; + READ32(iattr->ia_mtime.tv_sec); + READ32(iattr->ia_mtime.tv_nsec); + if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) + return nfserr_inval; + iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); + break; + case NFS4_SET_TO_SERVER_TIME: + iattr->ia_valid |= ATTR_MTIME; + break; + default: + goto xdr_error; + } + } + if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 + || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 + || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2) + READ_BUF(expected_len - len); + else if (len != expected_len) + goto xdr_error; + + DECODE_TAIL; + +out_nfserr: + status = nfserrno(host_err); + goto out; +} + +static __be32 +nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid) +{ + DECODE_HEAD; + + READ_BUF(sizeof(stateid_t)); + READ32(sid->si_generation); + COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(access->ac_req_access); + + DECODE_TAIL; +} + +static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) +{ + DECODE_HEAD; + + READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); + COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); + READ32(bcts->dir); + /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker + * could help us figure out we should be using it. */ + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(close->cl_seqid); + return nfsd4_decode_stateid(argp, &close->cl_stateid); + + DECODE_TAIL; +} + + +static __be32 +nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit) +{ + DECODE_HEAD; + + READ_BUF(12); + READ64(commit->co_offset); + READ32(commit->co_count); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(create->cr_type); + switch (create->cr_type) { + case NF4LNK: + READ_BUF(4); + READ32(create->cr_linklen); + READ_BUF(create->cr_linklen); + SAVEMEM(create->cr_linkname, create->cr_linklen); + break; + case NF4BLK: + case NF4CHR: + READ_BUF(8); + READ32(create->cr_specdata1); + READ32(create->cr_specdata2); + break; + case NF4SOCK: + case NF4FIFO: + case NF4DIR: + default: + break; + } + + READ_BUF(4); + READ32(create->cr_namelen); + READ_BUF(create->cr_namelen); + SAVEMEM(create->cr_name, create->cr_namelen); + if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) + return status; + + status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, + &create->cr_acl); + if (status) + goto out; + + DECODE_TAIL; +} + +static inline __be32 +nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) +{ + return nfsd4_decode_stateid(argp, &dr->dr_stateid); +} + +static inline __be32 +nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr) +{ + return nfsd4_decode_bitmap(argp, getattr->ga_bmval); +} + +static __be32 +nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(link->li_namelen); + READ_BUF(link->li_namelen); + SAVEMEM(link->li_name, link->li_namelen); + if ((status = check_filename(link->li_name, link->li_namelen, nfserr_inval))) + return status; + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) +{ + DECODE_HEAD; + + /* + * type, reclaim(boolean), offset, length, new_lock_owner(boolean) + */ + READ_BUF(28); + READ32(lock->lk_type); + if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT)) + goto xdr_error; + READ32(lock->lk_reclaim); + READ64(lock->lk_offset); + READ64(lock->lk_length); + READ32(lock->lk_is_new); + + if (lock->lk_is_new) { + READ_BUF(4); + READ32(lock->lk_new_open_seqid); + status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid); + if (status) + return status; + READ_BUF(8 + sizeof(clientid_t)); + READ32(lock->lk_new_lock_seqid); + COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); + READ32(lock->lk_new_owner.len); + READ_BUF(lock->lk_new_owner.len); + READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); + } else { + status = nfsd4_decode_stateid(argp, &lock->lk_old_lock_stateid); + if (status) + return status; + READ_BUF(4); + READ32(lock->lk_old_lock_seqid); + } + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) +{ + DECODE_HEAD; + + READ_BUF(32); + READ32(lockt->lt_type); + if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) + goto xdr_error; + READ64(lockt->lt_offset); + READ64(lockt->lt_length); + COPYMEM(&lockt->lt_clientid, 8); + READ32(lockt->lt_owner.len); + READ_BUF(lockt->lt_owner.len); + READMEM(lockt->lt_owner.data, lockt->lt_owner.len); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) +{ + DECODE_HEAD; + + READ_BUF(8); + READ32(locku->lu_type); + if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) + goto xdr_error; + READ32(locku->lu_seqid); + status = nfsd4_decode_stateid(argp, &locku->lu_stateid); + if (status) + return status; + READ_BUF(16); + READ64(locku->lu_offset); + READ64(locku->lu_length); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(lookup->lo_len); + READ_BUF(lookup->lo_len); + SAVEMEM(lookup->lo_name, lookup->lo_len); + if ((status = check_filename(lookup->lo_name, lookup->lo_len, nfserr_noent))) + return status; + + DECODE_TAIL; +} + +static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *x) +{ + __be32 *p; + u32 w; + + READ_BUF(4); + READ32(w); + *x = w; + switch (w & NFS4_SHARE_ACCESS_MASK) { + case NFS4_SHARE_ACCESS_READ: + case NFS4_SHARE_ACCESS_WRITE: + case NFS4_SHARE_ACCESS_BOTH: + break; + default: + return nfserr_bad_xdr; + } + w &= ~NFS4_SHARE_ACCESS_MASK; + if (!w) + return nfs_ok; + if (!argp->minorversion) + return nfserr_bad_xdr; + switch (w & NFS4_SHARE_WANT_MASK) { + case NFS4_SHARE_WANT_NO_PREFERENCE: + case NFS4_SHARE_WANT_READ_DELEG: + case NFS4_SHARE_WANT_WRITE_DELEG: + case NFS4_SHARE_WANT_ANY_DELEG: + case NFS4_SHARE_WANT_NO_DELEG: + case NFS4_SHARE_WANT_CANCEL: + break; + default: + return nfserr_bad_xdr; + } + w &= ~NFS4_SHARE_WANT_MASK; + if (!w) + return nfs_ok; + switch (w) { + case NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL: + case NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED: + case (NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL | + NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED): + return nfs_ok; + } +xdr_error: + return nfserr_bad_xdr; +} + +static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x) +{ + __be32 *p; + + READ_BUF(4); + READ32(*x); + /* Note: unlinke access bits, deny bits may be zero. */ + if (*x & ~NFS4_SHARE_DENY_BOTH) + return nfserr_bad_xdr; + return nfs_ok; +xdr_error: + return nfserr_bad_xdr; +} + +static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o) +{ + __be32 *p; + + READ_BUF(4); + READ32(o->len); + + if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT) + return nfserr_bad_xdr; + + READ_BUF(o->len); + SAVEMEM(o->data, o->len); + return nfs_ok; +xdr_error: + return nfserr_bad_xdr; +} + +static __be32 +nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) +{ + DECODE_HEAD; + + memset(open->op_bmval, 0, sizeof(open->op_bmval)); + open->op_iattr.ia_valid = 0; + open->op_openowner = NULL; + + /* seqid, share_access, share_deny, clientid, ownerlen */ + READ_BUF(4); + READ32(open->op_seqid); + status = nfsd4_decode_share_access(argp, &open->op_share_access); + if (status) + goto xdr_error; + status = nfsd4_decode_share_deny(argp, &open->op_share_deny); + if (status) + goto xdr_error; + READ_BUF(sizeof(clientid_t)); + COPYMEM(&open->op_clientid, sizeof(clientid_t)); + status = nfsd4_decode_opaque(argp, &open->op_owner); + if (status) + goto xdr_error; + READ_BUF(4); + READ32(open->op_create); + switch (open->op_create) { + case NFS4_OPEN_NOCREATE: + break; + case NFS4_OPEN_CREATE: + READ_BUF(4); + READ32(open->op_createmode); + switch (open->op_createmode) { + case NFS4_CREATE_UNCHECKED: + case NFS4_CREATE_GUARDED: + status = nfsd4_decode_fattr(argp, open->op_bmval, + &open->op_iattr, &open->op_acl); + if (status) + goto out; + break; + case NFS4_CREATE_EXCLUSIVE: + READ_BUF(8); + COPYMEM(open->op_verf.data, 8); + break; + case NFS4_CREATE_EXCLUSIVE4_1: + if (argp->minorversion < 1) + goto xdr_error; + READ_BUF(8); + COPYMEM(open->op_verf.data, 8); + status = nfsd4_decode_fattr(argp, open->op_bmval, + &open->op_iattr, &open->op_acl); + if (status) + goto out; + break; + default: + goto xdr_error; + } + break; + default: + goto xdr_error; + } + + /* open_claim */ + READ_BUF(4); + READ32(open->op_claim_type); + switch (open->op_claim_type) { + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_DELEGATE_PREV: + READ_BUF(4); + READ32(open->op_fname.len); + READ_BUF(open->op_fname.len); + SAVEMEM(open->op_fname.data, open->op_fname.len); + if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval))) + return status; + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + READ_BUF(4); + READ32(open->op_delegate_type); + break; + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); + if (status) + return status; + READ_BUF(4); + READ32(open->op_fname.len); + READ_BUF(open->op_fname.len); + SAVEMEM(open->op_fname.data, open->op_fname.len); + if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval))) + return status; + break; + case NFS4_OPEN_CLAIM_FH: + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: + if (argp->minorversion < 1) + goto xdr_error; + /* void */ + break; + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + if (argp->minorversion < 1) + goto xdr_error; + status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); + if (status) + return status; + break; + default: + goto xdr_error; + } + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf) +{ + DECODE_HEAD; + + status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); + if (status) + return status; + READ_BUF(4); + READ32(open_conf->oc_seqid); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down) +{ + DECODE_HEAD; + + status = nfsd4_decode_stateid(argp, &open_down->od_stateid); + if (status) + return status; + READ_BUF(4); + READ32(open_down->od_seqid); + status = nfsd4_decode_share_access(argp, &open_down->od_share_access); + if (status) + return status; + status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny); + if (status) + return status; + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(putfh->pf_fhlen); + if (putfh->pf_fhlen > NFS4_FHSIZE) + goto xdr_error; + READ_BUF(putfh->pf_fhlen); + SAVEMEM(putfh->pf_fhval, putfh->pf_fhlen); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) +{ + DECODE_HEAD; + + status = nfsd4_decode_stateid(argp, &read->rd_stateid); + if (status) + return status; + READ_BUF(12); + READ64(read->rd_offset); + READ32(read->rd_length); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir) +{ + DECODE_HEAD; + + READ_BUF(24); + READ64(readdir->rd_cookie); + COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data)); + READ32(readdir->rd_dircount); /* just in case you needed a useless field... */ + READ32(readdir->rd_maxcount); + if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval))) + goto out; + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(remove->rm_namelen); + READ_BUF(remove->rm_namelen); + SAVEMEM(remove->rm_name, remove->rm_namelen); + if ((status = check_filename(remove->rm_name, remove->rm_namelen, nfserr_noent))) + return status; + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(rename->rn_snamelen); + READ_BUF(rename->rn_snamelen + 4); + SAVEMEM(rename->rn_sname, rename->rn_snamelen); + READ32(rename->rn_tnamelen); + READ_BUF(rename->rn_tnamelen); + SAVEMEM(rename->rn_tname, rename->rn_tnamelen); + if ((status = check_filename(rename->rn_sname, rename->rn_snamelen, nfserr_noent))) + return status; + if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen, nfserr_inval))) + return status; + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) +{ + DECODE_HEAD; + + READ_BUF(sizeof(clientid_t)); + COPYMEM(clientid, sizeof(clientid_t)); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, + struct nfsd4_secinfo *secinfo) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(secinfo->si_namelen); + READ_BUF(secinfo->si_namelen); + SAVEMEM(secinfo->si_name, secinfo->si_namelen); + status = check_filename(secinfo->si_name, secinfo->si_namelen, + nfserr_noent); + if (status) + return status; + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, + struct nfsd4_secinfo_no_name *sin) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(sin->sin_style); + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) +{ + __be32 status; + + status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); + if (status) + return status; + return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, + &setattr->sa_acl); +} + +static __be32 +nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid) +{ + DECODE_HEAD; + + READ_BUF(8); + COPYMEM(setclientid->se_verf.data, 8); + + status = nfsd4_decode_opaque(argp, &setclientid->se_name); + if (status) + return nfserr_bad_xdr; + READ_BUF(8); + READ32(setclientid->se_callback_prog); + READ32(setclientid->se_callback_netid_len); + + READ_BUF(setclientid->se_callback_netid_len + 4); + SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len); + READ32(setclientid->se_callback_addr_len); + + READ_BUF(setclientid->se_callback_addr_len + 4); + SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len); + READ32(setclientid->se_callback_ident); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c) +{ + DECODE_HEAD; + + READ_BUF(8 + sizeof(nfs4_verifier)); + COPYMEM(&scd_c->sc_clientid, 8); + COPYMEM(&scd_c->sc_confirm, sizeof(nfs4_verifier)); + + DECODE_TAIL; +} + +/* Also used for NVERIFY */ +static __be32 +nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify) +{ +#if 0 + struct nfsd4_compoundargs save = { + .p = argp->p, + .end = argp->end, + .rqstp = argp->rqstp, + }; + u32 ve_bmval[2]; + struct iattr ve_iattr; /* request */ + struct nfs4_acl *ve_acl; /* request */ +#endif + DECODE_HEAD; + + if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval))) + goto out; + + /* For convenience's sake, we compare raw xdr'd attributes in + * nfsd4_proc_verify; however we still decode here just to return + * correct error in case of bad xdr. */ +#if 0 + status = nfsd4_decode_fattr(ve_bmval, &ve_iattr, &ve_acl); + if (status == nfserr_inval) { + status = nfserrno(status); + goto out; + } +#endif + READ_BUF(4); + READ32(verify->ve_attrlen); + READ_BUF(verify->ve_attrlen); + SAVEMEM(verify->ve_attrval, verify->ve_attrlen); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) +{ + int avail; + int v; + int len; + DECODE_HEAD; + + status = nfsd4_decode_stateid(argp, &write->wr_stateid); + if (status) + return status; + READ_BUF(16); + READ64(write->wr_offset); + READ32(write->wr_stable_how); + if (write->wr_stable_how > 2) + goto xdr_error; + READ32(write->wr_buflen); + + /* Sorry .. no magic macros for this.. * + * READ_BUF(write->wr_buflen); + * SAVEMEM(write->wr_buf, write->wr_buflen); + */ + avail = (char*)argp->end - (char*)argp->p; + if (avail + argp->pagelen < write->wr_buflen) { + dprintk("NFSD: xdr error (%s:%d)\n", + __FILE__, __LINE__); + goto xdr_error; + } + argp->rqstp->rq_vec[0].iov_base = p; + argp->rqstp->rq_vec[0].iov_len = avail; + v = 0; + len = write->wr_buflen; + while (len > argp->rqstp->rq_vec[v].iov_len) { + len -= argp->rqstp->rq_vec[v].iov_len; + v++; + argp->rqstp->rq_vec[v].iov_base = page_address(argp->pagelist[0]); + argp->pagelist++; + if (argp->pagelen >= PAGE_SIZE) { + argp->rqstp->rq_vec[v].iov_len = PAGE_SIZE; + argp->pagelen -= PAGE_SIZE; + } else { + argp->rqstp->rq_vec[v].iov_len = argp->pagelen; + argp->pagelen -= len; + } + } + argp->end = (__be32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len); + argp->p = (__be32*) (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2)); + argp->rqstp->rq_vec[v].iov_len = len; + write->wr_vlen = v+1; + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner) +{ + DECODE_HEAD; + + READ_BUF(12); + COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t)); + READ32(rlockowner->rl_owner.len); + READ_BUF(rlockowner->rl_owner.len); + READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); + + if (argp->minorversion && !zero_clientid(&rlockowner->rl_clientid)) + return nfserr_inval; + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, + struct nfsd4_exchange_id *exid) +{ + int dummy, tmp; + DECODE_HEAD; + + READ_BUF(NFS4_VERIFIER_SIZE); + COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE); + + status = nfsd4_decode_opaque(argp, &exid->clname); + if (status) + return nfserr_bad_xdr; + + READ_BUF(4); + READ32(exid->flags); + + /* Ignore state_protect4_a */ + READ_BUF(4); + READ32(exid->spa_how); + switch (exid->spa_how) { + case SP4_NONE: + break; + case SP4_MACH_CRED: + /* spo_must_enforce */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy * 4); + p += dummy; + + /* spo_must_allow */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy * 4); + p += dummy; + break; + case SP4_SSV: + /* ssp_ops */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy * 4); + p += dummy; + + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy * 4); + p += dummy; + + /* ssp_hash_algs<> */ + READ_BUF(4); + READ32(tmp); + while (tmp--) { + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + } + + /* ssp_encr_algs<> */ + READ_BUF(4); + READ32(tmp); + while (tmp--) { + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + } + + /* ssp_window and ssp_num_gss_handles */ + READ_BUF(8); + READ32(dummy); + READ32(dummy); + break; + default: + goto xdr_error; + } + + /* Ignore Implementation ID */ + READ_BUF(4); /* nfs_impl_id4 array length */ + READ32(dummy); + + if (dummy > 1) + goto xdr_error; + + if (dummy == 1) { + /* nii_domain */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + + /* nii_name */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + + /* nii_date */ + READ_BUF(12); + p += 3; + } + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, + struct nfsd4_create_session *sess) +{ + DECODE_HEAD; + + u32 dummy; + char *machine_name; + int i; + int nr_secflavs; + + READ_BUF(16); + COPYMEM(&sess->clientid, 8); + READ32(sess->seqid); + READ32(sess->flags); + + /* Fore channel attrs */ + READ_BUF(28); + READ32(dummy); /* headerpadsz is always 0 */ + READ32(sess->fore_channel.maxreq_sz); + READ32(sess->fore_channel.maxresp_sz); + READ32(sess->fore_channel.maxresp_cached); + READ32(sess->fore_channel.maxops); + READ32(sess->fore_channel.maxreqs); + READ32(sess->fore_channel.nr_rdma_attrs); + if (sess->fore_channel.nr_rdma_attrs == 1) { + READ_BUF(4); + READ32(sess->fore_channel.rdma_attrs); + } else if (sess->fore_channel.nr_rdma_attrs > 1) { + dprintk("Too many fore channel attr bitmaps!\n"); + goto xdr_error; + } + + /* Back channel attrs */ + READ_BUF(28); + READ32(dummy); /* headerpadsz is always 0 */ + READ32(sess->back_channel.maxreq_sz); + READ32(sess->back_channel.maxresp_sz); + READ32(sess->back_channel.maxresp_cached); + READ32(sess->back_channel.maxops); + READ32(sess->back_channel.maxreqs); + READ32(sess->back_channel.nr_rdma_attrs); + if (sess->back_channel.nr_rdma_attrs == 1) { + READ_BUF(4); + READ32(sess->back_channel.rdma_attrs); + } else if (sess->back_channel.nr_rdma_attrs > 1) { + dprintk("Too many back channel attr bitmaps!\n"); + goto xdr_error; + } + + READ_BUF(8); + READ32(sess->callback_prog); + + /* callback_sec_params4 */ + READ32(nr_secflavs); + for (i = 0; i < nr_secflavs; ++i) { + READ_BUF(4); + READ32(dummy); + switch (dummy) { + case RPC_AUTH_NULL: + /* Nothing to read */ + break; + case RPC_AUTH_UNIX: + READ_BUF(8); + /* stamp */ + READ32(dummy); + + /* machine name */ + READ32(dummy); + READ_BUF(dummy); + SAVEMEM(machine_name, dummy); + + /* uid, gid */ + READ_BUF(8); + READ32(sess->uid); + READ32(sess->gid); + + /* more gids */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy * 4); + break; + case RPC_AUTH_GSS: + dprintk("RPC_AUTH_GSS callback secflavor " + "not supported!\n"); + READ_BUF(8); + /* gcbp_service */ + READ32(dummy); + /* gcbp_handle_from_server */ + READ32(dummy); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + /* gcbp_handle_from_client */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + break; + default: + dprintk("Illegal callback secflavor\n"); + return nfserr_inval; + } + } + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, + struct nfsd4_destroy_session *destroy_session) +{ + DECODE_HEAD; + READ_BUF(NFS4_MAX_SESSIONID_LEN); + COPYMEM(destroy_session->sessionid.data, NFS4_MAX_SESSIONID_LEN); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, + struct nfsd4_free_stateid *free_stateid) +{ + DECODE_HEAD; + + READ_BUF(sizeof(stateid_t)); + READ32(free_stateid->fr_stateid.si_generation); + COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t)); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, + struct nfsd4_sequence *seq) +{ + DECODE_HEAD; + + READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); + COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); + READ32(seq->seqid); + READ32(seq->slotid); + READ32(seq->maxslots); + READ32(seq->cachethis); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) +{ + unsigned int nbytes; + stateid_t si; + int i; + __be32 *p; + __be32 status; + + READ_BUF(4); + test_stateid->ts_num_ids = ntohl(*p++); + + nbytes = test_stateid->ts_num_ids * sizeof(stateid_t); + if (nbytes > (u32)((char *)argp->end - (char *)argp->p)) + goto xdr_error; + + test_stateid->ts_saved_args = argp; + save_buf(argp, &test_stateid->ts_savedp); + + for (i = 0; i < test_stateid->ts_num_ids; i++) { + status = nfsd4_decode_stateid(argp, &si); + if (status) + return status; + } + + status = 0; +out: + return status; +xdr_error: + dprintk("NFSD: xdr error (%s:%d)\n", __FILE__, __LINE__); + status = nfserr_bad_xdr; + goto out; +} + +static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, struct nfsd4_destroy_clientid *dc) +{ + DECODE_HEAD; + + READ_BUF(8); + COPYMEM(&dc->clientid, 8); + + DECODE_TAIL; +} + +static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(rc->rca_one_fs); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) +{ + return nfs_ok; +} + +static __be32 +nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) +{ + return nfserr_notsupp; +} + +typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); + +static nfsd4_dec nfsd4_dec_ops[] = { + [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, + [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, + [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, + [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, + [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, + [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, + [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, + [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, + [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, + [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, + [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, + [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, + [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, + [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, + [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, + [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, + [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, + [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_READ] = (nfsd4_dec)nfsd4_decode_read, + [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, + [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, + [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, + [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, + [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew, + [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, + [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, + [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid, + [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm, + [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, + [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, + [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, +}; + +static nfsd4_dec nfsd41_dec_ops[] = { + [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, + [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, + [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, + [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, + [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, + [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, + [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, + [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, + [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, + [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, + [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, + [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, + [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, + [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, + [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, + [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, + [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_READ] = (nfsd4_dec)nfsd4_decode_read, + [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, + [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, + [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, + [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, + [OP_RENEW] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, + [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, + [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_SETCLIENTID_CONFIRM]= (nfsd4_dec)nfsd4_decode_notsupp, + [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, + [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, + [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_notsupp, + + /* new operations for NFSv4.1 */ + [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session, + [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, + [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, + [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, + [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, + [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, + [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, + [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid, + [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, + [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, +}; + +struct nfsd4_minorversion_ops { + nfsd4_dec *decoders; + int nops; +}; + +static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { + [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, + [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, +}; + +static __be32 +nfsd4_decode_compound(struct nfsd4_compoundargs *argp) +{ + DECODE_HEAD; + struct nfsd4_op *op; + struct nfsd4_minorversion_ops *ops; + bool cachethis = false; + int i; + + /* + * XXX: According to spec, we should check the tag + * for UTF-8 compliance. I'm postponing this for + * now because it seems that some clients do use + * binary tags. + */ + READ_BUF(4); + READ32(argp->taglen); + READ_BUF(argp->taglen + 8); + SAVEMEM(argp->tag, argp->taglen); + READ32(argp->minorversion); + READ32(argp->opcnt); + + if (argp->taglen > NFSD4_MAX_TAGLEN) + goto xdr_error; + if (argp->opcnt > 100) + goto xdr_error; + + if (argp->opcnt > ARRAY_SIZE(argp->iops)) { + argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); + if (!argp->ops) { + argp->ops = argp->iops; + dprintk("nfsd: couldn't allocate room for COMPOUND\n"); + goto xdr_error; + } + } + + if (argp->minorversion >= ARRAY_SIZE(nfsd4_minorversion)) + argp->opcnt = 0; + + ops = &nfsd4_minorversion[argp->minorversion]; + for (i = 0; i < argp->opcnt; i++) { + op = &argp->ops[i]; + op->replay = NULL; + + /* + * We can't use READ_BUF() here because we need to handle + * a missing opcode as an OP_WRITE + 1. So we need to check + * to see if we're truly at the end of our buffer or if there + * is another page we need to flip to. + */ + + if (argp->p == argp->end) { + if (argp->pagelen < 4) { + /* There isn't an opcode still on the wire */ + op->opnum = OP_WRITE + 1; + op->status = nfserr_bad_xdr; + argp->opcnt = i+1; + break; + } + + /* + * False alarm. We just hit a page boundary, but there + * is still data available. Move pointer across page + * boundary. *snip from READ_BUF* + */ + argp->p = page_address(argp->pagelist[0]); + argp->pagelist++; + if (argp->pagelen < PAGE_SIZE) { + argp->end = argp->p + (argp->pagelen>>2); + argp->pagelen = 0; + } else { + argp->end = argp->p + (PAGE_SIZE>>2); + argp->pagelen -= PAGE_SIZE; + } + } + op->opnum = ntohl(*argp->p++); + + if (op->opnum >= FIRST_NFS4_OP && op->opnum <= LAST_NFS4_OP) + op->status = ops->decoders[op->opnum](argp, &op->u); + else { + op->opnum = OP_ILLEGAL; + op->status = nfserr_op_illegal; + } + + if (op->status) { + argp->opcnt = i+1; + break; + } + /* + * We'll try to cache the result in the DRC if any one + * op in the compound wants to be cached: + */ + cachethis |= nfsd4_cache_this_op(op); + } + /* Sessions make the DRC unnecessary: */ + if (argp->minorversion) + cachethis = false; + argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; + + DECODE_TAIL; +} + +#define WRITE32(n) *p++ = htonl(n) +#define WRITE64(n) do { \ + *p++ = htonl((u32)((n) >> 32)); \ + *p++ = htonl((u32)(n)); \ +} while (0) +#define WRITEMEM(ptr,nbytes) do { if (nbytes > 0) { \ + *(p + XDR_QUADLEN(nbytes) -1) = 0; \ + memcpy(p, ptr, nbytes); \ + p += XDR_QUADLEN(nbytes); \ +}} while (0) + +static void write32(__be32 **p, u32 n) +{ + *(*p)++ = n; +} + +static void write64(__be32 **p, u64 n) +{ + write32(p, (u32)(n >> 32)); + write32(p, (u32)n); +} + +static void write_change(__be32 **p, struct kstat *stat, struct inode *inode) +{ + if (IS_I_VERSION(inode)) { + write64(p, inode->i_version); + } else { + write32(p, stat->ctime.tv_sec); + write32(p, stat->ctime.tv_nsec); + } +} + +static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) +{ + write32(p, c->atomic); + if (c->change_supported) { + write64(p, c->before_change); + write64(p, c->after_change); + } else { + write32(p, c->before_ctime_sec); + write32(p, c->before_ctime_nsec); + write32(p, c->after_ctime_sec); + write32(p, c->after_ctime_nsec); + } +} + +#define RESERVE_SPACE(nbytes) do { \ + p = resp->p; \ + BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end); \ +} while (0) +#define ADJUST_ARGS() resp->p = p + +/* + * Header routine to setup seqid operation replay cache + */ +#define ENCODE_SEQID_OP_HEAD \ + __be32 *save; \ + \ + save = resp->p; + +/* + * Routine for encoding the result of a "seqid-mutating" NFSv4 operation. This + * is where sequence id's are incremented, and the replay cache is filled. + * Note that we increment sequence id's here, at the last moment, so we're sure + * we know whether the error to be returned is a sequence id mutating error. + */ + +static void encode_seqid_op_tail(struct nfsd4_compoundres *resp, __be32 *save, __be32 nfserr) +{ + struct nfs4_stateowner *stateowner = resp->cstate.replay_owner; + + if (seqid_mutating_err(ntohl(nfserr)) && stateowner) { + stateowner->so_seqid++; + stateowner->so_replay.rp_status = nfserr; + stateowner->so_replay.rp_buflen = + (char *)resp->p - (char *)save; + memcpy(stateowner->so_replay.rp_buf, save, + stateowner->so_replay.rp_buflen); + nfsd4_purge_closed_stateid(stateowner); + } +} + +/* Encode as an array of strings the string given with components + * separated @sep. + */ +static __be32 nfsd4_encode_components(char sep, char *components, + __be32 **pp, int *buflen) +{ + __be32 *p = *pp; + __be32 *countp = p; + int strlen, count=0; + char *str, *end; + + dprintk("nfsd4_encode_components(%s)\n", components); + if ((*buflen -= 4) < 0) + return nfserr_resource; + WRITE32(0); /* We will fill this in with @count later */ + end = str = components; + while (*end) { + for (; *end && (*end != sep); end++) + ; /* Point to end of component */ + strlen = end - str; + if (strlen) { + if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0) + return nfserr_resource; + WRITE32(strlen); + WRITEMEM(str, strlen); + count++; + } + else + end++; + str = end; + } + *pp = p; + p = countp; + WRITE32(count); + return 0; +} + +/* + * encode a location element of a fs_locations structure + */ +static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location, + __be32 **pp, int *buflen) +{ + __be32 status; + __be32 *p = *pp; + + status = nfsd4_encode_components(':', location->hosts, &p, buflen); + if (status) + return status; + status = nfsd4_encode_components('/', location->path, &p, buflen); + if (status) + return status; + *pp = p; + return 0; +} + +/* + * Encode a path in RFC3530 'pathname4' format + */ +static __be32 nfsd4_encode_path(const struct path *root, + const struct path *path, __be32 **pp, int *buflen) +{ + struct path cur = { + .mnt = path->mnt, + .dentry = path->dentry, + }; + __be32 *p = *pp; + struct dentry **components = NULL; + unsigned int ncomponents = 0; + __be32 err = nfserr_jukebox; + + dprintk("nfsd4_encode_components("); + + path_get(&cur); + /* First walk the path up to the nfsd root, and store the + * dentries/path components in an array. + */ + for (;;) { + if (cur.dentry == root->dentry && cur.mnt == root->mnt) + break; + if (cur.dentry == cur.mnt->mnt_root) { + if (follow_up(&cur)) + continue; + goto out_free; + } + if ((ncomponents & 15) == 0) { + struct dentry **new; + new = krealloc(components, + sizeof(*new) * (ncomponents + 16), + GFP_KERNEL); + if (!new) + goto out_free; + components = new; + } + components[ncomponents++] = cur.dentry; + cur.dentry = dget_parent(cur.dentry); + } + + *buflen -= 4; + if (*buflen < 0) + goto out_free; + WRITE32(ncomponents); + + while (ncomponents) { + struct dentry *dentry = components[ncomponents - 1]; + unsigned int len = dentry->d_name.len; + + *buflen -= 4 + (XDR_QUADLEN(len) << 2); + if (*buflen < 0) + goto out_free; + WRITE32(len); + WRITEMEM(dentry->d_name.name, len); + dprintk("/%s", dentry->d_name.name); + dput(dentry); + ncomponents--; + } + + *pp = p; + err = 0; +out_free: + dprintk(")\n"); + while (ncomponents) + dput(components[--ncomponents]); + kfree(components); + path_put(&cur); + return err; +} + +static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp, + const struct path *path, __be32 **pp, int *buflen) +{ + struct svc_export *exp_ps; + __be32 res; + + exp_ps = rqst_find_fsidzero_export(rqstp); + if (IS_ERR(exp_ps)) + return nfserrno(PTR_ERR(exp_ps)); + res = nfsd4_encode_path(&exp_ps->ex_path, path, pp, buflen); + exp_put(exp_ps); + return res; +} + +/* + * encode a fs_locations structure + */ +static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp, + struct svc_export *exp, + __be32 **pp, int *buflen) +{ + __be32 status; + int i; + __be32 *p = *pp; + struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs; + + status = nfsd4_encode_fsloc_fsroot(rqstp, &exp->ex_path, &p, buflen); + if (status) + return status; + if ((*buflen -= 4) < 0) + return nfserr_resource; + WRITE32(fslocs->locations_count); + for (i=0; i<fslocs->locations_count; i++) { + status = nfsd4_encode_fs_location4(&fslocs->locations[i], + &p, buflen); + if (status) + return status; + } + *pp = p; + return 0; +} + +static u32 nfs4_file_type(umode_t mode) +{ + switch (mode & S_IFMT) { + case S_IFIFO: return NF4FIFO; + case S_IFCHR: return NF4CHR; + case S_IFDIR: return NF4DIR; + case S_IFBLK: return NF4BLK; + case S_IFLNK: return NF4LNK; + case S_IFREG: return NF4REG; + case S_IFSOCK: return NF4SOCK; + default: return NF4BAD; + }; +} + +static __be32 +nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group, + __be32 **p, int *buflen) +{ + int status; + + if (*buflen < (XDR_QUADLEN(IDMAP_NAMESZ) << 2) + 4) + return nfserr_resource; + if (whotype != NFS4_ACL_WHO_NAMED) + status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1)); + else if (group) + status = nfsd_map_gid_to_name(rqstp, id, (u8 *)(*p + 1)); + else + status = nfsd_map_uid_to_name(rqstp, id, (u8 *)(*p + 1)); + if (status < 0) + return nfserrno(status); + *p = xdr_encode_opaque(*p, NULL, status); + *buflen -= (XDR_QUADLEN(status) << 2) + 4; + BUG_ON(*buflen < 0); + return 0; +} + +static inline __be32 +nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, __be32 **p, int *buflen) +{ + return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen); +} + +static inline __be32 +nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, __be32 **p, int *buflen) +{ + return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen); +} + +static inline __be32 +nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group, + __be32 **p, int *buflen) +{ + return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen); +} + +#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ + FATTR4_WORD0_RDATTR_ERROR) +#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID + +static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) +{ + /* As per referral draft: */ + if (*bmval0 & ~WORD0_ABSENT_FS_ATTRS || + *bmval1 & ~WORD1_ABSENT_FS_ATTRS) { + if (*bmval0 & FATTR4_WORD0_RDATTR_ERROR || + *bmval0 & FATTR4_WORD0_FS_LOCATIONS) + *rdattr_err = NFSERR_MOVED; + else + return nfserr_moved; + } + *bmval0 &= WORD0_ABSENT_FS_ATTRS; + *bmval1 &= WORD1_ABSENT_FS_ATTRS; + return 0; +} + +/* + * Note: @fhp can be NULL; in this case, we might have to compose the filehandle + * ourselves. + * + * @countp is the buffer size in _words_; upon successful return this becomes + * replaced with the number of words written. + */ +__be32 +nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, + struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval, + struct svc_rqst *rqstp, int ignore_crossmnt) +{ + u32 bmval0 = bmval[0]; + u32 bmval1 = bmval[1]; + u32 bmval2 = bmval[2]; + struct kstat stat; + struct svc_fh tempfh; + struct kstatfs statfs; + int buflen = *countp << 2; + __be32 *attrlenp; + u32 dummy; + u64 dummy64; + u32 rdattr_err = 0; + __be32 *p = buffer; + __be32 status; + int err; + int aclsupport = 0; + struct nfs4_acl *acl = NULL; + struct nfsd4_compoundres *resp = rqstp->rq_resp; + u32 minorversion = resp->cstate.minorversion; + struct path path = { + .mnt = exp->ex_path.mnt, + .dentry = dentry, + }; + + BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); + BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion)); + BUG_ON(bmval1 & ~nfsd_suppattrs1(minorversion)); + BUG_ON(bmval2 & ~nfsd_suppattrs2(minorversion)); + + if (exp->ex_fslocs.migrated) { + BUG_ON(bmval[2]); + status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err); + if (status) + goto out; + } + + err = vfs_getattr(exp->ex_path.mnt, dentry, &stat); + if (err) + goto out_nfserr; + if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | + FATTR4_WORD0_MAXNAME)) || + (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | + FATTR4_WORD1_SPACE_TOTAL))) { + err = vfs_statfs(&path, &statfs); + if (err) + goto out_nfserr; + } + if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) { + fh_init(&tempfh, NFS4_FHSIZE); + status = fh_compose(&tempfh, exp, dentry, NULL); + if (status) + goto out; + fhp = &tempfh; + } + if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT + | FATTR4_WORD0_SUPPORTED_ATTRS)) { + err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl); + aclsupport = (err == 0); + if (bmval0 & FATTR4_WORD0_ACL) { + if (err == -EOPNOTSUPP) + bmval0 &= ~FATTR4_WORD0_ACL; + else if (err == -EINVAL) { + status = nfserr_attrnotsupp; + goto out; + } else if (err != 0) + goto out_nfserr; + } + } + + if (bmval2) { + if ((buflen -= 16) < 0) + goto out_resource; + WRITE32(3); + WRITE32(bmval0); + WRITE32(bmval1); + WRITE32(bmval2); + } else if (bmval1) { + if ((buflen -= 12) < 0) + goto out_resource; + WRITE32(2); + WRITE32(bmval0); + WRITE32(bmval1); + } else { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE32(1); + WRITE32(bmval0); + } + attrlenp = p++; /* to be backfilled later */ + + if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { + u32 word0 = nfsd_suppattrs0(minorversion); + u32 word1 = nfsd_suppattrs1(minorversion); + u32 word2 = nfsd_suppattrs2(minorversion); + + if (!aclsupport) + word0 &= ~FATTR4_WORD0_ACL; + if (!word2) { + if ((buflen -= 12) < 0) + goto out_resource; + WRITE32(2); + WRITE32(word0); + WRITE32(word1); + } else { + if ((buflen -= 16) < 0) + goto out_resource; + WRITE32(3); + WRITE32(word0); + WRITE32(word1); + WRITE32(word2); + } + } + if (bmval0 & FATTR4_WORD0_TYPE) { + if ((buflen -= 4) < 0) + goto out_resource; + dummy = nfs4_file_type(stat.mode); + if (dummy == NF4BAD) + goto out_serverfault; + WRITE32(dummy); + } + if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) { + if ((buflen -= 4) < 0) + goto out_resource; + if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) + WRITE32(NFS4_FH_PERSISTENT); + else + WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME); + } + if (bmval0 & FATTR4_WORD0_CHANGE) { + if ((buflen -= 8) < 0) + goto out_resource; + write_change(&p, &stat, dentry->d_inode); + } + if (bmval0 & FATTR4_WORD0_SIZE) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE64(stat.size); + } + if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(1); + } + if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(1); + } + if (bmval0 & FATTR4_WORD0_NAMED_ATTR) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(0); + } + if (bmval0 & FATTR4_WORD0_FSID) { + if ((buflen -= 16) < 0) + goto out_resource; + if (exp->ex_fslocs.migrated) { + WRITE64(NFS4_REFERRAL_FSID_MAJOR); + WRITE64(NFS4_REFERRAL_FSID_MINOR); + } else switch(fsid_source(fhp)) { + case FSIDSOURCE_FSID: + WRITE64((u64)exp->ex_fsid); + WRITE64((u64)0); + break; + case FSIDSOURCE_DEV: + WRITE32(0); + WRITE32(MAJOR(stat.dev)); + WRITE32(0); + WRITE32(MINOR(stat.dev)); + break; + case FSIDSOURCE_UUID: + WRITEMEM(exp->ex_uuid, 16); + break; + } + } + if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(0); + } + if (bmval0 & FATTR4_WORD0_LEASE_TIME) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(nfsd4_lease); + } + if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(rdattr_err); + } + if (bmval0 & FATTR4_WORD0_ACL) { + struct nfs4_ace *ace; + + if (acl == NULL) { + if ((buflen -= 4) < 0) + goto out_resource; + + WRITE32(0); + goto out_acl; + } + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(acl->naces); + + for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) { + if ((buflen -= 4*3) < 0) + goto out_resource; + WRITE32(ace->type); + WRITE32(ace->flag); + WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL); + status = nfsd4_encode_aclname(rqstp, ace->whotype, + ace->who, ace->flag & NFS4_ACE_IDENTIFIER_GROUP, + &p, &buflen); + if (status == nfserr_resource) + goto out_resource; + if (status) + goto out; + } + } +out_acl: + if (bmval0 & FATTR4_WORD0_ACLSUPPORT) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(aclsupport ? + ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0); + } + if (bmval0 & FATTR4_WORD0_CANSETTIME) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(1); + } + if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(1); + } + if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(1); + } + if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(1); + } + if (bmval0 & FATTR4_WORD0_FILEHANDLE) { + buflen -= (XDR_QUADLEN(fhp->fh_handle.fh_size) << 2) + 4; + if (buflen < 0) + goto out_resource; + WRITE32(fhp->fh_handle.fh_size); + WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size); + } + if (bmval0 & FATTR4_WORD0_FILEID) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE64(stat.ino); + } + if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE64((u64) statfs.f_ffree); + } + if (bmval0 & FATTR4_WORD0_FILES_FREE) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE64((u64) statfs.f_ffree); + } + if (bmval0 & FATTR4_WORD0_FILES_TOTAL) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE64((u64) statfs.f_files); + } + if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { + status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen); + if (status == nfserr_resource) + goto out_resource; + if (status) + goto out; + } + if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(1); + } + if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE64(~(u64)0); + } + if (bmval0 & FATTR4_WORD0_MAXLINK) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(255); + } + if (bmval0 & FATTR4_WORD0_MAXNAME) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(statfs.f_namelen); + } + if (bmval0 & FATTR4_WORD0_MAXREAD) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE64((u64) svc_max_payload(rqstp)); + } + if (bmval0 & FATTR4_WORD0_MAXWRITE) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE64((u64) svc_max_payload(rqstp)); + } + if (bmval1 & FATTR4_WORD1_MODE) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(stat.mode & S_IALLUGO); + } + if (bmval1 & FATTR4_WORD1_NO_TRUNC) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(1); + } + if (bmval1 & FATTR4_WORD1_NUMLINKS) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(stat.nlink); + } + if (bmval1 & FATTR4_WORD1_OWNER) { + status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen); + if (status == nfserr_resource) + goto out_resource; + if (status) + goto out; + } + if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { + status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen); + if (status == nfserr_resource) + goto out_resource; + if (status) + goto out; + } + if (bmval1 & FATTR4_WORD1_RAWDEV) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE32((u32) MAJOR(stat.rdev)); + WRITE32((u32) MINOR(stat.rdev)); + } + if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) { + if ((buflen -= 8) < 0) + goto out_resource; + dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize; + WRITE64(dummy64); + } + if (bmval1 & FATTR4_WORD1_SPACE_FREE) { + if ((buflen -= 8) < 0) + goto out_resource; + dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize; + WRITE64(dummy64); + } + if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) { + if ((buflen -= 8) < 0) + goto out_resource; + dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize; + WRITE64(dummy64); + } + if (bmval1 & FATTR4_WORD1_SPACE_USED) { + if ((buflen -= 8) < 0) + goto out_resource; + dummy64 = (u64)stat.blocks << 9; + WRITE64(dummy64); + } + if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { + if ((buflen -= 12) < 0) + goto out_resource; + WRITE32(0); + WRITE32(stat.atime.tv_sec); + WRITE32(stat.atime.tv_nsec); + } + if (bmval1 & FATTR4_WORD1_TIME_DELTA) { + if ((buflen -= 12) < 0) + goto out_resource; + WRITE32(0); + WRITE32(1); + WRITE32(0); + } + if (bmval1 & FATTR4_WORD1_TIME_METADATA) { + if ((buflen -= 12) < 0) + goto out_resource; + WRITE32(0); + WRITE32(stat.ctime.tv_sec); + WRITE32(stat.ctime.tv_nsec); + } + if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { + if ((buflen -= 12) < 0) + goto out_resource; + WRITE32(0); + WRITE32(stat.mtime.tv_sec); + WRITE32(stat.mtime.tv_nsec); + } + if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { + if ((buflen -= 8) < 0) + goto out_resource; + /* + * Get parent's attributes if not ignoring crossmount + * and this is the root of a cross-mounted filesystem. + */ + if (ignore_crossmnt == 0 && + dentry == exp->ex_path.mnt->mnt_root) { + struct path path = exp->ex_path; + path_get(&path); + while (follow_up(&path)) { + if (path.dentry != path.mnt->mnt_root) + break; + } + err = vfs_getattr(path.mnt, path.dentry, &stat); + path_put(&path); + if (err) + goto out_nfserr; + } + WRITE64(stat.ino); + } + if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { + WRITE32(3); + WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); + WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1); + WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2); + } + + *attrlenp = htonl((char *)p - (char *)attrlenp - 4); + *countp = p - buffer; + status = nfs_ok; + +out: + kfree(acl); + if (fhp == &tempfh) + fh_put(&tempfh); + return status; +out_nfserr: + status = nfserrno(err); + goto out; +out_resource: + *countp = 0; + status = nfserr_resource; + goto out; +out_serverfault: + status = nfserr_serverfault; + goto out; +} + +static inline int attributes_need_mount(u32 *bmval) +{ + if (bmval[0] & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_LEASE_TIME)) + return 1; + if (bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID) + return 1; + return 0; +} + +static __be32 +nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, + const char *name, int namlen, __be32 *p, int *buflen) +{ + struct svc_export *exp = cd->rd_fhp->fh_export; + struct dentry *dentry; + __be32 nfserr; + int ignore_crossmnt = 0; + + dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); + if (IS_ERR(dentry)) + return nfserrno(PTR_ERR(dentry)); + if (!dentry->d_inode) { + /* + * nfsd_buffered_readdir drops the i_mutex between + * readdir and calling this callback, leaving a window + * where this directory entry could have gone away. + */ + dput(dentry); + return nfserr_noent; + } + + exp_get(exp); + /* + * In the case of a mountpoint, the client may be asking for + * attributes that are only properties of the underlying filesystem + * as opposed to the cross-mounted file system. In such a case, + * we will not follow the cross mount and will fill the attribtutes + * directly from the mountpoint dentry. + */ + if (nfsd_mountpoint(dentry, exp)) { + int err; + + if (!(exp->ex_flags & NFSEXP_V4ROOT) + && !attributes_need_mount(cd->rd_bmval)) { + ignore_crossmnt = 1; + goto out_encode; + } + /* + * Why the heck aren't we just using nfsd_lookup?? + * Different "."/".." handling? Something else? + * At least, add a comment here to explain.... + */ + err = nfsd_cross_mnt(cd->rd_rqstp, &dentry, &exp); + if (err) { + nfserr = nfserrno(err); + goto out_put; + } + nfserr = check_nfsd_access(exp, cd->rd_rqstp); + if (nfserr) + goto out_put; + + } +out_encode: + nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, + cd->rd_rqstp, ignore_crossmnt); +out_put: + dput(dentry); + exp_put(exp); + return nfserr; +} + +static __be32 * +nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr) +{ + __be32 *attrlenp; + + if (buflen < 6) + return NULL; + *p++ = htonl(2); + *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */ + *p++ = htonl(0); /* bmval1 */ + + attrlenp = p++; + *p++ = nfserr; /* no htonl */ + *attrlenp = htonl((char *)p - (char *)attrlenp - 4); + return p; +} + +static int +nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct readdir_cd *ccd = ccdv; + struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); + int buflen; + __be32 *p = cd->buffer; + __be32 *cookiep; + __be32 nfserr = nfserr_toosmall; + + /* In nfsv4, "." and ".." never make it onto the wire.. */ + if (name && isdotent(name, namlen)) { + cd->common.err = nfs_ok; + return 0; + } + + if (cd->offset) + xdr_encode_hyper(cd->offset, (u64) offset); + + buflen = cd->buflen - 4 - XDR_QUADLEN(namlen); + if (buflen < 0) + goto fail; + + *p++ = xdr_one; /* mark entry present */ + cookiep = p; + p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ + p = xdr_encode_array(p, name, namlen); /* name length & name */ + + nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen); + switch (nfserr) { + case nfs_ok: + p += buflen; + break; + case nfserr_resource: + nfserr = nfserr_toosmall; + goto fail; + case nfserr_noent: + goto skip_entry; + default: + /* + * If the client requested the RDATTR_ERROR attribute, + * we stuff the error code into this attribute + * and continue. If this attribute was not requested, + * then in accordance with the spec, we fail the + * entire READDIR operation(!) + */ + if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)) + goto fail; + p = nfsd4_encode_rdattr_error(p, buflen, nfserr); + if (p == NULL) { + nfserr = nfserr_toosmall; + goto fail; + } + } + cd->buflen -= (p - cd->buffer); + cd->buffer = p; + cd->offset = cookiep; +skip_entry: + cd->common.err = nfs_ok; + return 0; +fail: + cd->common.err = nfserr; + return -EINVAL; +} + +static void +nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) +{ + __be32 *p; + + RESERVE_SPACE(sizeof(stateid_t)); + WRITE32(sid->si_generation); + WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); + ADJUST_ARGS(); +} + +static __be32 +nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) +{ + __be32 *p; + + if (!nfserr) { + RESERVE_SPACE(8); + WRITE32(access->ac_supported); + WRITE32(access->ac_resp_access); + ADJUST_ARGS(); + } + return nfserr; +} + +static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) +{ + __be32 *p; + + if (!nfserr) { + RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 8); + WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); + WRITE32(bcts->dir); + /* XXX: ? */ + WRITE32(0); + ADJUST_ARGS(); + } + return nfserr; +} + +static __be32 +nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) +{ + ENCODE_SEQID_OP_HEAD; + + if (!nfserr) + nfsd4_encode_stateid(resp, &close->cl_stateid); + + encode_seqid_op_tail(resp, save, nfserr); + return nfserr; +} + + +static __be32 +nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) +{ + __be32 *p; + + if (!nfserr) { + RESERVE_SPACE(8); + WRITEMEM(commit->co_verf.data, 8); + ADJUST_ARGS(); + } + return nfserr; +} + +static __be32 +nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) +{ + __be32 *p; + + if (!nfserr) { + RESERVE_SPACE(32); + write_cinfo(&p, &create->cr_cinfo); + WRITE32(2); + WRITE32(create->cr_bmval[0]); + WRITE32(create->cr_bmval[1]); + ADJUST_ARGS(); + } + return nfserr; +} + +static __be32 +nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr) +{ + struct svc_fh *fhp = getattr->ga_fhp; + int buflen; + + if (nfserr) + return nfserr; + + buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); + nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, + resp->p, &buflen, getattr->ga_bmval, + resp->rqstp, 0); + if (!nfserr) + resp->p += buflen; + return nfserr; +} + +static __be32 +nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) +{ + struct svc_fh *fhp = *fhpp; + unsigned int len; + __be32 *p; + + if (!nfserr) { + len = fhp->fh_handle.fh_size; + RESERVE_SPACE(len + 4); + WRITE32(len); + WRITEMEM(&fhp->fh_handle.fh_base, len); + ADJUST_ARGS(); + } + return nfserr; +} + +/* +* Including all fields other than the name, a LOCK4denied structure requires +* 8(clientid) + 4(namelen) + 8(offset) + 8(length) + 4(type) = 32 bytes. +*/ +static void +nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld) +{ + struct xdr_netobj *conf = &ld->ld_owner; + __be32 *p; + + RESERVE_SPACE(32 + XDR_LEN(conf->len)); + WRITE64(ld->ld_start); + WRITE64(ld->ld_length); + WRITE32(ld->ld_type); + if (conf->len) { + WRITEMEM(&ld->ld_clientid, 8); + WRITE32(conf->len); + WRITEMEM(conf->data, conf->len); + kfree(conf->data); + } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ + WRITE64((u64)0); /* clientid */ + WRITE32(0); /* length of owner name */ + } + ADJUST_ARGS(); +} + +static __be32 +nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) +{ + ENCODE_SEQID_OP_HEAD; + + if (!nfserr) + nfsd4_encode_stateid(resp, &lock->lk_resp_stateid); + else if (nfserr == nfserr_denied) + nfsd4_encode_lock_denied(resp, &lock->lk_denied); + + encode_seqid_op_tail(resp, save, nfserr); + return nfserr; +} + +static __be32 +nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) +{ + if (nfserr == nfserr_denied) + nfsd4_encode_lock_denied(resp, &lockt->lt_denied); + return nfserr; +} + +static __be32 +nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) +{ + ENCODE_SEQID_OP_HEAD; + + if (!nfserr) + nfsd4_encode_stateid(resp, &locku->lu_stateid); + + encode_seqid_op_tail(resp, save, nfserr); + return nfserr; +} + + +static __be32 +nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) +{ + __be32 *p; + + if (!nfserr) { + RESERVE_SPACE(20); + write_cinfo(&p, &link->li_cinfo); + ADJUST_ARGS(); + } + return nfserr; +} + + +static __be32 +nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) +{ + __be32 *p; + ENCODE_SEQID_OP_HEAD; + + if (nfserr) + goto out; + + nfsd4_encode_stateid(resp, &open->op_stateid); + RESERVE_SPACE(40); + write_cinfo(&p, &open->op_cinfo); + WRITE32(open->op_rflags); + WRITE32(2); + WRITE32(open->op_bmval[0]); + WRITE32(open->op_bmval[1]); + WRITE32(open->op_delegate_type); + ADJUST_ARGS(); + + switch (open->op_delegate_type) { + case NFS4_OPEN_DELEGATE_NONE: + break; + case NFS4_OPEN_DELEGATE_READ: + nfsd4_encode_stateid(resp, &open->op_delegate_stateid); + RESERVE_SPACE(20); + WRITE32(open->op_recall); + + /* + * TODO: ACE's in delegations + */ + WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); + WRITE32(0); + WRITE32(0); + WRITE32(0); /* XXX: is NULL principal ok? */ + ADJUST_ARGS(); + break; + case NFS4_OPEN_DELEGATE_WRITE: + nfsd4_encode_stateid(resp, &open->op_delegate_stateid); + RESERVE_SPACE(32); + WRITE32(0); + + /* + * TODO: space_limit's in delegations + */ + WRITE32(NFS4_LIMIT_SIZE); + WRITE32(~(u32)0); + WRITE32(~(u32)0); + + /* + * TODO: ACE's in delegations + */ + WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); + WRITE32(0); + WRITE32(0); + WRITE32(0); /* XXX: is NULL principal ok? */ + ADJUST_ARGS(); + break; + default: + BUG(); + } + /* XXX save filehandle here */ +out: + encode_seqid_op_tail(resp, save, nfserr); + return nfserr; +} + +static __be32 +nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) +{ + ENCODE_SEQID_OP_HEAD; + + if (!nfserr) + nfsd4_encode_stateid(resp, &oc->oc_resp_stateid); + + encode_seqid_op_tail(resp, save, nfserr); + return nfserr; +} + +static __be32 +nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) +{ + ENCODE_SEQID_OP_HEAD; + + if (!nfserr) + nfsd4_encode_stateid(resp, &od->od_stateid); + + encode_seqid_op_tail(resp, save, nfserr); + return nfserr; +} + +static __be32 +nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_read *read) +{ + u32 eof; + int v, pn; + unsigned long maxcount; + long len; + __be32 *p; + + if (nfserr) + return nfserr; + if (resp->xbuf->page_len) + return nfserr_resource; + + RESERVE_SPACE(8); /* eof flag and byte count */ + + maxcount = svc_max_payload(resp->rqstp); + if (maxcount > read->rd_length) + maxcount = read->rd_length; + + len = maxcount; + v = 0; + while (len > 0) { + pn = resp->rqstp->rq_resused++; + resp->rqstp->rq_vec[v].iov_base = + page_address(resp->rqstp->rq_respages[pn]); + resp->rqstp->rq_vec[v].iov_len = + len < PAGE_SIZE ? len : PAGE_SIZE; + v++; + len -= PAGE_SIZE; + } + read->rd_vlen = v; + + nfserr = nfsd_read_file(read->rd_rqstp, read->rd_fhp, read->rd_filp, + read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, + &maxcount); + + if (nfserr) + return nfserr; + eof = (read->rd_offset + maxcount >= + read->rd_fhp->fh_dentry->d_inode->i_size); + + WRITE32(eof); + WRITE32(maxcount); + ADJUST_ARGS(); + resp->xbuf->head[0].iov_len = (char*)p + - (char*)resp->xbuf->head[0].iov_base; + resp->xbuf->page_len = maxcount; + + /* Use rest of head for padding and remaining ops: */ + resp->xbuf->tail[0].iov_base = p; + resp->xbuf->tail[0].iov_len = 0; + if (maxcount&3) { + RESERVE_SPACE(4); + WRITE32(0); + resp->xbuf->tail[0].iov_base += maxcount&3; + resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); + ADJUST_ARGS(); + } + return 0; +} + +static __be32 +nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) +{ + int maxcount; + char *page; + __be32 *p; + + if (nfserr) + return nfserr; + if (resp->xbuf->page_len) + return nfserr_resource; + + page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); + + maxcount = PAGE_SIZE; + RESERVE_SPACE(4); + + /* + * XXX: By default, the ->readlink() VFS op will truncate symlinks + * if they would overflow the buffer. Is this kosher in NFSv4? If + * not, one easy fix is: if ->readlink() precisely fills the buffer, + * assume that truncation occurred, and return NFS4ERR_RESOURCE. + */ + nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount); + if (nfserr == nfserr_isdir) + return nfserr_inval; + if (nfserr) + return nfserr; + + WRITE32(maxcount); + ADJUST_ARGS(); + resp->xbuf->head[0].iov_len = (char*)p + - (char*)resp->xbuf->head[0].iov_base; + resp->xbuf->page_len = maxcount; + + /* Use rest of head for padding and remaining ops: */ + resp->xbuf->tail[0].iov_base = p; + resp->xbuf->tail[0].iov_len = 0; + if (maxcount&3) { + RESERVE_SPACE(4); + WRITE32(0); + resp->xbuf->tail[0].iov_base += maxcount&3; + resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); + ADJUST_ARGS(); + } + return 0; +} + +static __be32 +nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir) +{ + int maxcount; + loff_t offset; + __be32 *page, *savep, *tailbase; + __be32 *p; + + if (nfserr) + return nfserr; + if (resp->xbuf->page_len) + return nfserr_resource; + + RESERVE_SPACE(8); /* verifier */ + savep = p; + + /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ + WRITE32(0); + WRITE32(0); + ADJUST_ARGS(); + resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; + tailbase = p; + + maxcount = PAGE_SIZE; + if (maxcount > readdir->rd_maxcount) + maxcount = readdir->rd_maxcount; + + /* + * Convert from bytes to words, account for the two words already + * written, make sure to leave two words at the end for the next + * pointer and eof field. + */ + maxcount = (maxcount >> 2) - 4; + if (maxcount < 0) { + nfserr = nfserr_toosmall; + goto err_no_verf; + } + + page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); + readdir->common.err = 0; + readdir->buflen = maxcount; + readdir->buffer = page; + readdir->offset = NULL; + + offset = readdir->rd_cookie; + nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp, + &offset, + &readdir->common, nfsd4_encode_dirent); + if (nfserr == nfs_ok && + readdir->common.err == nfserr_toosmall && + readdir->buffer == page) + nfserr = nfserr_toosmall; + if (nfserr) + goto err_no_verf; + + if (readdir->offset) + xdr_encode_hyper(readdir->offset, offset); + + p = readdir->buffer; + *p++ = 0; /* no more entries */ + *p++ = htonl(readdir->common.err == nfserr_eof); + resp->xbuf->page_len = ((char*)p) - (char*)page_address( + resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); + + /* Use rest of head for padding and remaining ops: */ + resp->xbuf->tail[0].iov_base = tailbase; + resp->xbuf->tail[0].iov_len = 0; + resp->p = resp->xbuf->tail[0].iov_base; + resp->end = resp->p + (PAGE_SIZE - resp->xbuf->head[0].iov_len)/4; + + return 0; +err_no_verf: + p = savep; + ADJUST_ARGS(); + return nfserr; +} + +static __be32 +nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) +{ + __be32 *p; + + if (!nfserr) { + RESERVE_SPACE(20); + write_cinfo(&p, &remove->rm_cinfo); + ADJUST_ARGS(); + } + return nfserr; +} + +static __be32 +nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) +{ + __be32 *p; + + if (!nfserr) { + RESERVE_SPACE(40); + write_cinfo(&p, &rename->rn_sinfo); + write_cinfo(&p, &rename->rn_tinfo); + ADJUST_ARGS(); + } + return nfserr; +} + +static __be32 +nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, + __be32 nfserr,struct svc_export *exp) +{ + int i = 0; + u32 nflavs; + struct exp_flavor_info *flavs; + struct exp_flavor_info def_flavs[2]; + __be32 *p; + + if (nfserr) + goto out; + if (exp->ex_nflavors) { + flavs = exp->ex_flavors; + nflavs = exp->ex_nflavors; + } else { /* Handling of some defaults in absence of real secinfo: */ + flavs = def_flavs; + if (exp->ex_client->flavour->flavour == RPC_AUTH_UNIX) { + nflavs = 2; + flavs[0].pseudoflavor = RPC_AUTH_UNIX; + flavs[1].pseudoflavor = RPC_AUTH_NULL; + } else if (exp->ex_client->flavour->flavour == RPC_AUTH_GSS) { + nflavs = 1; + flavs[0].pseudoflavor + = svcauth_gss_flavor(exp->ex_client); + } else { + nflavs = 1; + flavs[0].pseudoflavor + = exp->ex_client->flavour->flavour; + } + } + + RESERVE_SPACE(4); + WRITE32(nflavs); + ADJUST_ARGS(); + for (i = 0; i < nflavs; i++) { + u32 flav = flavs[i].pseudoflavor; + struct gss_api_mech *gm = gss_mech_get_by_pseudoflavor(flav); + + if (gm) { + RESERVE_SPACE(4); + WRITE32(RPC_AUTH_GSS); + ADJUST_ARGS(); + RESERVE_SPACE(4 + gm->gm_oid.len); + WRITE32(gm->gm_oid.len); + WRITEMEM(gm->gm_oid.data, gm->gm_oid.len); + ADJUST_ARGS(); + RESERVE_SPACE(4); + WRITE32(0); /* qop */ + ADJUST_ARGS(); + RESERVE_SPACE(4); + WRITE32(gss_pseudoflavor_to_service(gm, flav)); + ADJUST_ARGS(); + gss_mech_put(gm); + } else { + RESERVE_SPACE(4); + WRITE32(flav); + ADJUST_ARGS(); + } + } +out: + if (exp) + exp_put(exp); + return nfserr; +} + +static __be32 +nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_secinfo *secinfo) +{ + return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->si_exp); +} + +static __be32 +nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_secinfo_no_name *secinfo) +{ + return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->sin_exp); +} + +/* + * The SETATTR encode routine is special -- it always encodes a bitmap, + * regardless of the error status. + */ +static __be32 +nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) +{ + __be32 *p; + + RESERVE_SPACE(12); + if (nfserr) { + WRITE32(2); + WRITE32(0); + WRITE32(0); + } + else { + WRITE32(2); + WRITE32(setattr->sa_bmval[0]); + WRITE32(setattr->sa_bmval[1]); + } + ADJUST_ARGS(); + return nfserr; +} + +static __be32 +nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) +{ + __be32 *p; + + if (!nfserr) { + RESERVE_SPACE(8 + sizeof(nfs4_verifier)); + WRITEMEM(&scd->se_clientid, 8); + WRITEMEM(&scd->se_confirm, sizeof(nfs4_verifier)); + ADJUST_ARGS(); + } + else if (nfserr == nfserr_clid_inuse) { + RESERVE_SPACE(8); + WRITE32(0); + WRITE32(0); + ADJUST_ARGS(); + } + return nfserr; +} + +static __be32 +nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) +{ + __be32 *p; + + if (!nfserr) { + RESERVE_SPACE(16); + WRITE32(write->wr_bytes_written); + WRITE32(write->wr_how_written); + WRITEMEM(write->wr_verifier.data, 8); + ADJUST_ARGS(); + } + return nfserr; +} + +static __be32 +nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr, + struct nfsd4_exchange_id *exid) +{ + __be32 *p; + char *major_id; + char *server_scope; + int major_id_sz; + int server_scope_sz; + uint64_t minor_id = 0; + + if (nfserr) + return nfserr; + + major_id = utsname()->nodename; + major_id_sz = strlen(major_id); + server_scope = utsname()->nodename; + server_scope_sz = strlen(server_scope); + + RESERVE_SPACE( + 8 /* eir_clientid */ + + 4 /* eir_sequenceid */ + + 4 /* eir_flags */ + + 4 /* spr_how (SP4_NONE) */ + + 8 /* so_minor_id */ + + 4 /* so_major_id.len */ + + (XDR_QUADLEN(major_id_sz) * 4) + + 4 /* eir_server_scope.len */ + + (XDR_QUADLEN(server_scope_sz) * 4) + + 4 /* eir_server_impl_id.count (0) */); + + WRITEMEM(&exid->clientid, 8); + WRITE32(exid->seqid); + WRITE32(exid->flags); + + /* state_protect4_r. Currently only support SP4_NONE */ + BUG_ON(exid->spa_how != SP4_NONE); + WRITE32(exid->spa_how); + + /* The server_owner struct */ + WRITE64(minor_id); /* Minor id */ + /* major id */ + WRITE32(major_id_sz); + WRITEMEM(major_id, major_id_sz); + + /* Server scope */ + WRITE32(server_scope_sz); + WRITEMEM(server_scope, server_scope_sz); + + /* Implementation id */ + WRITE32(0); /* zero length nfs_impl_id4 array */ + ADJUST_ARGS(); + return 0; +} + +static __be32 +nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr, + struct nfsd4_create_session *sess) +{ + __be32 *p; + + if (nfserr) + return nfserr; + + RESERVE_SPACE(24); + WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN); + WRITE32(sess->seqid); + WRITE32(sess->flags); + ADJUST_ARGS(); + + RESERVE_SPACE(28); + WRITE32(0); /* headerpadsz */ + WRITE32(sess->fore_channel.maxreq_sz); + WRITE32(sess->fore_channel.maxresp_sz); + WRITE32(sess->fore_channel.maxresp_cached); + WRITE32(sess->fore_channel.maxops); + WRITE32(sess->fore_channel.maxreqs); + WRITE32(sess->fore_channel.nr_rdma_attrs); + ADJUST_ARGS(); + + if (sess->fore_channel.nr_rdma_attrs) { + RESERVE_SPACE(4); + WRITE32(sess->fore_channel.rdma_attrs); + ADJUST_ARGS(); + } + + RESERVE_SPACE(28); + WRITE32(0); /* headerpadsz */ + WRITE32(sess->back_channel.maxreq_sz); + WRITE32(sess->back_channel.maxresp_sz); + WRITE32(sess->back_channel.maxresp_cached); + WRITE32(sess->back_channel.maxops); + WRITE32(sess->back_channel.maxreqs); + WRITE32(sess->back_channel.nr_rdma_attrs); + ADJUST_ARGS(); + + if (sess->back_channel.nr_rdma_attrs) { + RESERVE_SPACE(4); + WRITE32(sess->back_channel.rdma_attrs); + ADJUST_ARGS(); + } + return 0; +} + +static __be32 +nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr, + struct nfsd4_destroy_session *destroy_session) +{ + return nfserr; +} + +static __be32 +nfsd4_encode_free_stateid(struct nfsd4_compoundres *resp, int nfserr, + struct nfsd4_free_stateid *free_stateid) +{ + __be32 *p; + + if (nfserr) + return nfserr; + + RESERVE_SPACE(4); + WRITE32(nfserr); + ADJUST_ARGS(); + return nfserr; +} + +static __be32 +nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, + struct nfsd4_sequence *seq) +{ + __be32 *p; + + if (nfserr) + return nfserr; + + RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 20); + WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); + WRITE32(seq->seqid); + WRITE32(seq->slotid); + /* Note slotid's are numbered from zero: */ + WRITE32(seq->maxslots - 1); /* sr_highest_slotid */ + WRITE32(seq->maxslots - 1); /* sr_target_highest_slotid */ + WRITE32(seq->status_flags); + + ADJUST_ARGS(); + resp->cstate.datap = p; /* DRC cache data pointer */ + return 0; +} + +__be32 +nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr, + struct nfsd4_test_stateid *test_stateid) +{ + struct nfsd4_compoundargs *argp; + struct nfs4_client *cl = resp->cstate.session->se_client; + stateid_t si; + __be32 *p; + int i; + int valid; + + restore_buf(test_stateid->ts_saved_args, &test_stateid->ts_savedp); + argp = test_stateid->ts_saved_args; + + RESERVE_SPACE(4); + *p++ = htonl(test_stateid->ts_num_ids); + resp->p = p; + + nfs4_lock_state(); + for (i = 0; i < test_stateid->ts_num_ids; i++) { + nfsd4_decode_stateid(argp, &si); + valid = nfs4_validate_stateid(cl, &si); + RESERVE_SPACE(4); + *p++ = htonl(valid); + resp->p = p; + } + nfs4_unlock_state(); + + return nfserr; +} + +static __be32 +nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) +{ + return nfserr; +} + +typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); + +/* + * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1 + * since we don't need to filter out obsolete ops as this is + * done in the decoding phase. + */ +static nfsd4_enc nfsd4_enc_ops[] = { + [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, + [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, + [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit, + [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create, + [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr, + [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh, + [OP_LINK] = (nfsd4_enc)nfsd4_encode_link, + [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock, + [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt, + [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku, + [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop, + [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop, + [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open, + [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop, + [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm, + [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade, + [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_READ] = (nfsd4_enc)nfsd4_encode_read, + [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir, + [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink, + [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove, + [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename, + [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop, + [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo, + [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr, + [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid, + [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop, + [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop, + [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write, + [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop, + + /* NFSv4.1 operations */ + [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop, + [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session, + [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, + [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, + [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session, + [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_free_stateid, + [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, + [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, + [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, + [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_test_stateid, + [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, + [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, +}; + +/* + * Calculate the total amount of memory that the compound response has taken + * after encoding the current operation with pad. + * + * pad: if operation is non-idempotent, pad was calculate by op_rsize_bop() + * which was specified at nfsd4_operation, else pad is zero. + * + * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached. + * + * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so + * will be at least a page and will therefore hold the xdr_buf head. + */ +int nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) +{ + struct xdr_buf *xb = &resp->rqstp->rq_res; + struct nfsd4_session *session = NULL; + struct nfsd4_slot *slot = resp->cstate.slot; + u32 length, tlen = 0; + + if (!nfsd4_has_session(&resp->cstate)) + return 0; + + session = resp->cstate.session; + if (session == NULL) + return 0; + + if (xb->page_len == 0) { + length = (char *)resp->p - (char *)xb->head[0].iov_base + pad; + } else { + if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0) + tlen = (char *)resp->p - (char *)xb->tail[0].iov_base; + + length = xb->head[0].iov_len + xb->page_len + tlen + pad; + } + dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__, + length, xb->page_len, tlen, pad); + + if (length > session->se_fchannel.maxresp_sz) + return nfserr_rep_too_big; + + if (slot->sl_cachethis == 1 && + length > session->se_fchannel.maxresp_cached) + return nfserr_rep_too_big_to_cache; + + return 0; +} + +void +nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) +{ + __be32 *statp; + __be32 *p; + + RESERVE_SPACE(8); + WRITE32(op->opnum); + statp = p++; /* to be backfilled at the end */ + ADJUST_ARGS(); + + if (op->opnum == OP_ILLEGAL) + goto status; + BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || + !nfsd4_enc_ops[op->opnum]); + op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); + /* nfsd4_check_drc_limit guarantees enough room for error status */ + if (!op->status) + op->status = nfsd4_check_resp_size(resp, 0); +status: + /* + * Note: We write the status directly, instead of using WRITE32(), + * since it is already in network byte order. + */ + *statp = op->status; +} + +/* + * Encode the reply stored in the stateowner reply cache + * + * XDR note: do not encode rp->rp_buflen: the buffer contains the + * previously sent already encoded operation. + * + * called with nfs4_lock_state() held + */ +void +nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) +{ + __be32 *p; + struct nfs4_replay *rp = op->replay; + + BUG_ON(!rp); + + RESERVE_SPACE(8); + WRITE32(op->opnum); + *p++ = rp->rp_status; /* already xdr'ed */ + ADJUST_ARGS(); + + RESERVE_SPACE(rp->rp_buflen); + WRITEMEM(rp->rp_buf, rp->rp_buflen); + ADJUST_ARGS(); +} + +int +nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy) +{ + return xdr_ressize_check(rqstp, p); +} + +int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp) +{ + struct svc_rqst *rqstp = rq; + struct nfsd4_compoundargs *args = rqstp->rq_argp; + + if (args->ops != args->iops) { + kfree(args->ops); + args->ops = args->iops; + } + kfree(args->tmpp); + args->tmpp = NULL; + while (args->to_free) { + struct tmpbuf *tb = args->to_free; + args->to_free = tb->next; + tb->release(tb->buf); + kfree(tb); + } + return 1; +} + +int +nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args) +{ + args->p = p; + args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len; + args->pagelist = rqstp->rq_arg.pages; + args->pagelen = rqstp->rq_arg.page_len; + args->tmpp = NULL; + args->to_free = NULL; + args->ops = args->iops; + args->rqstp = rqstp; + + return !nfsd4_decode_compound(args); +} + +int +nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundres *resp) +{ + /* + * All that remains is to write the tag and operation count... + */ + struct nfsd4_compound_state *cs = &resp->cstate; + struct kvec *iov; + p = resp->tagp; + *p++ = htonl(resp->taglen); + memcpy(p, resp->tag, resp->taglen); + p += XDR_QUADLEN(resp->taglen); + *p++ = htonl(resp->opcnt); + + if (rqstp->rq_res.page_len) + iov = &rqstp->rq_res.tail[0]; + else + iov = &rqstp->rq_res.head[0]; + iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; + BUG_ON(iov->iov_len > PAGE_SIZE); + if (nfsd4_has_session(cs)) { + if (cs->status != nfserr_replay_cache) { + nfsd4_store_cache_entry(resp); + dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); + cs->slot->sl_inuse = false; + } + /* Renew the clientid on success and on replay */ + release_session_client(cs->session); + nfsd4_put_session(cs->session); + } + return 1; +} + +/* + * Local variables: + * c-basic-offset: 8 + * End: + */ diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c new file mode 100644 index 00000000000..2cbac34a55d --- /dev/null +++ b/fs/nfsd/nfscache.c @@ -0,0 +1,323 @@ +/* + * Request reply cache. This is currently a global cache, but this may + * change in the future and be a per-client cache. + * + * This code is heavily inspired by the 44BSD implementation, although + * it does things a bit differently. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/slab.h> + +#include "nfsd.h" +#include "cache.h" + +/* Size of reply cache. Common values are: + * 4.3BSD: 128 + * 4.4BSD: 256 + * Solaris2: 1024 + * DEC Unix: 512-4096 + */ +#define CACHESIZE 1024 +#define HASHSIZE 64 + +static struct hlist_head * cache_hash; +static struct list_head lru_head; +static int cache_disabled = 1; + +/* + * Calculate the hash index from an XID. + */ +static inline u32 request_hash(u32 xid) +{ + u32 h = xid; + h ^= (xid >> 24); + return h & (HASHSIZE-1); +} + +static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); + +/* + * locking for the reply cache: + * A cache entry is "single use" if c_state == RC_INPROG + * Otherwise, it when accessing _prev or _next, the lock must be held. + */ +static DEFINE_SPINLOCK(cache_lock); + +int nfsd_reply_cache_init(void) +{ + struct svc_cacherep *rp; + int i; + + INIT_LIST_HEAD(&lru_head); + i = CACHESIZE; + while (i) { + rp = kmalloc(sizeof(*rp), GFP_KERNEL); + if (!rp) + goto out_nomem; + list_add(&rp->c_lru, &lru_head); + rp->c_state = RC_UNUSED; + rp->c_type = RC_NOCACHE; + INIT_HLIST_NODE(&rp->c_hash); + i--; + } + + cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); + if (!cache_hash) + goto out_nomem; + + cache_disabled = 0; + return 0; +out_nomem: + printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); + nfsd_reply_cache_shutdown(); + return -ENOMEM; +} + +void nfsd_reply_cache_shutdown(void) +{ + struct svc_cacherep *rp; + + while (!list_empty(&lru_head)) { + rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); + if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF) + kfree(rp->c_replvec.iov_base); + list_del(&rp->c_lru); + kfree(rp); + } + + cache_disabled = 1; + + kfree (cache_hash); + cache_hash = NULL; +} + +/* + * Move cache entry to end of LRU list + */ +static void +lru_put_end(struct svc_cacherep *rp) +{ + list_move_tail(&rp->c_lru, &lru_head); +} + +/* + * Move a cache entry from one hash list to another + */ +static void +hash_refile(struct svc_cacherep *rp) +{ + hlist_del_init(&rp->c_hash); + hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid)); +} + +/* + * Try to find an entry matching the current call in the cache. When none + * is found, we grab the oldest unlocked entry off the LRU list. + * Note that no operation within the loop may sleep. + */ +int +nfsd_cache_lookup(struct svc_rqst *rqstp) +{ + struct hlist_node *hn; + struct hlist_head *rh; + struct svc_cacherep *rp; + __be32 xid = rqstp->rq_xid; + u32 proto = rqstp->rq_prot, + vers = rqstp->rq_vers, + proc = rqstp->rq_proc; + unsigned long age; + int type = rqstp->rq_cachetype; + int rtn; + + rqstp->rq_cacherep = NULL; + if (cache_disabled || type == RC_NOCACHE) { + nfsdstats.rcnocache++; + return RC_DOIT; + } + + spin_lock(&cache_lock); + rtn = RC_DOIT; + + rh = &cache_hash[request_hash(xid)]; + hlist_for_each_entry(rp, hn, rh, c_hash) { + if (rp->c_state != RC_UNUSED && + xid == rp->c_xid && proc == rp->c_proc && + proto == rp->c_prot && vers == rp->c_vers && + time_before(jiffies, rp->c_timestamp + 120*HZ) && + memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, sizeof(rp->c_addr))==0) { + nfsdstats.rchits++; + goto found_entry; + } + } + nfsdstats.rcmisses++; + + /* This loop shouldn't take more than a few iterations normally */ + { + int safe = 0; + list_for_each_entry(rp, &lru_head, c_lru) { + if (rp->c_state != RC_INPROG) + break; + if (safe++ > CACHESIZE) { + printk("nfsd: loop in repcache LRU list\n"); + cache_disabled = 1; + goto out; + } + } + } + + /* All entries on the LRU are in-progress. This should not happen */ + if (&rp->c_lru == &lru_head) { + static int complaints; + + printk(KERN_WARNING "nfsd: all repcache entries locked!\n"); + if (++complaints > 5) { + printk(KERN_WARNING "nfsd: disabling repcache.\n"); + cache_disabled = 1; + } + goto out; + } + + rqstp->rq_cacherep = rp; + rp->c_state = RC_INPROG; + rp->c_xid = xid; + rp->c_proc = proc; + memcpy(&rp->c_addr, svc_addr_in(rqstp), sizeof(rp->c_addr)); + rp->c_prot = proto; + rp->c_vers = vers; + rp->c_timestamp = jiffies; + + hash_refile(rp); + + /* release any buffer */ + if (rp->c_type == RC_REPLBUFF) { + kfree(rp->c_replvec.iov_base); + rp->c_replvec.iov_base = NULL; + } + rp->c_type = RC_NOCACHE; + out: + spin_unlock(&cache_lock); + return rtn; + +found_entry: + /* We found a matching entry which is either in progress or done. */ + age = jiffies - rp->c_timestamp; + rp->c_timestamp = jiffies; + lru_put_end(rp); + + rtn = RC_DROPIT; + /* Request being processed or excessive rexmits */ + if (rp->c_state == RC_INPROG || age < RC_DELAY) + goto out; + + /* From the hall of fame of impractical attacks: + * Is this a user who tries to snoop on the cache? */ + rtn = RC_DOIT; + if (!rqstp->rq_secure && rp->c_secure) + goto out; + + /* Compose RPC reply header */ + switch (rp->c_type) { + case RC_NOCACHE: + break; + case RC_REPLSTAT: + svc_putu32(&rqstp->rq_res.head[0], rp->c_replstat); + rtn = RC_REPLY; + break; + case RC_REPLBUFF: + if (!nfsd_cache_append(rqstp, &rp->c_replvec)) + goto out; /* should not happen */ + rtn = RC_REPLY; + break; + default: + printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); + rp->c_state = RC_UNUSED; + } + + goto out; +} + +/* + * Update a cache entry. This is called from nfsd_dispatch when + * the procedure has been executed and the complete reply is in + * rqstp->rq_res. + * + * We're copying around data here rather than swapping buffers because + * the toplevel loop requires max-sized buffers, which would be a waste + * of memory for a cache with a max reply size of 100 bytes (diropokres). + * + * If we should start to use different types of cache entries tailored + * specifically for attrstat and fh's, we may save even more space. + * + * Also note that a cachetype of RC_NOCACHE can legally be passed when + * nfsd failed to encode a reply that otherwise would have been cached. + * In this case, nfsd_cache_update is called with statp == NULL. + */ +void +nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) +{ + struct svc_cacherep *rp; + struct kvec *resv = &rqstp->rq_res.head[0], *cachv; + int len; + + if (!(rp = rqstp->rq_cacherep) || cache_disabled) + return; + + len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); + len >>= 2; + + /* Don't cache excessive amounts of data and XDR failures */ + if (!statp || len > (256 >> 2)) { + rp->c_state = RC_UNUSED; + return; + } + + switch (cachetype) { + case RC_REPLSTAT: + if (len != 1) + printk("nfsd: RC_REPLSTAT/reply len %d!\n",len); + rp->c_replstat = *statp; + break; + case RC_REPLBUFF: + cachv = &rp->c_replvec; + cachv->iov_base = kmalloc(len << 2, GFP_KERNEL); + if (!cachv->iov_base) { + spin_lock(&cache_lock); + rp->c_state = RC_UNUSED; + spin_unlock(&cache_lock); + return; + } + cachv->iov_len = len << 2; + memcpy(cachv->iov_base, statp, len << 2); + break; + } + spin_lock(&cache_lock); + lru_put_end(rp); + rp->c_secure = rqstp->rq_secure; + rp->c_type = cachetype; + rp->c_state = RC_DONE; + rp->c_timestamp = jiffies; + spin_unlock(&cache_lock); + return; +} + +/* + * Copy cached reply to current reply buffer. Should always fit. + * FIXME as reply is in a page, we should just attach the page, and + * keep a refcount.... + */ +static int +nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) +{ + struct kvec *vec = &rqstp->rq_res.head[0]; + + if (vec->iov_len + data->iov_len > PAGE_SIZE) { + printk(KERN_WARNING "nfsd: cached reply too large (%Zd).\n", + data->iov_len); + return 0; + } + memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len); + vec->iov_len += data->iov_len; + return 1; +} diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c new file mode 100644 index 00000000000..748eda93ce5 --- /dev/null +++ b/fs/nfsd/nfsctl.c @@ -0,0 +1,1192 @@ +/* + * Syscall interface to knfsd. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/slab.h> +#include <linux/namei.h> +#include <linux/ctype.h> + +#include <linux/sunrpc/svcsock.h> +#include <linux/lockd/lockd.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/gss_api.h> +#include <linux/sunrpc/gss_krb5_enctypes.h> +#include <linux/module.h> + +#include "idmap.h" +#include "nfsd.h" +#include "cache.h" +#include "fault_inject.h" + +/* + * We have a single directory with several nodes in it. + */ +enum { + NFSD_Root = 1, + NFSD_List, + NFSD_Export_features, + NFSD_Fh, + NFSD_FO_UnlockIP, + NFSD_FO_UnlockFS, + NFSD_Threads, + NFSD_Pool_Threads, + NFSD_Pool_Stats, + NFSD_Versions, + NFSD_Ports, + NFSD_MaxBlkSize, + NFSD_SupportedEnctypes, + /* + * The below MUST come last. Otherwise we leave a hole in nfsd_files[] + * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops + */ +#ifdef CONFIG_NFSD_V4 + NFSD_Leasetime, + NFSD_Gracetime, + NFSD_RecoveryDir, +#endif +}; + +/* + * write() for these nodes. + */ +static ssize_t write_filehandle(struct file *file, char *buf, size_t size); +static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size); +static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size); +static ssize_t write_threads(struct file *file, char *buf, size_t size); +static ssize_t write_pool_threads(struct file *file, char *buf, size_t size); +static ssize_t write_versions(struct file *file, char *buf, size_t size); +static ssize_t write_ports(struct file *file, char *buf, size_t size); +static ssize_t write_maxblksize(struct file *file, char *buf, size_t size); +#ifdef CONFIG_NFSD_V4 +static ssize_t write_leasetime(struct file *file, char *buf, size_t size); +static ssize_t write_gracetime(struct file *file, char *buf, size_t size); +static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); +#endif + +static ssize_t (*write_op[])(struct file *, char *, size_t) = { + [NFSD_Fh] = write_filehandle, + [NFSD_FO_UnlockIP] = write_unlock_ip, + [NFSD_FO_UnlockFS] = write_unlock_fs, + [NFSD_Threads] = write_threads, + [NFSD_Pool_Threads] = write_pool_threads, + [NFSD_Versions] = write_versions, + [NFSD_Ports] = write_ports, + [NFSD_MaxBlkSize] = write_maxblksize, +#ifdef CONFIG_NFSD_V4 + [NFSD_Leasetime] = write_leasetime, + [NFSD_Gracetime] = write_gracetime, + [NFSD_RecoveryDir] = write_recoverydir, +#endif +}; + +static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) +{ + ino_t ino = file->f_path.dentry->d_inode->i_ino; + char *data; + ssize_t rv; + + if (ino >= ARRAY_SIZE(write_op) || !write_op[ino]) + return -EINVAL; + + data = simple_transaction_get(file, buf, size); + if (IS_ERR(data)) + return PTR_ERR(data); + + rv = write_op[ino](file, data, size); + if (rv >= 0) { + simple_transaction_set(file, rv); + rv = size; + } + return rv; +} + +static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) +{ + if (! file->private_data) { + /* An attempt to read a transaction file without writing + * causes a 0-byte write so that the file can return + * state information + */ + ssize_t rv = nfsctl_transaction_write(file, buf, 0, pos); + if (rv < 0) + return rv; + } + return simple_transaction_read(file, buf, size, pos); +} + +static const struct file_operations transaction_ops = { + .write = nfsctl_transaction_write, + .read = nfsctl_transaction_read, + .release = simple_transaction_release, + .llseek = default_llseek, +}; + +static int exports_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &nfs_exports_op); +} + +static const struct file_operations exports_operations = { + .open = exports_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .owner = THIS_MODULE, +}; + +static int export_features_show(struct seq_file *m, void *v) +{ + seq_printf(m, "0x%x 0x%x\n", NFSEXP_ALLFLAGS, NFSEXP_SECINFO_FLAGS); + return 0; +} + +static int export_features_open(struct inode *inode, struct file *file) +{ + return single_open(file, export_features_show, NULL); +} + +static struct file_operations export_features_operations = { + .open = export_features_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) +static int supported_enctypes_show(struct seq_file *m, void *v) +{ + seq_printf(m, KRB5_SUPPORTED_ENCTYPES); + return 0; +} + +static int supported_enctypes_open(struct inode *inode, struct file *file) +{ + return single_open(file, supported_enctypes_show, NULL); +} + +static struct file_operations supported_enctypes_ops = { + .open = supported_enctypes_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ + +extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); +extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); + +static const struct file_operations pool_stats_operations = { + .open = nfsd_pool_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = nfsd_pool_stats_release, + .owner = THIS_MODULE, +}; + +/*----------------------------------------------------------------------------*/ +/* + * payload - write methods + */ + + +/** + * write_unlock_ip - Release all locks used by a client + * + * Experimental. + * + * Input: + * buf: '\n'-terminated C string containing a + * presentation format IP address + * size: length of C string in @buf + * Output: + * On success: returns zero if all specified locks were released; + * returns one if one or more locks were not released + * On error: return code is negative errno value + */ +static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) +{ + struct sockaddr_storage address; + struct sockaddr *sap = (struct sockaddr *)&address; + size_t salen = sizeof(address); + char *fo_path; + + /* sanity check */ + if (size == 0) + return -EINVAL; + + if (buf[size-1] != '\n') + return -EINVAL; + + fo_path = buf; + if (qword_get(&buf, fo_path, size) < 0) + return -EINVAL; + + if (rpc_pton(fo_path, size, sap, salen) == 0) + return -EINVAL; + + return nlmsvc_unlock_all_by_ip(sap); +} + +/** + * write_unlock_fs - Release all locks on a local file system + * + * Experimental. + * + * Input: + * buf: '\n'-terminated C string containing the + * absolute pathname of a local file system + * size: length of C string in @buf + * Output: + * On success: returns zero if all specified locks were released; + * returns one if one or more locks were not released + * On error: return code is negative errno value + */ +static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size) +{ + struct path path; + char *fo_path; + int error; + + /* sanity check */ + if (size == 0) + return -EINVAL; + + if (buf[size-1] != '\n') + return -EINVAL; + + fo_path = buf; + if (qword_get(&buf, fo_path, size) < 0) + return -EINVAL; + + error = kern_path(fo_path, 0, &path); + if (error) + return error; + + /* + * XXX: Needs better sanity checking. Otherwise we could end up + * releasing locks on the wrong file system. + * + * For example: + * 1. Does the path refer to a directory? + * 2. Is that directory a mount point, or + * 3. Is that directory the root of an exported file system? + */ + error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb); + + path_put(&path); + return error; +} + +/** + * write_filehandle - Get a variable-length NFS file handle by path + * + * On input, the buffer contains a '\n'-terminated C string comprised of + * three alphanumeric words separated by whitespace. The string may + * contain escape sequences. + * + * Input: + * buf: + * domain: client domain name + * path: export pathname + * maxsize: numeric maximum size of + * @buf + * size: length of C string in @buf + * Output: + * On success: passed-in buffer filled with '\n'-terminated C + * string containing a ASCII hex text version + * of the NFS file handle; + * return code is the size in bytes of the string + * On error: return code is negative errno value + */ +static ssize_t write_filehandle(struct file *file, char *buf, size_t size) +{ + char *dname, *path; + int uninitialized_var(maxsize); + char *mesg = buf; + int len; + struct auth_domain *dom; + struct knfsd_fh fh; + + if (size == 0) + return -EINVAL; + + if (buf[size-1] != '\n') + return -EINVAL; + buf[size-1] = 0; + + dname = mesg; + len = qword_get(&mesg, dname, size); + if (len <= 0) + return -EINVAL; + + path = dname+len+1; + len = qword_get(&mesg, path, size); + if (len <= 0) + return -EINVAL; + + len = get_int(&mesg, &maxsize); + if (len) + return len; + + if (maxsize < NFS_FHSIZE) + return -EINVAL; + if (maxsize > NFS3_FHSIZE) + maxsize = NFS3_FHSIZE; + + if (qword_get(&mesg, mesg, size)>0) + return -EINVAL; + + /* we have all the words, they are in buf.. */ + dom = unix_domain_find(dname); + if (!dom) + return -ENOMEM; + + len = exp_rootfh(dom, path, &fh, maxsize); + auth_domain_put(dom); + if (len) + return len; + + mesg = buf; + len = SIMPLE_TRANSACTION_LIMIT; + qword_addhex(&mesg, &len, (char*)&fh.fh_base, fh.fh_size); + mesg[-1] = '\n'; + return mesg - buf; +} + +/** + * write_threads - Start NFSD, or report the current number of running threads + * + * Input: + * buf: ignored + * size: zero + * Output: + * On success: passed-in buffer filled with '\n'-terminated C + * string numeric value representing the number of + * running NFSD threads; + * return code is the size in bytes of the string + * On error: return code is zero + * + * OR + * + * Input: + * buf: C string containing an unsigned + * integer value representing the + * number of NFSD threads to start + * size: non-zero length of C string in @buf + * Output: + * On success: NFS service is started; + * passed-in buffer filled with '\n'-terminated C + * string numeric value representing the number of + * running NFSD threads; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + */ +static ssize_t write_threads(struct file *file, char *buf, size_t size) +{ + char *mesg = buf; + int rv; + if (size > 0) { + int newthreads; + rv = get_int(&mesg, &newthreads); + if (rv) + return rv; + if (newthreads < 0) + return -EINVAL; + rv = nfsd_svc(NFS_PORT, newthreads); + if (rv < 0) + return rv; + } else + rv = nfsd_nrthreads(); + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv); +} + +/** + * write_pool_threads - Set or report the current number of threads per pool + * + * Input: + * buf: ignored + * size: zero + * + * OR + * + * Input: + * buf: C string containing whitespace- + * separated unsigned integer values + * representing the number of NFSD + * threads to start in each pool + * size: non-zero length of C string in @buf + * Output: + * On success: passed-in buffer filled with '\n'-terminated C + * string containing integer values representing the + * number of NFSD threads in each pool; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + */ +static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) +{ + /* if size > 0, look for an array of number of threads per node + * and apply them then write out number of threads per node as reply + */ + char *mesg = buf; + int i; + int rv; + int len; + int npools; + int *nthreads; + + mutex_lock(&nfsd_mutex); + npools = nfsd_nrpools(); + if (npools == 0) { + /* + * NFS is shut down. The admin can start it by + * writing to the threads file but NOT the pool_threads + * file, sorry. Report zero threads. + */ + mutex_unlock(&nfsd_mutex); + strcpy(buf, "0\n"); + return strlen(buf); + } + + nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL); + rv = -ENOMEM; + if (nthreads == NULL) + goto out_free; + + if (size > 0) { + for (i = 0; i < npools; i++) { + rv = get_int(&mesg, &nthreads[i]); + if (rv == -ENOENT) + break; /* fewer numbers than pools */ + if (rv) + goto out_free; /* syntax error */ + rv = -EINVAL; + if (nthreads[i] < 0) + goto out_free; + } + rv = nfsd_set_nrthreads(i, nthreads); + if (rv) + goto out_free; + } + + rv = nfsd_get_nrthreads(npools, nthreads); + if (rv) + goto out_free; + + mesg = buf; + size = SIMPLE_TRANSACTION_LIMIT; + for (i = 0; i < npools && size > 0; i++) { + snprintf(mesg, size, "%d%c", nthreads[i], (i == npools-1 ? '\n' : ' ')); + len = strlen(mesg); + size -= len; + mesg += len; + } + rv = mesg - buf; +out_free: + kfree(nthreads); + mutex_unlock(&nfsd_mutex); + return rv; +} + +static ssize_t __write_versions(struct file *file, char *buf, size_t size) +{ + char *mesg = buf; + char *vers, *minorp, sign; + int len, num, remaining; + unsigned minor; + ssize_t tlen = 0; + char *sep; + + if (size>0) { + if (nfsd_serv) + /* Cannot change versions without updating + * nfsd_serv->sv_xdrsize, and reallocing + * rq_argp and rq_resp + */ + return -EBUSY; + if (buf[size-1] != '\n') + return -EINVAL; + buf[size-1] = 0; + + vers = mesg; + len = qword_get(&mesg, vers, size); + if (len <= 0) return -EINVAL; + do { + sign = *vers; + if (sign == '+' || sign == '-') + num = simple_strtol((vers+1), &minorp, 0); + else + num = simple_strtol(vers, &minorp, 0); + if (*minorp == '.') { + if (num < 4) + return -EINVAL; + minor = simple_strtoul(minorp+1, NULL, 0); + if (minor == 0) + return -EINVAL; + if (nfsd_minorversion(minor, sign == '-' ? + NFSD_CLEAR : NFSD_SET) < 0) + return -EINVAL; + goto next; + } + switch(num) { + case 2: + case 3: + case 4: + nfsd_vers(num, sign == '-' ? NFSD_CLEAR : NFSD_SET); + break; + default: + return -EINVAL; + } + next: + vers += len + 1; + } while ((len = qword_get(&mesg, vers, size)) > 0); + /* If all get turned off, turn them back on, as + * having no versions is BAD + */ + nfsd_reset_versions(); + } + + /* Now write current state into reply buffer */ + len = 0; + sep = ""; + remaining = SIMPLE_TRANSACTION_LIMIT; + for (num=2 ; num <= 4 ; num++) + if (nfsd_vers(num, NFSD_AVAIL)) { + len = snprintf(buf, remaining, "%s%c%d", sep, + nfsd_vers(num, NFSD_TEST)?'+':'-', + num); + sep = " "; + + if (len > remaining) + break; + remaining -= len; + buf += len; + tlen += len; + } + if (nfsd_vers(4, NFSD_AVAIL)) + for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION; + minor++) { + len = snprintf(buf, remaining, " %c4.%u", + (nfsd_vers(4, NFSD_TEST) && + nfsd_minorversion(minor, NFSD_TEST)) ? + '+' : '-', + minor); + + if (len > remaining) + break; + remaining -= len; + buf += len; + tlen += len; + } + + len = snprintf(buf, remaining, "\n"); + if (len > remaining) + return -EINVAL; + return tlen + len; +} + +/** + * write_versions - Set or report the available NFS protocol versions + * + * Input: + * buf: ignored + * size: zero + * Output: + * On success: passed-in buffer filled with '\n'-terminated C + * string containing positive or negative integer + * values representing the current status of each + * protocol version; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + * + * OR + * + * Input: + * buf: C string containing whitespace- + * separated positive or negative + * integer values representing NFS + * protocol versions to enable ("+n") + * or disable ("-n") + * size: non-zero length of C string in @buf + * Output: + * On success: status of zero or more protocol versions has + * been updated; passed-in buffer filled with + * '\n'-terminated C string containing positive + * or negative integer values representing the + * current status of each protocol version; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + */ +static ssize_t write_versions(struct file *file, char *buf, size_t size) +{ + ssize_t rv; + + mutex_lock(&nfsd_mutex); + rv = __write_versions(file, buf, size); + mutex_unlock(&nfsd_mutex); + return rv; +} + +/* + * Zero-length write. Return a list of NFSD's current listener + * transports. + */ +static ssize_t __write_ports_names(char *buf) +{ + if (nfsd_serv == NULL) + return 0; + return svc_xprt_names(nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT); +} + +/* + * A single 'fd' number was written, in which case it must be for + * a socket of a supported family/protocol, and we use it as an + * nfsd listener. + */ +static ssize_t __write_ports_addfd(char *buf) +{ + char *mesg = buf; + int fd, err; + + err = get_int(&mesg, &fd); + if (err != 0 || fd < 0) + return -EINVAL; + + err = nfsd_create_serv(); + if (err != 0) + return err; + + err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); + if (err < 0) { + svc_destroy(nfsd_serv); + return err; + } + + /* Decrease the count, but don't shut down the service */ + nfsd_serv->sv_nrthreads--; + return err; +} + +/* + * A '-' followed by the 'name' of a socket means we close the socket. + */ +static ssize_t __write_ports_delfd(char *buf) +{ + char *toclose; + int len = 0; + + toclose = kstrdup(buf + 1, GFP_KERNEL); + if (toclose == NULL) + return -ENOMEM; + + if (nfsd_serv != NULL) + len = svc_sock_names(nfsd_serv, buf, + SIMPLE_TRANSACTION_LIMIT, toclose); + kfree(toclose); + return len; +} + +/* + * A transport listener is added by writing it's transport name and + * a port number. + */ +static ssize_t __write_ports_addxprt(char *buf) +{ + char transport[16]; + struct svc_xprt *xprt; + int port, err; + + if (sscanf(buf, "%15s %4u", transport, &port) != 2) + return -EINVAL; + + if (port < 1 || port > USHRT_MAX) + return -EINVAL; + + err = nfsd_create_serv(); + if (err != 0) + return err; + + err = svc_create_xprt(nfsd_serv, transport, &init_net, + PF_INET, port, SVC_SOCK_ANONYMOUS); + if (err < 0) + goto out_err; + + err = svc_create_xprt(nfsd_serv, transport, &init_net, + PF_INET6, port, SVC_SOCK_ANONYMOUS); + if (err < 0 && err != -EAFNOSUPPORT) + goto out_close; + + /* Decrease the count, but don't shut down the service */ + nfsd_serv->sv_nrthreads--; + return 0; +out_close: + xprt = svc_find_xprt(nfsd_serv, transport, PF_INET, port); + if (xprt != NULL) { + svc_close_xprt(xprt); + svc_xprt_put(xprt); + } +out_err: + svc_destroy(nfsd_serv); + return err; +} + +/* + * A transport listener is removed by writing a "-", it's transport + * name, and it's port number. + */ +static ssize_t __write_ports_delxprt(char *buf) +{ + struct svc_xprt *xprt; + char transport[16]; + int port; + + if (sscanf(&buf[1], "%15s %4u", transport, &port) != 2) + return -EINVAL; + + if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL) + return -EINVAL; + + xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); + if (xprt == NULL) + return -ENOTCONN; + + svc_close_xprt(xprt); + svc_xprt_put(xprt); + return 0; +} + +static ssize_t __write_ports(struct file *file, char *buf, size_t size) +{ + if (size == 0) + return __write_ports_names(buf); + + if (isdigit(buf[0])) + return __write_ports_addfd(buf); + + if (buf[0] == '-' && isdigit(buf[1])) + return __write_ports_delfd(buf); + + if (isalpha(buf[0])) + return __write_ports_addxprt(buf); + + if (buf[0] == '-' && isalpha(buf[1])) + return __write_ports_delxprt(buf); + + return -EINVAL; +} + +/** + * write_ports - Pass a socket file descriptor or transport name to listen on + * + * Input: + * buf: ignored + * size: zero + * Output: + * On success: passed-in buffer filled with a '\n'-terminated C + * string containing a whitespace-separated list of + * named NFSD listeners; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + * + * OR + * + * Input: + * buf: C string containing an unsigned + * integer value representing a bound + * but unconnected socket that is to be + * used as an NFSD listener; listen(3) + * must be called for a SOCK_STREAM + * socket, otherwise it is ignored + * size: non-zero length of C string in @buf + * Output: + * On success: NFS service is started; + * passed-in buffer filled with a '\n'-terminated C + * string containing a unique alphanumeric name of + * the listener; + * return code is the size in bytes of the string + * On error: return code is a negative errno value + * + * OR + * + * Input: + * buf: C string containing a "-" followed + * by an integer value representing a + * previously passed in socket file + * descriptor + * size: non-zero length of C string in @buf + * Output: + * On success: NFS service no longer listens on that socket; + * passed-in buffer filled with a '\n'-terminated C + * string containing a unique name of the listener; + * return code is the size in bytes of the string + * On error: return code is a negative errno value + * + * OR + * + * Input: + * buf: C string containing a transport + * name and an unsigned integer value + * representing the port to listen on, + * separated by whitespace + * size: non-zero length of C string in @buf + * Output: + * On success: returns zero; NFS service is started + * On error: return code is a negative errno value + * + * OR + * + * Input: + * buf: C string containing a "-" followed + * by a transport name and an unsigned + * integer value representing the port + * to listen on, separated by whitespace + * size: non-zero length of C string in @buf + * Output: + * On success: returns zero; NFS service no longer listens + * on that transport + * On error: return code is a negative errno value + */ +static ssize_t write_ports(struct file *file, char *buf, size_t size) +{ + ssize_t rv; + + mutex_lock(&nfsd_mutex); + rv = __write_ports(file, buf, size); + mutex_unlock(&nfsd_mutex); + return rv; +} + + +int nfsd_max_blksize; + +/** + * write_maxblksize - Set or report the current NFS blksize + * + * Input: + * buf: ignored + * size: zero + * + * OR + * + * Input: + * buf: C string containing an unsigned + * integer value representing the new + * NFS blksize + * size: non-zero length of C string in @buf + * Output: + * On success: passed-in buffer filled with '\n'-terminated C string + * containing numeric value of the current NFS blksize + * setting; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + */ +static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) +{ + char *mesg = buf; + if (size > 0) { + int bsize; + int rv = get_int(&mesg, &bsize); + if (rv) + return rv; + /* force bsize into allowed range and + * required alignment. + */ + if (bsize < 1024) + bsize = 1024; + if (bsize > NFSSVC_MAXBLKSIZE) + bsize = NFSSVC_MAXBLKSIZE; + bsize &= ~(1024-1); + mutex_lock(&nfsd_mutex); + if (nfsd_serv) { + mutex_unlock(&nfsd_mutex); + return -EBUSY; + } + nfsd_max_blksize = bsize; + mutex_unlock(&nfsd_mutex); + } + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", + nfsd_max_blksize); +} + +#ifdef CONFIG_NFSD_V4 +static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time) +{ + char *mesg = buf; + int rv, i; + + if (size > 0) { + if (nfsd_serv) + return -EBUSY; + rv = get_int(&mesg, &i); + if (rv) + return rv; + /* + * Some sanity checking. We don't have a reason for + * these particular numbers, but problems with the + * extremes are: + * - Too short: the briefest network outage may + * cause clients to lose all their locks. Also, + * the frequent polling may be wasteful. + * - Too long: do you really want reboot recovery + * to take more than an hour? Or to make other + * clients wait an hour before being able to + * revoke a dead client's locks? + */ + if (i < 10 || i > 3600) + return -EINVAL; + *time = i; + } + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time); +} + +static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time) +{ + ssize_t rv; + + mutex_lock(&nfsd_mutex); + rv = __nfsd4_write_time(file, buf, size, time); + mutex_unlock(&nfsd_mutex); + return rv; +} + +/** + * write_leasetime - Set or report the current NFSv4 lease time + * + * Input: + * buf: ignored + * size: zero + * + * OR + * + * Input: + * buf: C string containing an unsigned + * integer value representing the new + * NFSv4 lease expiry time + * size: non-zero length of C string in @buf + * Output: + * On success: passed-in buffer filled with '\n'-terminated C + * string containing unsigned integer value of the + * current lease expiry time; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + */ +static ssize_t write_leasetime(struct file *file, char *buf, size_t size) +{ + return nfsd4_write_time(file, buf, size, &nfsd4_lease); +} + +/** + * write_gracetime - Set or report current NFSv4 grace period time + * + * As above, but sets the time of the NFSv4 grace period. + * + * Note this should never be set to less than the *previous* + * lease-period time, but we don't try to enforce this. (In the common + * case (a new boot), we don't know what the previous lease time was + * anyway.) + */ +static ssize_t write_gracetime(struct file *file, char *buf, size_t size) +{ + return nfsd4_write_time(file, buf, size, &nfsd4_grace); +} + +extern char *nfs4_recoverydir(void); + +static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size) +{ + char *mesg = buf; + char *recdir; + int len, status; + + if (size > 0) { + if (nfsd_serv) + return -EBUSY; + if (size > PATH_MAX || buf[size-1] != '\n') + return -EINVAL; + buf[size-1] = 0; + + recdir = mesg; + len = qword_get(&mesg, recdir, size); + if (len <= 0) + return -EINVAL; + + status = nfs4_reset_recoverydir(recdir); + if (status) + return status; + } + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%s\n", + nfs4_recoverydir()); +} + +/** + * write_recoverydir - Set or report the pathname of the recovery directory + * + * Input: + * buf: ignored + * size: zero + * + * OR + * + * Input: + * buf: C string containing the pathname + * of the directory on a local file + * system containing permanent NFSv4 + * recovery data + * size: non-zero length of C string in @buf + * Output: + * On success: passed-in buffer filled with '\n'-terminated C string + * containing the current recovery pathname setting; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + */ +static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) +{ + ssize_t rv; + + mutex_lock(&nfsd_mutex); + rv = __write_recoverydir(file, buf, size); + mutex_unlock(&nfsd_mutex); + return rv; +} + +#endif + +/*----------------------------------------------------------------------------*/ +/* + * populating the filesystem. + */ + +static int nfsd_fill_super(struct super_block * sb, void * data, int silent) +{ + static struct tree_descr nfsd_files[] = { + [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, + [NFSD_Export_features] = {"export_features", + &export_features_operations, S_IRUGO}, + [NFSD_FO_UnlockIP] = {"unlock_ip", + &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_FO_UnlockFS] = {"unlock_filesystem", + &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO}, + [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, + [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, +#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) + [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, +#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ +#ifdef CONFIG_NFSD_V4 + [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, +#endif + /* last one */ {""} + }; + return simple_fill_super(sb, 0x6e667364, nfsd_files); +} + +static struct dentry *nfsd_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_single(fs_type, flags, data, nfsd_fill_super); +} + +static struct file_system_type nfsd_fs_type = { + .owner = THIS_MODULE, + .name = "nfsd", + .mount = nfsd_mount, + .kill_sb = kill_litter_super, +}; + +#ifdef CONFIG_PROC_FS +static int create_proc_exports_entry(void) +{ + struct proc_dir_entry *entry; + + entry = proc_mkdir("fs/nfs", NULL); + if (!entry) + return -ENOMEM; + entry = proc_create("exports", 0, entry, &exports_operations); + if (!entry) + return -ENOMEM; + return 0; +} +#else /* CONFIG_PROC_FS */ +static int create_proc_exports_entry(void) +{ + return 0; +} +#endif + +static int __init init_nfsd(void) +{ + int retval; + printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); + + retval = nfsd4_init_slabs(); + if (retval) + return retval; + nfs4_state_init(); + retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ + if (retval) + goto out_free_slabs; + nfsd_stat_init(); /* Statistics */ + retval = nfsd_reply_cache_init(); + if (retval) + goto out_free_stat; + retval = nfsd_export_init(); + if (retval) + goto out_free_cache; + nfsd_lockd_init(); /* lockd->nfsd callbacks */ + retval = nfsd_idmap_init(); + if (retval) + goto out_free_lockd; + retval = create_proc_exports_entry(); + if (retval) + goto out_free_idmap; + retval = register_filesystem(&nfsd_fs_type); + if (retval) + goto out_free_all; + return 0; +out_free_all: + remove_proc_entry("fs/nfs/exports", NULL); + remove_proc_entry("fs/nfs", NULL); +out_free_idmap: + nfsd_idmap_shutdown(); +out_free_lockd: + nfsd_lockd_shutdown(); + nfsd_export_shutdown(); +out_free_cache: + nfsd_reply_cache_shutdown(); +out_free_stat: + nfsd_stat_shutdown(); + nfsd_fault_inject_cleanup(); +out_free_slabs: + nfsd4_free_slabs(); + return retval; +} + +static void __exit exit_nfsd(void) +{ + nfsd_export_shutdown(); + nfsd_reply_cache_shutdown(); + remove_proc_entry("fs/nfs/exports", NULL); + remove_proc_entry("fs/nfs", NULL); + nfsd_stat_shutdown(); + nfsd_lockd_shutdown(); + nfsd_idmap_shutdown(); + nfsd4_free_slabs(); + nfsd_fault_inject_cleanup(); + unregister_filesystem(&nfsd_fs_type); +} + +MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); +MODULE_LICENSE("GPL"); +module_init(init_nfsd) +module_exit(exit_nfsd) diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h new file mode 100644 index 00000000000..1d1e8589b4c --- /dev/null +++ b/fs/nfsd/nfsd.h @@ -0,0 +1,375 @@ +/* + * Hodge-podge collection of knfsd-related stuff. + * I will sort this out later. + * + * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> + */ + +#ifndef LINUX_NFSD_NFSD_H +#define LINUX_NFSD_NFSD_H + +#include <linux/types.h> +#include <linux/mount.h> + +#include <linux/nfs.h> +#include <linux/nfs2.h> +#include <linux/nfs3.h> +#include <linux/nfs4.h> +#include <linux/sunrpc/msg_prot.h> + +#include <linux/nfsd/debug.h> +#include <linux/nfsd/export.h> +#include <linux/nfsd/stats.h> + +/* + * nfsd version + */ +#define NFSD_SUPPORTED_MINOR_VERSION 1 +/* + * Maximum blocksizes supported by daemon under various circumstances. + */ +#define NFSSVC_MAXBLKSIZE RPCSVC_MAXPAYLOAD +/* NFSv2 is limited by the protocol specification, see RFC 1094 */ +#define NFSSVC_MAXBLKSIZE_V2 (8*1024) + + +/* + * Largest number of bytes we need to allocate for an NFS + * call or reply. Used to control buffer sizes. We use + * the length of v3 WRITE, READDIR and READDIR replies + * which are an RPC header, up to 26 XDR units of reply + * data, and some page data. + * + * Note that accuracy here doesn't matter too much as the + * size is rounded up to a page size when allocating space. + */ +#define NFSD_BUFSIZE ((RPC_MAX_HEADER_WITH_AUTH+26)*XDR_UNIT + NFSSVC_MAXBLKSIZE) + +struct readdir_cd { + __be32 err; /* 0, nfserr, or nfserr_eof */ +}; + + +extern struct svc_program nfsd_program; +extern struct svc_version nfsd_version2, nfsd_version3, + nfsd_version4; +extern u32 nfsd_supported_minorversion; +extern struct mutex nfsd_mutex; +extern struct svc_serv *nfsd_serv; +extern spinlock_t nfsd_drc_lock; +extern unsigned int nfsd_drc_max_mem; +extern unsigned int nfsd_drc_mem_used; + +extern const struct seq_operations nfs_exports_op; + +/* + * Function prototypes. + */ +int nfsd_svc(unsigned short port, int nrservs); +int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp); + +int nfsd_nrthreads(void); +int nfsd_nrpools(void); +int nfsd_get_nrthreads(int n, int *); +int nfsd_set_nrthreads(int n, int *); + +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +#ifdef CONFIG_NFSD_V2_ACL +extern struct svc_version nfsd_acl_version2; +#else +#define nfsd_acl_version2 NULL +#endif +#ifdef CONFIG_NFSD_V3_ACL +extern struct svc_version nfsd_acl_version3; +#else +#define nfsd_acl_version3 NULL +#endif +#endif + +enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL }; +int nfsd_vers(int vers, enum vers_op change); +int nfsd_minorversion(u32 minorversion, enum vers_op change); +void nfsd_reset_versions(void); +int nfsd_create_serv(void); + +extern int nfsd_max_blksize; + +static inline int nfsd_v4client(struct svc_rqst *rq) +{ + return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4; +} + +/* + * NFSv4 State + */ +#ifdef CONFIG_NFSD_V4 +extern unsigned int max_delegations; +void nfs4_state_init(void); +int nfsd4_init_slabs(void); +void nfsd4_free_slabs(void); +int nfs4_state_start(void); +void nfs4_state_shutdown(void); +void nfs4_reset_lease(time_t leasetime); +int nfs4_reset_recoverydir(char *recdir); +#else +static inline void nfs4_state_init(void) { } +static inline int nfsd4_init_slabs(void) { return 0; } +static inline void nfsd4_free_slabs(void) { } +static inline int nfs4_state_start(void) { return 0; } +static inline void nfs4_state_shutdown(void) { } +static inline void nfs4_reset_lease(time_t leasetime) { } +static inline int nfs4_reset_recoverydir(char *recdir) { return 0; } +#endif + +/* + * lockd binding + */ +void nfsd_lockd_init(void); +void nfsd_lockd_shutdown(void); + + +/* + * These macros provide pre-xdr'ed values for faster operation. + */ +#define nfs_ok cpu_to_be32(NFS_OK) +#define nfserr_perm cpu_to_be32(NFSERR_PERM) +#define nfserr_noent cpu_to_be32(NFSERR_NOENT) +#define nfserr_io cpu_to_be32(NFSERR_IO) +#define nfserr_nxio cpu_to_be32(NFSERR_NXIO) +#define nfserr_eagain cpu_to_be32(NFSERR_EAGAIN) +#define nfserr_acces cpu_to_be32(NFSERR_ACCES) +#define nfserr_exist cpu_to_be32(NFSERR_EXIST) +#define nfserr_xdev cpu_to_be32(NFSERR_XDEV) +#define nfserr_nodev cpu_to_be32(NFSERR_NODEV) +#define nfserr_notdir cpu_to_be32(NFSERR_NOTDIR) +#define nfserr_isdir cpu_to_be32(NFSERR_ISDIR) +#define nfserr_inval cpu_to_be32(NFSERR_INVAL) +#define nfserr_fbig cpu_to_be32(NFSERR_FBIG) +#define nfserr_nospc cpu_to_be32(NFSERR_NOSPC) +#define nfserr_rofs cpu_to_be32(NFSERR_ROFS) +#define nfserr_mlink cpu_to_be32(NFSERR_MLINK) +#define nfserr_opnotsupp cpu_to_be32(NFSERR_OPNOTSUPP) +#define nfserr_nametoolong cpu_to_be32(NFSERR_NAMETOOLONG) +#define nfserr_notempty cpu_to_be32(NFSERR_NOTEMPTY) +#define nfserr_dquot cpu_to_be32(NFSERR_DQUOT) +#define nfserr_stale cpu_to_be32(NFSERR_STALE) +#define nfserr_remote cpu_to_be32(NFSERR_REMOTE) +#define nfserr_wflush cpu_to_be32(NFSERR_WFLUSH) +#define nfserr_badhandle cpu_to_be32(NFSERR_BADHANDLE) +#define nfserr_notsync cpu_to_be32(NFSERR_NOT_SYNC) +#define nfserr_badcookie cpu_to_be32(NFSERR_BAD_COOKIE) +#define nfserr_notsupp cpu_to_be32(NFSERR_NOTSUPP) +#define nfserr_toosmall cpu_to_be32(NFSERR_TOOSMALL) +#define nfserr_serverfault cpu_to_be32(NFSERR_SERVERFAULT) +#define nfserr_badtype cpu_to_be32(NFSERR_BADTYPE) +#define nfserr_jukebox cpu_to_be32(NFSERR_JUKEBOX) +#define nfserr_denied cpu_to_be32(NFSERR_DENIED) +#define nfserr_deadlock cpu_to_be32(NFSERR_DEADLOCK) +#define nfserr_expired cpu_to_be32(NFSERR_EXPIRED) +#define nfserr_bad_cookie cpu_to_be32(NFSERR_BAD_COOKIE) +#define nfserr_same cpu_to_be32(NFSERR_SAME) +#define nfserr_clid_inuse cpu_to_be32(NFSERR_CLID_INUSE) +#define nfserr_stale_clientid cpu_to_be32(NFSERR_STALE_CLIENTID) +#define nfserr_resource cpu_to_be32(NFSERR_RESOURCE) +#define nfserr_moved cpu_to_be32(NFSERR_MOVED) +#define nfserr_nofilehandle cpu_to_be32(NFSERR_NOFILEHANDLE) +#define nfserr_minor_vers_mismatch cpu_to_be32(NFSERR_MINOR_VERS_MISMATCH) +#define nfserr_share_denied cpu_to_be32(NFSERR_SHARE_DENIED) +#define nfserr_stale_stateid cpu_to_be32(NFSERR_STALE_STATEID) +#define nfserr_old_stateid cpu_to_be32(NFSERR_OLD_STATEID) +#define nfserr_bad_stateid cpu_to_be32(NFSERR_BAD_STATEID) +#define nfserr_bad_seqid cpu_to_be32(NFSERR_BAD_SEQID) +#define nfserr_symlink cpu_to_be32(NFSERR_SYMLINK) +#define nfserr_not_same cpu_to_be32(NFSERR_NOT_SAME) +#define nfserr_lock_range cpu_to_be32(NFSERR_LOCK_RANGE) +#define nfserr_restorefh cpu_to_be32(NFSERR_RESTOREFH) +#define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP) +#define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR) +#define nfserr_openmode cpu_to_be32(NFSERR_OPENMODE) +#define nfserr_badowner cpu_to_be32(NFSERR_BADOWNER) +#define nfserr_locks_held cpu_to_be32(NFSERR_LOCKS_HELD) +#define nfserr_op_illegal cpu_to_be32(NFSERR_OP_ILLEGAL) +#define nfserr_grace cpu_to_be32(NFSERR_GRACE) +#define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE) +#define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD) +#define nfserr_badname cpu_to_be32(NFSERR_BADNAME) +#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN) +#define nfserr_locked cpu_to_be32(NFSERR_LOCKED) +#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC) +#define nfserr_badiomode cpu_to_be32(NFS4ERR_BADIOMODE) +#define nfserr_badlayout cpu_to_be32(NFS4ERR_BADLAYOUT) +#define nfserr_bad_session_digest cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST) +#define nfserr_badsession cpu_to_be32(NFS4ERR_BADSESSION) +#define nfserr_badslot cpu_to_be32(NFS4ERR_BADSLOT) +#define nfserr_complete_already cpu_to_be32(NFS4ERR_COMPLETE_ALREADY) +#define nfserr_conn_not_bound_to_session cpu_to_be32(NFS4ERR_CONN_NOT_BOUND_TO_SESSION) +#define nfserr_deleg_already_wanted cpu_to_be32(NFS4ERR_DELEG_ALREADY_WANTED) +#define nfserr_back_chan_busy cpu_to_be32(NFS4ERR_BACK_CHAN_BUSY) +#define nfserr_layouttrylater cpu_to_be32(NFS4ERR_LAYOUTTRYLATER) +#define nfserr_layoutunavailable cpu_to_be32(NFS4ERR_LAYOUTUNAVAILABLE) +#define nfserr_nomatching_layout cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT) +#define nfserr_recallconflict cpu_to_be32(NFS4ERR_RECALLCONFLICT) +#define nfserr_unknown_layouttype cpu_to_be32(NFS4ERR_UNKNOWN_LAYOUTTYPE) +#define nfserr_seq_misordered cpu_to_be32(NFS4ERR_SEQ_MISORDERED) +#define nfserr_sequence_pos cpu_to_be32(NFS4ERR_SEQUENCE_POS) +#define nfserr_req_too_big cpu_to_be32(NFS4ERR_REQ_TOO_BIG) +#define nfserr_rep_too_big cpu_to_be32(NFS4ERR_REP_TOO_BIG) +#define nfserr_rep_too_big_to_cache cpu_to_be32(NFS4ERR_REP_TOO_BIG_TO_CACHE) +#define nfserr_retry_uncached_rep cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP) +#define nfserr_unsafe_compound cpu_to_be32(NFS4ERR_UNSAFE_COMPOUND) +#define nfserr_too_many_ops cpu_to_be32(NFS4ERR_TOO_MANY_OPS) +#define nfserr_op_not_in_session cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION) +#define nfserr_hash_alg_unsupp cpu_to_be32(NFS4ERR_HASH_ALG_UNSUPP) +#define nfserr_clientid_busy cpu_to_be32(NFS4ERR_CLIENTID_BUSY) +#define nfserr_pnfs_io_hole cpu_to_be32(NFS4ERR_PNFS_IO_HOLE) +#define nfserr_seq_false_retry cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY) +#define nfserr_bad_high_slot cpu_to_be32(NFS4ERR_BAD_HIGH_SLOT) +#define nfserr_deadsession cpu_to_be32(NFS4ERR_DEADSESSION) +#define nfserr_encr_alg_unsupp cpu_to_be32(NFS4ERR_ENCR_ALG_UNSUPP) +#define nfserr_pnfs_no_layout cpu_to_be32(NFS4ERR_PNFS_NO_LAYOUT) +#define nfserr_not_only_op cpu_to_be32(NFS4ERR_NOT_ONLY_OP) +#define nfserr_wrong_cred cpu_to_be32(NFS4ERR_WRONG_CRED) +#define nfserr_wrong_type cpu_to_be32(NFS4ERR_WRONG_TYPE) +#define nfserr_dirdeleg_unavail cpu_to_be32(NFS4ERR_DIRDELEG_UNAVAIL) +#define nfserr_reject_deleg cpu_to_be32(NFS4ERR_REJECT_DELEG) +#define nfserr_returnconflict cpu_to_be32(NFS4ERR_RETURNCONFLICT) +#define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) + +/* error codes for internal use */ +/* if a request fails due to kmalloc failure, it gets dropped. + * Client should resend eventually + */ +#define nfserr_dropit cpu_to_be32(30000) +/* end-of-file indicator in readdir */ +#define nfserr_eof cpu_to_be32(30001) +/* replay detected */ +#define nfserr_replay_me cpu_to_be32(11001) +/* nfs41 replay detected */ +#define nfserr_replay_cache cpu_to_be32(11002) + +/* Check for dir entries '.' and '..' */ +#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.')) + +/* + * Time of server startup + */ +extern struct timeval nfssvc_boot; + +#ifdef CONFIG_NFSD_V4 + +extern time_t nfsd4_lease; +extern time_t nfsd4_grace; + +/* before processing a COMPOUND operation, we have to check that there + * is enough space in the buffer for XDR encode to succeed. otherwise, + * we might process an operation with side effects, and be unable to + * tell the client that the operation succeeded. + * + * COMPOUND_SLACK_SPACE - this is the minimum bytes of buffer space + * needed to encode an "ordinary" _successful_ operation. (GETATTR, + * READ, READDIR, and READLINK have their own buffer checks.) if we + * fall below this level, we fail the next operation with NFS4ERR_RESOURCE. + * + * COMPOUND_ERR_SLACK_SPACE - this is the minimum bytes of buffer space + * needed to encode an operation which has failed with NFS4ERR_RESOURCE. + * care is taken to ensure that we never fall below this level for any + * reason. + */ +#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ +#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ + +#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ + +/* + * The following attributes are currently not supported by the NFSv4 server: + * ARCHIVE (deprecated anyway) + * HIDDEN (unlikely to be supported any time soon) + * MIMETYPE (unlikely to be supported any time soon) + * QUOTA_* (will be supported in a forthcoming patch) + * SYSTEM (unlikely to be supported any time soon) + * TIME_BACKUP (unlikely to be supported any time soon) + * TIME_CREATE (unlikely to be supported any time soon) + */ +#define NFSD4_SUPPORTED_ATTRS_WORD0 \ +(FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_TYPE | FATTR4_WORD0_FH_EXPIRE_TYPE \ + | FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE | FATTR4_WORD0_LINK_SUPPORT \ + | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR | FATTR4_WORD0_FSID \ + | FATTR4_WORD0_UNIQUE_HANDLES | FATTR4_WORD0_LEASE_TIME | FATTR4_WORD0_RDATTR_ERROR \ + | FATTR4_WORD0_ACLSUPPORT | FATTR4_WORD0_CANSETTIME | FATTR4_WORD0_CASE_INSENSITIVE \ + | FATTR4_WORD0_CASE_PRESERVING | FATTR4_WORD0_CHOWN_RESTRICTED \ + | FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FILEID | FATTR4_WORD0_FILES_AVAIL \ + | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_HOMOGENEOUS \ + | FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME \ + | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_ACL) + +#define NFSD4_SUPPORTED_ATTRS_WORD1 \ +(FATTR4_WORD1_MODE | FATTR4_WORD1_NO_TRUNC | FATTR4_WORD1_NUMLINKS \ + | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \ + | FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \ + | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_ACCESS_SET \ + | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \ + | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID) + +#define NFSD4_SUPPORTED_ATTRS_WORD2 0 + +#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \ + NFSD4_SUPPORTED_ATTRS_WORD0 + +#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \ + NFSD4_SUPPORTED_ATTRS_WORD1 + +#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ + (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) + +static inline u32 nfsd_suppattrs0(u32 minorversion) +{ + return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 + : NFSD4_SUPPORTED_ATTRS_WORD0; +} + +static inline u32 nfsd_suppattrs1(u32 minorversion) +{ + return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1 + : NFSD4_SUPPORTED_ATTRS_WORD1; +} + +static inline u32 nfsd_suppattrs2(u32 minorversion) +{ + return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2 + : NFSD4_SUPPORTED_ATTRS_WORD2; +} + +/* These will return ERR_INVAL if specified in GETATTR or READDIR. */ +#define NFSD_WRITEONLY_ATTRS_WORD1 \ + (FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) + +/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */ +#define NFSD_WRITEABLE_ATTRS_WORD0 \ + (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL) +#define NFSD_WRITEABLE_ATTRS_WORD1 \ + (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ + | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) +#define NFSD_WRITEABLE_ATTRS_WORD2 0 + +#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ + NFSD_WRITEABLE_ATTRS_WORD0 +/* + * we currently store the exclusive create verifier in the v_{a,m}time + * attributes so the client can't set these at create time using EXCLUSIVE4_1 + */ +#define NFSD_SUPPATTR_EXCLCREAT_WORD1 \ + (NFSD_WRITEABLE_ATTRS_WORD1 & \ + ~(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)) +#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \ + NFSD_WRITEABLE_ATTRS_WORD2 + +extern int nfsd4_is_junction(struct dentry *dentry); +#else +static inline int nfsd4_is_junction(struct dentry *dentry) +{ + return 0; +} + +#endif /* CONFIG_NFSD_V4 */ + +#endif /* LINUX_NFSD_NFSD_H */ diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c new file mode 100644 index 00000000000..68454e75fce --- /dev/null +++ b/fs/nfsd/nfsfh.c @@ -0,0 +1,690 @@ +/* + * NFS server file handle treatment. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + * Portions Copyright (C) 1999 G. Allen Morris III <gam3@acm.org> + * Extensive rewrite by Neil Brown <neilb@cse.unsw.edu.au> Southern-Spring 1999 + * ... and again Southern-Winter 2001 to support export_operations + */ + +#include <linux/exportfs.h> + +#include <linux/sunrpc/svcauth_gss.h> +#include "nfsd.h" +#include "vfs.h" +#include "auth.h" + +#define NFSDDBG_FACILITY NFSDDBG_FH + + +/* + * our acceptability function. + * if NOSUBTREECHECK, accept anything + * if not, require that we can walk up to exp->ex_dentry + * doing some checks on the 'x' bits + */ +static int nfsd_acceptable(void *expv, struct dentry *dentry) +{ + struct svc_export *exp = expv; + int rv; + struct dentry *tdentry; + struct dentry *parent; + + if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) + return 1; + + tdentry = dget(dentry); + while (tdentry != exp->ex_path.dentry && !IS_ROOT(tdentry)) { + /* make sure parents give x permission to user */ + int err; + parent = dget_parent(tdentry); + err = inode_permission(parent->d_inode, MAY_EXEC); + if (err < 0) { + dput(parent); + break; + } + dput(tdentry); + tdentry = parent; + } + if (tdentry != exp->ex_path.dentry) + dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name); + rv = (tdentry == exp->ex_path.dentry); + dput(tdentry); + return rv; +} + +/* Type check. The correct error return for type mismatches does not seem to be + * generally agreed upon. SunOS seems to use EISDIR if file isn't S_IFREG; a + * comment in the NFSv3 spec says this is incorrect (implementation notes for + * the write call). + */ +static inline __be32 +nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, umode_t requested) +{ + mode &= S_IFMT; + + if (requested == 0) /* the caller doesn't care */ + return nfs_ok; + if (mode == requested) + return nfs_ok; + /* + * v4 has an error more specific than err_notdir which we should + * return in preference to err_notdir: + */ + if (rqstp->rq_vers == 4 && mode == S_IFLNK) + return nfserr_symlink; + if (requested == S_IFDIR) + return nfserr_notdir; + if (mode == S_IFDIR) + return nfserr_isdir; + return nfserr_inval; +} + +static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, + struct svc_export *exp) +{ + int flags = nfsexp_flags(rqstp, exp); + + /* Check if the request originated from a secure port. */ + if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) { + RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); + dprintk(KERN_WARNING + "nfsd: request from insecure port %s!\n", + svc_print_addr(rqstp, buf, sizeof(buf))); + return nfserr_perm; + } + + /* Set user creds for this exportpoint */ + return nfserrno(nfsd_setuser(rqstp, exp)); +} + +static inline __be32 check_pseudo_root(struct svc_rqst *rqstp, + struct dentry *dentry, struct svc_export *exp) +{ + if (!(exp->ex_flags & NFSEXP_V4ROOT)) + return nfs_ok; + /* + * v2/v3 clients have no need for the V4ROOT export--they use + * the mount protocl instead; also, further V4ROOT checks may be + * in v4-specific code, in which case v2/v3 clients could bypass + * them. + */ + if (!nfsd_v4client(rqstp)) + return nfserr_stale; + /* + * We're exposing only the directories and symlinks that have to be + * traversed on the way to real exports: + */ + if (unlikely(!S_ISDIR(dentry->d_inode->i_mode) && + !S_ISLNK(dentry->d_inode->i_mode))) + return nfserr_stale; + /* + * A pseudoroot export gives permission to access only one + * single directory; the kernel has to make another upcall + * before granting access to anything else under it: + */ + if (unlikely(dentry != exp->ex_path.dentry)) + return nfserr_stale; + return nfs_ok; +} + +/* + * Use the given filehandle to look up the corresponding export and + * dentry. On success, the results are used to set fh_export and + * fh_dentry. + */ +static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) +{ + struct knfsd_fh *fh = &fhp->fh_handle; + struct fid *fid = NULL, sfid; + struct svc_export *exp; + struct dentry *dentry; + int fileid_type; + int data_left = fh->fh_size/4; + __be32 error; + + error = nfserr_stale; + if (rqstp->rq_vers > 2) + error = nfserr_badhandle; + if (rqstp->rq_vers == 4 && fh->fh_size == 0) + return nfserr_nofilehandle; + + if (fh->fh_version == 1) { + int len; + + if (--data_left < 0) + return error; + if (fh->fh_auth_type != 0) + return error; + len = key_len(fh->fh_fsid_type) / 4; + if (len == 0) + return error; + if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { + /* deprecated, convert to type 3 */ + len = key_len(FSID_ENCODE_DEV)/4; + fh->fh_fsid_type = FSID_ENCODE_DEV; + fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1]))); + fh->fh_fsid[1] = fh->fh_fsid[2]; + } + data_left -= len; + if (data_left < 0) + return error; + exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth); + fid = (struct fid *)(fh->fh_auth + len); + } else { + __u32 tfh[2]; + dev_t xdev; + ino_t xino; + + if (fh->fh_size != NFS_FHSIZE) + return error; + /* assume old filehandle format */ + xdev = old_decode_dev(fh->ofh_xdev); + xino = u32_to_ino_t(fh->ofh_xino); + mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); + exp = rqst_exp_find(rqstp, FSID_DEV, tfh); + } + + error = nfserr_stale; + if (PTR_ERR(exp) == -ENOENT) + return error; + + if (IS_ERR(exp)) + return nfserrno(PTR_ERR(exp)); + + if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) { + /* Elevate privileges so that the lack of 'r' or 'x' + * permission on some parent directory will + * not stop exportfs_decode_fh from being able + * to reconnect a directory into the dentry cache. + * The same problem can affect "SUBTREECHECK" exports, + * but as nfsd_acceptable depends on correct + * access control settings being in effect, we cannot + * fix that case easily. + */ + struct cred *new = prepare_creds(); + if (!new) + return nfserrno(-ENOMEM); + new->cap_effective = + cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted); + put_cred(override_creds(new)); + put_cred(new); + } else { + error = nfsd_setuser_and_check_port(rqstp, exp); + if (error) + goto out; + } + + /* + * Look up the dentry using the NFS file handle. + */ + error = nfserr_stale; + if (rqstp->rq_vers > 2) + error = nfserr_badhandle; + + if (fh->fh_version != 1) { + sfid.i32.ino = fh->ofh_ino; + sfid.i32.gen = fh->ofh_generation; + sfid.i32.parent_ino = fh->ofh_dirino; + fid = &sfid; + data_left = 3; + if (fh->ofh_dirino == 0) + fileid_type = FILEID_INO32_GEN; + else + fileid_type = FILEID_INO32_GEN_PARENT; + } else + fileid_type = fh->fh_fileid_type; + + if (fileid_type == FILEID_ROOT) + dentry = dget(exp->ex_path.dentry); + else { + dentry = exportfs_decode_fh(exp->ex_path.mnt, fid, + data_left, fileid_type, + nfsd_acceptable, exp); + } + if (dentry == NULL) + goto out; + if (IS_ERR(dentry)) { + if (PTR_ERR(dentry) != -EINVAL) + error = nfserrno(PTR_ERR(dentry)); + goto out; + } + + if (S_ISDIR(dentry->d_inode->i_mode) && + (dentry->d_flags & DCACHE_DISCONNECTED)) { + printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + } + + fhp->fh_dentry = dentry; + fhp->fh_export = exp; + return 0; +out: + exp_put(exp); + return error; +} + +/** + * fh_verify - filehandle lookup and access checking + * @rqstp: pointer to current rpc request + * @fhp: filehandle to be verified + * @type: expected type of object pointed to by filehandle + * @access: type of access needed to object + * + * Look up a dentry from the on-the-wire filehandle, check the client's + * access to the export, and set the current task's credentials. + * + * Regardless of success or failure of fh_verify(), fh_put() should be + * called on @fhp when the caller is finished with the filehandle. + * + * fh_verify() may be called multiple times on a given filehandle, for + * example, when processing an NFSv4 compound. The first call will look + * up a dentry using the on-the-wire filehandle. Subsequent calls will + * skip the lookup and just perform the other checks and possibly change + * the current task's credentials. + * + * @type specifies the type of object expected using one of the S_IF* + * constants defined in include/linux/stat.h. The caller may use zero + * to indicate that it doesn't care, or a negative integer to indicate + * that it expects something not of the given type. + * + * @access is formed from the NFSD_MAY_* constants defined in + * include/linux/nfsd/nfsd.h. + */ +__be32 +fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) +{ + struct svc_export *exp; + struct dentry *dentry; + __be32 error; + + dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); + + if (!fhp->fh_dentry) { + error = nfsd_set_fh_dentry(rqstp, fhp); + if (error) + goto out; + } + dentry = fhp->fh_dentry; + exp = fhp->fh_export; + /* + * We still have to do all these permission checks, even when + * fh_dentry is already set: + * - fh_verify may be called multiple times with different + * "access" arguments (e.g. nfsd_proc_create calls + * fh_verify(...,NFSD_MAY_EXEC) first, then later (in + * nfsd_create) calls fh_verify(...,NFSD_MAY_CREATE). + * - in the NFSv4 case, the filehandle may have been filled + * in by fh_compose, and given a dentry, but further + * compound operations performed with that filehandle + * still need permissions checks. In the worst case, a + * mountpoint crossing may have changed the export + * options, and we may now need to use a different uid + * (for example, if different id-squashing options are in + * effect on the new filesystem). + */ + error = check_pseudo_root(rqstp, dentry, exp); + if (error) + goto out; + + error = nfsd_setuser_and_check_port(rqstp, exp); + if (error) + goto out; + + error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type); + if (error) + goto out; + + /* + * pseudoflavor restrictions are not enforced on NLM, + * which clients virtually always use auth_sys for, + * even while using RPCSEC_GSS for NFS. + */ + if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS) + goto skip_pseudoflavor_check; + /* + * Clients may expect to be able to use auth_sys during mount, + * even if they use gss for everything else; see section 2.3.2 + * of rfc 2623. + */ + if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT + && exp->ex_path.dentry == dentry) + goto skip_pseudoflavor_check; + + error = check_nfsd_access(exp, rqstp); + if (error) + goto out; + +skip_pseudoflavor_check: + /* Finally, check access permissions. */ + error = nfsd_permission(rqstp, exp, dentry, access); + + if (error) { + dprintk("fh_verify: %s/%s permission failure, " + "acc=%x, error=%d\n", + dentry->d_parent->d_name.name, + dentry->d_name.name, + access, ntohl(error)); + } +out: + if (error == nfserr_stale) + nfsdstats.fh_stale++; + return error; +} + + +/* + * Compose a file handle for an NFS reply. + * + * Note that when first composed, the dentry may not yet have + * an inode. In this case a call to fh_update should be made + * before the fh goes out on the wire ... + */ +static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, + struct dentry *dentry) +{ + if (dentry != exp->ex_path.dentry) { + struct fid *fid = (struct fid *) + (fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1); + int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; + int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK); + + fhp->fh_handle.fh_fileid_type = + exportfs_encode_fh(dentry, fid, &maxsize, subtreecheck); + fhp->fh_handle.fh_size += maxsize * 4; + } else { + fhp->fh_handle.fh_fileid_type = FILEID_ROOT; + } +} + +/* + * for composing old style file handles + */ +static inline void _fh_update_old(struct dentry *dentry, + struct svc_export *exp, + struct knfsd_fh *fh) +{ + fh->ofh_ino = ino_t_to_u32(dentry->d_inode->i_ino); + fh->ofh_generation = dentry->d_inode->i_generation; + if (S_ISDIR(dentry->d_inode->i_mode) || + (exp->ex_flags & NFSEXP_NOSUBTREECHECK)) + fh->ofh_dirino = 0; +} + +static bool is_root_export(struct svc_export *exp) +{ + return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root; +} + +static struct super_block *exp_sb(struct svc_export *exp) +{ + return exp->ex_path.dentry->d_inode->i_sb; +} + +static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp) +{ + switch (fsid_type) { + case FSID_DEV: + if (!old_valid_dev(exp_sb(exp)->s_dev)) + return 0; + /* FALL THROUGH */ + case FSID_MAJOR_MINOR: + case FSID_ENCODE_DEV: + return exp_sb(exp)->s_type->fs_flags & FS_REQUIRES_DEV; + case FSID_NUM: + return exp->ex_flags & NFSEXP_FSID; + case FSID_UUID8: + case FSID_UUID16: + if (!is_root_export(exp)) + return 0; + /* fall through */ + case FSID_UUID4_INUM: + case FSID_UUID16_INUM: + return exp->ex_uuid != NULL; + } + return 1; +} + + +static void set_version_and_fsid_type(struct svc_fh *fhp, struct svc_export *exp, struct svc_fh *ref_fh) +{ + u8 version; + u8 fsid_type; +retry: + version = 1; + if (ref_fh && ref_fh->fh_export == exp) { + version = ref_fh->fh_handle.fh_version; + fsid_type = ref_fh->fh_handle.fh_fsid_type; + + ref_fh = NULL; + + switch (version) { + case 0xca: + fsid_type = FSID_DEV; + break; + case 1: + break; + default: + goto retry; + } + + /* + * As the fsid -> filesystem mapping was guided by + * user-space, there is no guarantee that the filesystem + * actually supports that fsid type. If it doesn't we + * loop around again without ref_fh set. + */ + if (!fsid_type_ok_for_exp(fsid_type, exp)) + goto retry; + } else if (exp->ex_flags & NFSEXP_FSID) { + fsid_type = FSID_NUM; + } else if (exp->ex_uuid) { + if (fhp->fh_maxsize >= 64) { + if (is_root_export(exp)) + fsid_type = FSID_UUID16; + else + fsid_type = FSID_UUID16_INUM; + } else { + if (is_root_export(exp)) + fsid_type = FSID_UUID8; + else + fsid_type = FSID_UUID4_INUM; + } + } else if (!old_valid_dev(exp_sb(exp)->s_dev)) + /* for newer device numbers, we must use a newer fsid format */ + fsid_type = FSID_ENCODE_DEV; + else + fsid_type = FSID_DEV; + fhp->fh_handle.fh_version = version; + if (version) + fhp->fh_handle.fh_fsid_type = fsid_type; +} + +__be32 +fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, + struct svc_fh *ref_fh) +{ + /* ref_fh is a reference file handle. + * if it is non-null and for the same filesystem, then we should compose + * a filehandle which is of the same version, where possible. + * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca + * Then create a 32byte filehandle using nfs_fhbase_old + * + */ + + struct inode * inode = dentry->d_inode; + struct dentry *parent = dentry->d_parent; + __u32 *datap; + dev_t ex_dev = exp_sb(exp)->s_dev; + + dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", + MAJOR(ex_dev), MINOR(ex_dev), + (long) exp->ex_path.dentry->d_inode->i_ino, + parent->d_name.name, dentry->d_name.name, + (inode ? inode->i_ino : 0)); + + /* Choose filehandle version and fsid type based on + * the reference filehandle (if it is in the same export) + * or the export options. + */ + set_version_and_fsid_type(fhp, exp, ref_fh); + + if (ref_fh == fhp) + fh_put(ref_fh); + + if (fhp->fh_locked || fhp->fh_dentry) { + printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n", + parent->d_name.name, dentry->d_name.name); + } + if (fhp->fh_maxsize < NFS_FHSIZE) + printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n", + fhp->fh_maxsize, + parent->d_name.name, dentry->d_name.name); + + fhp->fh_dentry = dget(dentry); /* our internal copy */ + fhp->fh_export = exp; + cache_get(&exp->h); + + if (fhp->fh_handle.fh_version == 0xca) { + /* old style filehandle please */ + memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE); + fhp->fh_handle.fh_size = NFS_FHSIZE; + fhp->fh_handle.ofh_dcookie = 0xfeebbaca; + fhp->fh_handle.ofh_dev = old_encode_dev(ex_dev); + fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev; + fhp->fh_handle.ofh_xino = + ino_t_to_u32(exp->ex_path.dentry->d_inode->i_ino); + fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry)); + if (inode) + _fh_update_old(dentry, exp, &fhp->fh_handle); + } else { + int len; + fhp->fh_handle.fh_auth_type = 0; + datap = fhp->fh_handle.fh_auth+0; + mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev, + exp->ex_path.dentry->d_inode->i_ino, + exp->ex_fsid, exp->ex_uuid); + + len = key_len(fhp->fh_handle.fh_fsid_type); + datap += len/4; + fhp->fh_handle.fh_size = 4 + len; + + if (inode) + _fh_update(fhp, exp, dentry); + if (fhp->fh_handle.fh_fileid_type == 255) { + fh_put(fhp); + return nfserr_opnotsupp; + } + } + + return 0; +} + +/* + * Update file handle information after changing a dentry. + * This is only called by nfsd_create, nfsd_create_v3 and nfsd_proc_create + */ +__be32 +fh_update(struct svc_fh *fhp) +{ + struct dentry *dentry; + + if (!fhp->fh_dentry) + goto out_bad; + + dentry = fhp->fh_dentry; + if (!dentry->d_inode) + goto out_negative; + if (fhp->fh_handle.fh_version != 1) { + _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); + } else { + if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) + goto out; + + _fh_update(fhp, fhp->fh_export, dentry); + if (fhp->fh_handle.fh_fileid_type == 255) + return nfserr_opnotsupp; + } +out: + return 0; + +out_bad: + printk(KERN_ERR "fh_update: fh not verified!\n"); + goto out; +out_negative: + printk(KERN_ERR "fh_update: %s/%s still negative!\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + goto out; +} + +/* + * Release a file handle. + */ +void +fh_put(struct svc_fh *fhp) +{ + struct dentry * dentry = fhp->fh_dentry; + struct svc_export * exp = fhp->fh_export; + if (dentry) { + fh_unlock(fhp); + fhp->fh_dentry = NULL; + dput(dentry); +#ifdef CONFIG_NFSD_V3 + fhp->fh_pre_saved = 0; + fhp->fh_post_saved = 0; +#endif + } + if (exp) { + cache_put(&exp->h, &svc_export_cache); + fhp->fh_export = NULL; + } + return; +} + +/* + * Shorthand for dprintk()'s + */ +char * SVCFH_fmt(struct svc_fh *fhp) +{ + struct knfsd_fh *fh = &fhp->fh_handle; + + static char buf[80]; + sprintf(buf, "%d: %08x %08x %08x %08x %08x %08x", + fh->fh_size, + fh->fh_base.fh_pad[0], + fh->fh_base.fh_pad[1], + fh->fh_base.fh_pad[2], + fh->fh_base.fh_pad[3], + fh->fh_base.fh_pad[4], + fh->fh_base.fh_pad[5]); + return buf; +} + +enum fsid_source fsid_source(struct svc_fh *fhp) +{ + if (fhp->fh_handle.fh_version != 1) + return FSIDSOURCE_DEV; + switch(fhp->fh_handle.fh_fsid_type) { + case FSID_DEV: + case FSID_ENCODE_DEV: + case FSID_MAJOR_MINOR: + if (exp_sb(fhp->fh_export)->s_type->fs_flags & FS_REQUIRES_DEV) + return FSIDSOURCE_DEV; + break; + case FSID_NUM: + if (fhp->fh_export->ex_flags & NFSEXP_FSID) + return FSIDSOURCE_FSID; + break; + default: + break; + } + /* either a UUID type filehandle, or the filehandle doesn't + * match the export. + */ + if (fhp->fh_export->ex_flags & NFSEXP_FSID) + return FSIDSOURCE_FSID; + if (fhp->fh_export->ex_uuid) + return FSIDSOURCE_UUID; + return FSIDSOURCE_DEV; +} diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h new file mode 100644 index 00000000000..e5e6707ba68 --- /dev/null +++ b/fs/nfsd/nfsfh.h @@ -0,0 +1,206 @@ +/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */ + +#ifndef _LINUX_NFSD_FH_INT_H +#define _LINUX_NFSD_FH_INT_H + +#include <linux/nfsd/nfsfh.h> + +enum nfsd_fsid { + FSID_DEV = 0, + FSID_NUM, + FSID_MAJOR_MINOR, + FSID_ENCODE_DEV, + FSID_UUID4_INUM, + FSID_UUID8, + FSID_UUID16, + FSID_UUID16_INUM, +}; + +enum fsid_source { + FSIDSOURCE_DEV, + FSIDSOURCE_FSID, + FSIDSOURCE_UUID, +}; +extern enum fsid_source fsid_source(struct svc_fh *fhp); + + +/* This might look a little large to "inline" but in all calls except + * one, 'vers' is constant so moste of the function disappears. + */ +static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino, + u32 fsid, unsigned char *uuid) +{ + u32 *up; + switch(vers) { + case FSID_DEV: + fsidv[0] = htonl((MAJOR(dev)<<16) | + MINOR(dev)); + fsidv[1] = ino_t_to_u32(ino); + break; + case FSID_NUM: + fsidv[0] = fsid; + break; + case FSID_MAJOR_MINOR: + fsidv[0] = htonl(MAJOR(dev)); + fsidv[1] = htonl(MINOR(dev)); + fsidv[2] = ino_t_to_u32(ino); + break; + + case FSID_ENCODE_DEV: + fsidv[0] = new_encode_dev(dev); + fsidv[1] = ino_t_to_u32(ino); + break; + + case FSID_UUID4_INUM: + /* 4 byte fsid and inode number */ + up = (u32*)uuid; + fsidv[0] = ino_t_to_u32(ino); + fsidv[1] = up[0] ^ up[1] ^ up[2] ^ up[3]; + break; + + case FSID_UUID8: + /* 8 byte fsid */ + up = (u32*)uuid; + fsidv[0] = up[0] ^ up[2]; + fsidv[1] = up[1] ^ up[3]; + break; + + case FSID_UUID16: + /* 16 byte fsid - NFSv3+ only */ + memcpy(fsidv, uuid, 16); + break; + + case FSID_UUID16_INUM: + /* 8 byte inode and 16 byte fsid */ + *(u64*)fsidv = (u64)ino; + memcpy(fsidv+2, uuid, 16); + break; + default: BUG(); + } +} + +static inline int key_len(int type) +{ + switch(type) { + case FSID_DEV: return 8; + case FSID_NUM: return 4; + case FSID_MAJOR_MINOR: return 12; + case FSID_ENCODE_DEV: return 8; + case FSID_UUID4_INUM: return 8; + case FSID_UUID8: return 8; + case FSID_UUID16: return 16; + case FSID_UUID16_INUM: return 24; + default: return 0; + } +} + +/* + * Shorthand for dprintk()'s + */ +extern char * SVCFH_fmt(struct svc_fh *fhp); + +/* + * Function prototypes + */ +__be32 fh_verify(struct svc_rqst *, struct svc_fh *, umode_t, int); +__be32 fh_compose(struct svc_fh *, struct svc_export *, struct d |