summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_super.c13
-rw-r--r--fs/Kconfig3
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/exec.c19
-rw-r--r--fs/ext3/resize.c1
-rw-r--r--fs/fuse/dev.c256
-rw-r--r--fs/fuse/dir.c118
-rw-r--r--fs/fuse/file.c56
-rw-r--r--fs/fuse/fuse_i.h61
-rw-r--r--fs/fuse/inode.c135
-rw-r--r--fs/inotify.c2
-rw-r--r--fs/namespace.c7
-rw-r--r--fs/nfsd/auth.c46
-rw-r--r--fs/nfsd/export.c3
-rw-r--r--fs/nfsd/nfs3proc.c2
-rw-r--r--fs/nfsd/nfs4acl.c8
-rw-r--r--fs/nfsd/nfs4callback.c6
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4state.c150
-rw-r--r--fs/nfsd/nfs4xdr.c62
-rw-r--r--fs/nfsd/nfsproc.c2
-rw-r--r--fs/nfsd/vfs.c6
-rw-r--r--fs/ocfs2/cluster/heartbeat.c40
-rw-r--r--fs/ocfs2/dlm/userdlm.c74
-rw-r--r--fs/ocfs2/file.c19
-rw-r--r--fs/proc/vmcore.c4
-rw-r--r--fs/select.c30
-rw-r--r--fs/sync.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/xfs_ialloc.c15
-rw-r--r--fs/xfs/xfs_iget.c29
-rw-r--r--fs/xfs/xfs_inode.c27
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_mount.c2
37 files changed, 625 insertions, 609 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index b0a0ae509c0..61c599b4a1e 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -127,12 +127,13 @@ static struct super_block *v9fs_get_sb(struct file_system_type
if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
dprintk(DEBUG_ERROR, "problem initiating session\n");
- kfree(v9ses);
- return ERR_PTR(newfid);
+ sb = ERR_PTR(newfid);
+ goto out_free_session;
}
sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
-
+ if (IS_ERR(sb))
+ goto out_close_session;
v9fs_fill_super(sb, v9ses, flags);
inode = v9fs_get_inode(sb, S_IFDIR | mode);
@@ -185,6 +186,12 @@ static struct super_block *v9fs_get_sb(struct file_system_type
return sb;
+out_close_session:
+ v9fs_session_close(v9ses);
+out_free_session:
+ kfree(v9ses);
+ return sb;
+
put_back_sb:
/* deactivate_super calls v9fs_kill_super which will frees the rest */
up_write(&sb->s_umount);
diff --git a/fs/Kconfig b/fs/Kconfig
index e207be68d4c..2524629dc83 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -799,6 +799,7 @@ config PROC_KCORE
config PROC_VMCORE
bool "/proc/vmcore support (EXPERIMENTAL)"
depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP
+ default y
help
Exports the dump image of crashed kernel in ELF format.
@@ -861,7 +862,7 @@ config RAMFS
config CONFIGFS_FS
tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ depends on SYSFS && EXPERIMENTAL
help
configfs is a ram-based filesystem that provides the converse
of sysfs's functionality. Where sysfs is a filesystem-based
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8ed9b06a982..5638c8f9362 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -504,7 +504,7 @@ static int populate_groups(struct config_group *group)
int ret = 0;
int i;
- if (group && group->default_groups) {
+ if (group->default_groups) {
/* FYI, we're faking mkdir here
* I'm not sure we need this semaphore, as we're called
* from our parent's mkdir. That holds our parent's
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 242fe1a66ce..1b4491cdd11 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -599,7 +599,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
switch (op) {
case EPOLL_CTL_ADD:
if (!epi) {
- epds.events |= POLLERR | POLLHUP | POLLRDHUP;
+ epds.events |= POLLERR | POLLHUP;
error = ep_insert(ep, &epds, tfile, fd);
} else
@@ -613,7 +613,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
break;
case EPOLL_CTL_MOD:
if (epi) {
- epds.events |= POLLERR | POLLHUP | POLLRDHUP;
+ epds.events |= POLLERR | POLLHUP;
error = ep_modify(ep, epi, &epds);
} else
error = -ENOENT;
diff --git a/fs/exec.c b/fs/exec.c
index 0291a68a362..3234a0c32d5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -678,6 +678,18 @@ static int de_thread(struct task_struct *tsk)
while (leader->exit_state != EXIT_ZOMBIE)
yield();
+ /*
+ * The only record we have of the real-time age of a
+ * process, regardless of execs it's done, is start_time.
+ * All the past CPU time is accumulated in signal_struct
+ * from sister threads now dead. But in this non-leader
+ * exec, nothing survives from the original leader thread,
+ * whose birth marks the true age of this process now.
+ * When we take on its identity by switching to its PID, we
+ * also take its birthdate (always earlier than our own).
+ */
+ current->start_time = leader->start_time;
+
spin_lock(&leader->proc_lock);
spin_lock(&current->proc_lock);
proc_dentry1 = proc_pid_unhash(current);
@@ -723,7 +735,12 @@ static int de_thread(struct task_struct *tsk)
current->parent = current->real_parent = leader->real_parent;
leader->parent = leader->real_parent = child_reaper;
current->group_leader = current;
- leader->group_leader = leader;
+ leader->group_leader = current;
+
+ /* Reduce leader to a thread */
+ detach_pid(leader, PIDTYPE_PGID);
+ detach_pid(leader, PIDTYPE_SID);
+ list_del_init(&leader->tasks);
add_parent(current);
add_parent(leader);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 1041dab6de2..14f5f6ea3e7 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -974,6 +974,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
ext3_warning(sb, __FUNCTION__,
"multiple resizers run on filesystem!");
+ unlock_super(sb);
err = -EBUSY;
goto exit_put;
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 23d1f52eb1b..6c740f86066 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1,6 +1,6 @@
/*
FUSE: Filesystem in Userspace
- Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
+ Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
This program can be distributed under the terms of the GNU GPL.
See the file COPYING.
@@ -23,13 +23,11 @@ static kmem_cache_t *fuse_req_cachep;
static struct fuse_conn *fuse_get_conn(struct file *file)
{
- struct fuse_conn *fc;
- spin_lock(&fuse_lock);
- fc = file->private_data;
- if (fc && !fc->connected)
- fc = NULL;
- spin_unlock(&fuse_lock);
- return fc;
+ /*
+ * Lockless access is OK, because file->private data is set
+ * once during mount and is valid until the file is released.
+ */
+ return file->private_data;
}
static void fuse_request_init(struct fuse_req *req)
@@ -74,10 +72,8 @@ static void restore_sigs(sigset_t *oldset)
*/
void fuse_reset_request(struct fuse_req *req)
{
- int preallocated = req->preallocated;
BUG_ON(atomic_read(&req->count) != 1);
fuse_request_init(req);
- req->preallocated = preallocated;
}
static void __fuse_get_request(struct fuse_req *req)
@@ -92,80 +88,52 @@ static void __fuse_put_request(struct fuse_req *req)
atomic_dec(&req->count);
}
-static struct fuse_req *do_get_request(struct fuse_conn *fc)
+struct fuse_req *fuse_get_req(struct fuse_conn *fc)
{
struct fuse_req *req;
-
- spin_lock(&fuse_lock);
- BUG_ON(list_empty(&fc->unused_list));
- req = list_entry(fc->unused_list.next, struct fuse_req, list);
- list_del_init(&req->list);
- spin_unlock(&fuse_lock);
- fuse_request_init(req);
- req->preallocated = 1;
- req->in.h.uid = current->fsuid;
- req->in.h.gid = current->fsgid;
- req->in.h.pid = current->pid;
- return req;
-}
-
-/* This can return NULL, but only in case it's interrupted by a SIGKILL */
-struct fuse_req *fuse_get_request(struct fuse_conn *fc)
-{
- int intr;
sigset_t oldset;
+ int err;
- atomic_inc(&fc->num_waiting);
block_sigs(&oldset);
- intr = down_interruptible(&fc->outstanding_sem);
+ err = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
restore_sigs(&oldset);
- if (intr) {
- atomic_dec(&fc->num_waiting);
- return NULL;
- }
- return do_get_request(fc);
-}
+ if (err)
+ return ERR_PTR(-EINTR);
-/* Must be called with fuse_lock held */
-static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
-{
- if (req->preallocated) {
- atomic_dec(&fc->num_waiting);
- list_add(&req->list, &fc->unused_list);
- } else
- fuse_request_free(req);
+ req = fuse_request_alloc();
+ if (!req)
+ return ERR_PTR(-ENOMEM);
- /* If we are in debt decrease that first */
- if (fc->outstanding_debt)
- fc->outstanding_debt--;
- else
- up(&fc->outstanding_sem);
+ atomic_inc(&fc->num_waiting);
+ fuse_request_init(req);
+ req->in.h.uid = current->fsuid;
+ req->in.h.gid = current->fsgid;
+ req->in.h.pid = current->pid;
+ return req;
}
void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
{
if (atomic_dec_and_test(&req->count)) {
- spin_lock(&fuse_lock);
- fuse_putback_request(fc, req);
- spin_unlock(&fuse_lock);
+ atomic_dec(&fc->num_waiting);
+ fuse_request_free(req);
}
}
-static void fuse_put_request_locked(struct fuse_conn *fc, struct fuse_req *req)
-{
- if (atomic_dec_and_test(&req->count))
- fuse_putback_request(fc, req);
-}
-
-void fuse_release_background(struct fuse_req *req)
+void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req)
{
iput(req->inode);
iput(req->inode2);
if (req->file)
fput(req->file);
- spin_lock(&fuse_lock);
+ spin_lock(&fc->lock);
list_del(&req->bg_entry);
- spin_unlock(&fuse_lock);
+ if (fc->num_background == FUSE_MAX_BACKGROUND) {
+ fc->blocked = 0;
+ wake_up_all(&fc->blocked_waitq);
+ }
+ fc->num_background--;
+ spin_unlock(&fc->lock);
}
/*
@@ -184,23 +152,23 @@ void fuse_release_background(struct fuse_req *req)
* interrupted and put in the background, it will return with an error
* and hence never be reset and reused.
*
- * Called with fuse_lock, unlocks it
+ * Called with fc->lock, unlocks it
*/
static void request_end(struct fuse_conn *fc, struct fuse_req *req)
{
list_del(&req->list);
req->state = FUSE_REQ_FINISHED;
if (!req->background) {
+ spin_unlock(&fc->lock);
wake_up(&req->waitq);
- fuse_put_request_locked(fc, req);
- spin_unlock(&fuse_lock);
+ fuse_put_request(fc, req);
} else {
void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
req->end = NULL;
- spin_unlock(&fuse_lock);
+ spin_unlock(&fc->lock);
down_read(&fc->sbput_sem);
if (fc->mounted)
- fuse_release_background(req);
+ fuse_release_background(fc, req);
up_read(&fc->sbput_sem);
if (end)
end(fc, req);
@@ -242,6 +210,9 @@ static void background_request(struct fuse_conn *fc, struct fuse_req *req)
{
req->background = 1;
list_add(&req->bg_entry, &fc->background);
+ fc->num_background++;
+ if (fc->num_background == FUSE_MAX_BACKGROUND)
+ fc->blocked = 1;
if (req->inode)
req->inode = igrab(req->inode);
if (req->inode2)
@@ -250,16 +221,16 @@ static void background_request(struct fuse_conn *fc, struct fuse_req *req)
get_file(req->file);
}
-/* Called with fuse_lock held. Releases, and then reacquires it. */
+/* Called with fc->lock held. Releases, and then reacquires it. */
static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
{
sigset_t oldset;
- spin_unlock(&fuse_lock);
+ spin_unlock(&fc->lock);
block_sigs(&oldset);
wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
restore_sigs(&oldset);
- spin_lock(&fuse_lock);
+ spin_lock(&fc->lock);
if (req->state == FUSE_REQ_FINISHED && !req->interrupted)
return;
@@ -273,9 +244,9 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
locked state, there mustn't be any filesystem
operation (e.g. page fault), since that could lead
to deadlock */
- spin_unlock(&fuse_lock);
+ spin_unlock(&fc->lock);
wait_event(req->waitq, !req->locked);
- spin_lock(&fuse_lock);
+ spin_lock(&fc->lock);
}
if (req->state == FUSE_REQ_PENDING) {
list_del(&req->list);
@@ -304,19 +275,10 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
req->in.h.unique = fc->reqctr;
req->in.h.len = sizeof(struct fuse_in_header) +
len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
- if (!req->preallocated) {
- /* If request is not preallocated (either FORGET or
- RELEASE), then still decrease outstanding_sem, so
- user can't open infinite number of files while not
- processing the RELEASE requests. However for
- efficiency do it without blocking, so if down()
- would block, just increase the debt instead */
- if (down_trylock(&fc->outstanding_sem))
- fc->outstanding_debt++;
- }
list_add_tail(&req->list, &fc->pending);
req->state = FUSE_REQ_PENDING;
wake_up(&fc->waitq);
+ kill_fasync(&fc->fasync, SIGIO, POLL_IN);
}
/*
@@ -325,7 +287,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
void request_send(struct fuse_conn *fc, struct fuse_req *req)
{
req->isreply = 1;
- spin_lock(&fuse_lock);
+ spin_lock(&fc->lock);
if (!fc->connected)
req->out.h.error = -ENOTCONN;
else if (fc->conn_error)
@@ -338,15 +300,16 @@ void request_send(struct fuse_conn *fc, struct fuse_req *req)
request_wait_answer(fc, req);
}
- spin_unlock(&fuse_lock);
+ spin_unlock(&fc->lock);
}
static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
{
- spin_lock(&fuse_lock);
+ spin_lock(&fc->lock);
+ background_request(fc, req);
if (fc->connected) {
queue_request(fc, req);
- spin_unlock(&fuse_lock);
+ spin_unlock(&fc->lock);
} else {
req->out.h.error = -ENOTCONN;
request_end(fc, req);
@@ -362,9 +325,6 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
{
req->isreply = 1;
- spin_lock(&fuse_lock);
- background_request(fc, req);
- spin_unlock(&fuse_lock);
request_send_nowait(fc, req);
}
@@ -373,16 +333,16 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
* anything that could cause a page-fault. If the request was already
* interrupted bail out.
*/
-static int lock_request(struct fuse_req *req)
+static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
{
int err = 0;
if (req) {
- spin_lock(&fuse_lock);
+ spin_lock(&fc->lock);
if (req->interrupted)
err = -ENOENT;
else
req->locked = 1;
- spin_unlock(&fuse_lock);
+ spin_unlock(&fc->lock);
}
return err;
}
@@ -392,18 +352,19 @@ static int lock_request(struct fuse_req *req)
* requester thread is currently waiting for it to be unlocked, so
* wake it up.
*/
-static void unlock_request(struct fuse_req *req)
+static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
{
if (req) {
- spin_lock(&fuse_lock);
+ spin_lock(&fc->lock);
req->locked = 0;
if (req->interrupted)
wake_up(&req->waitq);
- spin_unlock(&fuse_lock);
+ spin_unlock(&fc->lock);
}
}
struct fuse_copy_state {
+ struct fuse_conn *fc;
int write;
struct fuse_req *req;
const struct iovec *iov;
@@ -416,11 +377,12 @@ struct fuse_copy_state {
unsigned len;
};
-static void fuse_copy_init(struct fuse_copy_state *cs, int write,
- struct fuse_req *req, const struct iovec *iov,
- unsigned long nr_segs)
+static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
+ int write, struct fuse_req *req,
+ const struct iovec *iov, unsigned long nr_segs)
{
memset(cs, 0, sizeof(*cs));
+ cs->fc = fc;
cs->write = write;
cs->req = req;
cs->iov = iov;
@@ -450,7 +412,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
unsigned long offset;
int err;
- unlock_request(cs->req);
+ unlock_request(cs->fc, cs->req);
fuse_copy_finish(cs);
if (!cs->seglen) {
BUG_ON(!cs->nr_segs);
@@ -473,7 +435,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
cs->seglen -= cs->len;
cs->addr += cs->len;
- return lock_request(cs->req);
+ return lock_request(cs->fc, cs->req);
}
/* Do as much copy to/from userspace buffer as we can */
@@ -585,9 +547,9 @@ static void request_wait(struct fuse_conn *fc)
if (signal_pending(current))
break;
- spin_unlock(&fuse_lock);
+ spin_unlock(&fc->lock);
schedule();
- spin_lock(&fuse_lock);
+ spin_lock(&fc->lock);
}
set_current_state(TASK_RUNNING);
remove_wait_queue(&fc->waitq, &wait);
@@ -606,18 +568,21 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
unsigned long nr_segs, loff_t *off)
{
int err;
- struct fuse_conn *fc;
struct fuse_req *req;
struct fuse_in *in;
struct fuse_copy_state cs;
unsigned reqsize;
+ struct fuse_conn *fc = fuse_get_conn(file);
+ if (!fc)
+ return -EPERM;
restart:
- spin_lock(&fuse_lock);
- fc = file->private_data;
- err = -EPERM;
- if (!fc)
+ spin_lock(&fc->lock);
+ err = -EAGAIN;
+ if ((file->f_flags & O_NONBLOCK) && fc->connected &&
+ list_empty(&fc->pending))
goto err_unlock;
+
request_wait(fc);
err = -ENODEV;
if (!fc->connected)
@@ -641,14 +606,14 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
request_end(fc, req);
goto restart;
}
- spin_unlock(&fuse_lock);
- fuse_copy_init(&cs, 1, req, iov, nr_segs);
+ spin_unlock(&fc->lock);
+ fuse_copy_init(&cs, fc, 1, req, iov, nr_segs);
err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
if (!err)
err = fuse_copy_args(&cs, in->numargs, in->argpages,
(struct fuse_arg *) in->args, 0);
fuse_copy_finish(&cs);
- spin_lock(&fuse_lock);
+ spin_lock(&fc->lock);
req->locked = 0;
if (!err && req->interrupted)
err = -ENOENT;
@@ -663,12 +628,12 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
else {
req->state = FUSE_REQ_SENT;
list_move_tail(&req->list, &fc->processing);
- spin_unlock(&fuse_lock);
+ spin_unlock(&fc->lock);
}
return reqsize;
err_unlock:
- spin_unlock(&fuse_lock);
+ spin_unlock(&fc->lock);
return err;
}
@@ -735,9 +700,9 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
struct fuse_copy_state cs;
struct fuse_conn *fc = fuse_get_conn(file);
if (!fc)
- return -ENODEV;
+ return -EPERM;
- fuse_copy_init(&cs, 0, NULL, iov, nr_segs);
+ fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
if (nbytes < sizeof(struct fuse_out_header))
return -EINVAL;
@@ -749,7 +714,7 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
oh.len != nbytes)
goto err_finish;
- spin_lock(&fuse_lock);
+ spin_lock(&fc->lock);
err = -ENOENT;
if (!fc->connected)
goto err_unlock;
@@ -760,9 +725,9 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
goto err_unlock;
if (req->interrupted) {
- spin_unlock(&fuse_lock);
+ spin_unlock(&fc->lock);
fuse_copy_finish(&cs);
- spin_lock(&fuse_lock);
+ spin_lock(&fc->lock);
request_end(fc, req);
return -ENOENT;
}
@@ -770,12 +735,12 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
req->out.h = oh;
req->locked = 1;
cs.req = req;
- spin_unlock(&fuse_lock);
+ spin_unlock(&fc->lock);
err = copy_out_args(&cs, &req->out, nbytes);
fuse_copy_finish(&cs);
- spin_lock(&fuse_lock);
+ spin_lock(&fc->lock);
req->locked = 0;
if (!err) {
if (req->interrupted)
@@ -787,7 +752,7 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
return err ? err : nbytes;
err_unlock:
- spin_unlock(&fuse_lock);
+ spin_unlock(&fc->lock);
err_finish:
fuse_copy_finish(&cs);
return err;
@@ -804,18 +769,19 @@ static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
{
- struct fuse_conn *fc = fuse_get_conn(file);
unsigned mask = POLLOUT | POLLWRNORM;
-
+ struct fuse_conn *fc = fuse_get_conn(file);
if (!fc)
- return -ENODEV;
+ return POLLERR;
poll_wait(file, &fc->waitq, wait);
- spin_lock(&fuse_lock);
- if (!list_empty(&fc->pending))
- mask |= POLLIN | POLLRDNORM;
- spin_unlock(&fuse_lock);
+ spin_lock(&fc->lock);
+ if (!fc->connected)
+ mask = POLLERR;
+ else if (!list_empty(&fc->pending))
+ mask |= POLLIN | POLLRDNORM;
+ spin_unlock(&fc->lock);
return mask;
}
@@ -823,7 +789,7 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
/*
* Abort all requests on the given list (pending or processing)
*
- * This function releases and reacquires fuse_lock
+ * This function releases and reacquires fc->lock
*/
static void end_requests(struct fuse_conn *fc, struct list_head *head)
{
@@ -832,7 +798,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
req = list_entry(head->next, struct fuse_req, list);
req->out.h.error = -ECONNABORTED;
request_end(fc, req);
- spin_lock(&fuse_lock);
+ spin_lock(&fc->lock);
}
}
@@ -863,10 +829,10 @@ static void end_io_requests(struct fuse_conn *fc)
req->end = NULL;
/* The end function will consume this reference */
__fuse_get_request(req);
- spin_unlock(&fuse_lock);
+ spin_unlock(&fc->lock);
wait_event(req->waitq, !req->locked);
end(fc, req);
- spin_lock(&fuse_lock);
+ spin_lock(&fc->lock);
}
}
}
@@ -893,35 +859,44 @@ static void end_io_requests(struct fuse_conn *fc)
*/
void fuse_abort_conn(struct fuse_conn *fc)
{
- spin_lock(&fuse_lock);
+ spin_lock(&fc->lock);
if (fc->connected) {
fc->connected = 0;
end_io_requests(fc);
end_requests(fc, &fc->pending);
end_requests(fc, &fc->processing);
wake_up_all(&fc->waitq);
+ kill_fasync(&fc->fasync, SIGIO, POLL_IN);
}
- spin_unlock(&fuse_lock);
+ spin_unlock(&fc->lock);
}
static int fuse_dev_release(struct inode *inode, struct file *file)
{
- struct fuse_conn *fc;
-
- spin_lock(&fuse_lock);
- fc = file->private_data;
+ struct fuse_conn *fc = fuse_get_conn(file);
if (fc) {
+ spin_lock(&fc->lock);
fc->connected = 0;
end_requests(fc, &fc->pending);
end_requests(fc, &fc->processing);
- }
- spin_unlock(&fuse_lock);
- if (fc)
+ spin_unlock(&fc->lock);
+ fasync_helper(-1, file, 0, &fc->fasync);
kobject_put(&fc->kobj);
+ }
return 0;
}
+static int fuse_dev_fasync(int fd, struct file *file, int on)
+{
+ struct fuse_conn *fc = fuse_get_conn(file);
+ if (!fc)
+ return -EPERM;
+
+ /* No locking - fasync_helper does its own locking */
+ return fasync_helper(fd, file, on, &fc->fasync);
+}
+
const struct file_operations fuse_dev_operations = {
.owner = THIS_MODULE,
.llseek = no_llseek,
@@ -931,6 +906,7 @@ const struct file_operations fuse_dev_operations = {
.writev = fuse_dev_writev,
.poll = fuse_dev_poll,
.release = fuse_dev_release,
+ .fasync = fuse_dev_fasync,
};
static struct miscdevice fuse_miscdevice = {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 256355b8025..8d7546e832e 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -117,8 +117,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
return 0;
fc = get_fuse_conn(inode);
- req = fuse_get_request(fc);
- if (!req)
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
return 0;
fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg);
@@ -188,9 +188,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
if (entry->d_name.len > FUSE_NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
- req = fuse_get_request(fc);
- if (!req)
- return ERR_PTR(-EINTR);
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return ERR_PTR(PTR_ERR(req));
fuse_lookup_init(req, dir, entry, &outarg);
request_send(fc, req);
@@ -244,15 +244,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
struct file *file;
int flags = nd->intent.open.flags - 1;
- err = -ENOSYS;
if (fc->no_create)
- goto out;
+ return -ENOSYS;
- err = -EINTR;
- req = fuse_get_request(fc);
- if (!req)
- goto out;
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ err = -ENOMEM;
ff = fuse_file_alloc();
if (!ff)
goto out_put_request;
@@ -314,7 +313,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
fuse_file_free(ff);
out_put_request:
fuse_put_request(fc, req);
- out:
return err;
}
@@ -375,9 +373,9 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
{
struct fuse_mknod_in inarg;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ struct fuse_req *req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
inarg.mode = mode;
@@ -407,9 +405,9 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
{
struct fuse_mkdir_in inarg;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ struct fuse_req *req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
inarg.mode = mode;
@@ -427,9 +425,9 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
{
struct fuse_conn *fc = get_fuse_conn(dir);
unsigned len = strlen(link) + 1;
- struct fuse_req *req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ struct fuse_req *req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
req->in.h.opcode = FUSE_SYMLINK;
req->in.numargs = 2;
@@ -444,9 +442,9 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
{
int err;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ struct fuse_req *req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
req->in.h.opcode = FUSE_UNLINK;
req->in.h.nodeid = get_node_id(dir);
@@ -476,9 +474,9 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
{
int err;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ struct fuse_req *req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
req->in.h.opcode = FUSE_RMDIR;
req->in.h.nodeid = get_node_id(dir);
@@ -504,9 +502,9 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
int err;
struct fuse_rename_in inarg;
struct fuse_conn *fc = get_fuse_conn(olddir);
- struct fuse_req *req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ struct fuse_req *req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
inarg.newdir = get_node_id(newdir);
@@ -553,9 +551,9 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
struct fuse_link_in inarg;
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ struct fuse_req *req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
inarg.oldnodeid = get_node_id(inode);
@@ -583,9 +581,9 @@ int fuse_do_getattr(struct inode *inode)
int err;
struct fuse_attr_out arg;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ struct fuse_req *req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
req->in.h.opcode = FUSE_GETATTR;
req->in.h.nodeid = get_node_id(inode);
@@ -673,9 +671,9 @@ static int fuse_access(struct inode *inode, int mask)
if (fc->no_access)
return 0;
- req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
inarg.mask = mask;
@@ -780,9 +778,9 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
if (is_bad_inode(inode))
return -EIO;
- req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
page = alloc_page(GFP_KERNEL);
if (!page) {
@@ -809,11 +807,11 @@ static char *read_link(struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req = fuse_get_request(fc);
+ struct fuse_req *req = fuse_get_req(fc);
char *link;
- if (!req)
- return ERR_PTR(-EINTR);
+ if (IS_ERR(req))
+ return ERR_PTR(PTR_ERR(req));
link = (char *) __get_free_page(GFP_KERNEL);
if (!link) {
@@ -933,9 +931,9 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
}
}
- req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
iattr_to_fattr(attr, &inarg);
@@ -995,9 +993,9 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
if (fc->no_setxattr)
return -EOPNOTSUPP;
- req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
@@ -1035,9 +1033,9 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
if (fc->no_getxattr)
return -EOPNOTSUPP;
- req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
@@ -1085,9 +1083,9 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
if (fc->no_listxattr)
return -EOPNOTSUPP;
- req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
@@ -1131,9 +1129,9 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
if (fc->no_removexattr)
return -EOPNOTSUPP;
- req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
req->in.h.opcode = FUSE_REMOVEXATTR;
req->in.h.nodeid = get_node_id(inode);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 975f2697e86..e4f041a11bb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -22,9 +22,9 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
struct fuse_req *req;
int err;
- req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
@@ -184,9 +184,9 @@ static int fuse_flush(struct file *file)
if (fc->no_flush)
return 0;
- req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
@@ -223,9 +223,9 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
return 0;
- req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
@@ -297,9 +297,9 @@ static int fuse_readpage(struct file *file, struct page *page)
if (is_bad_inode(inode))
goto out;
- err = -EINTR;
- req = fuse_get_request(fc);
- if (!req)
+ req = fuse_get_req(fc);
+ err = PTR_ERR(req);
+ if (IS_ERR(req))
goto out;
req->out.page_zeroing = 1;
@@ -368,10 +368,10 @@ static int fuse_readpages_fill(void *_data, struct page *page)
(req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
req->pages[req->num_pages - 1]->index + 1 != page->index)) {
fuse_send_readpages(req, data->file, inode);
- data->req = req = fuse_get_request(fc);
- if (!req) {
+ data->req = req = fuse_get_req(fc);
+ if (IS_ERR(req)) {
unlock_page(page);
- return -EINTR;
+ return PTR_ERR(req);
}
}
req->pages[req->num_pages] = page;
@@ -392,13 +392,17 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
data.file = file;
data.inode = inode;
- data.req = fuse_get_request(fc);
- if (!data.req)
- return -EINTR;
+ data.req = fuse_get_req(fc);
+ if (IS_ERR(data.req))
+ return PTR_ERR(data.req);
err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
- if (!err)
- fuse_send_readpages(data.req, file, inode);
+ if (!err) {
+ if (data.req->num_pages)
+ fuse_send_readpages(data.req, file, inode);
+ else
+ fuse_put_request(fc, data.req);
+ }
return err;
}
@@ -451,9 +455,9 @@ static int fuse_commit_write(struct file *file, struct page *page,
if (is_bad_inode(inode))
return -EIO;
- req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
req->num_pages = 1;
req->pages[0] = page;
@@ -528,9 +532,9 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
if (is_bad_inode(inode))
return -EIO;
- req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
while (count) {
size_t nres;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index a16a04fcf41..19c7185a754 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1,6 +1,6 @@
/*
FUSE: Filesystem in Userspace
- Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
+ Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
This program can be distributed under the terms of the GNU GPL.
See the file COPYING.
@@ -18,8 +18,8 @@
/** Max number of pages that can be used in a single read request */
#define FUSE_MAX_PAGES_PER_REQ 32
-/** If more requests are outstanding, then the operation will block */
-#define FUSE_MAX_OUTSTANDING 10
+/** Maximum number of outstanding background requests */
+#define FUSE_MAX_BACKGROUND 10
/** It could be as large as PATH_MAX, but would that have any uses? */
#define FUSE_NAME_MAX 1024
@@ -131,8 +131,8 @@ struct fuse_conn;
* A request to the client
*/
struct fuse_req {
- /** This can be on either unused_list, pending processing or
- io lists in fuse_conn */
+ /** This can be on either pending processing or io lists in
+ fuse_conn */
struct list_head list;
/** Entry on the background list */
@@ -144,15 +144,12 @@ struct fuse_req {
/*
* The following bitfields are either set once before the
* request is queued or setting/clearing them is protected by
- * fuse_lock
+ * fuse_conn->lock
*/
/** True if the request has reply */
unsigned isreply:1;
- /** The request is preallocated */
- unsigned preallocated:1;
-
/** The request was interrupted */
unsigned interrupted:1;
@@ -213,6 +210,9 @@ struct fuse_req {
* unmounted.
*/
struct fuse_conn {
+ /** Lock protecting accessess to members of this structure */
+ spinlock_t lock;
+
/** The user id for this mount */
uid_t user_id;
@@ -244,19 +244,20 @@ struct fuse_conn {
interrupted request) */
struct list_head background;
- /** Controls the maximum number of outstanding requests */
- struct semaphore outstanding_sem;
+ /** Number of requests currently in the background */
+ unsigned num_background;
+
+ /** Flag indicating if connection is blocked. This will be
+ the case before the INIT reply is received, and if there
+ are too many outstading backgrounds requests */
+ int blocked;
- /** This counts the number of outstanding requests if
- outstanding_sem would go negative */
- unsigned outstanding_debt;
+ /** waitq for blocked connection */
+ wait_queue_head_t blocked_waitq;
/** RW semaphore for exclusion with fuse_put_super() */
struct rw_semaphore sbput_sem;
- /** The list of unused requests */
- struct list_head unused_list;
-
/** The next unique request id */
u64 reqctr;
@@ -318,6 +319,9 @@ struct fuse_conn {
/** kobject */
struct kobject kobj;
+
+ /** O_ASYNC requests */
+ struct fasync_struct *fasync;
};
static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -349,21 +353,6 @@ static inline u64 get_node_id(struct inode *inode)
extern const struct file_operations fuse_dev_operations;
/**
- * This is the single global spinlock which protects FUSE's structures
- *
- * The following data is protected by this lock:
- *
- * - the private_data field of the device file
- * - the s_fs_info field of the super block
- * - unused_list, pending, processing lists in fuse_conn
- * - background list in fuse_conn
- * - the unique request ID counter reqctr in fuse_conn
- * - the sb (super_block) field in fuse_conn
- * - the file (device file) field in fuse_conn
- */
-extern spinlock_t fuse_lock;
-
-/**
* Get a filled in inode
*/
struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
@@ -461,11 +450,11 @@ void fuse_reset_request(struct fuse_req *req);
/**
* Reserve a preallocated request
*/
-struct fuse_req *fuse_get_request(struct fuse_conn *fc);
+struct fuse_req *fuse_get_req(struct fuse_conn *fc);
/**
- * Decrement reference count of a request. If count goes to zero put
- * on unused list (preallocated) or free request (not preallocated).
+ * Decrement reference count of a request. If count goes to zero free
+ * the request.
*/
void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
@@ -487,7 +476,7 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
/**
* Release inodes and file associated with background request
*/
-void fuse_release_background(struct fuse_req *req);
+void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req);
/* Abort all requests */
void fuse_abort_conn(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 879e6fba948..fd34037b058 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1,6 +1,6 @@
/*
FUSE: Filesystem in Userspace
- Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
+ Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
This program can be distributed under the terms of the GNU GPL.
See the file COPYING.
@@ -22,7 +22,6 @@ MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Filesystem in Userspace");
MODULE_LICENSE("GPL");
-spinlock_t fuse_lock;
static kmem_cache_t *fuse_inode_cachep;
static struct subsystem connections_subsys;
@@ -207,15 +206,17 @@ static void fuse_put_super(struct super_block *sb)
down_write(&fc->sbput_sem);
while (!list_empty(&fc->background))
- fuse_release_background(list_entry(fc->background.next,
+ fuse_release_background(fc,
+ list_entry(fc->background.next,
struct fuse_req, bg_entry));
- spin_lock(&fuse_lock);
+ spin_lock(&fc->lock);
fc->mounted = 0;
fc->connected = 0;
- spin_unlock(&fuse_lock);
+ spin_unlock(&fc->lock);
up_write(&fc->sbput_sem);
/* Flush all readers on this fs */
+ kill_fasync(&fc->fasync, SIGIO, POLL_IN);
wake_up_all(&fc->waitq);
kobject_del(&fc->kobj);
kobject_put(&fc->kobj);
@@ -242,9 +243,9 @@ static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
struct fuse_statfs_out outarg;
int err;
- req = fuse_get_request(fc);
- if (!req)
- return -EINTR;
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
memset(&outarg, 0, sizeof(outarg));
req->in.numargs = 0;
@@ -369,15 +370,7 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
static void fuse_conn_release(struct kobject *kobj)
{
- struct fuse_conn *fc = get_fuse_conn_kobj(kobj);
-
- while (!list_empty(&fc->unused_list)) {
- struct fuse_req *req;
- req = list_entry(fc->unused_list.next, struct fuse_req, list);
- list_del(&req->list);
- fuse_request_free(req);
- }
- kfree(fc);
+ kfree(get_fuse_conn_kobj(kobj));
}
static struct fuse_conn *new_conn(void)
@@ -386,64 +379,25 @@ static struct fuse_conn *new_conn(void)
fc = kzalloc(sizeof(*fc), GFP_KERNEL);
if (fc) {
- int i;
+ spin_lock_init(&fc->lock);
init_waitqueue_head(&fc->waitq);
+ init_waitqueue_head(&fc->blocked_waitq);
INIT_LIST_HEAD(&fc->pending);
INIT_LIST_HEAD(&fc->processing);
INIT_LIST_HEAD(&fc->io);
- INIT_LIST_HEAD(&fc->unused_list);
INIT_LIST_HEAD(&fc->background);
- sema_init(&fc->outstanding_sem, 1); /* One for INIT */
init_rwsem(&fc->sbput_sem);
kobj_set_kset_s(fc, connections_subsys);
kobject_init(&fc->kobj);
atomic_set(&fc->num_waiting, 0);
- for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) {
- struct fuse_req *req = fuse_request_alloc();
- if (!req) {
- kobject_put(&fc->kobj);
- return NULL;
- }
- list_add(&req->list, &fc->unused_list);
- }
fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
fc->bdi.unplug_io_fn = default_unplug_io_fn;
fc->reqctr = 0;
+ fc->blocked = 1;
}
return fc;
}
-static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
-{
- struct fuse_conn *fc;
- int err;
-
- err = -EINVAL;
- if (file->f_op != &fuse_dev_operations)
- goto out_err;
-
- err = -ENOMEM;
- fc = new_conn();
- if (!fc)
- goto out_err;
-
- spin_lock(&fuse_lock);
- err = -EINVAL;
- if (file->private_data)
- goto out_unlock;
-
- kobject_get(&fc->kobj);
- file->private_data = fc;
- spin_unlock(&fuse_lock);
- return fc;
-
- out_unlock:
- spin_unlock(&fuse_lock);
- kobject_put(&fc->kobj);
- out_err:
- return ERR_PTR(err);
-}
-
static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
{
struct fuse_attr attr;
@@ -467,7 +421,6 @@ static struct super_operations fuse_super_operations = {
static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
{
- int i;
struct fuse_init_out *arg = &req->misc.init_out;
if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
@@ -486,22 +439,13 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->minor = arg->minor;
fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
}
-
- /* After INIT reply is received other requests can go
- out. So do (FUSE_MAX_OUTSTANDING - 1) number of
- up()s on outstanding_sem. The last up() is done in
- fuse_putback_request() */
- for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
- up(&fc->outstanding_sem);
-
fuse_put_request(fc, req);
+ fc->blocked = 0;
+ wake_up_all(&fc->blocked_waitq);
}
-static void fuse_send_init(struct fuse_conn *fc)
+static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
{
- /* This is called from fuse_read_super() so there's guaranteed
- to be exactly one request available */
- struct fuse_req *req = fuse_get_request(fc);
struct fuse_init_in *arg = &req->misc.init_in;
arg->major = FUSE_KERNEL_VERSION;
@@ -525,12 +469,9 @@ static void fuse_send_init(struct fuse_conn *fc)
static unsigned long long conn_id(void)
{
+ /* BKL is held for ->get_sb() */
static unsigned long long ctr = 1;
- unsigned long long val;
- spin_lock(&fuse_lock);
- val = ctr++;
- spin_unlock(&fuse_lock);
- return val;
+ return ctr++;
}
static int fuse_fill_super(struct super_block *sb, void *data, int silent)
@@ -540,6 +481,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
struct fuse_mount_data d;
struct file *file;
struct dentry *root_dentry;
+ struct fuse_req *init_req;
int err;
if (!parse_fuse_opt((char *) data, &d))
@@ -555,10 +497,17 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
if (!file)
return -EINVAL;
- fc = get_conn(file, sb);
- fput(file);
- if (IS_ERR(fc))
- return PTR_ERR(fc);
+ if (file->f_op != &fuse_dev_operations)
+ return -EINVAL;
+
+ /* Setting file->private_data can't race with other mount()
+ instances, since BKL is held for ->get_sb() */
+ if (file->private_data)
+ return -EINVAL;
+
+ fc = new_conn();
+ if (!fc)
+ return -ENOMEM;
fc->flags = d.flags;
fc->user_id = d.user_id;
@@ -579,27 +528,40 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
goto err;
}
+ init_req = fuse_request_alloc();
+ if (!init_req)
+ goto err_put_root;
+
err = kobject_set_name(&fc->kobj, "%llu", conn_id());
if (err)
- goto err_put_root;
+ goto err_free_req;
err = kobject_add(&fc->kobj);
if (err)
- goto err_put_root;
+ goto err_free_req;
sb->s_root = root_dentry;
- spin_lock(&fuse_lock);
fc->mounted = 1;
fc->connected = 1;
- spin_unlock(&fuse_lock);
+ kobject_get(&fc->kobj);
+ file->private_data = fc;
+ /*
+ * atomic_dec_and_test() in fput() provides the necessary
+ * memory barrier for file->private_data to be visible on all
+ * CPUs after this
+ */
+ fput(file);
- fuse_send_init(fc);
+ fuse_send_init(fc, init_req);
return 0;
+ err_free_req:
+ fuse_request_free(init_req);
err_put_root:
dput(root_dentry);
err:
+ fput(file);
kobject_put(&fc->kobj);
return err;
}
@@ -753,7 +715,6 @@ static int __init fuse_init(void)
printk("fuse init (API version %i.%i)\n",
FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
- spin_lock_init(&fuse_lock);
res = fuse_fs_init();
if (res)
goto err;
diff --git a/fs/inotify.c b/fs/inotify.c
index 367c487c014..1f50302849c 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -538,7 +538,7 @@ void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
spin_lock(&entry->d_lock);
parent = entry->d_parent;
- if (inotify_inode_watched(parent->d_inode))
+ if (parent->d_inode && inotify_inode_watched(parent->d_inode))
entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
spin_unlock(&entry->d_lock);
}
diff --git a/fs/namespace.c b/fs/namespace.c
index bf478addb85..2c5f1f80bdc 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -899,11 +899,13 @@ static int do_change_type(struct nameidata *nd, int flag)
/*
* do loopback mount.
*/
-static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
+static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags, int mnt_flags)
{
struct nameidata old_nd;
struct vfsmount *mnt = NULL;
+ int recurse = flags & MS_REC;
int err = mount_is_safe(nd);
+
if (err)
return err;
if (!old_name || !*old_name)
@@ -937,6 +939,7 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
spin_unlock(&vfsmount_lock);
release_mounts(&umount_list);
}
+ mnt->mnt_flags = mnt_flags;
out:
up_write(&namespace_sem);
@@ -1350,7 +1353,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
data_page);
else if (flags & MS_BIND)
- retval = do_loopback(&nd, dev_name, flags & MS_REC);
+ retval = do_loopback(&nd, dev_name, flags, mnt_flags);
else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
retval = do_change_type(&nd, flags);
else if (flags & MS_MOVE)
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index cfe9ce88161..6e92b0fe532 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -14,46 +14,46 @@
int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
{
- struct svc_cred *cred = &rqstp->rq_cred;
+ struct svc_cred cred = rqstp->rq_cred;
int i;
int ret;
if (exp->ex_flags & NFSEXP_ALLSQUASH) {
- cred->cr_uid = exp->ex_anon_uid;
- cred->cr_gid = exp->ex_anon_gid;
- put_group_info(cred->cr_group_info);
- cred->cr_group_info = groups_alloc(0);
+ cred.cr_uid = exp->ex_anon_uid;
+ cred.cr_gid = exp->ex_anon_gid;
+ cred.cr_group_info = groups_alloc(0);
} else if (exp->ex_flags & NFSEXP_ROOTSQUASH) {
struct group_info *gi;
- if (!cred->cr_uid)
- cred->cr_uid = exp->ex_anon_uid;
- if (!cred->cr_gid)
- cred->cr_gid = exp->ex_anon_gid;
- gi = groups_alloc(cred->cr_group_info->ngroups);
+ if (!cred.cr_uid)
+ cred.cr_uid = exp->ex_anon_uid;
+ if (!cred.cr_gid)
+ cred.cr_gid = exp->ex_anon_gid;
+ gi = groups_alloc(cred.cr_group_info->ngroups);
if (gi)
- for (i = 0; i < cred->cr_group_info->ngroups; i++) {
- if (!GROUP_AT(cred->cr_group_info, i))
+ for (i = 0; i < cred.cr_group_info->ngroups; i++) {
+ if (!GROUP_AT(cred.cr_group_info, i))
GROUP_AT(gi, i) = exp->ex_anon_gid;
else
- GROUP_AT(gi, i) = GROUP_AT(cred->cr_group_info, i);
+ GROUP_AT(gi, i) = GROUP_AT(cred.cr_group_info, i);
}
- put_group_info(cred->cr_group_info);
- cred->cr_group_info = gi;
- }
+ cred.cr_group_info = gi;
+ } else
+ get_group_info(cred.cr_group_info);
- if (cred->cr_uid != (uid_t) -1)
- current->fsuid = cred->cr_uid;
+ if (cred.cr_uid != (uid_t) -1)
+ current->fsuid = cred.cr_uid;
else
current->fsuid = exp->ex_anon_uid;
- if (cred->cr_gid != (gid_t) -1)
- current->fsgid = cred->cr_gid;
+ if (cred.cr_gid != (gid_t) -1)
+ current->fsgid = cred.cr_gid;
else
current->fsgid = exp->ex_anon_gid;
- if (!cred->cr_group_info)
+ if (!cred.cr_group_info)
return -ENOMEM;
- ret = set_current_groups(cred->cr_group_info);
- if ((cred->cr_uid)) {
+ ret = set_current_groups(cred.cr_group_info);
+ put_group_info(cred.cr_group_info);
+ if ((cred.cr_uid)) {
cap_t(current->cap_effective) &= ~CAP_NFSD_MASK;
} else {
cap_t(current->cap_effective) |= (CAP_NFSD_MASK &
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c340be0a3f5..4e0578121d9 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -422,7 +422,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
goto out;
err = path_lookup(buf, 0, &nd);
- if (err) goto out;
+ if (err) goto out_no_path;
exp.h.flags = 0;
exp.ex_client = dom;
@@ -475,6 +475,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
out:
if (nd.dentry)
path_release(&nd);
+ out_no_path:
if (dom)
auth_domain_put(dom);
kfree(buf);
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 6d2dfed1de0..f61142afea4 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -682,7 +682,7 @@ static struct svc_procedure nfsd_procedures3[22] = {
PROC(lookup, dirop, dirop, fhandle2, RC_NOCACHE, ST+FH+pAT+pAT),
PROC(access, access, access, fhandle, RC_NOCACHE, ST+pAT+1),
PROC(readlink, readlink, readlink, fhandle, RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4),
- PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE),
+ PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE/4),
PROC(write, write, write, fhandle, RC_REPLBUFF, ST+WC+4),
PROC(create, create, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
PROC(mkdir, mkdir, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 7391f4aabed..edb107e61b9 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -710,9 +710,9 @@ calculate_posix_ace_count(struct nfs4_acl *n4acl)
/* Also, the remaining entries are for named users and
* groups, and come in threes (mask, allow, deny): */
if (n4acl->naces < 7)
- return -1;
+ return -EINVAL;
if ((n4acl->naces - 7) % 3)
- return -1;
+ return -EINVAL;
return 4 + (n4acl->naces - 7)/3;
}
}
@@ -790,7 +790,7 @@ nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
continue;
error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
- ace->access_mask, ace->whotype, ace->who) == -1;
+ ace->access_mask, ace->whotype, ace->who);
if (error < 0)
goto out;
@@ -866,7 +866,7 @@ nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask,
struct nfs4_ace *ace;
if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL)
- return -1;
+ return -ENOMEM;
ace->type = type;
ace->flag = flag;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c872bd07fc1..dbaf3f93f32 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -441,8 +441,9 @@ nfsd4_probe_callback(struct nfs4_client *clp)
goto out_clnt;
}
- /* the task holds a reference to the nfs4_client struct */
cb->cb_client = clnt;
+
+ /* the task holds a reference to the nfs4_client struct */
atomic_inc(&clp->cl_count);
msg.rpc_cred = nfsd4_lookupcred(clp,0);
@@ -460,13 +461,12 @@ nfsd4_probe_callback(struct nfs4_client *clp)
out_rpciod:
atomic_dec(&clp->cl_count);
rpciod_down();
+ cb->cb_client = NULL;
out_clnt:
rpc_shutdown_client(clnt);
- goto out_err;
out_err:
dprintk("NFSD: warning: no callback path to client %.*s\n",
(int)clp->cl_name.len, clp->cl_name.data);
- cb->cb_client = NULL;
}
static void
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6d63f1d9e5f..b0e095ea0c0 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -288,8 +288,6 @@ nfsd4_putrootfh(struct svc_rqst *rqstp, struct svc_fh *current_fh)
fh_put(current_fh);
status = exp_pseudoroot(rqstp->rq_client, current_fh,
&rqstp->rq_chandle);
- if (!status)
- status = nfserrno(nfsd_setuser(rqstp, current_fh->fh_export));
return status;
}
@@ -975,7 +973,7 @@ struct nfsd4_voidargs { int dummy; };
*/
static struct svc_procedure nfsd_procedures4[2] = {
PROC(null, void, void, void, RC_NOCACHE, 1),
- PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE)
+ PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE/4)
};
struct svc_version nfsd_version4 = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 47ec112b266..96c7578cbe1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -147,6 +147,42 @@ get_nfs4_file(struct nfs4_file *fi)
kref_get(&fi->fi_ref);
}
+static int num_delegations;
+
+/*
+ * Open owner state (share locks)
+ */
+
+/* hash tables for nfs4_stateowner */
+#define OWNER_HASH_BITS 8
+#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
+#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
+
+#define ownerid_hashval(id) \
+ ((id) & OWNER_HASH_MASK)
+#define ownerstr_hashval(clientid, ownername) \
+ (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK)
+
+static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE];
+static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
+
+/* hash table for nfs4_file */
+#define FILE_HASH_BITS 8
+#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
+#define FILE_HASH_MASK (FILE_HASH_SIZE - 1)
+/* hash table for (open)nfs4_stateid */
+#define STATEID_HASH_BITS 10
+#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS)
+#define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1)
+
+#define file_hashval(x) \
+ hash_ptr(x, FILE_HASH_BITS)
+#define stateid_hashval(owner_id, file_id) \
+ (((owner_id) + (file_id)) & STATEID_HASH_MASK)
+
+static struct list_head file_hashtbl[FILE_HASH_SIZE];
+static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
+
static struct nfs4_delegation *
alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
{
@@ -155,9 +191,12 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
dprintk("NFSD alloc_init_deleg\n");
+ if (num_delegations > STATEID_HASH_SIZE * 4)
+ return NULL;
dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
if (dp == NULL)
return dp;
+ num_delegations++;
INIT_LIST_HEAD(&dp->dl_perfile);
INIT_LIST_HEAD(&dp->dl_perclnt);
INIT_LIST_HEAD(&dp->dl_recall_lru);
@@ -192,6 +231,7 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
dprintk("NFSD: freeing dp %p\n",dp);
put_nfs4_file(dp->dl_file);
kmem_cache_free(deleg_slab, dp);
+ num_delegations--;
}
}
@@ -330,22 +370,29 @@ put_nfs4_client(struct nfs4_client *clp)
}
static void
+shutdown_callback_client(struct nfs4_client *clp)
+{
+ struct rpc_clnt *clnt = clp->cl_callback.cb_client;
+
+ /* shutdown rpc client, ending any outstanding recall rpcs */
+ if (clnt) {
+ clp->cl_callback.cb_client = NULL;
+ rpc_shutdown_client(clnt);
+ rpciod_down();
+ }
+}
+
+static void
expire_client(struct nfs4_client *clp)
{
struct nfs4_stateowner *sop;
struct nfs4_delegation *dp;
- struct nfs4_callback *cb = &clp->cl_callback;
- struct rpc_clnt *clnt = clp->cl_callback.cb_client;
struct list_head reaplist;
dprintk("NFSD: expire_client cl_count %d\n",
atomic_read(&clp->cl_count));
- /* shutdown rpc client, ending any outstanding recall rpcs */
- if (atomic_read(&cb->cb_set) == 1 && clnt) {
- rpc_shutdown_client(clnt);
- clnt = clp->cl_callback.cb_client = NULL;
- }
+ shutdown_callback_client(clp);
INIT_LIST_HEAD(&reaplist);
spin_lock(&recall_lock);
@@ -936,40 +983,6 @@ out:
return status;
}
-/*
- * Open owner state (share locks)
- */
-
-/* hash tables for nfs4_stateowner */
-#define OWNER_HASH_BITS 8
-#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
-#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
-
-#define ownerid_hashval(id) \
- ((id) & OWNER_HASH_MASK)
-#define ownerstr_hashval(clientid, ownername) \
- (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK)
-
-static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE];
-static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
-
-/* hash table for nfs4_file */
-#define FILE_HASH_BITS 8
-#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
-#define FILE_HASH_MASK (FILE_HASH_SIZE - 1)
-/* hash table for (open)nfs4_stateid */
-#define STATEID_HASH_BITS 10
-#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS)
-#define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1)
-
-#define file_hashval(x) \
- hash_ptr(x, FILE_HASH_BITS)
-#define stateid_hashval(owner_id, file_id) \
- (((owner_id) + (file_id)) & STATEID_HASH_MASK)
-
-static struct list_head file_hashtbl[FILE_HASH_SIZE];
-static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
-
/* OPEN Share state helper functions */
static inline struct nfs4_file *
alloc_init_file(struct inode *ino)
@@ -1186,8 +1199,7 @@ move_to_close_lru(struct nfs4_stateowner *sop)
{
dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
- unhash_stateowner(sop);
- list_add_tail(&sop->so_close_lru, &close_lru);
+ list_move_tail(&sop->so_close_lru, &close_lru);
sop->so_time = get_seconds();
}
@@ -1916,8 +1928,7 @@ nfs4_laundromat(void)
}
dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
sop->so_id);
- list_del(&sop->so_close_lru);
- nfs4_put_stateowner(sop);
+ release_stateowner(sop);
}
if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
@@ -2495,36 +2506,27 @@ nfs4_transform_lock_offset(struct file_lock *lock)
lock->fl_end = OFFSET_MAX;
}
-static int
-nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval)
-{
- struct nfs4_stateowner *local = NULL;
- int status = 0;
-
- if (hashval >= LOCK_HASH_SIZE)
- goto out;
- list_for_each_entry(local, &lock_ownerid_hashtbl[hashval], so_idhash) {
- if (local == sop) {
- status = 1;
- goto out;
- }
- }
-out:
- return status;
-}
-
+/* Hack!: For now, we're defining this just so we can use a pointer to it
+ * as a unique cookie to identify our (NFSv4's) posix locks. */
+static struct lock_manager_operations nfsd_posix_mng_ops = {
+};
static inline void
nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
{
- struct nfs4_stateowner *sop = (struct nfs4_stateowner *) fl->fl_owner;
- unsigned int hval = lockownerid_hashval(sop->so_id);
+ struct nfs4_stateowner *sop;
+ unsigned int hval;
- deny->ld_sop = NULL;
- if (nfs4_verify_lock_stateowner(sop, hval)) {
+ if (fl->fl_lmops == &nfsd_posix_mng_ops) {
+ sop = (struct nfs4_stateowner *) fl->fl_owner;
+ hval = lockownerid_hashval(sop->so_id);
kref_get(&sop->so_ref);
deny->ld_sop = sop;
deny->ld_clientid = sop->so_client->cl_clientid;
+ } else {
+ deny->ld_sop = NULL;
+ deny->ld_clientid.cl_boot = 0;
+ deny->ld_clientid.cl_id = 0;
}
deny->ld_start = fl->fl_start;
deny->ld_length = ~(u64)0;
@@ -2736,6 +2738,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
file_lock.fl_pid = current->tgid;
file_lock.fl_file = filp;
file_lock.fl_flags = FL_POSIX;
+ file_lock.fl_lmops = &nfsd_posix_mng_ops;
file_lock.fl_start = lock->lk_offset;
if ((lock->lk_length == ~(u64)0) ||
@@ -2841,6 +2844,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner;
file_lock.fl_pid = current->tgid;
file_lock.fl_flags = FL_POSIX;
+ file_lock.fl_lmops = &nfsd_posix_mng_ops;
file_lock.fl_start = lockt->lt_offset;
if ((lockt->lt_length == ~(u64)0) || LOFF_OVERFLOW(lockt->lt_offset, lockt->lt_length))
@@ -2900,6 +2904,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
file_lock.fl_pid = current->tgid;
file_lock.fl_file = filp;
file_lock.fl_flags = FL_POSIX;
+ file_lock.fl_lmops = &nfsd_posix_mng_ops;
file_lock.fl_start = locku->lu_offset;
if ((locku->lu_length == ~(u64)0) || LOFF_OVERFLOW(locku->lu_offset, locku->lu_length))
@@ -3211,15 +3216,8 @@ __nfs4_state_shutdown(void)
int i;
struct nfs4_client *clp = NULL;
struct nfs4_delegation *dp = NULL;
- struct nfs4_stateowner *sop = NULL;
struct list_head *pos, *next, reaplist;
- list_for_each_safe(pos, next, &close_lru) {
- sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
- list_del(&sop->so_close_lru);
- nfs4_put_stateowner(sop);
- }
-
for (i = 0; i < CLIENT_HASH_SIZE; i++) {
while (!list_empty(&conf_id_hashtbl[i])) {
clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
@@ -3244,8 +3242,6 @@ __nfs4_state_shutdown(void)
}
cancel_delayed_work(&laundromat_work);
- flush_workqueue(laundry_wq);
- destroy_workqueue(laundry_wq);
nfsd4_shutdown_recdir();
nfs4_init = 0;
}
@@ -3253,6 +3249,8 @@ __nfs4_state_shutdown(void)
void
nfs4_state_shutdown(void)
{
+ cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work);
+ destroy_workqueue(laundry_wq);
nfs4_lock_state();
nfs4_release_reclaim();
__nfs4_state_shutdown();
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 03857fd8112..de3998f15f1 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -299,11 +299,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
buf, dummy32, &ace.who);
if (status)
goto out_nfserr;
- if (nfs4_acl_add_ace(*acl, ace.type, ace.flag,
- ace.access_mask, ace.whotype, ace.who) != 0) {
- status = -ENOMEM;
+ status = nfs4_acl_add_ace(*acl, ace.type, ace.flag,
+ ace.access_mask, ace.whotype, ace.who);
+ if (status)
goto out_nfserr;
- }
}
} else
*acl = NULL;
@@ -2085,27 +2084,20 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read
WRITE32(eof);
WRITE32(maxcount);
ADJUST_ARGS();
- resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
-
+ resp->xbuf->head[0].iov_len = (char*)p
+ - (char*)resp->xbuf->head[0].iov_base;
resp->xbuf->page_len = maxcount;
- /* read zero bytes -> don't set up tail */
- if(!maxcount)
- return 0;
-
- /* set up page for remaining responses */
- svc_take_page(resp->rqstp);
- resp->xbuf->tail[0].iov_base =
- page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
- resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
+ /* Use rest of head for padding and remaining ops: */
+ resp->rqstp->rq_restailpage = 0;
+ resp->xbuf->tail[0].iov_base = p;
resp->xbuf->tail[0].iov_len = 0;
- resp->p = resp->xbuf->tail[0].iov_base;
- resp->end = resp->p + PAGE_SIZE/4;
-
if (maxcount&3) {
- *(resp->p)++ = 0;
+ RESERVE_SPACE(4);
+ WRITE32(0);
resp->xbuf->tail[0].iov_base += maxcount&3;
resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
+ ADJUST_ARGS();
}
return 0;
}
@@ -2142,21 +2134,20 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
WRITE32(maxcount);
ADJUST_ARGS();
- resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
+ resp->xbuf->head[0].iov_len = (char*)p
+ - (char*)resp->xbuf->head[0].iov_base;
+ resp->xbuf->page_len = maxcount;
- svc_take_page(resp->rqstp);
- resp->xbuf->tail[0].iov_base =
- page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
- resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
+ /* Use rest of head for padding and remaining ops: */
+ resp->rqstp->rq_restailpage = 0;
+ resp->xbuf->tail[0].iov_base = p;
resp->xbuf->tail[0].iov_len = 0;
- resp->p = resp->xbuf->tail[0].iov_base;
- resp->end = resp->p + PAGE_SIZE/4;
-
- resp->xbuf->page_len = maxcount;
if (maxcount&3) {
- *(resp->p)++ = 0;
+ RESERVE_SPACE(4);
+ WRITE32(0);
resp->xbuf->tail[0].iov_base += maxcount&3;
resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
+ ADJUST_ARGS();
}
return 0;
}
@@ -2166,7 +2157,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
{
int maxcount;
loff_t offset;
- u32 *page, *savep;
+ u32 *page, *savep, *tailbase;
ENCODE_HEAD;
if (nfserr)
@@ -2182,6 +2173,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
WRITE32(0);
ADJUST_ARGS();
resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
+ tailbase = p;
maxcount = PAGE_SIZE;
if (maxcount > readdir->rd_maxcount)
@@ -2226,14 +2218,12 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
*p++ = htonl(readdir->common.err == nfserr_eof);
resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
- /* allocate a page for the tail */
- svc_take_page(resp->rqstp);
- resp->xbuf->tail[0].iov_base =
- page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
- resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
+ /* Use rest of head for padding and remaining ops: */
+ resp->rqstp->rq_restailpage = 0;
+ resp->xbuf->tail[0].iov_base = tailbase;
resp->xbuf->tail[0].iov_len = 0;
resp->p = resp->xbuf->tail[0].iov_base;
- resp->end = resp->p + PAGE_SIZE/4;
+ resp->end = resp->p + (PAGE_SIZE - resp->xbuf->head[0].iov_len)/4;
return 0;
err_no_verf:
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 3e6b75cd90f..06cd0db0f32 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -553,7 +553,7 @@ static struct svc_procedure nfsd_procedures2[18] = {
PROC(none, void, void, none, RC_NOCACHE, ST),
PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT),
PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4),
- PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE),
+ PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE/4),
PROC(none, void, void, none, RC_NOCACHE, ST),
PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT),
PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT),
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 31018333dc3..6aa92d0e687 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -371,7 +371,6 @@ out_nfserr:
static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
{
ssize_t buflen;
- int error;
buflen = vfs_getxattr(dentry, key, NULL, 0);
if (buflen <= 0)
@@ -381,10 +380,7 @@ static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
if (!*buf)
return -ENOMEM;
- error = vfs_getxattr(dentry, key, *buf, buflen);
- if (error < 0)
- return error;
- return buflen;
+ return vfs_getxattr(dentry, key, *buf, buflen);
}
#endif
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index bff0f0d0686..21f38accd03 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -153,6 +153,7 @@ struct o2hb_region {
struct o2hb_bio_wait_ctxt {
atomic_t wc_num_reqs;
struct completion wc_io_complete;
+ int wc_error;
};
static void o2hb_write_timeout(void *arg)
@@ -186,6 +187,7 @@ static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc,
{
atomic_set(&wc->wc_num_reqs, num_ios);
init_completion(&wc->wc_io_complete);
+ wc->wc_error = 0;
}
/* Used in error paths too */
@@ -218,8 +220,10 @@ static int o2hb_bio_end_io(struct bio *bio,
{
struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
- if (error)
+ if (error) {
mlog(ML_ERROR, "IO Error %d\n", error);
+ wc->wc_error = error;
+ }
if (bio->bi_size)
return 1;
@@ -390,6 +394,8 @@ static int o2hb_read_slots(struct o2hb_region *reg,
bail_and_wait:
o2hb_wait_on_io(reg, &wc);
+ if (wc.wc_error && !status)
+ status = wc.wc_error;
if (bios) {
for(i = 0; i < num_bios; i++)
@@ -790,20 +796,24 @@ static int o2hb_highest_node(unsigned long *nodes,
return highest;
}
-static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
+static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
{
int i, ret, highest_node, change = 0;
unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
struct bio *write_bio;
struct o2hb_bio_wait_ctxt write_wc;
- if (o2nm_configured_node_map(configured_nodes, sizeof(configured_nodes)))
- return;
+ ret = o2nm_configured_node_map(configured_nodes,
+ sizeof(configured_nodes));
+ if (ret) {
+ mlog_errno(ret);
+ return ret;
+ }
highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
if (highest_node >= O2NM_MAX_NODES) {
mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n");
- return;
+ return -EINVAL;
}
/* No sense in reading the slots of nodes that don't exist
@@ -813,7 +823,7 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
ret = o2hb_read_slots(reg, highest_node + 1);
if (ret < 0) {
mlog_errno(ret);
- return;
+ return ret;
}
/* With an up to date view of the slots, we can check that no
@@ -831,7 +841,7 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
ret = o2hb_issue_node_write(reg, &write_bio, &write_wc);
if (ret < 0) {
mlog_errno(ret);
- return;
+ return ret;
}
i = -1;
@@ -847,6 +857,15 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
*/
o2hb_wait_on_io(reg, &write_wc);
bio_put(write_bio);
+ if (write_wc.wc_error) {
+ /* Do not re-arm the write timeout on I/O error - we
+ * can't be sure that the new block ever made it to
+ * disk */
+ mlog(ML_ERROR, "Write error %d on device \"%s\"\n",
+ write_wc.wc_error, reg->hr_dev_name);
+ return write_wc.wc_error;
+ }
+
o2hb_arm_write_timeout(reg);
/* let the person who launched us know when things are steady */
@@ -854,6 +873,8 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
if (atomic_dec_and_test(&reg->hr_steady_iterations))
wake_up(&o2hb_steady_queue);
}
+
+ return 0;
}
/* Subtract b from a, storing the result in a. a *must* have a larger
@@ -913,7 +934,10 @@ static int o2hb_thread(void *data)
* likely to time itself out. */
do_gettimeofday(&before_hb);
- o2hb_do_disk_heartbeat(reg);
+ i = 0;
+ do {
+ ret = o2hb_do_disk_heartbeat(reg);
+ } while (ret && ++i < 2);
do_gettimeofday(&after_hb);
elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index c3764f4744e..74ca4e5f976 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -139,6 +139,10 @@ static void user_ast(void *opaque)
return;
}
+ mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE,
+ "Lockres %s, requested ivmode. flags 0x%x\n",
+ lockres->l_name, lockres->l_flags);
+
/* we're downconverting. */
if (lockres->l_requested < lockres->l_level) {
if (lockres->l_requested <=
@@ -229,23 +233,42 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name);
- if (status != DLM_NORMAL)
+ if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
mlog(ML_ERROR, "Dlm returns status %d\n", status);
spin_lock(&lockres->l_lock);
- if (lockres->l_flags & USER_LOCK_IN_TEARDOWN)
+ /* The teardown flag gets set early during the unlock process,
+ * so test the cancel flag to make sure that this ast isn't
+ * for a concurrent cancel. */
+ if (lockres->l_flags & USER_LOCK_IN_TEARDOWN
+ && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
lockres->l_level = LKM_IVMODE;
- else {
+ } else if (status == DLM_CANCELGRANT) {
+ mlog(0, "Lock %s, cancel fails, flags 0x%x\n",
+ lockres->l_name, lockres->l_flags);
+ /* We tried to cancel a convert request, but it was
+ * already granted. Don't clear the busy flag - the
+ * ast should've done this already. */
+ BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
+ lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
+ goto out_noclear;
+ } else {
+ BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
+ /* Cancel succeeded, we want to re-queue */
+ mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n",
+ lockres->l_name, lockres->l_flags);
lockres->l_requested = LKM_IVMODE; /* cancel an
* upconvert
* request. */
lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
/* we want the unblock thread to look at it again
* now. */
- __user_dlm_queue_lockres(lockres);
+ if (lockres->l_flags & USER_LOCK_BLOCKED)
+ __user_dlm_queue_lockres(lockres);
}
lockres->l_flags &= ~USER_LOCK_BUSY;
+out_noclear:
spin_unlock(&lockres->l_lock);
wake_up(&lockres->l_event);
@@ -268,13 +291,26 @@ static void user_dlm_unblock_lock(void *opaque)
spin_lock(&lockres->l_lock);
- BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED));
- BUG_ON(!(lockres->l_flags & USER_LOCK_QUEUED));
+ mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
+ "Lockres %s, flags 0x%x\n",
+ lockres->l_name, lockres->l_flags);
- /* notice that we don't clear USER_LOCK_BLOCKED here. That's
- * for user_ast to do. */
+ /* notice that we don't clear USER_LOCK_BLOCKED here. If it's
+ * set, we want user_ast clear it. */
lockres->l_flags &= ~USER_LOCK_QUEUED;
+ /* It's valid to get here and no longer be blocked - if we get
+ * several basts in a row, we might be queued by the first
+ * one, the unblock thread might run and clear the queued
+ * flag, and finally we might get another bast which re-queues
+ * us before our ast for the downconvert is called. */
+ if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
+ mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n",
+ lockres->l_name, lockres->l_flags);
+ spin_unlock(&lockres->l_lock);
+ goto drop_ref;
+ }
+
if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
mlog(0, "lock is in teardown so we do nothing\n");
spin_unlock(&lockres->l_lock);
@@ -282,7 +318,9 @@ static void user_dlm_unblock_lock(void *opaque)
}
if (lockres->l_flags & USER_LOCK_BUSY) {
- mlog(0, "BUSY flag detected...\n");
+ mlog(0, "Cancel lock %s, flags 0x%x\n",
+ lockres->l_name, lockres->l_flags);
+
if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
spin_unlock(&lockres->l_lock);
goto drop_ref;
@@ -296,14 +334,7 @@ static void user_dlm_unblock_lock(void *opaque)
LKM_CANCEL,
user_unlock_ast,
lockres);
- if (status == DLM_CANCELGRANT) {
- /* If we got this, then the ast was fired
- * before we could cancel. We cleanup our
- * state, and restart the function. */
- spin_lock(&lockres->l_lock);
- lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
- spin_unlock(&lockres->l_lock);
- } else if (status != DLM_NORMAL)
+ if (status != DLM_NORMAL)
user_log_dlm_error("dlmunlock", status, lockres);
goto drop_ref;
}
@@ -581,6 +612,14 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
mlog(0, "asked to destroy %s\n", lockres->l_name);
spin_lock(&lockres->l_lock);
+ if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
+ mlog(0, "Lock is already torn down\n");
+ spin_unlock(&lockres->l_lock);
+ return 0;
+ }
+
+ lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
+
while (lockres->l_flags & USER_LOCK_BUSY) {
spin_unlock(&lockres->l_lock);
@@ -606,7 +645,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
lockres->l_flags &= ~USER_LOCK_ATTACHED;
lockres->l_flags |= USER_LOCK_BUSY;
- lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
spin_unlock(&lockres->l_lock);
mlog(0, "unlocking lockres %s\n", lockres->l_name);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 34e903a6a46..581eb451a41 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -260,6 +260,17 @@ static int ocfs2_truncate_file(struct inode *inode,
if (new_i_size == le64_to_cpu(fe->i_size))
goto bail;
+ /* This forces other nodes to sync and drop their pages. Do
+ * this even if we have a truncate without allocation change -
+ * ocfs2 cluster sizes can be much greater than page size, so
+ * we have to truncate them anyway. */
+ status = ocfs2_data_lock(inode, 1);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ ocfs2_data_unlock(inode, 1);
+
if (le32_to_cpu(fe->i_clusters) ==
ocfs2_clusters_for_bytes(osb->sb, new_i_size)) {
mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n",
@@ -272,14 +283,6 @@ static int ocfs2_truncate_file(struct inode *inode,
goto bail;
}
- /* This forces other nodes to sync and drop their pages */
- status = ocfs2_data_lock(inode, 1);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
- ocfs2_data_unlock(inode, 1);
-
/* alright, we're going to need to do a full blown alloc size
* change. Orphan the inode so that recovery can complete the
* truncate if necessary. This does the task of marking
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 7efa73d44c9..20d4b2237fc 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -103,8 +103,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
size_t buflen, loff_t *fpos)
{
ssize_t acc = 0, tmp;
- size_t tsz, nr_bytes;
- u64 start;
+ size_t tsz;
+ u64 start, nr_bytes;
struct vmcore *curr_m = NULL;
if (buflen == 0 || *fpos >= vmcore_size)
diff --git a/fs/select.c b/fs/select.c
index 071660fa7b0..a8109baa5e4 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -310,8 +310,9 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
fd_set __user *exp, s64 *timeout)
{
fd_set_bits fds;
- char *bits;
- int ret, size, max_fdset;
+ void *bits;
+ int ret, max_fdset;
+ unsigned int size;
struct fdtable *fdt;
/* Allocate small arguments on the stack to save memory and be faster */
long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
@@ -333,20 +334,21 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
* since we used fdset we need to allocate memory in units of
* long-words.
*/
- ret = -ENOMEM;
size = FDS_BYTES(n);
- if (6*size < SELECT_STACK_ALLOC)
- bits = stack_fds;
- else
+ bits = stack_fds;
+ if (size > sizeof(stack_fds) / 6) {
+ /* Not enough space in on-stack array; must use kmalloc */
+ ret = -ENOMEM;
bits = kmalloc(6 * size, GFP_KERNEL);
- if (!bits)
- goto out_nofds;
- fds.in = (unsigned long *) bits;
- fds.out = (unsigned long *) (bits + size);
- fds.ex = (unsigned long *) (bits + 2*size);
- fds.res_in = (unsigned long *) (bits + 3*size);
- fds.res_out = (unsigned long *) (bits + 4*size);
- fds.res_ex = (unsigned long *) (bits + 5*size);
+ if (!bits)
+ goto out_nofds;
+ }
+ fds.in = bits;
+ fds.out = bits + size;
+ fds.ex = bits + 2*size;
+ fds.res_in = bits + 3*size;
+ fds.res_out = bits + 4*size;
+ fds.res_ex = bits + 5*size;
if ((ret = get_fd_set(n, inp, fds.in)) ||
(ret = get_fd_set(n, outp, fds.out)) ||
diff --git a/fs/sync.c b/fs/sync.c
index 8616006d209..aab5ffe77e9 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -61,7 +61,7 @@
* will be available after a crash.
*/
asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
- int flags)
+ unsigned int flags)
{
int ret;
struct file *file;
@@ -126,7 +126,7 @@ out:
* `endbyte' is inclusive
*/
int do_sync_file_range(struct file *file, loff_t offset, loff_t endbyte,
- int flags)
+ unsigned int flags)
{
int ret;
struct address_space *mapping;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 6cbbd165c60..4d191ef39b6 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -870,12 +870,14 @@ xfs_page_state_convert(
pgoff_t end_index, last_index, tlast;
ssize_t size, len;
int flags, err, iomap_valid = 0, uptodate = 1;
- int page_dirty, count = 0, trylock_flag = 0;
+ int page_dirty, count = 0;
+ int trylock = 0;
int all_bh = unmapped;
- /* wait for other IO threads? */
- if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking))
- trylock_flag |= BMAPI_TRYLOCK;
+ if (startio) {
+ if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
+ trylock |= BMAPI_TRYLOCK;
+ }
/* Is this page beyond the end of the file? */
offset = i_size_read(inode);
@@ -956,15 +958,13 @@ xfs_page_state_convert(
if (buffer_unwritten(bh)) {
type = IOMAP_UNWRITTEN;
- flags = BMAPI_WRITE|BMAPI_IGNSTATE;
+ flags = BMAPI_WRITE | BMAPI_IGNSTATE;
} else if (buffer_delay(bh)) {
type = IOMAP_DELAY;
- flags = BMAPI_ALLOCATE;
- if (!startio)
- flags |= trylock_flag;
+ flags = BMAPI_ALLOCATE | trylock;
} else {
type = IOMAP_NEW;
- flags = BMAPI_WRITE|BMAPI_MMAP;
+ flags = BMAPI_WRITE | BMAPI_MMAP;
}
if (!iomap_valid) {
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 9fb0312665c..26fed0756f0 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -182,7 +182,7 @@ free_address(
{
a_list_t *aentry;
- aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC & ~__GFP_HIGH);
+ aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT);
if (likely(aentry)) {
spin_lock(&as_lock);
aentry->next = as_free_head;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 149237304fb..2e2e275c786 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -673,8 +673,7 @@ xfs_vn_setattr(
if (ia_valid & ATTR_ATIME) {
vattr.va_mask |= XFS_AT_ATIME;
vattr.va_atime = attr->ia_atime;
- if (ia_valid & ATTR_ATIME_SET)
- inode->i_atime = attr->ia_atime;
+ inode->i_atime = attr->ia_atime;
}
if (ia_valid & ATTR_MTIME) {
vattr.va_mask |= XFS_AT_MTIME;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 4eeb856183b..deddbd03c16 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -158,9 +158,10 @@ xfs_ialloc_ag_alloc(
*/
agi = XFS_BUF_TO_AGI(agbp);
newino = be32_to_cpu(agi->agi_newino);
- if(likely(newino != NULLAGINO)) {
- args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
- XFS_IALLOC_BLOCKS(args.mp);
+ args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
+ XFS_IALLOC_BLOCKS(args.mp);
+ if (likely(newino != NULLAGINO &&
+ (args.agbno < be32_to_cpu(agi->agi_length)))) {
args.fsbno = XFS_AGB_TO_FSB(args.mp,
be32_to_cpu(agi->agi_seqno), args.agbno);
args.type = XFS_ALLOCTYPE_THIS_BNO;
@@ -182,8 +183,8 @@ xfs_ialloc_ag_alloc(
* Set the alignment for the allocation.
* If stripe alignment is turned on then align at stripe unit
* boundary.
- * If the cluster size is smaller than a filesystem block
- * then we're doing I/O for inodes in filesystem block size
+ * If the cluster size is smaller than a filesystem block
+ * then we're doing I/O for inodes in filesystem block size
* pieces, so don't need alignment anyway.
*/
isaligned = 0;
@@ -192,7 +193,7 @@ xfs_ialloc_ag_alloc(
args.alignment = args.mp->m_dalign;
isaligned = 1;
} else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
- args.mp->m_sb.sb_inoalignmt >=
+ args.mp->m_sb.sb_inoalignmt >=
XFS_B_TO_FSBT(args.mp,
XFS_INODE_CLUSTER_SIZE(args.mp)))
args.alignment = args.mp->m_sb.sb_inoalignmt;
@@ -220,7 +221,7 @@ xfs_ialloc_ag_alloc(
if ((error = xfs_alloc_vextent(&args)))
return error;
}
-
+
/*
* If stripe alignment is turned on, then try again with cluster
* alignment.
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index bb33113eef9..b5385432526 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -421,7 +421,10 @@ finish_inode:
ip->i_chash = chlnew;
chlnew->chl_ip = ip;
chlnew->chl_blkno = ip->i_blkno;
+ if (ch->ch_list)
+ ch->ch_list->chl_prev = chlnew;
chlnew->chl_next = ch->ch_list;
+ chlnew->chl_prev = NULL;
ch->ch_list = chlnew;
chlnew = NULL;
}
@@ -723,23 +726,15 @@ xfs_iextract(
ASSERT(ip->i_cnext == ip && ip->i_cprev == ip);
ASSERT(ip->i_chash != NULL);
chm=NULL;
- for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) {
- if (chl->chl_blkno == ip->i_blkno) {
- if (chm == NULL) {
- /* first item on the list */
- ch->ch_list = chl->chl_next;
- } else {
- chm->chl_next = chl->chl_next;
- }
- kmem_zone_free(xfs_chashlist_zone, chl);
- break;
- } else {
- ASSERT(chl->chl_ip != ip);
- chm = chl;
- }
- }
- ASSERT_ALWAYS(chl != NULL);
- } else {
+ chl = ip->i_chash;
+ if (chl->chl_prev)
+ chl->chl_prev->chl_next = chl->chl_next;
+ else
+ ch->ch_list = chl->chl_next;
+ if (chl->chl_next)
+ chl->chl_next->chl_prev = chl->chl_prev;
+ kmem_zone_free(xfs_chashlist_zone, chl);
+ } else {
/* delete one inode from a non-empty list */
iq = ip->i_cnext;
iq->i_cprev = ip->i_cprev;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 48146bdc6bd..94b60dd0380 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2732,16 +2732,29 @@ xfs_iunpin(
ASSERT(atomic_read(&ip->i_pincount) > 0);
if (atomic_dec_and_test(&ip->i_pincount)) {
- vnode_t *vp = XFS_ITOV_NULL(ip);
+ /*
+ * If the inode is currently being reclaimed, the
+ * linux inode _and_ the xfs vnode may have been
+ * freed so we cannot reference either of them safely.
+ * Hence we should not try to do anything to them
+ * if the xfs inode is currently in the reclaim
+ * path.
+ *
+ * However, we still need to issue the unpin wakeup
+ * call as the inode reclaim may be blocked waiting for
+ * the inode to become unpinned.
+ */
+ if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) {
+ vnode_t *vp = XFS_ITOV_NULL(ip);
- /* make sync come back and flush this inode */
- if (vp) {
- struct inode *inode = vn_to_inode(vp);
+ /* make sync come back and flush this inode */
+ if (vp) {
+ struct inode *inode = vn_to_inode(vp);
- if (!(inode->i_state & I_NEW))
- mark_inode_dirty_sync(inode);
+ if (!(inode->i_state & I_NEW))
+ mark_inode_dirty_sync(inode);
+ }
}
-
wake_up(&ip->i_ipin_wait);
}
}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 39ef9c36ea5..3b544db1790 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -189,6 +189,7 @@ typedef struct xfs_ihash {
*/
typedef struct xfs_chashlist {
struct xfs_chashlist *chl_next;
+ struct xfs_chashlist *chl_prev;
struct xfs_inode *chl_ip;
xfs_daddr_t chl_blkno; /* starting block number of
* the cluster */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 049fabb7f7e..c0b1c290688 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -270,7 +270,7 @@ xfs_mount_validate_sb(
(sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
(sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
(sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
- (sbp->sb_imax_pct > 100 || sbp->sb_imax_pct < 1))) {
+ (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) {
xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed");
return XFS_ERROR(EFSCORRUPTED);
}