From 0cc6eee130b0c062feec8446d9cecdb17d2cfad3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 18 Jul 2009 18:14:53 -0400 Subject: xfs: avoid memory allocation under m_peraglock in growfs code Allocate the memory for the larger m_perag array before taking the per-AG lock as the per-AG lock can be taken under the i_lock which can be taken from reclaim context. Reported by the new reclaim context tracing in lockdep. Signed-off-by: Christoph Hellwig Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher --- fs/xfs/xfs_fsops.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index cbd451bb484..2d0b3e1da9e 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -167,17 +167,25 @@ xfs_growfs_data_private( new = nb - mp->m_sb.sb_dblocks; oagcount = mp->m_sb.sb_agcount; if (nagcount > oagcount) { + void *new_perag, *old_perag; + xfs_filestream_flush(mp); + + new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount, + KM_MAYFAIL); + if (!new_perag) + return XFS_ERROR(ENOMEM); + down_write(&mp->m_peraglock); - mp->m_perag = kmem_realloc(mp->m_perag, - sizeof(xfs_perag_t) * nagcount, - sizeof(xfs_perag_t) * oagcount, - KM_SLEEP); - memset(&mp->m_perag[oagcount], 0, - (nagcount - oagcount) * sizeof(xfs_perag_t)); + memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount); + old_perag = mp->m_perag; + mp->m_perag = new_perag; + mp->m_flags |= XFS_MOUNT_32BITINODES; nagimax = xfs_initialize_perag(mp, nagcount); up_write(&mp->m_peraglock); + + kmem_free(old_perag); } tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); tp->t_flags |= XFS_TRANS_RESERVE; -- cgit From ca35dcd6cae7d4a780c484c53f45548c4719f82c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 18 Jul 2009 18:14:54 -0400 Subject: xfs: switch to NOFS allocation under i_lock in xfs_getbmap xfs_getbmap allocates memory with i_lock held, but i_lock is taken in reclaim context so all allocations under it must avoid recursions into the filesystem. Reported by the new reclaim context tracing in lockdep. Signed-off-by: Christoph Hellwig Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher --- fs/xfs/xfs_bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 7928b9983c1..8ee5b5a76a2 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -6009,7 +6009,7 @@ xfs_getbmap( */ error = ENOMEM; subnex = 16; - map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL); + map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); if (!map) goto out_unlock_ilock; -- cgit From f41d7fb9da05b604f8a69fb6cac2a0563c8ede4e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 18 Jul 2009 18:14:55 -0400 Subject: xfs: switch to NOFS allocation under i_lock in xfs_da_state_alloc xfs_da_state_alloc is always called with i_lock held, but i_lock is taken in reclaim context so all allocations under it must avoid recursions into the filesystem. Reported by the new reclaim context tracing in lockdep. Signed-off-by: Christoph Hellwig Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher --- fs/xfs/xfs_da_btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 9ff6e57a507..bd0bb6dfcdf 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -2201,7 +2201,7 @@ kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */ xfs_da_state_t * xfs_da_state_alloc(void) { - return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP); + return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS); } /* -- cgit From 73195ed7864ae4a1fb0bea2ed9df59d19b4fde90 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 18 Jul 2009 18:14:56 -0400 Subject: xfs: switch to NOFS allocation under i_lock in xfs_da_buf_make i_lock is taken in the reclaim context so all allocations under it must avoid recursions into the filesystem. Reported by the new reclaim context tracing in lockdep. Signed-off-by: Christoph Hellwig Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher --- fs/xfs/xfs_da_btree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index bd0bb6dfcdf..2847bbc1c53 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -2261,9 +2261,9 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra) int off; if (nbuf == 1) - dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP); + dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS); else - dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP); + dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS); dabuf->dirty = 0; #ifdef XFS_DABUF_DEBUG dabuf->ra = ra; -- cgit From 3f52c2f0a07c23771909cc53f2e9451a7f1bf253 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 18 Jul 2009 18:14:57 -0400 Subject: xfs: switch to NOFS allocation under i_lock in xfs_dir_cilookup_result xfs_dir_cilookup_result is always called with i_lock held, but i_lock is taken in reclaim context so all allocations under it must avoid recursions into the filesystem. Reported by the new reclaim context tracing in lockdep. Signed-off-by: Christoph Hellwig Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher --- fs/xfs/xfs_dir2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index c657bec6d95..bb1d58eb398 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -256,7 +256,7 @@ xfs_dir_cilookup_result( !(args->op_flags & XFS_DA_OP_CILOOKUP)) return EEXIST; - args->value = kmem_alloc(len, KM_MAYFAIL); + args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL); if (!args->value) return ENOMEM; -- cgit From 36fae17a648e0aee5d9560514d08477ef48dc87f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 18 Jul 2009 18:14:58 -0400 Subject: xfs: switch to NOFS allocation under i_lock in xfs_buf_associate_memory xfs_buf_associate_memory is used for setting up the spare buffer for the log wrap case in xlog_sync which can happen under i_lock when called from xfs_fsync. The i_lock mutex is taken in reclaim context so all allocations under it must avoid recursions into the filesystem. There are a couple more uses of xfs_buf_associate_memory in the log recovery code that are also affected by this, but I'd rather keep the code simple than passing on a gfp_mask argument. Longer term we should just stop requiring the memoery allocation in xlog_sync by some smaller rework of the buffer layer. Reported by the new reclaim context tracing in lockdep. Signed-off-by: Christoph Hellwig Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher --- fs/xfs/linux-2.6/xfs_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 1418b916fc2..178c20c13e8 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -770,7 +770,7 @@ xfs_buf_associate_memory( bp->b_pages = NULL; bp->b_addr = mem; - rval = _xfs_buf_get_pages(bp, page_count, 0); + rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK); if (rval) return rval; -- cgit From 10746e47e722b5688fcd6eba9fbf9b2e64a248a7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 18 Jul 2009 18:14:59 -0400 Subject: xfs: switch to NOFS allocation under i_lock in xfs_attr_rmtval_set xfs_attr_rmtval_set is always called with i_lock held, and i_lock is taken in reclaim context so all allocations under it must avoid recursions into the filesystem. Reported by the new reclaim context tracing in lockdep. Signed-off-by: Christoph Hellwig Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher --- fs/xfs/xfs_attr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index db15feb906f..bfb583791fe 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -2141,8 +2141,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); - bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, - blkcnt, XFS_BUF_LOCK); + bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, blkcnt, + XFS_BUF_LOCK | XBF_DONT_BLOCK); ASSERT(bp); ASSERT(!XFS_BUF_GETERROR(bp)); -- cgit From 7b02ecb3031b192823bc732ae717febc0a59aa92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 18 Jul 2009 18:15:00 -0400 Subject: xfs: switch to NOFS allocation under i_lock in xfs_readlink_bmap xfs_readlink_bmap is called with i_lock held, but i_lock is taken in reclaim context so all allocations under it must avoid recursions into the filesystem. Reported by the new reclaim context tracing in lockdep. Signed-off-by: Christoph Hellwig Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher --- fs/xfs/xfs_vnodeops.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index c4eca5ed5da..492d75bae2b 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -538,7 +538,9 @@ xfs_readlink_bmap( d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); - bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); + bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt), + XBF_LOCK | XBF_MAPPED | + XBF_DONT_BLOCK); error = XFS_BUF_GETERROR(bp); if (error) { xfs_ioerror_alert("xfs_readlink", -- cgit From ddd3a14e0f030f0f7b900621f67532285b8657ef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 18 Jul 2009 18:15:01 -0400 Subject: xfs: switch to NOFS allocation under i_lock in xfs_attr_rmtval_get xfs_attr_rmtval_get is always called with i_lock held, but i_lock is taken in reclaim context so all allocations under it must avoid recursions into the filesystem. Reported by the new reclaim context tracing in lockdep. Signed-off-by: Christoph Hellwig Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher --- fs/xfs/xfs_attr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index bfb583791fe..4ece1906bd4 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -2010,7 +2010,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, - blkcnt, XFS_BUF_LOCK, &bp); + blkcnt, + XFS_BUF_LOCK | XBF_DONT_BLOCK, + &bp); if (error) return(error); -- cgit From e0c222c411e22f086e929cd69fdcc89336164ec1 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 20 Jul 2009 10:52:15 -0500 Subject: use XFS_CORRUPTION_ERROR in xfs_btree_check_sblock In Red Hat Bug 512552 - Can't write to XFS mount during raid5 resync a user ran into corruption while resyncing a raid, and we failed a consistency test, but didn't get much more info; it'd be nice to call XFS_CORRUPTION_ERROR here so we can see the buffer contents. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Felix Blyakher --- fs/xfs/xfs_btree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index e9df9957482..26717388acf 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -120,8 +120,8 @@ xfs_btree_check_sblock( XFS_RANDOM_BTREE_CHECK_SBLOCK))) { if (bp) xfs_buftrace("SBTREE ERROR", bp); - XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW, - cur->bc_mp); + XFS_CORRUPTION_ERROR("xfs_btree_check_sblock", + XFS_ERRLEVEL_LOW, cur->bc_mp, block); return XFS_ERROR(EFSCORRUPTED); } return 0; -- cgit From b89d4208de3de442c9025919c4261be0b38e79a4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Aug 2009 11:32:18 -0300 Subject: xfs: check for dinode realtime flag corruption Ramon tested XFS with a modified version of fsfuzzer and hit a NULL pointer dereference in __xfs_get_blocks due to the RT device target pointer being NULL. To fix this reject inode with the realtime bit set on a a filesystem without an RT subvolume during inode read. Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Reviewed-by: Felix Blyakher Reported-by: Ramon de Carvalho Valle Tested-by: Ramon de Carvalho Valle Signed-off-by: Felix Blyakher --- fs/xfs/xfs_inode.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 1f22d65fed0..da428b3fe0f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -343,6 +343,16 @@ xfs_iformat( return XFS_ERROR(EFSCORRUPTED); } + if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && + !ip->i_mount->m_rtdev_targp)) { + xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, + "corrupt dinode %Lu, has realtime flag set.", + ip->i_ino); + XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", + XFS_ERRLEVEL_LOW, ip->i_mount, dip); + return XFS_ERROR(EFSCORRUPTED); + } + switch (ip->i_d.di_mode & S_IFMT) { case S_IFIFO: case S_IFCHR: -- cgit From a8914f3a6d72c97328597a556a99daaf5cc288ae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Aug 2009 11:32:44 -0300 Subject: xfs: fix spin_is_locked assert on uni-processor builds Without SMP or preemption spin_is_locked always returns false, so we can't do an assert with it. Instead use assert_spin_locked, which does the right thing on all builds. Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Reported-by: Johannes Engel Tested-by: Johannes Engel Signed-off-by: Felix Blyakher --- fs/xfs/xfs_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 3750f04ede0..9dbdff3ea48 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3180,7 +3180,7 @@ try_again: STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) { - ASSERT(spin_is_locked(&log->l_icloglock)); + assert_spin_locked(&log->l_icloglock); if (iclog->ic_state == XLOG_STATE_ACTIVE) { xlog_state_switch_iclogs(log, iclog, 0); -- cgit From bc990f5cb424cdca9dda866785d088e2c2110ecc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 16 Aug 2009 20:36:34 -0400 Subject: xfs: fix locking in xfs_iget_cache_hit The locking in xfs_iget_cache_hit currently has numerous problems: - we clear the reclaim tag without i_flags_lock which protects modifications to it - we call inode_init_always which can sleep with pag_ici_lock held (this is oss.sgi.com BZ #819) - we acquire and drop i_flags_lock a lot and thus provide no consistency between the various flags we set/clear under it This patch fixes all that with a major revamp of the locking in the function. The new version acquires i_flags_lock early and only drops it once we need to call into inode_init_always or before calling xfs_ilock. This patch fixes a bug seen in the wild where we race modifying the reclaim tag. Signed-off-by: Christoph Hellwig Reviewed-by: Felix Blyakher Reviewed-by: Eric Sandeen Signed-off-by: Felix Blyakher --- fs/xfs/linux-2.6/xfs_sync.c | 13 ++++- fs/xfs/linux-2.6/xfs_sync.h | 1 + fs/xfs/xfs_iget.c | 113 +++++++++++++++++++++++--------------------- 3 files changed, 70 insertions(+), 57 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index b619d6b8ca4..98ef624d9ba 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -708,6 +708,16 @@ xfs_reclaim_inode( return 0; } +void +__xfs_inode_set_reclaim_tag( + struct xfs_perag *pag, + struct xfs_inode *ip) +{ + radix_tree_tag_set(&pag->pag_ici_root, + XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), + XFS_ICI_RECLAIM_TAG); +} + /* * We set the inode flag atomically with the radix tree tag. * Once we get tag lookups on the radix tree, this inode flag @@ -722,8 +732,7 @@ xfs_inode_set_reclaim_tag( read_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); - radix_tree_tag_set(&pag->pag_ici_root, - XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + __xfs_inode_set_reclaim_tag(pag, ip); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); spin_unlock(&ip->i_flags_lock); read_unlock(&pag->pag_ici_lock); diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 2a10301c99c..59120602588 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -48,6 +48,7 @@ int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); +void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, struct xfs_inode *ip); diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 34ec86923f7..ecbf8b4d2e2 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -191,80 +191,82 @@ xfs_iget_cache_hit( int flags, int lock_flags) __releases(pag->pag_ici_lock) { + struct inode *inode = VFS_I(ip); struct xfs_mount *mp = ip->i_mount; - int error = EAGAIN; + int error; + + spin_lock(&ip->i_flags_lock); /* - * If INEW is set this inode is being set up - * If IRECLAIM is set this inode is being torn down - * Pause and try again. + * If we are racing with another cache hit that is currently + * instantiating this inode or currently recycling it out of + * reclaimabe state, wait for the initialisation to complete + * before continuing. + * + * XXX(hch): eventually we should do something equivalent to + * wait_on_inode to wait for these flags to be cleared + * instead of polling for it. */ - if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { + if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { XFS_STATS_INC(xs_ig_frecycle); + error = EAGAIN; goto out_error; } - /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ - if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { - - /* - * If lookup is racing with unlink, then we should return an - * error immediately so we don't remove it from the reclaim - * list and potentially leak the inode. - */ - if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { - error = ENOENT; - goto out_error; - } + /* + * If lookup is racing with unlink return an error immediately. + */ + if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { + error = ENOENT; + goto out_error; + } + /* + * If IRECLAIMABLE is set, we've torn down the VFS inode already. + * Need to carefully get it back into useable state. + */ + if (ip->i_flags & XFS_IRECLAIMABLE) { xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); /* - * We need to re-initialise the VFS inode as it has been - * 'freed' by the VFS. Do this here so we can deal with - * errors cleanly, then tag it so it can be set up correctly - * later. + * We need to set XFS_INEW atomically with clearing the + * reclaimable tag so that we do have an indicator of the + * inode still being initialized. */ - if (inode_init_always(mp->m_super, VFS_I(ip))) { - error = ENOMEM; - goto out_error; - } + ip->i_flags |= XFS_INEW; + ip->i_flags &= ~XFS_IRECLAIMABLE; + __xfs_inode_clear_reclaim_tag(mp, pag, ip); - /* - * We must set the XFS_INEW flag before clearing the - * XFS_IRECLAIMABLE flag so that if a racing lookup does - * not find the XFS_IRECLAIMABLE above but has the igrab() - * below succeed we can safely check XFS_INEW to detect - * that this inode is still being initialised. - */ - xfs_iflags_set(ip, XFS_INEW); - xfs_iflags_clear(ip, XFS_IRECLAIMABLE); + spin_unlock(&ip->i_flags_lock); + read_unlock(&pag->pag_ici_lock); - /* clear the radix tree reclaim flag as well. */ - __xfs_inode_clear_reclaim_tag(mp, pag, ip); - } else if (!igrab(VFS_I(ip))) { + error = -inode_init_always(mp->m_super, inode); + if (error) { + /* + * Re-initializing the inode failed, and we are in deep + * trouble. Try to re-add it to the reclaim list. + */ + read_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + + ip->i_flags &= ~XFS_INEW; + ip->i_flags |= XFS_IRECLAIMABLE; + __xfs_inode_set_reclaim_tag(pag, ip); + goto out_error; + } + inode->i_state = I_LOCK|I_NEW; + } else { /* If the VFS inode is being torn down, pause and try again. */ - XFS_STATS_INC(xs_ig_frecycle); - goto out_error; - } else if (xfs_iflags_test(ip, XFS_INEW)) { - /* - * We are racing with another cache hit that is - * currently recycling this inode out of the XFS_IRECLAIMABLE - * state. Wait for the initialisation to complete before - * continuing. - */ - wait_on_inode(VFS_I(ip)); - } + if (!igrab(inode)) { + error = EAGAIN; + goto out_error; + } - if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { - error = ENOENT; - iput(VFS_I(ip)); - goto out_error; + /* We've got a live one. */ + spin_unlock(&ip->i_flags_lock); + read_unlock(&pag->pag_ici_lock); } - /* We've got a live one. */ - read_unlock(&pag->pag_ici_lock); - if (lock_flags != 0) xfs_ilock(ip, lock_flags); @@ -274,6 +276,7 @@ xfs_iget_cache_hit( return 0; out_error: + spin_unlock(&ip->i_flags_lock); read_unlock(&pag->pag_ici_lock); return error; } -- cgit From 3725867dccfb83e4b0cff64e916a04258f300591 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2009 14:03:08 -0400 Subject: xfs: actually enable the swapext compat handler Fix a small typo in the compat ioctl handler that cause the swapext compat handler to never be called. Signed-off-by: Christoph Hellwig Reviewed-by: Torsten Kaiser Tested-by: Torsten Kaiser Reviewed-by: Eric Sandeen Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher --- fs/xfs/linux-2.6/xfs_ioctl32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 0882d166239..eafcc7c1870 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -619,7 +619,7 @@ xfs_file_compat_ioctl( case XFS_IOC_GETVERSION_32: cmd = _NATIVE_IOC(cmd, long); return xfs_file_ioctl(filp, cmd, p); - case XFS_IOC_SWAPEXT: { + case XFS_IOC_SWAPEXT_32: { struct xfs_swapext sxp; struct compat_xfs_swapext __user *sxu = arg; -- cgit