From 55bd725aa3a83b3935988f37275b5a80e10d4169 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 15 Feb 2008 12:47:21 -0500 Subject: ext4: Fix locking hierarchy violation in ext4_fallocate() ext4_fallocate() was trying to acquire i_data_sem outside of jbd2_start_transaction/jbd2_journal_stop, which violates ext4's locking hierarchy. So we take i_mutex to prevent writes and truncates during the complete fallocate operation, and use ext4_get_block_wrap() which acquires and releases i_data_sem for each block allocation. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index bc7081f1fbe..e856f660fc3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2623,7 +2623,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) * modify 1 super block, 1 block bitmap and 1 group descriptor. */ credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; - down_write((&EXT4_I(inode)->i_data_sem)); + mutex_lock(&inode->i_mutex); retry: while (ret >= 0 && ret < max_blocks) { block = block + ret; @@ -2634,7 +2634,7 @@ retry: break; } - ret = ext4_ext_get_blocks(handle, inode, block, + ret = ext4_get_blocks_wrap(handle, inode, block, max_blocks, &map_bh, EXT4_CREATE_UNINITIALIZED_EXT, 0); WARN_ON(ret <= 0); @@ -2680,7 +2680,6 @@ retry: if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; - up_write((&EXT4_I(inode)->i_data_sem)); /* * Time to update the file size. * Update only when preallocation was requested beyond the file size. @@ -2692,21 +2691,18 @@ retry: * if no error, we assume preallocation succeeded * completely */ - mutex_lock(&inode->i_mutex); i_size_write(inode, offset + len); EXT4_I(inode)->i_disksize = i_size_read(inode); - mutex_unlock(&inode->i_mutex); } else if (ret < 0 && nblocks) { /* Handle partial allocation scenario */ loff_t newsize; - mutex_lock(&inode->i_mutex); newsize = (nblocks << blkbits) + i_size_read(inode); i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits)); EXT4_I(inode)->i_disksize = i_size_read(inode); - mutex_unlock(&inode->i_mutex); } } + mutex_unlock(&inode->i_mutex); return ret > 0 ? ret2 : ret; } -- cgit From b35905c16ad6428551eb9e49525011bd2700cf56 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 25 Feb 2008 16:54:37 -0500 Subject: ext4: Fix memory and buffer head leak in callers to ext4_ext_find_extent() The path variable returned via ext4_ext_find_extent is a kmalloc variable and needs to be freeded. It also contains a reference to buffer_head which needs to be dropped. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e856f660fc3..995ac16102a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -349,7 +349,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) #define ext4_ext_show_leaf(inode,path) #endif -static void ext4_ext_drop_refs(struct ext4_ext_path *path) +void ext4_ext_drop_refs(struct ext4_ext_path *path) { int depth = path->p_depth; int i; @@ -2200,10 +2200,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, newdepth = ext_depth(inode); if (newdepth != depth) { depth = newdepth; - path = ext4_ext_find_extent(inode, iblock, NULL); + ext4_ext_drop_refs(path); + path = ext4_ext_find_extent(inode, iblock, path); if (IS_ERR(path)) { err = PTR_ERR(path); - path = NULL; goto out; } eh = path[depth].p_hdr; -- cgit From 9df5643ad135c7f8c02d3b69020de4ec910f9fc0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 22 Feb 2008 06:17:31 -0500 Subject: ext4: Get journal write access before modifying the extent tree When the user was writing into an unitialized extent, ext4_ext_convert_to_initialize() was not requesting journal write access before it started to modify the extent tree. Fix this oversight. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 995ac16102a..c4d6f19faf3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2168,6 +2168,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, newblock = iblock - ee_block + ext_pblock(ex); ex2 = ex; + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; + /* ex1: ee_block to iblock - 1 : uninitialized */ if (iblock > ee_block) { ex1 = ex; @@ -2210,6 +2214,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex = path[depth].p_ext; if (ex2 != &newex) ex2 = ex; + + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; } allocated = max_blocks; } @@ -2230,9 +2238,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex2->ee_len = cpu_to_le16(allocated); if (ex2 != ex) goto insert; - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto out; /* * New (initialized) extent starts from the first block * in the current extent. i.e., ex2 == ex -- cgit From 74d3487fc8aa58cec16dff7239dea1ca59bdab0e Mon Sep 17 00:00:00 2001 From: Valerie Clement Date: Fri, 15 Feb 2008 13:43:07 -0500 Subject: ext4: modify block allocation algorithm for the last group When a directory inode is allocated in the last group and the last group contains less than s_blocks_per_group blocks, the initial block allocated for the directory is not always allocated in the same group as the directory inode, but in one of the first groups of the filesystem (group 1 for example). Depending on the current process's pid, ext4_find_near() and ext4_ext_find_goal() can return a block number greater than the maximum blocks count in the filesystem and in that case the block will be not allocated in the same group as the inode. The following patch fixes the problem. Should the modification also be done in ext2/3 code? Signed-off-by: Valerie Clement Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c4d6f19faf3..8a59f7ba30e 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -148,6 +148,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, { struct ext4_inode_info *ei = EXT4_I(inode); ext4_fsblk_t bg_start; + ext4_fsblk_t last_block; ext4_grpblk_t colour; int depth; @@ -169,8 +170,13 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, /* OK. use inode's group */ bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block); - colour = (current->pid % 16) * + last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; + + if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) + colour = (current->pid % 16) * (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); + else + colour = (current->pid % 16) * ((last_block - bg_start) / 16); return bg_start + colour + block; } -- cgit From f5ab0d1f8f7df937778c60c3da6f4ef939a54a7b Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Mon, 25 Feb 2008 15:29:55 -0500 Subject: ext4: Fix BUG when writing to an unitialized extent This patch fixes a bug when writing to preallocated but uninitialized blocks, which resulted in a BUG in fs/buffer.c saying that the buffer is not mapped. When writing to a file, ext4_get_block_wrap() is called with create=1 in order to request that blocks be allocated if necessary. It currently calls ext4_get_blocks() with create=0 in order to do a lookup first. If the inode contains an unitialized data block, the buffer head is left unampped, which ext4_get_blocks_wrap() returns, causing the BUG. We fix this by checking to see if the buffer head is unmapped, and if so, we make sure the the buffer head is mapped by calling ext4_ext_get_blocks with create=1. Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 8a59f7ba30e..bcf5d040e32 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2287,9 +2287,22 @@ out: } /* + * Block allocation/map/preallocation routine for extents based files + * + * * Need to be called with * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) + * + * return > 0, number of of blocks already mapped/allocated + * if create == 0 and these are pre-allocated blocks + * buffer head is unmapped + * otherwise blocks are mapped + * + * return = 0, if plain look up failed (blocks have not been allocated) + * buffer head is unmapped + * + * return < 0, error case. */ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, -- cgit From 2c98615d3b64ce7888cd46cc668023f456daf287 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 25 Feb 2008 15:41:35 -0500 Subject: ext4: Don't mark filesystem error if fallocate fails If we fail to allocate blocks don't call ext4_error. Also don't hide errors from ext4_get_blocks_wrap Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index bcf5d040e32..9ae6e67090c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2661,13 +2661,14 @@ retry: ret = ext4_get_blocks_wrap(handle, inode, block, max_blocks, &map_bh, EXT4_CREATE_UNINITIALIZED_EXT, 0); - WARN_ON(ret <= 0); if (ret <= 0) { - ext4_error(inode->i_sb, "ext4_fallocate", - "ext4_ext_get_blocks returned error: " - "inode#%lu, block=%u, max_blocks=%lu", +#ifdef EXT4FS_DEBUG + WARN_ON(ret <= 0); + printk(KERN_ERR "%s: ext4_ext_get_blocks " + "returned error inode#%lu, block=%u, " + "max_blocks=%lu", __func__, inode->i_ino, block, max_blocks); - ret = -EIO; +#endif ext4_mark_inode_dirty(handle, inode); ret2 = ext4_journal_stop(handle); break; -- cgit