diff options
Diffstat (limited to 'ext4-fix-races-between-buffered-IO-and-collapse-inse.patch')
-rw-r--r-- | ext4-fix-races-between-buffered-IO-and-collapse-inse.patch | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/ext4-fix-races-between-buffered-IO-and-collapse-inse.patch b/ext4-fix-races-between-buffered-IO-and-collapse-inse.patch new file mode 100644 index 000000000..0c89ea77a --- /dev/null +++ b/ext4-fix-races-between-buffered-IO-and-collapse-inse.patch @@ -0,0 +1,119 @@ +From 32ebffd3bbb4162da5ff88f9a35dd32d0a28ea70 Mon Sep 17 00:00:00 2001 +From: Jan Kara <jack@suse.com> +Date: Mon, 7 Dec 2015 14:31:11 -0500 +Subject: [PATCH 3/4] ext4: fix races between buffered IO and collapse / insert + range + +Current code implementing FALLOC_FL_COLLAPSE_RANGE and +FALLOC_FL_INSERT_RANGE is prone to races with buffered writes and page +faults. If buffered write or write via mmap manages to squeeze between +filemap_write_and_wait_range() and truncate_pagecache() in the fallocate +implementations, the written data is simply discarded by +truncate_pagecache() although it should have been shifted. + +Fix the problem by moving filemap_write_and_wait_range() call inside +i_mutex and i_mmap_sem. That way we are protected against races with +both buffered writes and page faults. + +Signed-off-by: Jan Kara <jack@suse.com> +Signed-off-by: Theodore Ts'o <tytso@mit.edu> +--- + fs/ext4/extents.c | 59 +++++++++++++++++++++++++++++-------------------------- + 1 file changed, 31 insertions(+), 28 deletions(-) + +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index 65b5ada2833f..4b105c96df08 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -5487,21 +5487,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) + return ret; + } + +- /* +- * Need to round down offset to be aligned with page size boundary +- * for page size > block size. +- */ +- ioffset = round_down(offset, PAGE_SIZE); +- +- /* Write out all dirty pages */ +- ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, +- LLONG_MAX); +- if (ret) +- return ret; +- +- /* Take mutex lock */ + mutex_lock(&inode->i_mutex); +- + /* + * There is no need to overlap collapse range with EOF, in which case + * it is effectively a truncate operation +@@ -5526,6 +5512,27 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) + * page cache. + */ + down_write(&EXT4_I(inode)->i_mmap_sem); ++ /* ++ * Need to round down offset to be aligned with page size boundary ++ * for page size > block size. ++ */ ++ ioffset = round_down(offset, PAGE_SIZE); ++ /* ++ * Write tail of the last page before removed range since it will get ++ * removed from the page cache below. ++ */ ++ ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset); ++ if (ret) ++ goto out_mmap; ++ /* ++ * Write data that will be shifted to preserve them when discarding ++ * page cache below. We are also protected from pages becoming dirty ++ * by i_mmap_sem. ++ */ ++ ret = filemap_write_and_wait_range(inode->i_mapping, offset + len, ++ LLONG_MAX); ++ if (ret) ++ goto out_mmap; + truncate_pagecache(inode, ioffset); + + credits = ext4_writepage_trans_blocks(inode); +@@ -5626,21 +5633,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) + return ret; + } + +- /* +- * Need to round down to align start offset to page size boundary +- * for page size > block size. +- */ +- ioffset = round_down(offset, PAGE_SIZE); +- +- /* Write out all dirty pages */ +- ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, +- LLONG_MAX); +- if (ret) +- return ret; +- +- /* Take mutex lock */ + mutex_lock(&inode->i_mutex); +- + /* Currently just for extent based files */ + if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { + ret = -EOPNOTSUPP; +@@ -5668,6 +5661,16 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) + * page cache. + */ + down_write(&EXT4_I(inode)->i_mmap_sem); ++ /* ++ * Need to round down to align start offset to page size boundary ++ * for page size > block size. ++ */ ++ ioffset = round_down(offset, PAGE_SIZE); ++ /* Write out all dirty pages */ ++ ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, ++ LLONG_MAX); ++ if (ret) ++ goto out_mmap; + truncate_pagecache(inode, ioffset); + + credits = ext4_writepage_trans_blocks(inode); +-- +2.5.5 + |